TLFi-light

Le Tréso de la Langue Française informatisé dans votre terminal - retour accueil

git clone git://bebou.netlib.re/TLFi-light
Log | Files | Refs |

commit aee46afaaf3b274bde81b4419d1180f4c0ad0e66
Auterice: Arthur Pons <arthur.pons@unistra.fr>
Date:   Fri, 14 Jun 2024 15:39:24 +0200

Premier commit getwords parsewords

getwords récupère la liste de tous les mots du dico
parseword en parse un de manière un peu dégueu mais je sais pas vraiment
faire mieux

Diffstat:
Agetwords | 21+++++++++++++++++++++
Aparseword | 25+++++++++++++++++++++++++
2 files changed, 46 insertions(+), 0 deletions(-)

diff --git a/getwords b/getwords @@ -0,0 +1,21 @@ +#! /bin/sh + +for letter in A B C D E F G H I J K L M N O P Q R S T U V W X Y Z +do + echo "Letter $letter" + for i in $(seq 0 80 100000) + do + printf " number $i" + [ -s $letter$i ] && { echo " - already exists, skippin"; continue; } + printf " - download $letter$i" + curl -Ls "https://www.cnrtl.fr/portailindex/LEXI/TLFI/$letter/$i" | + sed -n '/Liste des formes/ p' | + grep -oE '/definition/[^"]*' | + cut -d '/' -f3 > $letter$i + [ ! -s $letter$i ] && { echo " - no more words, next letter";break; } + echo + done +done + + + diff --git a/parseword b/parseword @@ -0,0 +1,25 @@ +#! /bin/sh + +dom="\x1b[91m" +def="\x1b[93m" +synt="\x1b[92m" +cit="\x1b[94m" +ital="\x1b[3m" +norm="\x1b[0m" +cat html-pyramide | + sed -n '/vtoolbar/,$ p' | + sed -E "s,<span class=\"tlf_cdomaine\">,$dom,g; + s,<span class=\"tlf_cdefinition\">,\n$def,g; + s,<span class=\"tlf_cplan\">,\n\ + \n,g; + s,<span class=\"tlf_csyntagme\">,$synt,g; + s,<div class=\"tlf_(tabulation|parothers)\">,\n\ + \n ,g; + s,<div class=\"tlf_paraputir\">,\n\ + \n,g; + s,<span class=\"tlf_cexemple\">,$ital,g;" | + #s,</span>,$norm,g" | + sed -E 's,<[^>]*>,,g' | + sed -E "s#\([^)]*p\. [0-9]+\)#$cit&$norm#g" | + grep -v '^ **$' +