Le Tréso de la Langue Française informatisé dans votre terminal - retour accueil
git clone git://bebou.netlib.re/TLFi-light
Log | Files | Refs |
commit aee46afaaf3b274bde81b4419d1180f4c0ad0e66 Auterice: Arthur Pons <arthur.pons@unistra.fr> Date: Fri, 14 Jun 2024 15:39:24 +0200 Premier commit getwords parsewords getwords récupère la liste de tous les mots du dico parseword en parse un de manière un peu dégueu mais je sais pas vraiment faire mieux Diffstat:
A | getwords | | | 21 | +++++++++++++++++++++ |
A | parseword | | | 25 | +++++++++++++++++++++++++ |
2 files changed, 46 insertions(+), 0 deletions(-)
diff --git a/getwords b/getwords @@ -0,0 +1,21 @@ +#! /bin/sh + +for letter in A B C D E F G H I J K L M N O P Q R S T U V W X Y Z +do + echo "Letter $letter" + for i in $(seq 0 80 100000) + do + printf " number $i" + [ -s $letter$i ] && { echo " - already exists, skippin"; continue; } + printf " - download $letter$i" + curl -Ls "https://www.cnrtl.fr/portailindex/LEXI/TLFI/$letter/$i" | + sed -n '/Liste des formes/ p' | + grep -oE '/definition/[^"]*' | + cut -d '/' -f3 > $letter$i + [ ! -s $letter$i ] && { echo " - no more words, next letter";break; } + echo + done +done + + + diff --git a/parseword b/parseword @@ -0,0 +1,25 @@ +#! /bin/sh + +dom="\x1b[91m" +def="\x1b[93m" +synt="\x1b[92m" +cit="\x1b[94m" +ital="\x1b[3m" +norm="\x1b[0m" +cat html-pyramide | + sed -n '/vtoolbar/,$ p' | + sed -E "s,<span class=\"tlf_cdomaine\">,$dom,g; + s,<span class=\"tlf_cdefinition\">,\n$def,g; + s,<span class=\"tlf_cplan\">,\n\ + \n,g; + s,<span class=\"tlf_csyntagme\">,$synt,g; + s,<div class=\"tlf_(tabulation|parothers)\">,\n\ + \n ,g; + s,<div class=\"tlf_paraputir\">,\n\ + \n,g; + s,<span class=\"tlf_cexemple\">,$ital,g;" | + #s,</span>,$norm,g" | + sed -E 's,<[^>]*>,,g' | + sed -E "s#\([^)]*p\. [0-9]+\)#$cit&$norm#g" | + grep -v '^ **$' +