Un traducteur md -> html minimal en awk - retour accueil
git clone git://bebou.netlib.re/katdown
Log | Files | Refs | README |
commit ae44c2fd6ba1788d5dbcab3f9d48deec068d67d3 parent 6b85157e0a54400201cc9ba425b37d87883e4a73 Auteurice: Graham Marlow <graham@mgmarlow.com> Date: Fri, 8 Mar 2024 09:07:35 -0800 New architecture - Make use of Awk's strengths with proper match expressions, taking advantage of the next function to skip after match. - Handle multi-line paragraphs by collecting lines of input before flushing. - Use a portable match expression that does not rely on gawk's matching pattern arrays There are still two areas of improvement: 1. Still need to figure out how to handle inline links 2. Probably need to flushp() before every match expression, to ensure that paragraphs are properly handled if there is not a newline between a paragraph and some other Markdown element Diffstat:
M | awkdown.awk | | | 109 | ++++++++++++++++++++++++++++++------------------------------------------------- |
1 file changed, 41 insertions(+), 68 deletions(-)
diff --git a/awkdown.awk b/awkdown.awk @@ -8,87 +8,60 @@ BEGIN { print "<body>" } -{ data[NR] = $0 } +/^# / { print "<h1>" substr($0, 3) "</h1>"; next } +/^## / { print "<h2>" substr($0, 4) "</h2>"; next } +/^### / { print "<h3>" substr($0, 5) "</h3>"; next } +/^#### / { print "<h4>" substr($0, 6) "</h4>"; next } +/^---$/ { print "<hr />"; next } +inpre && /^```/ { print "</pre>"; inpre = 0; next } +/^```/ { print "<pre>"; inpre = 1; next } +/^-/ { if (!inul) print "<ul>"; inul = 1; print "<li>" substr($0, 3) "</li>"; next } +inul && !/^-/ { print "</ul>"; inul = 0; next } +/^[0-9]+./ { if (!inol) print "<ol>"; inol = 1; print "<li>" substr($0, length($1)+2) "</li>"; next } +inol && !/^[0-9]+./ { print "</ol>"; inol = 0; next } +/^> / { if (!inquote) print "<blockquote>"; inquote = 1; print substr($0, 3); next } +inquote && !/^> / { print "</blockquote>"; inquote = 0; next } +/./ { for (i=1; i<=NF; i++) collect($i) } +/^$/ { flushp() } +END { flushp(); flushtags() } END { - for (i = 1; i <= NR; i++) - render(data[i]) - - # Catch any trailing unclosed tags - if (inquote || inul || inol || inpre) render() - print "</body>" print "</html>" } -function render(line) { - if (line !~ /^> / && inquote) { - print "</blockquote>" - inquote = 0 - } +function collect(v) { + line = line sep v + sep = " " +} - if (line !~ /^- / && inul) { - print "</ul>" - inul = 0 +function flushp() { + if (line) { + print "<p>" render(line) "</p>" + line = sep = "" } +} - if (line !~ /^[0-9]+. / && inol) { - print "</ol>" - inol = 0 +function render(line) { + if (match(line, /_(.*)_/)) { + gsub(/_(.*)_/, sprintf("<em>%s</em>", substr(line, RSTART+1, RLENGTH-2)), line) } - if (match(line, /_(.*)_/, pats)) { - gsub(/_(.*)_/, sprintf("<em>%s</em>", pats[1]), line) + if (match(line, /\*(.*)\*/)) { + gsub(/\*(.*)\*/, sprintf("<strong>%s</strong>", substr(line, RSTART+1, RLENGTH-2)), line) } - if (match(line, /\*(.*)\*/, pats)) { - gsub(/\*(.*)\*/, sprintf("<strong>%s</strong>", pats[1]), line) - } +# if (match(line, /(.*)\[(.+)\]\((.+)\)(.*)/, pats)) { +# gsub(/(.*)\[(.+)\]\((.+)\)(.*)/, +# sprintf("%s<a href='%s'>%s</a>%s", pats[1], pats[3], pats[2], pats[4]), line) +# } - if (match(line, /(.*)\[(.+)\]\((.+)\)(.*)/, pats)) { - gsub(/(.*)\[(.+)\]\((.+)\)(.*)/, - sprintf("%s<a href='%s'>%s</a>%s", pats[1], pats[3], pats[2], pats[4]), line) - } + return line +} - if (line ~ /^# /) { - print "<h1>" substr(line, 3) "</h1>" - } else if (line ~ /^## /) { - print "<h2>" substr(line, 4) "</h2>" - } else if (line ~ /^### /) { - print "<h3>" substr(line, 5) "</h3>" - } else if (line ~ /^#### /) { - print "<h4>" substr(line, 6) "</h4>" - } else if (line ~ /^- /) { - if (!inul) { - print "<ul>" - inul = 1 - } - print "<li>" substr(line, 3) "</li>" - } else if (line ~ /^[0-9]+. /) { - if (!inol) { - print "<ol>" - inol = 1 - } - print "<li>" substr(line, 4) "</li>" - } else if (line ~ /^> /) { - if (!inquote) { - print "<blockquote>" - inquote = 1 - } - print substr(line, 3) - } else if (line ~ /^```/) { - if (!inpre) { - print "<pre>" - inpre = 1 - } else { - print "</pre>" - inpre = 0 - } - } else if (inpre) { - print line - } else if (line ~ /^---$/) { - print "<hr />" - } else if (line !~ /^$/) { - print "<p>" line "</p>" - } +function flushtags() { + if (inquote) print "</blockquote>" + if (inol) print "</ol>" + if (inul) print "</ul>" + if (inpre) print "</pre>" }