#!/bin/awk -f # aux/emparse -- parse em source BEGIN { expectblock = 1 itemlevel = 1 linktext = ENVIRON["linktext"] if (!linktext) linktext = "link" } END { breakblock() } /^$/ { breakblock(); getline } expectblock && /^ / { newblock("table") } expectblock && /^ - / { newblock("ul") } expectblock && /^ [0-9a-z]+\. / { newblock("ol") } expectblock && /^ \[[0-9a-z]\]+ / { newblock("nl") } expectblock && /^ .*: / { newblock("dl") } expectblock && /^ / { newblock("pre") } expectblock && /^---$/ { expectblock = 0; printf "
\n"; next } expectblock && /^= .* =$/ { heading(1, $0); next } expectblock && /^== .* ==$/ { heading(2, $0); next } expectblock && /^=== .* ===$/ { heading(3, $0); next } expectblock && /^==== .* ====$/ { heading(4, $0); next } expectblock && /^===== .* =====$/ { heading(5, $0); next } expectblock && /^====== .* ======$/ { heading(6, $0); next } expectblock { newblock("p") } openblock == "pre" { sub("^ ", ""); escape(); printf "%s\n", $0; next } openblock == "ul" && /^ - / { item(1, "ul", line) } openblock == "ol" && /^ [0-9a-z]+\. / { item(1, "ol", line) } openblock == "nl" && /^ \[[0-9a-z]\]+ [^ ]+$/ { next } # hyperlink reference openblock == "nl" && /^ \[[0-9a-z]\]+ / { item(1, "nl", line) } # text reference openblock == "dl" && /^ .*: / { term(line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(2, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(2, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(3, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(3, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(4, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(4, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(5, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(5, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(6, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(6, "ol", line) } { format($0) } # inline formatting function format(line) { escape(line) n = split(line, w, "[ ]") for (i = 0; i <= n; i++) { if (w[i] == "") continue; if (match(w[i], "^\\[[0-9a-z]+\\]$")) w[i] = ref(substr(w[i], 2, RLENGTH-2)) else if (match(w[i], "^\\[[0-9a-z]+\\][.,:;?!)]$")) w[i] = ref(substr(w[i], 2, RLENGTH-3)) substr(w[i], RLENGTH) else if (match(w[i], "^\\[[0-9a-z]+\\]\\)[.,:;?!]$")) w[i] = ref(substr(w[i], 2, RLENGTH-4)) substr(w[i], RLENGTH-1) if (match(w[i], "^`")) w[i] = "" substr(w[i], 2) else if (match(w[i], "^\\*")) w[i] = "" substr(w[i], 2) else if (match(w[i], "^_")) w[i] = "" substr(w[i], 2) else if (match(w[i], "^\\(`")) w[i] = "(" substr(w[i], 3) else if (match(w[i], "^\\(\\*")) w[i] = "(" substr(w[i], 3) else if (match(w[i], "^\\(_")) w[i] = "(" substr(w[i], 3) if (match(w[i], "`$")) w[i] = substr(w[i], 1, RSTART-1) "" else if (match(w[i], "\\*$")) w[i] = substr(w[i], 1, RSTART-1) "" else if (match(w[i], "_$")) w[i] = substr(w[i], 1, RSTART-1) "" else if (match(w[i], "`[.,:;?!)]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "\\*[.,:;?!)]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "_[.,:;?!)]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "`\\)[.,:;?!]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "\\*\\)[.,:;?!]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "_\\)[.,:;?!]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) printf "%s", w[i] if (i < n) printf " " } printf "\n" } function beginblock(name) { if (name == "nl") printf "
    \n" else printf "<%s>\n", name } function endblock(name) { if (name == "nl") printf "
\n" else printf "\n", name } function newblock(name) { beginblock(name) openblock = name openitem = 0 expectblock = 0 } function breakblock() { if (openitem) { printf "\n" while (itemlevel-- > 1) endblock(openblock) itemlevel = 1 } if (opendef) printf "\n" if (openblock) endblock(openblock) openitem = 0 opendef = 0 openblock = 0 expectblock = 1 } function heading(level, line) { sub("^=* ", "", line) sub(" =*$", "", line) escape(line) printf "%s\n", level, line, level # should inline formatting be supported in headings? } function item(level, type, line) { if (openitem) printf "" openitem = 1 if (level > itemlevel) printf "<%s>\n", type if (level < itemlevel) printf "\n", type itemlevel = level if (type == "ul") { sub("^ +- ", "") printf "
  • " } if (type == "ol") { match($0, "[0-9a-z]+\.") v = substr($0, RSTART, RLENGTH-1) sub("^ +[0-9a-z]+\. ", "") printf "
  • ", v } if (type == "nl") { match($0, "\\[[0-9a-z]+\\]") v = substr($0, RSTART+1, RLENGTH-2) sub("^ +\\[[0-9a-z]+\\] ", "") printf "
  • ", v, v } } function term(line, t) { # t is a local variable if (opendef) printf "" opendef = 1 t = $0 sub("^ ", "", t) sub(": .*$", "", t) sub("^ [^:]+: ", "") escape(t) printf "
    %s
    ", t } function ref(v) { if (ENVIRON["ref" v] != "") return "(" linktext ")" return "[" v "]" } function escape(s) { if (s == "") { gsub("&", "\\&") gsub("<", "\\<") gsub(">", "\\>") } else { gsub("&", "\\&", s) gsub("<", "\\<", s) gsub(">", "\\>", s) } }