#!/bin/awk -f # aux/emparse -- parse em source BEGIN { expectblock = 1 itemlevel = 1 linktext = ENVIRON["linktext"] if (!linktext) linktext = "link" } END { breakblock() } /^$/ { breakblock(); getline } expectblock && /^ / { newblock("table") } expectblock && /^ - / { newblock("ul") } expectblock && /^ [0-9a-z]+\. / { newblock("ol") } expectblock && /^ \[[0-9a-z]\]+ / { newblock("nl") } expectblock && /^ .*: / { newblock("dl") } expectblock && /^ / { newblock("pre") } expectblock && /^---$/ { expectblock = 0; printf "
\n"; next } expectblock && /^= .* =$/ { heading(1, $0); next } expectblock && /^== .* ==$/ { heading(2, $0); next } expectblock && /^=== .* ===$/ { heading(3, $0); next } expectblock && /^==== .* ====$/ { heading(4, $0); next } expectblock && /^===== .* =====$/ { heading(5, $0); next } expectblock && /^====== .* ======$/ { heading(6, $0); next } expectblock { newblock("p") } openblock == "pre" { sub("^ ", ""); escape(); printf "%s\n", $0; next } openblock == "ul" && /^ - / { item(1, "ul", line) } openblock == "ol" && /^ [0-9a-z]+\. / { item(1, "ol", line) } openblock == "nl" && /^ \[[0-9a-z]\]+ [^ ]+$/ { next } # hyperlink reference openblock == "nl" && /^ \[[0-9a-z]\]+ / { item(1, "nl", line) } # text reference openblock == "dl" && /^ .*: / { term(line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(2, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(2, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(3, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(3, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(4, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(4, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(5, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(5, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(6, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(6, "ol", line) } { format($0) } # inline formatting function format(line) { escape(line) n = split(line, w, "[ ]") for (i = 0; i <= n; i++) { if (w[i] == "") continue; if (match(w[i], "^\\[[0-9a-z]+\\]$")) w[i] = ref(substr(w[i], 2, RLENGTH-2)) else if (match(w[i], "^\\[[0-9a-z]+\\][.,:;?!)]$")) w[i] = ref(substr(w[i], 2, RLENGTH-3)) substr(w[i], RLENGTH) else if (match(w[i], "^\\[[0-9a-z]+\\]\\)[.,:;?!]$")) w[i] = ref(substr(w[i], 2, RLENGTH-4)) substr(w[i], RLENGTH-1) if (!openformat) { if (match(w[i], "^`")) { w[i] = "" substr(w[i], 2) openformat = "tt" } else if (match(w[i], "^\\*")) { w[i] = "" substr(w[i], 2) openformat = "i" } else if (match(w[i], "^_")) { w[i] = "" substr(w[i], 2) openformat = "b" } else if (match(w[i], "^\\(`")) { w[i] = "(" substr(w[i], 3) openformat = "tt" } else if (match(w[i], "^\\(\\*")) { w[i] = "(" substr(w[i], 3) openformat = "i" } else if (match(w[i], "^\\(_")) { w[i] = "(" substr(w[i], 3) openformat = "b" } } if (openformat) { orig = w[i] if (match(w[i], "`$")) w[i] = substr(w[i], 1, RSTART-1) "" else if (match(w[i], "\\*$")) w[i] = substr(w[i], 1, RSTART-1) "" else if (match(w[i], "_$")) w[i] = substr(w[i], 1, RSTART-1) "" else if (match(w[i], "`[.,:;?!)]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "\\*[.,:;?!)]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "_[.,:;?!)]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "`\\)[.,:;?!]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "\\*\\)[.,:;?!]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "_\\)[.,:;?!]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) if (w[i] != orig) openformat = "" } printf "%s", w[i] if (i < n) printf " " } printf "\n" } function opentag(name) { if (name == "nl") printf "
    \n" else printf "<%s>\n", name } function closetag(name) { if (name == "nl") printf "
\n" else printf "\n", name } function newblock(name) { opentag(name) openblock = name expectblock = 0 } function breakblock() { if (openformat) { printf "", openformat printf "(%s:%d) warning: open <%s> closed at block break\n", ARGV[1], NR, openformat > "/dev/stderr" } if (openitem) { printf "\n" while (itemlevel-- > 1) closetag(openblock) itemlevel = 1 } if (opendef) printf "\n" if (openblock) closetag(openblock) openitem = 0 opendef = 0 openblock = 0 openformat = "" itemlevel = 1 delete leveltype expectblock = 1 } function heading(level, line) { sub("^=* ", "", line) sub(" =*$", "", line) escape(line) printf "%s\n", level, line, level # should inline formatting be supported in headings? } function item(level, type, line) { if (openitem) printf "" openitem = 1 if (type != "nl") { for (; itemlevel < level; itemlevel++) { printf "<%s>\n", type leveltype[itemlevel+1] = type } for (; itemlevel > level; itemlevel--) { printf "\n", leveltype[itemlevel] leveltype[itemlevel] = "" } } if (type == "ul") { sub("^ +- ", "") printf "
  • " } if (type == "ol") { match($0, "[0-9a-z]+\.") v = substr($0, RSTART, RLENGTH-1) sub("^ +[0-9a-z]+\. ", "") printf "
  • ", v } if (type == "nl") { match($0, "\\[[0-9a-z]+\\]") v = substr($0, RSTART+1, RLENGTH-2) sub("^ +\\[[0-9a-z]+\\] ", "") printf "
  • ", v, v } } function term(line, t) { # t is a local variable if (opendef) printf "" opendef = 1 t = $0 sub("^ ", "", t) sub(": .*$", "", t) sub("^ [^:]+: ", "") escape(t) printf "
    %s
    ", t } function ref(v) { if (ENVIRON["ref" v] != "") return "(" linktext ")" return "[" v "]" } function escape(s) { if (s == "") { gsub("&", "\\&") gsub("<", "\\<") gsub(">", "\\>") } else { gsub("&", "\\&", s) gsub("<", "\\<", s) gsub(">", "\\>", s) } }