diff options
Diffstat (limited to 'emparse')
-rwxr-xr-x | emparse | 218 |
1 files changed, 218 insertions, 0 deletions
@@ -0,0 +1,218 @@ +#!/bin/awk -f + +# aux/emparse -- parse em source + +BEGIN { + expectblock = 1 + itemlevel = 1 + linktext = ENVIRON["linktext"] + if (!linktext) linktext = "link" +} +END { breakblock() } + +/^$/ { breakblock(); getline } +expectblock && /^ / { newblock("table") } +expectblock && /^ - / { newblock("ul") } +expectblock && /^ [0-9a-z]+\. / { newblock("ol") } +expectblock && /^ \[[0-9a-z]\]+ / { newblock("nl") } +expectblock && /^ .*: / { newblock("dl") } +expectblock && /^ / { newblock("pre") } +expectblock && /^---$/ { expectblock = 0; printf "<hr/>\n"; next } +expectblock && /^= .* =$/ { heading(1, $0); next } +expectblock && /^== .* ==$/ { heading(2, $0); next } +expectblock && /^=== .* ===$/ { heading(3, $0); next } +expectblock && /^==== .* ====$/ { heading(4, $0); next } +expectblock && /^===== .* =====$/ { heading(5, $0); next } +expectblock && /^====== .* ======$/ { heading(6, $0); next } +expectblock { newblock("p") } + +openblock == "pre" { sub("^ ", ""); escape(); printf "%s\n", $0; next } + +openblock == "ul" && /^ - / { item(1, "ul", line) } +openblock == "ol" && /^ [0-9a-z]+\. / { item(1, "ol", line) } +openblock == "nl" && /^ \[[0-9a-z]\]+ [^ ]+$/ { next } # hyperlink reference +openblock == "nl" && /^ \[[0-9a-z]\]+ / { item(1, "nl", line) } # text reference +openblock == "dl" && /^ .*: / { term(line) } + +(openblock == "ul" || openblock == "ol") && /^ -/ { item(2, "ul", line) } +(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(2, "ol", line) } +(openblock == "ul" || openblock == "ol") && /^ -/ { item(3, "ul", line) } +(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(3, "ol", line) } +(openblock == "ul" || openblock == "ol") && /^ -/ { item(4, "ul", line) } +(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(4, "ol", line) } +(openblock == "ul" || openblock == "ol") && /^ -/ { item(5, "ul", line) } +(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(5, "ol", line) } +(openblock == "ul" || openblock == "ol") && /^ -/ { item(6, "ul", line) } +(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(6, "ol", line) } + +{ format($0) } # inline formatting + +function format(line) { + escape(line) + n = split(line, w, "[ ]") + for (i = 0; i <= n; i++) { + if (w[i] == "") continue; + + if (match(w[i], "^\\[[0-9a-z]+\\]$")) + w[i] = ref(substr(w[i], 2, RLENGTH-2)) + else if (match(w[i], "^\\[[0-9a-z]+\\][.,:;?!)]$")) + w[i] = ref(substr(w[i], 2, RLENGTH-3)) substr(w[i], RLENGTH) + else if (match(w[i], "^\\[[0-9a-z]+\\]\\)[.,:;?!]$")) + w[i] = ref(substr(w[i], 2, RLENGTH-4)) substr(w[i], RLENGTH-1) + + if (!openformat) { + if (match(w[i], "^`")) { + w[i] = "<tt>" substr(w[i], 2) + openformat = "tt" + } else if (match(w[i], "^\\*")) { + w[i] = "<i>" substr(w[i], 2) + openformat = "i" + } else if (match(w[i], "^_")) { + w[i] = "<b>" substr(w[i], 2) + openformat = "b" + } else if (match(w[i], "^\\(`")) { + w[i] = "(<tt>" substr(w[i], 3) + openformat = "tt" + } else if (match(w[i], "^\\(\\*")) { + w[i] = "(<i>" substr(w[i], 3) + openformat = "i" + } else if (match(w[i], "^\\(_")) { + w[i] = "(<b>" substr(w[i], 3) + openformat = "b" + } + } + + if (openformat) { + orig = w[i] + if (match(w[i], "`$")) + w[i] = substr(w[i], 1, RSTART-1) "</tt>" + else if (match(w[i], "\\*$")) + w[i] = substr(w[i], 1, RSTART-1) "</i>" + else if (match(w[i], "_$")) + w[i] = substr(w[i], 1, RSTART-1) "</b>" + else if (match(w[i], "`[.,:;?!)]$")) + w[i] = substr(w[i], 1, RSTART-1) "</tt>" substr(w[i], RSTART+1) + else if (match(w[i], "\\*[.,:;?!)]$")) + w[i] = substr(w[i], 1, RSTART-1) "</i>" substr(w[i], RSTART+1) + else if (match(w[i], "_[.,:;?!)]$")) + w[i] = substr(w[i], 1, RSTART-1) "</b>" substr(w[i], RSTART+1) + else if (match(w[i], "`\\)[.,:;?!]$")) + w[i] = substr(w[i], 1, RSTART-1) "</tt>" substr(w[i], RSTART+1) + else if (match(w[i], "\\*\\)[.,:;?!]$")) + w[i] = substr(w[i], 1, RSTART-1) "</i>" substr(w[i], RSTART+1) + else if (match(w[i], "_\\)[.,:;?!]$")) + w[i] = substr(w[i], 1, RSTART-1) "</b>" substr(w[i], RSTART+1) + if (w[i] != orig) openformat = "" + } + + printf "%s", w[i] + if (i < n) printf " " + } + printf "\n" +} + +function opentag(name) { + if (name == "nl") printf "<ol class=\"reflist\" style=\"font-size: small;\">\n" + else printf "<%s>\n", name +} + +function closetag(name) { + if (name == "nl") printf "</ol>\n" + else printf "</%s>\n", name +} + +function newblock(name) { + opentag(name) + openblock = name + expectblock = 0 +} + +function breakblock() { + if (openformat) { + printf "</%s>", openformat + printf "(%s:%d) warning: open <%s> closed at block break\n", ARGV[1], NR, openformat > "/dev/stderr" + } + if (openitem) { + printf "</li>\n" + while (itemlevel-- > 1) + closetag(openblock) + itemlevel = 1 + } + if (opendef) printf "</dd>\n" + if (openblock) closetag(openblock) + openitem = 0 + opendef = 0 + openblock = 0 + openformat = "" + itemlevel = 1 + delete leveltype + expectblock = 1 +} + +function heading(level, line) { + sub("^=* ", "", line) + sub(" =*$", "", line) + escape(line) + printf "<h%d>%s</h%d>\n", level, line, level + # should inline formatting be supported in headings? +} + +function item(level, type, line) { + if (openitem) printf "</li>" + openitem = 1 + if (type != "nl") { + for (; itemlevel < level; itemlevel++) { + printf "<%s>\n", type + leveltype[itemlevel+1] = type + } + for (; itemlevel > level; itemlevel--) { + printf "</%s>\n", leveltype[itemlevel] + leveltype[itemlevel] = "" + } + } + if (type == "ul") { + sub("^ +- ", "") + printf "<li>" + } + if (type == "ol") { + match($0, "[0-9a-z]+\.") + v = substr($0, RSTART, RLENGTH-1) + sub("^ +[0-9a-z]+\. ", "") + printf "<li value=\"%s\">", v + } + if (type == "nl") { + match($0, "\\[[0-9a-z]+\\]") + v = substr($0, RSTART+1, RLENGTH-2) + sub("^ +\\[[0-9a-z]+\\] ", "") + printf "<li value=\"%s\" id=\"ref%s\">", v, v + } +} + +function term(line, t) { # t is a local variable + if (opendef) printf "</dd>" + opendef = 1 + t = $0 + sub("^ ", "", t) + sub(": .*$", "", t) + sub("^ [^:]+: ", "") + escape(t) + printf "<dt>%s</dt><dd>", t +} + +function ref(v) { + if (ENVIRON["ref" v] != "") + return "(<a href=\"" ENVIRON["ref" v] "\">" linktext "</a>)" + return "[<a href=\"#ref" v "\">" v "</a>]" +} + +function escape(s) { + if (s == "") { + gsub("&", "\\&") + gsub("<", "\\<") + gsub(">", "\\>") + } else { + gsub("&", "\\&", s) + gsub("<", "\\<", s) + gsub(">", "\\>", s) + } +} |