#!/bin/awk -f # aux/emparse -- parse em source BEGIN { expectblock = 1 itemlevel = 1 for (n=0; n < 256; n++) ord[sprintf("%c", n)] = n } END { breakblock() } /^$/ { breakblock(); getline } expectblock && /^ / { newblock("table") } expectblock && /^ > / { newblock("blockquote") } expectblock && /^ - / { newblock("ul") } expectblock && /^ [0-9a-z]+\. / { newblock("ol") } expectblock && /^ \[[0-9a-z]+\] / { newblock("nl") } expectblock && /^ .*: / { newblock("dl") } expectblock && /^ / { newblock("pre") } expectblock && /^---$/ { expectblock = 0; printf "
\n"; next } expectblock && /^= .* =$/ { heading(1, $0); next } expectblock && /^== .* ==$/ { heading(2, $0); next } expectblock && /^=== .* ===$/ { heading(3, $0); next } expectblock && /^==== .* ====$/ { heading(4, $0); next } expectblock && /^===== .* =====$/ { heading(5, $0); next } expectblock && /^====== .* ======$/ { heading(6, $0); next } expectblock { newblock("p") } openblock == "pre" { sub("^ ", ""); $0 = escape($0); printf "%s\n", $0; next } openblock == "blockquote" && /^ > / { item(1, "blockquote", line) } openblock == "ul" && /^ - / { item(1, "ul", line) } openblock == "ol" && /^ [0-9a-z]+\. / { item(1, "ol", line) } openblock == "nl" && /^ \[[0-9a-z]+\] / { item(1, "nl", line) } openblock == "dl" && /^ .*: / { term(line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(2, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(2, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(3, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(3, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(4, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(4, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(5, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(5, "ol", line) } (openblock == "ul" || openblock == "ol") && /^ -/ { item(6, "ul", line) } (openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(6, "ol", line) } { format($0) } # inline formatting function format(line) { line = escape(line) n = split(line, w, "[ ]") # TODO: this removes spaces... for (i = 0; i <= n; i++) { if (w[i] == "") continue; if (match(w[i], "^<(\\./.*|\\.\\./.*|#.*|.*/.*)>$")) w[i] = link(substr(w[i], 5, RLENGTH-8)) else if (match(w[i], "^<(\\./.*|\\.\\./.*|#.*|.*/.*)>[.,:;?!)]$")) w[i] = link(substr(w[i], 5, RLENGTH-9)) substr(w[i], RLENGTH) else if (match(w[i], "^<(\\./.*|\\.\\./.*|#.*|.*/.*)>\\)[.,:;?!]$")) w[i] = link(substr(w[i], 5, RLENGTH-10)) substr(w[i], RLENGTH-1) if (match(w[i], "^\\[[0-9a-z]+\\]$")) w[i] = ref(substr(w[i], 2, RLENGTH-2)) else if (match(w[i], "^\\[[0-9a-z]+\\][.,:;?!)]$")) w[i] = ref(substr(w[i], 2, RLENGTH-3)) substr(w[i], RLENGTH) else if (match(w[i], "^\\[[0-9a-z]+\\]\\)[.,:;?!]$")) w[i] = ref(substr(w[i], 2, RLENGTH-4)) substr(w[i], RLENGTH-1) if (!openformat) { if (match(w[i], "^`")) { w[i] = "" substr(w[i], 2) openformat = "tt" } else if (match(w[i], "^\\*")) { w[i] = "" substr(w[i], 2) openformat = "i" } else if (match(w[i], "^_")) { w[i] = "" substr(w[i], 2) openformat = "b" } else if (match(w[i], "^\\(`")) { w[i] = "(" substr(w[i], 3) openformat = "tt" } else if (match(w[i], "^\\(\\*")) { w[i] = "(" substr(w[i], 3) openformat = "i" } else if (match(w[i], "^\\(_")) { w[i] = "(" substr(w[i], 3) openformat = "b" } } if (openformat) { orig = w[i] if (match(w[i], "`$")) w[i] = substr(w[i], 1, RSTART-1) "" else if (match(w[i], "\\*$")) w[i] = substr(w[i], 1, RSTART-1) "" else if (match(w[i], "_$")) w[i] = substr(w[i], 1, RSTART-1) "" else if (match(w[i], "`[.,:;?!)]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "\\*[.,:;?!)]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "_[.,:;?!)]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "`\\)[.,:;?!]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "\\*\\)[.,:;?!]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) else if (match(w[i], "_\\)[.,:;?!]$")) w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1) if (w[i] != orig) openformat = "" } printf "%s", w[i] if (i < n) printf " " } printf "\n" } function opentag(name) { if (name == "nl") printf "
    \n" else printf "<%s>\n", name } function closetag(name) { if (name == "nl") printf "
\n" else printf "\n", name } function newblock(name) { opentag(name) openblock = name expectblock = 0 } function breakblock() { if (openformat) { printf "", openformat printf "(%s:%d) warning: open <%s> closed at block break\n", ARGV[1], NR, openformat > "/dev/stderr" } if (openitem) { if (openblock == "blockquote") printf "

\n" else printf "\n" while (itemlevel-- > 1) closetag(openblock) itemlevel = 1 } if (opendef) printf "\n" if (openblock) closetag(openblock) openitem = 0 opendef = 0 openblock = 0 openformat = "" itemlevel = 1 delete leveltype expectblock = 1 } function heading(level, line) { sub("^=* ", "", line) sub(" =*$", "", line) escape(line) printf "%s\n", level, line, level # should inline formatting be supported in headings? } function item(level, type, line) { if (type == "blockquote") { if (openitem) printf "

" openitem = 1 sub("^ > ", "") printf "

" return } if (openitem) printf "" openitem = 1 for (; itemlevel < level; itemlevel++) { printf "<%s>\n", type leveltype[itemlevel+1] = type } for (; itemlevel > level; itemlevel--) { printf "\n", leveltype[itemlevel] leveltype[itemlevel] = "" } if (type == "ul") { sub("^ +- ", "") printf "

  • " } if (type == "ol") { match($0, "[0-9a-z]+\\.") v = substr($0, RSTART, RLENGTH-1) sub("^ +[0-9a-z]+\\. ", "") listvalid(v) printf "
  • ", listnum(v), listtype(v) } if (type == "nl") { match($0, "\\[[0-9a-z]+\\]") v = substr($0, RSTART+1, RLENGTH-2) sub("^ \\[[0-9a-z]+\\] ", "") listvalid(v) printf "
  • ", listnum(v), listtype(v), v } } function term(line, t) { # t is a local variable if (opendef) printf "" opendef = 1 t = $0 sub("^ ", "", t) sub(": .*$", "", t) sub("^ [^:]+: ", "") escape(t) printf "
    %s
    ", t } function ref(v) { if (ENVIRON["ref" v] != "") return "[" v "]" return "[" v "]" } function link(h) { return "" h "" } function listvalid(v) { if (match(v, "^[0-9]+$")) return if (match(v, "^[a-z]$")) return if (match(v, "^[A-Z]$")) return printf "error: '%s' is not a valid list item value\n", v > "/dev/stderr" exit 1 } function listnum(v) { if (match(v, "^[0-9]+$")) return v if (match(v, "^[a-z]$")) return ord[v]-96 if (match(v, "^[A-Z]$")) return ord[v]-64 } function listtype(v) { if (match(v, "^[0-9]+$")) return "decimal" if (match(v, "^[a-z]$")) return "lower-alpha" if (match(v, "^[A-Z]$")) return "upper-alpha" } function escape(s) { gsub("&", "\\&", s) gsub("<", "\\<", s) gsub(">", "\\>", s) return s }