#!/bin/awk -f
# aux/emparse -- parse em source
BEGIN {
expectblock = 1
itemlevel = 1
for (n=0; n < 256; n++)
ord[sprintf("%c", n)] = n
}
END { breakblock() }
/^$/ { breakblock(); getline }
expectblock && /^ / { newblock("table") }
expectblock && /^ > / { newblock("blockquote") }
expectblock && /^ - / { newblock("ul") }
expectblock && /^ [0-9a-z]+\. / { newblock("ol") }
expectblock && /^ \[[0-9a-z]+\] / { newblock("nl") }
expectblock && /^ .*: / { newblock("dl") }
expectblock && /^ / { newblock("pre") }
expectblock && /^---$/ { expectblock = 0; printf "
\n"; next }
expectblock && /^= .* =$/ { heading(1, $0); next }
expectblock && /^== .* ==$/ { heading(2, $0); next }
expectblock && /^=== .* ===$/ { heading(3, $0); next }
expectblock && /^==== .* ====$/ { heading(4, $0); next }
expectblock && /^===== .* =====$/ { heading(5, $0); next }
expectblock && /^====== .* ======$/ { heading(6, $0); next }
expectblock { newblock("p") }
openblock == "pre" { sub("^ ", ""); $0 = escape($0); printf "%s\n", $0; next }
openblock == "blockquote" && /^ > / { item(1, "blockquote", line) }
openblock == "ul" && /^ - / { item(1, "ul", line) }
openblock == "ol" && /^ [0-9a-z]+\. / { item(1, "ol", line) }
openblock == "nl" && /^ \[[0-9a-z]+\] / { item(1, "nl", line) }
openblock == "dl" && /^ .*: / { term(line) }
(openblock == "ul" || openblock == "ol") && /^ -/ { item(2, "ul", line) }
(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(2, "ol", line) }
(openblock == "ul" || openblock == "ol") && /^ -/ { item(3, "ul", line) }
(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(3, "ol", line) }
(openblock == "ul" || openblock == "ol") && /^ -/ { item(4, "ul", line) }
(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(4, "ol", line) }
(openblock == "ul" || openblock == "ol") && /^ -/ { item(5, "ul", line) }
(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(5, "ol", line) }
(openblock == "ul" || openblock == "ol") && /^ -/ { item(6, "ul", line) }
(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(6, "ol", line) }
{ format($0) } # inline formatting
function format(line) {
line = escape(line)
n = split(line, w, "[ ]") # TODO: this removes spaces...
for (i = 0; i <= n; i++) {
if (w[i] == "") continue;
if (match(w[i], "^<(\\./.*|\\.\\./.*|#.*|.*/.*)>$"))
w[i] = link(substr(w[i], 5, RLENGTH-8))
else if (match(w[i], "^<(\\./.*|\\.\\./.*|#.*|.*/.*)>[.,:;?!)]$"))
w[i] = link(substr(w[i], 5, RLENGTH-9)) substr(w[i], RLENGTH)
else if (match(w[i], "^<(\\./.*|\\.\\./.*|#.*|.*/.*)>\\)[.,:;?!]$"))
w[i] = link(substr(w[i], 5, RLENGTH-10)) substr(w[i], RLENGTH-1)
if (match(w[i], "^\\[[0-9a-z]+\\]$"))
w[i] = ref(substr(w[i], 2, RLENGTH-2))
else if (match(w[i], "^\\[[0-9a-z]+\\][.,:;?!)]$"))
w[i] = ref(substr(w[i], 2, RLENGTH-3)) substr(w[i], RLENGTH)
else if (match(w[i], "^\\[[0-9a-z]+\\]\\)[.,:;?!]$"))
w[i] = ref(substr(w[i], 2, RLENGTH-4)) substr(w[i], RLENGTH-1)
if (!openformat) {
if (match(w[i], "^`")) {
w[i] = "" substr(w[i], 2)
openformat = "tt"
} else if (match(w[i], "^\\*")) {
w[i] = "" substr(w[i], 2)
openformat = "i"
} else if (match(w[i], "^_")) {
w[i] = "" substr(w[i], 2)
openformat = "b"
} else if (match(w[i], "^\\(`")) {
w[i] = "(" substr(w[i], 3)
openformat = "tt"
} else if (match(w[i], "^\\(\\*")) {
w[i] = "(" substr(w[i], 3)
openformat = "i"
} else if (match(w[i], "^\\(_")) {
w[i] = "(" substr(w[i], 3)
openformat = "b"
}
}
if (openformat) {
orig = w[i]
if (match(w[i], "`$"))
w[i] = substr(w[i], 1, RSTART-1) ""
else if (match(w[i], "\\*$"))
w[i] = substr(w[i], 1, RSTART-1) ""
else if (match(w[i], "_$"))
w[i] = substr(w[i], 1, RSTART-1) ""
else if (match(w[i], "`[.,:;?!)]$"))
w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1)
else if (match(w[i], "\\*[.,:;?!)]$"))
w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1)
else if (match(w[i], "_[.,:;?!)]$"))
w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1)
else if (match(w[i], "`\\)[.,:;?!]$"))
w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1)
else if (match(w[i], "\\*\\)[.,:;?!]$"))
w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1)
else if (match(w[i], "_\\)[.,:;?!]$"))
w[i] = substr(w[i], 1, RSTART-1) "" substr(w[i], RSTART+1)
if (w[i] != orig) openformat = ""
}
printf "%s", w[i]
if (i < n) printf " "
}
printf "\n"
}
function opentag(name) {
if (name == "nl") printf "\n"
else printf "<%s>\n", name
}
function closetag(name) {
if (name == "nl") printf "
\n"
else printf "%s>\n", name
}
function newblock(name) {
opentag(name)
openblock = name
expectblock = 0
}
function breakblock() {
if (openformat) {
printf "%s>", openformat
printf "(%s:%d) warning: open <%s> closed at block break\n", ARGV[1], NR, openformat > "/dev/stderr"
}
if (openitem) {
if (openblock == "blockquote")
printf "\n"
else
printf "\n"
while (itemlevel-- > 1)
closetag(openblock)
itemlevel = 1
}
if (opendef) printf "\n"
if (openblock) closetag(openblock)
openitem = 0
opendef = 0
openblock = 0
openformat = ""
itemlevel = 1
delete leveltype
expectblock = 1
}
function heading(level, line) {
sub("^=* ", "", line)
sub(" =*$", "", line)
escape(line)
printf "%s\n", level, line, level
# should inline formatting be supported in headings?
}
function item(level, type, line) {
if (type == "blockquote") {
if (openitem) printf ""
openitem = 1
sub("^ > ", "")
printf ""
return
}
if (openitem) printf ""
openitem = 1
for (; itemlevel < level; itemlevel++) {
printf "<%s>\n", type
leveltype[itemlevel+1] = type
}
for (; itemlevel > level; itemlevel--) {
printf "%s>\n", leveltype[itemlevel]
leveltype[itemlevel] = ""
}
if (type == "ul") {
sub("^ +- ", "")
printf "
"
}
if (type == "ol") {
match($0, "[0-9a-z]+\.")
v = substr($0, RSTART, RLENGTH-1)
sub("^ +[0-9a-z]+\. ", "")
listvalid(v)
printf "", listnum(v), listtype(v)
}
if (type == "nl") {
match($0, "\\[[0-9a-z]+\\]")
v = substr($0, RSTART+1, RLENGTH-2)
sub("^ \\[[0-9a-z]+\\] ", "")
listvalid(v)
printf "", listnum(v), listtype(v), v
}
}
function term(line, t) { # t is a local variable
if (opendef) printf ""
opendef = 1
t = $0
sub("^ ", "", t)
sub(": .*$", "", t)
sub("^ [^:]+: ", "")
escape(t)
printf "%s", t
}
function ref(v) {
if (ENVIRON["ref" v] != "")
return "[" v "]"
return "[" v "]"
}
function link(h) {
return "" h ""
}
function listvalid(v) {
if (match(v, "^[0-9]+$")) return
if (match(v, "^[a-z]$")) return
if (match(v, "^[A-Z]$")) return
printf "error: '%s' is not a valid list item value\n", v > "/dev/stderr"
exit 1
}
function listnum(v) {
if (match(v, "^[0-9]+$")) return v
if (match(v, "^[a-z]$")) return ord[v]-96
if (match(v, "^[A-Z]$")) return ord[v]-64
}
function listtype(v) {
if (match(v, "^[0-9]+$")) return "decimal"
if (match(v, "^[a-z]$")) return "lower-alpha"
if (match(v, "^[A-Z]$")) return "upper-alpha"
}
function escape(s) {
gsub("&", "\\&", s)
gsub("<", "\\<", s)
gsub(">", "\\>", s)
return s
}