#!/bin/awk -f

# aux/emparse -- parse em source

BEGIN { expectblock = 1; itemlevel = 1 }
END { breakblock() }

/^$/							{ breakblock(); getline }
expectblock && /^  /			{ newblock("table") }
expectblock && /^ > /			{ newblock("blockquote") }
expectblock && /^ - /			{ newblock("ul") }
expectblock && /^ [0-9]+\. /		{ newblock("ol") }
expectblock && /^ \[[0-9]+\] /	{ newblock("nl") }
expectblock && /^ .*: /			{ newblock("dl") }
expectblock && /^	/			{ newblock("pre") }
expectblock && /^---$/			{ expectblock = 0; printf "<hr/>\n"; next }
expectblock && /^= .* =$/			{ heading(1, $0); next }
expectblock && /^== .* ==$/			{ heading(2, $0); next }
expectblock && /^=== .* ===$/		{ heading(3, $0); next }
expectblock && /^==== .* ====$/		{ heading(4, $0); next }
expectblock && /^===== .* =====$/	{ heading(5, $0); next }
expectblock && /^====== .* ======$/	{ heading(6, $0); next }
expectblock { newblock("p") }

openblock == "pre" { sub("^	", ""); $0 = escape($0); printf "%s\n", $0; next }

openblock == "blockquote" && /^ > /		{ item(1, "blockquote", line) }
openblock == "ul" && /^ - /				{ item(1, "ul", line) }
openblock == "ol" && /^ [0-9]+\. /			{ item(1, "ol", line) }
openblock == "nl" && /^ \[[0-9]+\] /		{ item(1, "nl", line) }
openblock == "dl" && /^ .*: /				{ term(line) }

(openblock == "ul" || openblock == "ol") && /^  -/			{ item(2, "ul", line) }
(openblock == "ul" || openblock == "ol") && /^  [0-9]+\./		{ item(2, "ol", line) }
(openblock == "ul" || openblock == "ol") && /^   -/			{ item(3, "ul", line) }
(openblock == "ul" || openblock == "ol") && /^   [0-9]+\./	{ item(3, "ol", line) }
(openblock == "ul" || openblock == "ol") && /^    -/			{ item(4, "ul", line) }
(openblock == "ul" || openblock == "ol") && /^    [0-9]+\./	{ item(4, "ol", line) }
(openblock == "ul" || openblock == "ol") && /^     -/			{ item(5, "ul", line) }
(openblock == "ul" || openblock == "ol") && /^     [0-9]+\./	{ item(5, "ol", line) }
(openblock == "ul" || openblock == "ol") && /^      -/		{ item(6, "ul", line) }
(openblock == "ul" || openblock == "ol") && /^      [0-9]+\./	{ item(6, "ol", line) }

{ format($0) } # inline formatting

function format(line) {
	line = escape(line)
	n = split(line, w, "[ 	]") # TODO: this removes spaces...
	for (i = 0; i <= n; i++) {
		if (w[i] == "") continue;

		if (match(w[i], "^&lt;(\\./.*|\\.\\./.*|#.*|.*/.*)&gt;$"))
			w[i] = link(substr(w[i], 5, RLENGTH-8))
		else if (match(w[i], "^&lt;(\\./.*|\\.\\./.*|#.*|.*/.*)&gt;[.,:;?!)]$"))
			w[i] = link(substr(w[i], 5, RLENGTH-9)) substr(w[i], RLENGTH)
		else if (match(w[i], "^&lt;(\\./.*|\\.\\./.*|#.*|.*/.*)&gt;\\)[.,:;?!]$"))
			w[i] = link(substr(w[i], 5, RLENGTH-10)) substr(w[i], RLENGTH-1)

		if (match(w[i], "^\\[[0-9]+\\]$"))
			w[i] = ref(substr(w[i], 2, RLENGTH-2))
		else if (match(w[i], "^\\[[0-9]+\\][.,:;?!)]$"))
			w[i] = ref(substr(w[i], 2, RLENGTH-3)) substr(w[i], RLENGTH)
		else if (match(w[i], "^\\[[0-9]+\\]\\)[.,:;?!]$"))
			w[i] = ref(substr(w[i], 2, RLENGTH-4)) substr(w[i], RLENGTH-1)

		if (!openformat) {
			if (match(w[i], "^`")) {
				w[i] = "<tt>" substr(w[i], 2)
				openformat = "tt"
			} else if (match(w[i], "^\\*")) {
				w[i] = "<i>" substr(w[i], 2)
				openformat = "i"
			} else if (match(w[i], "^_")) {
				w[i] = "<b>" substr(w[i], 2)
				openformat = "b"
			} else if (match(w[i], "^\\(`")) {
				w[i] = "(<tt>" substr(w[i], 3)
				openformat = "tt"
			} else if (match(w[i], "^\\(\\*")) {
				w[i] = "(<i>" substr(w[i], 3)
				openformat = "i"
			} else if (match(w[i], "^\\(_")) {
				w[i] = "(<b>" substr(w[i], 3)
				openformat = "b"
			}
		}

		if (openformat) {
			orig = w[i]
			if (match(w[i], "`$"))
				w[i] = substr(w[i], 1, RSTART-1) "</tt>"
			else if (match(w[i], "\\*$"))
				w[i] = substr(w[i], 1, RSTART-1) "</i>"
			else if (match(w[i], "_$"))
				w[i] = substr(w[i], 1, RSTART-1) "</b>"
			else if (match(w[i], "`[.,:;?!)]$"))
				w[i] = substr(w[i], 1, RSTART-1) "</tt>" substr(w[i], RSTART+1)
			else if (match(w[i], "\\*[.,:;?!)]$"))
				w[i] = substr(w[i], 1, RSTART-1) "</i>" substr(w[i], RSTART+1)
			else if (match(w[i], "_[.,:;?!)]$"))
				w[i] = substr(w[i], 1, RSTART-1) "</b>" substr(w[i], RSTART+1)
			else if (match(w[i], "`\\)[.,:;?!]$"))
				w[i] = substr(w[i], 1, RSTART-1) "</tt>" substr(w[i], RSTART+1)
			else if (match(w[i], "\\*\\)[.,:;?!]$"))
				w[i] = substr(w[i], 1, RSTART-1) "</i>" substr(w[i], RSTART+1)
			else if (match(w[i], "_\\)[.,:;?!]$"))
				w[i] = substr(w[i], 1, RSTART-1) "</b>" substr(w[i], RSTART+1)
			if (w[i] != orig) openformat = ""
		}

		printf "%s", w[i]
		if (i < n) printf " "
	}
	printf "\n"
}

function opentag(name) {
	if (name == "nl") printf "<ol class=\"reflist\">\n"
	else printf "<%s>\n", name
}

function closetag(name) {
	if (name == "nl") printf "</ol>\n"
	else printf "</%s>\n", name
}

function newblock(name) {
	opentag(name)
	openblock = name
	expectblock = 0
}

function breakblock() {
	if (openformat) {
		printf "</%s>", openformat
		printf "(%s:%d) warning: open <%s> closed at block break\n", ARGV[1], NR, openformat > "/dev/stderr"
	}
	if (openitem) {
		if (openblock == "blockquote")
			printf "</p>\n"
		else
			printf "</li>\n"
		while (itemlevel-- > 1)
			closetag(openblock)
		itemlevel = 1
	}
	if (opendef) printf "</dd>\n"
	if (openblock) closetag(openblock)
	openitem = 0
	opendef = 0
	openblock = 0
	openformat = ""
	itemlevel = 1
	delete leveltype
	expectblock = 1
}

function heading(level, line) {
	sub("^=* ", "", line)
	sub(" =*$", "", line)
	escape(line)
	printf "<h%d>%s</h%d>\n", level, line, level
	# should inline formatting be supported in headings?
}

function item(level, type, line) {
	if (type == "blockquote") {
		if (openitem) printf "</p>"
		openitem = 1
		sub("^ > ", "")
		printf "<p>"
		return
	}
	if (openitem) printf "</li>"
	openitem = 1
	for (; itemlevel < level; itemlevel++) {
		printf "<%s>\n", type
		leveltype[itemlevel+1] = type
	}
	for (; itemlevel > level; itemlevel--) {
		printf "</%s>\n", leveltype[itemlevel]
		leveltype[itemlevel] = ""
	}
	if (type == "ul") {
		sub("^ +- ", "")
		printf "<li>"
	}
	if (type == "ol") {
		match($0, "[0-9]+\.")
		v = substr($0, RSTART, RLENGTH-1)
		sub("^ +[0-9]+\. ", "")
		printf "<li value=\"%s\">", v
	}
	if (type == "nl") {
		match($0, "\\[[0-9]+\\]")
		v = substr($0, RSTART+1, RLENGTH-2)
		sub("^ \\[[0-9]+\\] ", "")
		printf "<li value=\"%s\" id=\"ref%s\">", v, v
	}
}

function term(line, t) { # t is a local variable
	if (opendef) printf "</dd>"
	opendef = 1
	t = $0
	sub("^ ", "", t)
	sub(": .*$", "", t)
	sub("^ [^:]+: ", "")
	escape(t)
	printf "<dt>%s</dt><dd>", t
}

function ref(v) {
	if (ENVIRON["ref" v] != "")
		return "[<a href=\"" ENVIRON["ref" v] "\">" v "</a>]"
	return "[<a href=\"#ref" v "\">" v "</a>]"
}

function link(h) {
	return "<a href=\"" h "\">" h "</a>"
}

function escape(s) {
	gsub("&", "\\&amp;", s)
	gsub("<", "\\&lt;", s)
	gsub(">", "\\&gt;", s)
	return s
}