6 files changed, 211 insertions, 2 deletions
diff --git a/BUGS b/BUGS
index cb764d6..7fa9dc6 100644
--- a/BUGS
+++ b/BUGS
@@ -1,3 +1,9 @@
+Tue Jun  8 01:30:39 CEST 2021
+
+Unlike the Plan 9 version, the UNIX version of htindex doesn't
+currently support Latin-1 characters.  At least that seems to be
+the case on NetBSD.  I've commented out the relevant lines for now.
+
 Sun Jan 31 01:16:11 CET 2021
 
 Because excess spaces are stripped when processing inline formatting,
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f9d42dd
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,18 @@
+test.html: .unix test.em
+	export PATH=.:$$PATH; em.sh test.em | htwrap.sh -t | htindex.sh -s > test.html
+
+README.html: .unix README
+	export PATH=.:$$PATH; em.sh README | htindex.sh -s > README.html
+
+install: .unix
+	install em.sh /usr/local/bin/em
+	install htindex.sh /usr/local/bin/htindex
+	install htwrap.sh /usr/local/bin/htwrap
+	install emcollect emparse /usr/local/bin
+
+clean:
+	rm .unix
+
+.unix:
+	sed -i '1s,^#!/bin/awk,#!/usr/bin/awk,' emcollect emparse
+	@touch .unix
diff --git a/em.sh b/em.sh
new file mode 100755
index 0000000..0b4275f
--- /dev/null
+++ b/em.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# em -- limited hypertext markup language
+
+IFS='
+'
+
+if [ $# = 0 ]; then
+	file=`mktemp`
+	tmp=$file
+	cat > $file
+else
+	file=$1
+fi
+
+export file
+emcollect $file |
+perl -w -ne '
+	chomp;
+	@parts = split /=/;
+	$url = $parts[1];
+	$url .= "=$_" for @parts[3..$#parts];
+	$ENV{"ref".$parts[0]} = $url;
+	END {
+		exec("emparse", $ENV{file});
+		die "$!\n";
+	}
+'
+
+[ -z "$tmp" ] || rm $tmp
diff --git a/emparse b/emparse
index ed3a4c7..9594f82 100755
--- a/emparse
+++ b/emparse
@@ -191,9 +191,9 @@ function item(level, type, line) {
 		printf "<li>"
 	}
 	if (type == "ol") {
-		match($0, "[0-9a-z]+\.")
+		match($0, "[0-9a-z]+\\.")
 		v = substr($0, RSTART, RLENGTH-1)
-		sub("^ +[0-9a-z]+\. ", "")
+		sub("^ +[0-9a-z]+\\. ", "")
 		listvalid(v)
 		printf "<li value=\"%s\" style=\"list-style-type: %s\">", listnum(v), listtype(v)
 	}
diff --git a/htindex.sh b/htindex.sh
new file mode 100755
index 0000000..54ca8d8
--- /dev/null
+++ b/htindex.sh
@@ -0,0 +1,77 @@
+#!/bin/sh
+
+# index -- add ids to headings and print index to stderr
+
+usage() { echo "usage: $0 [-s] [file ...]" 1>&2; exit 1; }
+
+flags=
+case "$1" in
+-*)	[ x"$1" = x-s ] && { flags=1; shift; } || usage ;;
+*)	;;
+esac
+export flags
+
+source() {
+	[ $# -gt 0 ] && cat "$@" || cat
+}
+
+source "$@" | awk '
+	/<h[0-9]>.*<\/h[0-9]/ {
+		heading($0)
+		next
+	}
+	/<h[0-9]>/ {
+		open = 1
+		s = $0
+		next
+	}
+	open && /<\/h[0-9]>/ {
+		open = 0
+		s = s "\n" $0
+		heading(s)
+		next
+	}
+	open { s = s " " $0; next }
+	{ print }
+
+	function heading(s) {
+		i = s
+		sub("^.*<h[0-9]>", "", i)
+		sub("</h[0-9]>.*$", "", i)
+		i = toascii(i)
+		sub("^<h[0-9]", "& id=\"" i "\"", s)
+		print s
+		if (!ENVIRON["flags"]) { # if not silent mode
+			sub("^<h", "", s)
+			sub(" id=\"", "	", s)
+			sub("\">", "	", s)
+			sub("<\\/h[0-9]>$", "", s)
+			print s > "/dev/stderr"
+		}
+	}
+
+	function toascii(s) {
+		s = tolower(s)
+		#gsub("[ÀÁÂÃÄÅàáâãäå]", "a", s)
+		#gsub("[Ææ]", "ae", s)
+		#gsub("[Çç]", "c", s)
+		#gsub("[Ðð]", "dh", s)
+		#gsub("[ÈÉÊËèéêë]", "e", s)
+		#gsub("[ÌÍÎÏìíîï]", "i", s)
+		#gsub("[Ññ]", "n", s)
+		#gsub("[ÒÓÔÕÖØòóôõöø]", "o", s)
+		#gsub("[ß]", "ss", s)
+		#gsub("[Þþ]", "th", s)
+		#gsub("[ÙÚÛÜùúûü]", "u", s)
+		#gsub("[×]", "x", s)
+		#gsub("[Ýýÿ]", "y", s)
+		gsub("[/]", "-", s)
+                gsub("[         \n]+\\+[        \n]+", "+", s)
+                gsub("[         \n]+=[  \n]+", "=", s)
+                gsub("[         \n]+", "-", s)
+		gsub("[^-=+a-z0-9_ 	]", "", s)
+		gsub("-\\+-", "+", s)
+		gsub("--*", "-", s)
+		return s
+	}
+'
diff --git a/htwrap.sh b/htwrap.sh
new file mode 100755
index 0000000..3d68946
--- /dev/null
+++ b/htwrap.sh
@@ -0,0 +1,78 @@
+#!/bin/sh
+
+# htwrap -- create standalone HTML document
+
+usage() {
+	echo "usage: $0 [-t] [-C] [-c charset] [-d dir] [-l lang] [-v doctype]"\
+		1>&2
+	exit 1
+}
+
+args=`getopt tCc:d:l:v: $*`
+[ $? -ne 0 ] && usage
+set -- $args
+
+while [ $# -gt 0 ]; do
+	case "$1" in
+	-t)	flagt=$1 ;;
+	-C)	flagC=$1 ;;
+	-c)	flagc=$2; shift ;;
+	-d)	flagd=$2; shift ;;
+	-l)	flagl=$2; shift ;;
+	-v)	flagv=$2; shift ;;
+	--)	shift; break ;;
+	esac
+	shift
+done
+
+case "$flagv" in
+5)	echo '<!DOCTYPE html>'
+	;;
+4)	echo '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+"http://www.w3.org/TR/html4/loose.dtd">'
+	;;
+4s)	echo '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+"http://www.w3.org/TR/html4/strict.dtd">'
+	;;
+x|xhtml)
+	echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
+	;;
+xs|xhtmls)
+	echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
+	;;
+'')
+	echo '<!DOCTYPE html>'
+	;;
+*)
+	echo error: unknown doctype "$flagv" 1>&2
+	usage
+	;;
+esac
+
+echo -n '<html'
+[ ! -z "$flagl" ] && echo -n ' lang='"$flagl"'"'
+[ ! -z "$flagd" ] && echo -n ' dir="'"$flagd"'"'
+echo '>'
+
+if [ -z "$flagC" ]; then
+	if [ ! -z "$flagc" ]; then
+		echo '<meta http-equiv="Content-Type" content="text/html; charset='"$flagc"'">'
+	else
+		echo '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">'
+	fi
+fi
+
+source() {
+	[ $# -gt 0 ] && cat "$@" || cat
+}
+
+if [ ! -z "$flagt" ]; then # try to retrieve title from <h1> on first line
+	source "$@" | sed '1s/^<h1>\(.*\)<\/h1>/<title>\1<\/title>\
+&/'
+else
+	source "$@"
+fi
+
+echo '</html>'