aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--BUGS6
-rw-r--r--Makefile18
-rwxr-xr-xem.sh30
-rwxr-xr-xemparse4
-rwxr-xr-xhtindex.sh77
-rwxr-xr-xhtwrap.sh78
6 files changed, 211 insertions, 2 deletions
diff --git a/BUGS b/BUGS
index cb764d6..7fa9dc6 100644
--- a/BUGS
+++ b/BUGS
@@ -1,3 +1,9 @@
+Tue Jun 8 01:30:39 CEST 2021
+
+Unlike the Plan 9 version, the UNIX version of htindex doesn't
+currently support Latin-1 characters. At least that seems to be
+the case on NetBSD. I've commented out the relevant lines for now.
+
Sun Jan 31 01:16:11 CET 2021
Because excess spaces are stripped when processing inline formatting,
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f9d42dd
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,18 @@
+test.html: .unix test.em
+ export PATH=.:$$PATH; em.sh test.em | htwrap.sh -t | htindex.sh -s > test.html
+
+README.html: .unix README
+ export PATH=.:$$PATH; em.sh README | htindex.sh -s > README.html
+
+install: .unix
+ install em.sh /usr/local/bin/em
+ install htindex.sh /usr/local/bin/htindex
+ install htwrap.sh /usr/local/bin/htwrap
+ install emcollect emparse /usr/local/bin
+
+clean:
+ rm .unix
+
+.unix:
+ sed -i '1s,^#!/bin/awk,#!/usr/bin/awk,' emcollect emparse
+ @touch .unix
diff --git a/em.sh b/em.sh
new file mode 100755
index 0000000..0b4275f
--- /dev/null
+++ b/em.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# em -- limited hypertext markup language
+
+IFS='
+'
+
+if [ $# = 0 ]; then
+ file=`mktemp`
+ tmp=$file
+ cat > $file
+else
+ file=$1
+fi
+
+export file
+emcollect $file |
+perl -w -ne '
+ chomp;
+ @parts = split /=/;
+ $url = $parts[1];
+ $url .= "=$_" for @parts[3..$#parts];
+ $ENV{"ref".$parts[0]} = $url;
+ END {
+ exec("emparse", $ENV{file});
+ die "$!\n";
+ }
+'
+
+[ -z "$tmp" ] || rm $tmp
diff --git a/emparse b/emparse
index ed3a4c7..9594f82 100755
--- a/emparse
+++ b/emparse
@@ -191,9 +191,9 @@ function item(level, type, line) {
printf "<li>"
}
if (type == "ol") {
- match($0, "[0-9a-z]+\.")
+ match($0, "[0-9a-z]+\\.")
v = substr($0, RSTART, RLENGTH-1)
- sub("^ +[0-9a-z]+\. ", "")
+ sub("^ +[0-9a-z]+\\. ", "")
listvalid(v)
printf "<li value=\"%s\" style=\"list-style-type: %s\">", listnum(v), listtype(v)
}
diff --git a/htindex.sh b/htindex.sh
new file mode 100755
index 0000000..54ca8d8
--- /dev/null
+++ b/htindex.sh
@@ -0,0 +1,77 @@
+#!/bin/sh
+
+# index -- add ids to headings and print index to stderr
+
+usage() { echo "usage: $0 [-s] [file ...]" 1>&2; exit 1; }
+
+flags=
+case "$1" in
+-*) [ x"$1" = x-s ] && { flags=1; shift; } || usage ;;
+*) ;;
+esac
+export flags
+
+source() {
+ [ $# -gt 0 ] && cat "$@" || cat
+}
+
+source "$@" | awk '
+ /<h[0-9]>.*<\/h[0-9]/ {
+ heading($0)
+ next
+ }
+ /<h[0-9]>/ {
+ open = 1
+ s = $0
+ next
+ }
+ open && /<\/h[0-9]>/ {
+ open = 0
+ s = s "\n" $0
+ heading(s)
+ next
+ }
+ open { s = s " " $0; next }
+ { print }
+
+ function heading(s) {
+ i = s
+ sub("^.*<h[0-9]>", "", i)
+ sub("</h[0-9]>.*$", "", i)
+ i = toascii(i)
+ sub("^<h[0-9]", "& id=\"" i "\"", s)
+ print s
+ if (!ENVIRON["flags"]) { # if not silent mode
+ sub("^<h", "", s)
+ sub(" id=\"", " ", s)
+ sub("\">", " ", s)
+ sub("<\\/h[0-9]>$", "", s)
+ print s > "/dev/stderr"
+ }
+ }
+
+ function toascii(s) {
+ s = tolower(s)
+ #gsub("[ÀÁÂÃÄÅàáâãäå]", "a", s)
+ #gsub("[Ææ]", "ae", s)
+ #gsub("[Çç]", "c", s)
+ #gsub("[Ðð]", "dh", s)
+ #gsub("[ÈÉÊËèéêë]", "e", s)
+ #gsub("[ÌÍÎÏìíîï]", "i", s)
+ #gsub("[Ññ]", "n", s)
+ #gsub("[ÒÓÔÕÖØòóôõöø]", "o", s)
+ #gsub("[ß]", "ss", s)
+ #gsub("[Þþ]", "th", s)
+ #gsub("[ÙÚÛÜùúûü]", "u", s)
+ #gsub("[×]", "x", s)
+ #gsub("[Ýýÿ]", "y", s)
+ gsub("[/]", "-", s)
+ gsub("[ \n]+\\+[ \n]+", "+", s)
+ gsub("[ \n]+=[ \n]+", "=", s)
+ gsub("[ \n]+", "-", s)
+ gsub("[^-=+a-z0-9_ ]", "", s)
+ gsub("-\\+-", "+", s)
+ gsub("--*", "-", s)
+ return s
+ }
+'
diff --git a/htwrap.sh b/htwrap.sh
new file mode 100755
index 0000000..3d68946
--- /dev/null
+++ b/htwrap.sh
@@ -0,0 +1,78 @@
+#!/bin/sh
+
+# htwrap -- create standalone HTML document
+
+usage() {
+ echo "usage: $0 [-t] [-C] [-c charset] [-d dir] [-l lang] [-v doctype]"\
+ 1>&2
+ exit 1
+}
+
+args=`getopt tCc:d:l:v: $*`
+[ $? -ne 0 ] && usage
+set -- $args
+
+while [ $# -gt 0 ]; do
+ case "$1" in
+ -t) flagt=$1 ;;
+ -C) flagC=$1 ;;
+ -c) flagc=$2; shift ;;
+ -d) flagd=$2; shift ;;
+ -l) flagl=$2; shift ;;
+ -v) flagv=$2; shift ;;
+ --) shift; break ;;
+ esac
+ shift
+done
+
+case "$flagv" in
+5) echo '<!DOCTYPE html>'
+ ;;
+4) echo '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+"http://www.w3.org/TR/html4/loose.dtd">'
+ ;;
+4s) echo '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+"http://www.w3.org/TR/html4/strict.dtd">'
+ ;;
+x|xhtml)
+ echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
+ ;;
+xs|xhtmls)
+ echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
+ ;;
+'')
+ echo '<!DOCTYPE html>'
+ ;;
+*)
+ echo error: unknown doctype "$flagv" 1>&2
+ usage
+ ;;
+esac
+
+echo -n '<html'
+[ ! -z "$flagl" ] && echo -n ' lang='"$flagl"'"'
+[ ! -z "$flagd" ] && echo -n ' dir="'"$flagd"'"'
+echo '>'
+
+if [ -z "$flagC" ]; then
+ if [ ! -z "$flagc" ]; then
+ echo '<meta http-equiv="Content-Type" content="text/html; charset='"$flagc"'">'
+ else
+ echo '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">'
+ fi
+fi
+
+source() {
+ [ $# -gt 0 ] && cat "$@" || cat
+}
+
+if [ ! -z "$flagt" ]; then # try to retrieve title from <h1> on first line
+ source "$@" | sed '1s/^<h1>\(.*\)<\/h1>/<title>\1<\/title>\
+&/'
+else
+ source "$@"
+fi
+
+echo '</html>'