diff options
-rw-r--r-- | BUGS | 6 | ||||
-rw-r--r-- | Makefile | 18 | ||||
-rwxr-xr-x | em.sh | 30 | ||||
-rwxr-xr-x | emparse | 4 | ||||
-rwxr-xr-x | htindex.sh | 77 | ||||
-rwxr-xr-x | htwrap.sh | 78 |
6 files changed, 211 insertions, 2 deletions
@@ -1,3 +1,9 @@ +Tue Jun 8 01:30:39 CEST 2021 + +Unlike the Plan 9 version, the UNIX version of htindex doesn't +currently support Latin-1 characters. At least that seems to be +the case on NetBSD. I've commented out the relevant lines for now. + Sun Jan 31 01:16:11 CET 2021 Because excess spaces are stripped when processing inline formatting, diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f9d42dd --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +test.html: .unix test.em + export PATH=.:$$PATH; em.sh test.em | htwrap.sh -t | htindex.sh -s > test.html + +README.html: .unix README + export PATH=.:$$PATH; em.sh README | htindex.sh -s > README.html + +install: .unix + install em.sh /usr/local/bin/em + install htindex.sh /usr/local/bin/htindex + install htwrap.sh /usr/local/bin/htwrap + install emcollect emparse /usr/local/bin + +clean: + rm .unix + +.unix: + sed -i '1s,^#!/bin/awk,#!/usr/bin/awk,' emcollect emparse + @touch .unix @@ -0,0 +1,30 @@ +#!/bin/sh + +# em -- limited hypertext markup language + +IFS=' +' + +if [ $# = 0 ]; then + file=`mktemp` + tmp=$file + cat > $file +else + file=$1 +fi + +export file +emcollect $file | +perl -w -ne ' + chomp; + @parts = split /=/; + $url = $parts[1]; + $url .= "=$_" for @parts[3..$#parts]; + $ENV{"ref".$parts[0]} = $url; + END { + exec("emparse", $ENV{file}); + die "$!\n"; + } +' + +[ -z "$tmp" ] || rm $tmp @@ -191,9 +191,9 @@ function item(level, type, line) { printf "<li>" } if (type == "ol") { - match($0, "[0-9a-z]+\.") + match($0, "[0-9a-z]+\\.") v = substr($0, RSTART, RLENGTH-1) - sub("^ +[0-9a-z]+\. ", "") + sub("^ +[0-9a-z]+\\. ", "") listvalid(v) printf "<li value=\"%s\" style=\"list-style-type: %s\">", listnum(v), listtype(v) } diff --git a/htindex.sh b/htindex.sh new file mode 100755 index 0000000..54ca8d8 --- /dev/null +++ b/htindex.sh @@ -0,0 +1,77 @@ +#!/bin/sh + +# index -- add ids to headings and print index to stderr + +usage() { echo "usage: $0 [-s] [file ...]" 1>&2; exit 1; } + +flags= +case "$1" in +-*) [ x"$1" = x-s ] && { flags=1; shift; } || usage ;; +*) ;; +esac +export flags + +source() { + [ $# -gt 0 ] && cat "$@" || cat +} + +source "$@" | awk ' + /<h[0-9]>.*<\/h[0-9]/ { + heading($0) + next + } + /<h[0-9]>/ { + open = 1 + s = $0 + next + } + open && /<\/h[0-9]>/ { + open = 0 + s = s "\n" $0 + heading(s) + next + } + open { s = s " " $0; next } + { print } + + function heading(s) { + i = s + sub("^.*<h[0-9]>", "", i) + sub("</h[0-9]>.*$", "", i) + i = toascii(i) + sub("^<h[0-9]", "& id=\"" i "\"", s) + print s + if (!ENVIRON["flags"]) { # if not silent mode + sub("^<h", "", s) + sub(" id=\"", " ", s) + sub("\">", " ", s) + sub("<\\/h[0-9]>$", "", s) + print s > "/dev/stderr" + } + } + + function toascii(s) { + s = tolower(s) + #gsub("[ÀÁÂÃÄÅàáâãäå]", "a", s) + #gsub("[Ææ]", "ae", s) + #gsub("[Çç]", "c", s) + #gsub("[Ðð]", "dh", s) + #gsub("[ÈÉÊËèéêë]", "e", s) + #gsub("[ÌÍÎÏìíîï]", "i", s) + #gsub("[Ññ]", "n", s) + #gsub("[ÒÓÔÕÖØòóôõöø]", "o", s) + #gsub("[ß]", "ss", s) + #gsub("[Þþ]", "th", s) + #gsub("[ÙÚÛÜùúûü]", "u", s) + #gsub("[×]", "x", s) + #gsub("[Ýýÿ]", "y", s) + gsub("[/]", "-", s) + gsub("[ \n]+\\+[ \n]+", "+", s) + gsub("[ \n]+=[ \n]+", "=", s) + gsub("[ \n]+", "-", s) + gsub("[^-=+a-z0-9_ ]", "", s) + gsub("-\\+-", "+", s) + gsub("--*", "-", s) + return s + } +' diff --git a/htwrap.sh b/htwrap.sh new file mode 100755 index 0000000..3d68946 --- /dev/null +++ b/htwrap.sh @@ -0,0 +1,78 @@ +#!/bin/sh + +# htwrap -- create standalone HTML document + +usage() { + echo "usage: $0 [-t] [-C] [-c charset] [-d dir] [-l lang] [-v doctype]"\ + 1>&2 + exit 1 +} + +args=`getopt tCc:d:l:v: $*` +[ $? -ne 0 ] && usage +set -- $args + +while [ $# -gt 0 ]; do + case "$1" in + -t) flagt=$1 ;; + -C) flagC=$1 ;; + -c) flagc=$2; shift ;; + -d) flagd=$2; shift ;; + -l) flagl=$2; shift ;; + -v) flagv=$2; shift ;; + --) shift; break ;; + esac + shift +done + +case "$flagv" in +5) echo '<!DOCTYPE html>' + ;; +4) echo '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" +"http://www.w3.org/TR/html4/loose.dtd">' + ;; +4s) echo '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" +"http://www.w3.org/TR/html4/strict.dtd">' + ;; +x|xhtml) + echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' + ;; +xs|xhtmls) + echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + ;; +'') + echo '<!DOCTYPE html>' + ;; +*) + echo error: unknown doctype "$flagv" 1>&2 + usage + ;; +esac + +echo -n '<html' +[ ! -z "$flagl" ] && echo -n ' lang='"$flagl"'"' +[ ! -z "$flagd" ] && echo -n ' dir="'"$flagd"'"' +echo '>' + +if [ -z "$flagC" ]; then + if [ ! -z "$flagc" ]; then + echo '<meta http-equiv="Content-Type" content="text/html; charset='"$flagc"'">' + else + echo '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">' + fi +fi + +source() { + [ $# -gt 0 ] && cat "$@" || cat +} + +if [ ! -z "$flagt" ]; then # try to retrieve title from <h1> on first line + source "$@" | sed '1s/^<h1>\(.*\)<\/h1>/<title>\1<\/title>\ +&/' +else + source "$@" +fi + +echo '</html>' |