From b18302253c93727b28da8d128fbb204c21b62815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20Ankarstr=C3=B6m?= Date: Mon, 7 Jun 2021 20:58:10 +0200 Subject: Add UNIX support --- BUGS | 6 +++++ Makefile | 18 +++++++++++++++ em.sh | 30 ++++++++++++++++++++++++ emparse | 4 ++-- htindex.sh | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ htwrap.sh | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 211 insertions(+), 2 deletions(-) create mode 100644 Makefile create mode 100755 em.sh create mode 100755 htindex.sh create mode 100755 htwrap.sh diff --git a/BUGS b/BUGS index cb764d6..7fa9dc6 100644 --- a/BUGS +++ b/BUGS @@ -1,3 +1,9 @@ +Tue Jun 8 01:30:39 CEST 2021 + +Unlike the Plan 9 version, the UNIX version of htindex doesn't +currently support Latin-1 characters. At least that seems to be +the case on NetBSD. I've commented out the relevant lines for now. + Sun Jan 31 01:16:11 CET 2021 Because excess spaces are stripped when processing inline formatting, diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f9d42dd --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +test.html: .unix test.em + export PATH=.:$$PATH; em.sh test.em | htwrap.sh -t | htindex.sh -s > test.html + +README.html: .unix README + export PATH=.:$$PATH; em.sh README | htindex.sh -s > README.html + +install: .unix + install em.sh /usr/local/bin/em + install htindex.sh /usr/local/bin/htindex + install htwrap.sh /usr/local/bin/htwrap + install emcollect emparse /usr/local/bin + +clean: + rm .unix + +.unix: + sed -i '1s,^#!/bin/awk,#!/usr/bin/awk,' emcollect emparse + @touch .unix diff --git a/em.sh b/em.sh new file mode 100755 index 0000000..0b4275f --- /dev/null +++ b/em.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +# em -- limited hypertext markup language + +IFS=' +' + +if [ $# = 0 ]; then + file=`mktemp` + tmp=$file + cat > $file +else + file=$1 +fi + +export file +emcollect $file | +perl -w -ne ' + chomp; + @parts = split /=/; + $url = $parts[1]; + $url .= "=$_" for @parts[3..$#parts]; + $ENV{"ref".$parts[0]} = $url; + END { + exec("emparse", $ENV{file}); + die "$!\n"; + } +' + +[ -z "$tmp" ] || rm $tmp diff --git a/emparse b/emparse index ed3a4c7..9594f82 100755 --- a/emparse +++ b/emparse @@ -191,9 +191,9 @@ function item(level, type, line) { printf "
  • " } if (type == "ol") { - match($0, "[0-9a-z]+\.") + match($0, "[0-9a-z]+\\.") v = substr($0, RSTART, RLENGTH-1) - sub("^ +[0-9a-z]+\. ", "") + sub("^ +[0-9a-z]+\\. ", "") listvalid(v) printf "
  • ", listnum(v), listtype(v) } diff --git a/htindex.sh b/htindex.sh new file mode 100755 index 0000000..54ca8d8 --- /dev/null +++ b/htindex.sh @@ -0,0 +1,77 @@ +#!/bin/sh + +# index -- add ids to headings and print index to stderr + +usage() { echo "usage: $0 [-s] [file ...]" 1>&2; exit 1; } + +flags= +case "$1" in +-*) [ x"$1" = x-s ] && { flags=1; shift; } || usage ;; +*) ;; +esac +export flags + +source() { + [ $# -gt 0 ] && cat "$@" || cat +} + +source "$@" | awk ' + /.*<\/h[0-9]/ { + heading($0) + next + } + // { + open = 1 + s = $0 + next + } + open && /<\/h[0-9]>/ { + open = 0 + s = s "\n" $0 + heading(s) + next + } + open { s = s " " $0; next } + { print } + + function heading(s) { + i = s + sub("^.*", "", i) + sub(".*$", "", i) + i = toascii(i) + sub("^", " ", s) + sub("<\\/h[0-9]>$", "", s) + print s > "/dev/stderr" + } + } + + function toascii(s) { + s = tolower(s) + #gsub("[ÀÁÂÃÄÅàáâãäå]", "a", s) + #gsub("[Ææ]", "ae", s) + #gsub("[Çç]", "c", s) + #gsub("[Ðð]", "dh", s) + #gsub("[ÈÉÊËèéêë]", "e", s) + #gsub("[ÌÍÎÏìíîï]", "i", s) + #gsub("[Ññ]", "n", s) + #gsub("[ÒÓÔÕÖØòóôõöø]", "o", s) + #gsub("[ß]", "ss", s) + #gsub("[Þþ]", "th", s) + #gsub("[ÙÚÛÜùúûü]", "u", s) + #gsub("[×]", "x", s) + #gsub("[Ýýÿ]", "y", s) + gsub("[/]", "-", s) + gsub("[ \n]+\\+[ \n]+", "+", s) + gsub("[ \n]+=[ \n]+", "=", s) + gsub("[ \n]+", "-", s) + gsub("[^-=+a-z0-9_ ]", "", s) + gsub("-\\+-", "+", s) + gsub("--*", "-", s) + return s + } +' diff --git a/htwrap.sh b/htwrap.sh new file mode 100755 index 0000000..3d68946 --- /dev/null +++ b/htwrap.sh @@ -0,0 +1,78 @@ +#!/bin/sh + +# htwrap -- create standalone HTML document + +usage() { + echo "usage: $0 [-t] [-C] [-c charset] [-d dir] [-l lang] [-v doctype]"\ + 1>&2 + exit 1 +} + +args=`getopt tCc:d:l:v: $*` +[ $? -ne 0 ] && usage +set -- $args + +while [ $# -gt 0 ]; do + case "$1" in + -t) flagt=$1 ;; + -C) flagC=$1 ;; + -c) flagc=$2; shift ;; + -d) flagd=$2; shift ;; + -l) flagl=$2; shift ;; + -v) flagv=$2; shift ;; + --) shift; break ;; + esac + shift +done + +case "$flagv" in +5) echo '' + ;; +4) echo '' + ;; +4s) echo '' + ;; +x|xhtml) + echo '' + ;; +xs|xhtmls) + echo '' + ;; +'') + echo '' + ;; +*) + echo error: unknown doctype "$flagv" 1>&2 + usage + ;; +esac + +echo -n '' + +if [ -z "$flagC" ]; then + if [ ! -z "$flagc" ]; then + echo '' + else + echo '' + fi +fi + +source() { + [ $# -gt 0 ] && cat "$@" || cat +} + +if [ ! -z "$flagt" ]; then # try to retrieve title from

    on first line + source "$@" | sed '1s/^

    \(.*\)<\/h1>/\1<\/title>\ +&/' +else + source "$@" +fi + +echo '</html>' -- cgit v1.2.3