From 3ab42a5fb02a2652cae089bd3f936729fd49382d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?John=20Ankarstr=C3=B6m?= <john@ankarstrom.se>
Date: Sun, 31 Jan 2021 17:53:13 +0000
Subject: Support [A-Za-z] in list item values

HTML/CSS makes this a bit awkward; all the better if em
can handle it for you.
---
 README.html | 24 ++++++++++----------
 emcollect   |  6 ++---
 emparse     | 75 +++++++++++++++++++++++++++++++++++++++++--------------------
 mkfile      |  2 +-
 test.em     |  6 ++---
 test.html   | 14 ++++++------
 6 files changed, 77 insertions(+), 50 deletions(-)
diff --git a/README.html b/README.html
index 3b19f94..34df622 100644
--- a/README.html
+++ b/README.html
@@ -3,9 +3,9 @@
 maximally readable.</b> It is similar to Markdown, but it has a few key advantages:
 </p>
 <ol>
-<li value="1">It is more readable.
-</li><li value="2">It is simpler to parse.
-</li><li value="3">There is <i>not</i> more than one way to do it: for any given HTML, there is
+<li value="1" style="list-style-type: decimal">It is more readable.
+</li><li value="2" style="list-style-type: decimal">It is simpler to parse.
+</li><li value="3" style="list-style-type: decimal">There is <i>not</i> more than one way to do it: for any given HTML, there is
 never more than a single possible em representation.
 </li>
 </ol>
@@ -52,10 +52,10 @@ Example of *italic text*, _bold text_ and `teletype text`.
 The marks are only valid in certain positions:
 </p>
 <ol>
-<li value="1">At word borders
-</li><li value="2">At the beginning of a word after an opening parenthesis
-</li><li value="3">At the end of a word before any of <tt>.,:;?!)</tt>
-</li><li value="4">At the end of a word before a closing parenthesis followed by any of <tt>.,:;?!</tt>
+<li value="1" style="list-style-type: decimal">At word borders
+</li><li value="2" style="list-style-type: decimal">At the beginning of a word after an opening parenthesis
+</li><li value="3" style="list-style-type: decimal">At the end of a word before any of <tt>.,:;?!)</tt>
+</li><li value="4" style="list-style-type: decimal">At the end of a word before a closing parenthesis followed by any of <tt>.,:;?!</tt>
 </li>
 </ol>
 <hr/>
@@ -252,10 +252,10 @@ This is another paragraph.
 <hr/>
 <h2 id="references">References</h2>
 <ol class="reflist">
-<li value="1" id="ref1"><a href="#hyperlinks">#hyperlinks</a>
-</li><li value="2" id="ref2"><a href="../tree/emparse">../tree/emparse</a>
-</li><li value="3" id="ref3"><a href="../tree/README">../tree/README</a>
-</li><li value="4" id="ref4"><a href="../tree/test.em">../tree/test.em</a>
-</li><li value="5" id="ref5"><a href="#reference-lists">#reference-lists</a>
+<li value="1" style="list-style-type: decimal" id="ref1"><a href="#hyperlinks">#hyperlinks</a>
+</li><li value="2" style="list-style-type: decimal" id="ref2"><a href="../tree/emparse">../tree/emparse</a>
+</li><li value="3" style="list-style-type: decimal" id="ref3"><a href="../tree/README">../tree/README</a>
+</li><li value="4" style="list-style-type: decimal" id="ref4"><a href="../tree/test.em">../tree/test.em</a>
+</li><li value="5" style="list-style-type: decimal" id="ref5"><a href="#reference-lists">#reference-lists</a>
 </li>
 </ol>
diff --git a/emcollect b/emcollect
index c0f7759..de9c2d0 100755
--- a/emcollect
+++ b/emcollect
@@ -7,7 +7,7 @@ function collect(line) {
 	right = $0
 	sub("^ \\[", "", left)
 	sub("\\].*$", "", left)
-	sub("^ \\[[0-9]+\\] <", "", right)
+	sub("^ \\[[0-9a-z]+\\] <", "", right)
 	sub(">$", "", right)
 	printf "%s=%s\n", left, right
 }
@@ -15,5 +15,5 @@ function collect(line) {
 BEGIN { expectblock = 1 }
 
 /^$/							{ expectblock = 1; getline }
-expectblock && /^ \[[0-9]+\] /	{ block = "nl"; expectblock = 0 }
-block = "nl" && /^ \[[0-9]+\] <(\.\/.*|\.\.\/.*|#.*|.*\/.*)>$/	{ collect($0); next }
+expectblock && /^ \[[0-9a-z]+\] /	{ block = "nl"; expectblock = 0 }
+block = "nl" && /^ \[[0-9a-z]+\] <(\.\/.*|\.\.\/.*|#.*|.*\/.*)>$/	{ collect($0); next }
diff --git a/emparse b/emparse
index 3985a86..ed3a4c7 100755
--- a/emparse
+++ b/emparse
@@ -2,15 +2,20 @@
 
 # aux/emparse -- parse em source
 
-BEGIN { expectblock = 1; itemlevel = 1 }
+BEGIN {
+	expectblock = 1
+	itemlevel = 1
+	for (n=0; n < 256; n++)
+		ord[sprintf("%c", n)] = n
+}
 END { breakblock() }
 
 /^$/							{ breakblock(); getline }
 expectblock && /^  /			{ newblock("table") }
 expectblock && /^ > /			{ newblock("blockquote") }
 expectblock && /^ - /			{ newblock("ul") }
-expectblock && /^ [0-9]+\. /		{ newblock("ol") }
-expectblock && /^ \[[0-9]+\] /	{ newblock("nl") }
+expectblock && /^ [0-9a-z]+\. /	{ newblock("ol") }
+expectblock && /^ \[[0-9a-z]+\] /	{ newblock("nl") }
 expectblock && /^ .*: /			{ newblock("dl") }
 expectblock && /^	/			{ newblock("pre") }
 expectblock && /^---$/			{ expectblock = 0; printf "<hr/>\n"; next }
@@ -26,20 +31,20 @@ openblock == "pre" { sub("^	", ""); $0 = escape($0); printf "%s\n", $0; next }
 
 openblock == "blockquote" && /^ > /		{ item(1, "blockquote", line) }
 openblock == "ul" && /^ - /				{ item(1, "ul", line) }
-openblock == "ol" && /^ [0-9]+\. /			{ item(1, "ol", line) }
-openblock == "nl" && /^ \[[0-9]+\] /		{ item(1, "nl", line) }
+openblock == "ol" && /^ [0-9a-z]+\. /		{ item(1, "ol", line) }
+openblock == "nl" && /^ \[[0-9a-z]+\] /		{ item(1, "nl", line) }
 openblock == "dl" && /^ .*: /				{ term(line) }
 
-(openblock == "ul" || openblock == "ol") && /^  -/			{ item(2, "ul", line) }
-(openblock == "ul" || openblock == "ol") && /^  [0-9]+\./		{ item(2, "ol", line) }
-(openblock == "ul" || openblock == "ol") && /^   -/			{ item(3, "ul", line) }
-(openblock == "ul" || openblock == "ol") && /^   [0-9]+\./	{ item(3, "ol", line) }
-(openblock == "ul" || openblock == "ol") && /^    -/			{ item(4, "ul", line) }
-(openblock == "ul" || openblock == "ol") && /^    [0-9]+\./	{ item(4, "ol", line) }
-(openblock == "ul" || openblock == "ol") && /^     -/			{ item(5, "ul", line) }
-(openblock == "ul" || openblock == "ol") && /^     [0-9]+\./	{ item(5, "ol", line) }
-(openblock == "ul" || openblock == "ol") && /^      -/		{ item(6, "ul", line) }
-(openblock == "ul" || openblock == "ol") && /^      [0-9]+\./	{ item(6, "ol", line) }
+(openblock == "ul" || openblock == "ol") && /^  -/				{ item(2, "ul", line) }
+(openblock == "ul" || openblock == "ol") && /^  [0-9a-z]+\./		{ item(2, "ol", line) }
+(openblock == "ul" || openblock == "ol") && /^   -/				{ item(3, "ul", line) }
+(openblock == "ul" || openblock == "ol") && /^   [0-9a-z]+\./		{ item(3, "ol", line) }
+(openblock == "ul" || openblock == "ol") && /^    -/				{ item(4, "ul", line) }
+(openblock == "ul" || openblock == "ol") && /^    [0-9a-z]+\./		{ item(4, "ol", line) }
+(openblock == "ul" || openblock == "ol") && /^     -/				{ item(5, "ul", line) }
+(openblock == "ul" || openblock == "ol") && /^     [0-9a-z]+\./		{ item(5, "ol", line) }
+(openblock == "ul" || openblock == "ol") && /^      -/			{ item(6, "ul", line) }
+(openblock == "ul" || openblock == "ol") && /^      [0-9a-z]+\./	{ item(6, "ol", line) }
 
 { format($0) } # inline formatting
 
@@ -56,11 +61,11 @@ function format(line) {
 		else if (match(w[i], "^&lt;(\\./.*|\\.\\./.*|#.*|.*/.*)&gt;\\)[.,:;?!]$"))
 			w[i] = link(substr(w[i], 5, RLENGTH-10)) substr(w[i], RLENGTH-1)
 
-		if (match(w[i], "^\\[[0-9]+\\]$"))
+		if (match(w[i], "^\\[[0-9a-z]+\\]$"))
 			w[i] = ref(substr(w[i], 2, RLENGTH-2))
-		else if (match(w[i], "^\\[[0-9]+\\][.,:;?!)]$"))
+		else if (match(w[i], "^\\[[0-9a-z]+\\][.,:;?!)]$"))
 			w[i] = ref(substr(w[i], 2, RLENGTH-3)) substr(w[i], RLENGTH)
-		else if (match(w[i], "^\\[[0-9]+\\]\\)[.,:;?!]$"))
+		else if (match(w[i], "^\\[[0-9a-z]+\\]\\)[.,:;?!]$"))
 			w[i] = ref(substr(w[i], 2, RLENGTH-4)) substr(w[i], RLENGTH-1)
 
 		if (!openformat) {
@@ -186,16 +191,18 @@ function item(level, type, line) {
 		printf "<li>"
 	}
 	if (type == "ol") {
-		match($0, "[0-9]+\.")
+		match($0, "[0-9a-z]+\.")
 		v = substr($0, RSTART, RLENGTH-1)
-		sub("^ +[0-9]+\. ", "")
-		printf "<li value=\"%s\">", v
+		sub("^ +[0-9a-z]+\. ", "")
+		listvalid(v)
+		printf "<li value=\"%s\" style=\"list-style-type: %s\">", listnum(v), listtype(v)
 	}
 	if (type == "nl") {
-		match($0, "\\[[0-9]+\\]")
+		match($0, "\\[[0-9a-z]+\\]")
 		v = substr($0, RSTART+1, RLENGTH-2)
-		sub("^ \\[[0-9]+\\] ", "")
-		printf "<li value=\"%s\" id=\"ref%s\">", v, v
+		sub("^ \\[[0-9a-z]+\\] ", "")
+		listvalid(v)
+		printf "<li value=\"%s\" style=\"list-style-type: %s\" id=\"ref%s\">", listnum(v), listtype(v), v
 	}
 }
 
@@ -220,6 +227,26 @@ function link(h) {
 	return "<a href=\"" h "\">" h "</a>"
 }
 
+function listvalid(v) {
+	if (match(v, "^[0-9]+$")) return
+	if (match(v, "^[a-z]$")) return
+	if (match(v, "^[A-Z]$")) return
+	printf "error: '%s' is not a valid list item value\n", v > "/dev/stderr"
+	exit 1
+}
+
+function listnum(v) {
+	if (match(v, "^[0-9]+$")) return v
+	if (match(v, "^[a-z]$")) return ord[v]-96
+	if (match(v, "^[A-Z]$")) return ord[v]-64
+}
+
+function listtype(v) {
+	if (match(v, "^[0-9]+$")) return "decimal"
+	if (match(v, "^[a-z]$")) return "lower-alpha"
+	if (match(v, "^[A-Z]$")) return "upper-alpha"
+}
+
 function escape(s) {
 	gsub("&", "\\&amp;", s)
 	gsub("<", "\\&lt;", s)
diff --git a/mkfile b/mkfile
index 21e11ae..e50c6be 100644
--- a/mkfile
+++ b/mkfile
@@ -1,4 +1,4 @@
-test.html:
+test.html: test.em
 	path=(. /bin)
 	ramfs -p
 	em test.em > /tmp/1
diff --git a/test.em b/test.em
index 0f44abb..b0721b2 100644
--- a/test.em
+++ b/test.em
@@ -24,13 +24,13 @@ ordered list item.
  2. Here is another.
 
  1. Here is a list
-  1. With another list inside it [1]
+  a. With another list inside it [1]
    - And yet another list
    - Inside that list
- 2. And now we're back at the first level [2]
+ 2. And now we're back at the first level [b]
 
  [1] Reference list item
- [2] <http://example.com/abc>
+ [b] <http://example.com/abc>
 
  term: definition
  long term: long
diff --git a/test.html b/test.html
index 089f1a6..f3d2c40 100644
--- a/test.html
+++ b/test.html
@@ -29,26 +29,26 @@ unordered list item.
 </li>
 </ul>
 <ol>
-<li value="1">Here is an
+<li value="1" style="list-style-type: decimal">Here is an
 ordered list item.
-</li><li value="2">Here is another.
+</li><li value="2" style="list-style-type: decimal">Here is another.
 </li>
 </ol>
 <ol>
-<li value="1">Here is a list
+<li value="1" style="list-style-type: decimal">Here is a list
 </li><ol>
-<li value="1">With another list inside it [<a href="#ref1">1</a>]
+<li value="1" style="list-style-type: lower-alpha">With another list inside it [<a href="#ref1">1</a>]
 </li><ul>
 <li>And yet another list
 </li><li>Inside that list
 </li></ul>
 </ol>
-<li value="2">And now we're back at the first level [<a href="http://example.com/abc">2</a>]
+<li value="2" style="list-style-type: decimal">And now we're back at the first level [<a href="http://example.com/abc">b</a>]
 </li>
 </ol>
 <ol class="reflist">
-<li value="1" id="ref1">Reference list item
-</li><li value="2" id="ref2"><a href="http://example.com/abc">http://example.com/abc</a>
+<li value="1" style="list-style-type: decimal" id="ref1">Reference list item
+</li><li value="2" style="list-style-type: lower-alpha" id="refb"><a href="http://example.com/abc">http://example.com/abc</a>
 </li>
 </ol>
 <dl>
-- 
cgit v1.2.3