From 3ab42a5fb02a2652cae089bd3f936729fd49382d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?John=20Ankarstr=C3=B6m?=
Date: Sun, 31 Jan 2021 17:53:13 +0000
Subject: Support [A-Za-z] in list item values
HTML/CSS makes this a bit awkward; all the better if em
can handle it for you.
---
README.html | 24 ++++++++++----------
emcollect | 6 ++---
emparse | 75 +++++++++++++++++++++++++++++++++++++++++--------------------
mkfile | 2 +-
test.em | 6 ++---
test.html | 14 ++++++------
6 files changed, 77 insertions(+), 50 deletions(-)
diff --git a/README.html b/README.html
index 3b19f94..34df622 100644
--- a/README.html
+++ b/README.html
@@ -3,9 +3,9 @@
maximally readable. It is similar to Markdown, but it has a few key advantages:
-- It is more readable.
-
- It is simpler to parse.
-
- There is not more than one way to do it: for any given HTML, there is
+
- It is more readable.
+
- It is simpler to parse.
+
- There is not more than one way to do it: for any given HTML, there is
never more than a single possible em representation.
@@ -52,10 +52,10 @@ Example of *italic text*, _bold text_ and `teletype text`.
The marks are only valid in certain positions:
-- At word borders
-
- At the beginning of a word after an opening parenthesis
-
- At the end of a word before any of .,:;?!)
-
- At the end of a word before a closing parenthesis followed by any of .,:;?!
+
- At word borders
+
- At the beginning of a word after an opening parenthesis
+
- At the end of a word before any of .,:;?!)
+
- At the end of a word before a closing parenthesis followed by any of .,:;?!
@@ -252,10 +252,10 @@ This is another paragraph.
References
-- #hyperlinks
-
- ../tree/emparse
-
- ../tree/README
-
- ../tree/test.em
-
- #reference-lists
+
- #hyperlinks
+
- ../tree/emparse
+
- ../tree/README
+
- ../tree/test.em
+
- #reference-lists
diff --git a/emcollect b/emcollect
index c0f7759..de9c2d0 100755
--- a/emcollect
+++ b/emcollect
@@ -7,7 +7,7 @@ function collect(line) {
right = $0
sub("^ \\[", "", left)
sub("\\].*$", "", left)
- sub("^ \\[[0-9]+\\] <", "", right)
+ sub("^ \\[[0-9a-z]+\\] <", "", right)
sub(">$", "", right)
printf "%s=%s\n", left, right
}
@@ -15,5 +15,5 @@ function collect(line) {
BEGIN { expectblock = 1 }
/^$/ { expectblock = 1; getline }
-expectblock && /^ \[[0-9]+\] / { block = "nl"; expectblock = 0 }
-block = "nl" && /^ \[[0-9]+\] <(\.\/.*|\.\.\/.*|#.*|.*\/.*)>$/ { collect($0); next }
+expectblock && /^ \[[0-9a-z]+\] / { block = "nl"; expectblock = 0 }
+block = "nl" && /^ \[[0-9a-z]+\] <(\.\/.*|\.\.\/.*|#.*|.*\/.*)>$/ { collect($0); next }
diff --git a/emparse b/emparse
index 3985a86..ed3a4c7 100755
--- a/emparse
+++ b/emparse
@@ -2,15 +2,20 @@
# aux/emparse -- parse em source
-BEGIN { expectblock = 1; itemlevel = 1 }
+BEGIN {
+ expectblock = 1
+ itemlevel = 1
+ for (n=0; n < 256; n++)
+ ord[sprintf("%c", n)] = n
+}
END { breakblock() }
/^$/ { breakblock(); getline }
expectblock && /^ / { newblock("table") }
expectblock && /^ > / { newblock("blockquote") }
expectblock && /^ - / { newblock("ul") }
-expectblock && /^ [0-9]+\. / { newblock("ol") }
-expectblock && /^ \[[0-9]+\] / { newblock("nl") }
+expectblock && /^ [0-9a-z]+\. / { newblock("ol") }
+expectblock && /^ \[[0-9a-z]+\] / { newblock("nl") }
expectblock && /^ .*: / { newblock("dl") }
expectblock && /^ / { newblock("pre") }
expectblock && /^---$/ { expectblock = 0; printf "
\n"; next }
@@ -26,20 +31,20 @@ openblock == "pre" { sub("^ ", ""); $0 = escape($0); printf "%s\n", $0; next }
openblock == "blockquote" && /^ > / { item(1, "blockquote", line) }
openblock == "ul" && /^ - / { item(1, "ul", line) }
-openblock == "ol" && /^ [0-9]+\. / { item(1, "ol", line) }
-openblock == "nl" && /^ \[[0-9]+\] / { item(1, "nl", line) }
+openblock == "ol" && /^ [0-9a-z]+\. / { item(1, "ol", line) }
+openblock == "nl" && /^ \[[0-9a-z]+\] / { item(1, "nl", line) }
openblock == "dl" && /^ .*: / { term(line) }
-(openblock == "ul" || openblock == "ol") && /^ -/ { item(2, "ul", line) }
-(openblock == "ul" || openblock == "ol") && /^ [0-9]+\./ { item(2, "ol", line) }
-(openblock == "ul" || openblock == "ol") && /^ -/ { item(3, "ul", line) }
-(openblock == "ul" || openblock == "ol") && /^ [0-9]+\./ { item(3, "ol", line) }
-(openblock == "ul" || openblock == "ol") && /^ -/ { item(4, "ul", line) }
-(openblock == "ul" || openblock == "ol") && /^ [0-9]+\./ { item(4, "ol", line) }
-(openblock == "ul" || openblock == "ol") && /^ -/ { item(5, "ul", line) }
-(openblock == "ul" || openblock == "ol") && /^ [0-9]+\./ { item(5, "ol", line) }
-(openblock == "ul" || openblock == "ol") && /^ -/ { item(6, "ul", line) }
-(openblock == "ul" || openblock == "ol") && /^ [0-9]+\./ { item(6, "ol", line) }
+(openblock == "ul" || openblock == "ol") && /^ -/ { item(2, "ul", line) }
+(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(2, "ol", line) }
+(openblock == "ul" || openblock == "ol") && /^ -/ { item(3, "ul", line) }
+(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(3, "ol", line) }
+(openblock == "ul" || openblock == "ol") && /^ -/ { item(4, "ul", line) }
+(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(4, "ol", line) }
+(openblock == "ul" || openblock == "ol") && /^ -/ { item(5, "ul", line) }
+(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(5, "ol", line) }
+(openblock == "ul" || openblock == "ol") && /^ -/ { item(6, "ul", line) }
+(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(6, "ol", line) }
{ format($0) } # inline formatting
@@ -56,11 +61,11 @@ function format(line) {
else if (match(w[i], "^<(\\./.*|\\.\\./.*|#.*|.*/.*)>\\)[.,:;?!]$"))
w[i] = link(substr(w[i], 5, RLENGTH-10)) substr(w[i], RLENGTH-1)
- if (match(w[i], "^\\[[0-9]+\\]$"))
+ if (match(w[i], "^\\[[0-9a-z]+\\]$"))
w[i] = ref(substr(w[i], 2, RLENGTH-2))
- else if (match(w[i], "^\\[[0-9]+\\][.,:;?!)]$"))
+ else if (match(w[i], "^\\[[0-9a-z]+\\][.,:;?!)]$"))
w[i] = ref(substr(w[i], 2, RLENGTH-3)) substr(w[i], RLENGTH)
- else if (match(w[i], "^\\[[0-9]+\\]\\)[.,:;?!]$"))
+ else if (match(w[i], "^\\[[0-9a-z]+\\]\\)[.,:;?!]$"))
w[i] = ref(substr(w[i], 2, RLENGTH-4)) substr(w[i], RLENGTH-1)
if (!openformat) {
@@ -186,16 +191,18 @@ function item(level, type, line) {
printf ""
}
if (type == "ol") {
- match($0, "[0-9]+\.")
+ match($0, "[0-9a-z]+\.")
v = substr($0, RSTART, RLENGTH-1)
- sub("^ +[0-9]+\. ", "")
- printf "", v
+ sub("^ +[0-9a-z]+\. ", "")
+ listvalid(v)
+ printf "", listnum(v), listtype(v)
}
if (type == "nl") {
- match($0, "\\[[0-9]+\\]")
+ match($0, "\\[[0-9a-z]+\\]")
v = substr($0, RSTART+1, RLENGTH-2)
- sub("^ \\[[0-9]+\\] ", "")
- printf "", v, v
+ sub("^ \\[[0-9a-z]+\\] ", "")
+ listvalid(v)
+ printf "", listnum(v), listtype(v), v
}
}
@@ -220,6 +227,26 @@ function link(h) {
return "" h ""
}
+function listvalid(v) {
+ if (match(v, "^[0-9]+$")) return
+ if (match(v, "^[a-z]$")) return
+ if (match(v, "^[A-Z]$")) return
+ printf "error: '%s' is not a valid list item value\n", v > "/dev/stderr"
+ exit 1
+}
+
+function listnum(v) {
+ if (match(v, "^[0-9]+$")) return v
+ if (match(v, "^[a-z]$")) return ord[v]-96
+ if (match(v, "^[A-Z]$")) return ord[v]-64
+}
+
+function listtype(v) {
+ if (match(v, "^[0-9]+$")) return "decimal"
+ if (match(v, "^[a-z]$")) return "lower-alpha"
+ if (match(v, "^[A-Z]$")) return "upper-alpha"
+}
+
function escape(s) {
gsub("&", "\\&", s)
gsub("<", "\\<", s)
diff --git a/mkfile b/mkfile
index 21e11ae..e50c6be 100644
--- a/mkfile
+++ b/mkfile
@@ -1,4 +1,4 @@
-test.html:
+test.html: test.em
path=(. /bin)
ramfs -p
em test.em > /tmp/1
diff --git a/test.em b/test.em
index 0f44abb..b0721b2 100644
--- a/test.em
+++ b/test.em
@@ -24,13 +24,13 @@ ordered list item.
2. Here is another.
1. Here is a list
- 1. With another list inside it [1]
+ a. With another list inside it [1]
- And yet another list
- Inside that list
- 2. And now we're back at the first level [2]
+ 2. And now we're back at the first level [b]
[1] Reference list item
- [2]
+ [b]
term: definition
long term: long
diff --git a/test.html b/test.html
index 089f1a6..f3d2c40 100644
--- a/test.html
+++ b/test.html
@@ -29,26 +29,26 @@ unordered list item.
-- Here is an
+
- Here is an
ordered list item.
-
- Here is another.
+
- Here is another.
-- Here is a list
+
- Here is a list
-- With another list inside it [1]
+
- With another list inside it [1]
- And yet another list
- Inside that list
-- And now we're back at the first level [2]
+
- And now we're back at the first level [b]
-- Reference list item
-
- http://example.com/abc
+
- Reference list item
+
- http://example.com/abc
--
cgit v1.2.3