From 3ab42a5fb02a2652cae089bd3f936729fd49382d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20Ankarstr=C3=B6m?= Date: Sun, 31 Jan 2021 17:53:13 +0000 Subject: Support [A-Za-z] in list item values HTML/CSS makes this a bit awkward; all the better if em can handle it for you. --- README.html | 24 ++++++++++---------- emcollect | 6 ++--- emparse | 75 +++++++++++++++++++++++++++++++++++++++++-------------------- mkfile | 2 +- test.em | 6 ++--- test.html | 14 ++++++------ 6 files changed, 77 insertions(+), 50 deletions(-) diff --git a/README.html b/README.html index 3b19f94..34df622 100644 --- a/README.html +++ b/README.html @@ -3,9 +3,9 @@ maximally readable. It is similar to Markdown, but it has a few key advantages:

    -
  1. It is more readable. -
  2. It is simpler to parse. -
  3. There is not more than one way to do it: for any given HTML, there is +
  4. It is more readable. +
  5. It is simpler to parse. +
  6. There is not more than one way to do it: for any given HTML, there is never more than a single possible em representation.
@@ -52,10 +52,10 @@ Example of *italic text*, _bold text_ and `teletype text`. The marks are only valid in certain positions:

    -
  1. At word borders -
  2. At the beginning of a word after an opening parenthesis -
  3. At the end of a word before any of .,:;?!) -
  4. At the end of a word before a closing parenthesis followed by any of .,:;?! +
  5. At word borders +
  6. At the beginning of a word after an opening parenthesis +
  7. At the end of a word before any of .,:;?!) +
  8. At the end of a word before a closing parenthesis followed by any of .,:;?!

@@ -252,10 +252,10 @@ This is another paragraph.

References

    -
  1. #hyperlinks -
  2. ../tree/emparse -
  3. ../tree/README -
  4. ../tree/test.em -
  5. #reference-lists +
  6. #hyperlinks +
  7. ../tree/emparse +
  8. ../tree/README +
  9. ../tree/test.em +
  10. #reference-lists
diff --git a/emcollect b/emcollect index c0f7759..de9c2d0 100755 --- a/emcollect +++ b/emcollect @@ -7,7 +7,7 @@ function collect(line) { right = $0 sub("^ \\[", "", left) sub("\\].*$", "", left) - sub("^ \\[[0-9]+\\] <", "", right) + sub("^ \\[[0-9a-z]+\\] <", "", right) sub(">$", "", right) printf "%s=%s\n", left, right } @@ -15,5 +15,5 @@ function collect(line) { BEGIN { expectblock = 1 } /^$/ { expectblock = 1; getline } -expectblock && /^ \[[0-9]+\] / { block = "nl"; expectblock = 0 } -block = "nl" && /^ \[[0-9]+\] <(\.\/.*|\.\.\/.*|#.*|.*\/.*)>$/ { collect($0); next } +expectblock && /^ \[[0-9a-z]+\] / { block = "nl"; expectblock = 0 } +block = "nl" && /^ \[[0-9a-z]+\] <(\.\/.*|\.\.\/.*|#.*|.*\/.*)>$/ { collect($0); next } diff --git a/emparse b/emparse index 3985a86..ed3a4c7 100755 --- a/emparse +++ b/emparse @@ -2,15 +2,20 @@ # aux/emparse -- parse em source -BEGIN { expectblock = 1; itemlevel = 1 } +BEGIN { + expectblock = 1 + itemlevel = 1 + for (n=0; n < 256; n++) + ord[sprintf("%c", n)] = n +} END { breakblock() } /^$/ { breakblock(); getline } expectblock && /^ / { newblock("table") } expectblock && /^ > / { newblock("blockquote") } expectblock && /^ - / { newblock("ul") } -expectblock && /^ [0-9]+\. / { newblock("ol") } -expectblock && /^ \[[0-9]+\] / { newblock("nl") } +expectblock && /^ [0-9a-z]+\. / { newblock("ol") } +expectblock && /^ \[[0-9a-z]+\] / { newblock("nl") } expectblock && /^ .*: / { newblock("dl") } expectblock && /^ / { newblock("pre") } expectblock && /^---$/ { expectblock = 0; printf "
\n"; next } @@ -26,20 +31,20 @@ openblock == "pre" { sub("^ ", ""); $0 = escape($0); printf "%s\n", $0; next } openblock == "blockquote" && /^ > / { item(1, "blockquote", line) } openblock == "ul" && /^ - / { item(1, "ul", line) } -openblock == "ol" && /^ [0-9]+\. / { item(1, "ol", line) } -openblock == "nl" && /^ \[[0-9]+\] / { item(1, "nl", line) } +openblock == "ol" && /^ [0-9a-z]+\. / { item(1, "ol", line) } +openblock == "nl" && /^ \[[0-9a-z]+\] / { item(1, "nl", line) } openblock == "dl" && /^ .*: / { term(line) } -(openblock == "ul" || openblock == "ol") && /^ -/ { item(2, "ul", line) } -(openblock == "ul" || openblock == "ol") && /^ [0-9]+\./ { item(2, "ol", line) } -(openblock == "ul" || openblock == "ol") && /^ -/ { item(3, "ul", line) } -(openblock == "ul" || openblock == "ol") && /^ [0-9]+\./ { item(3, "ol", line) } -(openblock == "ul" || openblock == "ol") && /^ -/ { item(4, "ul", line) } -(openblock == "ul" || openblock == "ol") && /^ [0-9]+\./ { item(4, "ol", line) } -(openblock == "ul" || openblock == "ol") && /^ -/ { item(5, "ul", line) } -(openblock == "ul" || openblock == "ol") && /^ [0-9]+\./ { item(5, "ol", line) } -(openblock == "ul" || openblock == "ol") && /^ -/ { item(6, "ul", line) } -(openblock == "ul" || openblock == "ol") && /^ [0-9]+\./ { item(6, "ol", line) } +(openblock == "ul" || openblock == "ol") && /^ -/ { item(2, "ul", line) } +(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(2, "ol", line) } +(openblock == "ul" || openblock == "ol") && /^ -/ { item(3, "ul", line) } +(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(3, "ol", line) } +(openblock == "ul" || openblock == "ol") && /^ -/ { item(4, "ul", line) } +(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(4, "ol", line) } +(openblock == "ul" || openblock == "ol") && /^ -/ { item(5, "ul", line) } +(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(5, "ol", line) } +(openblock == "ul" || openblock == "ol") && /^ -/ { item(6, "ul", line) } +(openblock == "ul" || openblock == "ol") && /^ [0-9a-z]+\./ { item(6, "ol", line) } { format($0) } # inline formatting @@ -56,11 +61,11 @@ function format(line) { else if (match(w[i], "^<(\\./.*|\\.\\./.*|#.*|.*/.*)>\\)[.,:;?!]$")) w[i] = link(substr(w[i], 5, RLENGTH-10)) substr(w[i], RLENGTH-1) - if (match(w[i], "^\\[[0-9]+\\]$")) + if (match(w[i], "^\\[[0-9a-z]+\\]$")) w[i] = ref(substr(w[i], 2, RLENGTH-2)) - else if (match(w[i], "^\\[[0-9]+\\][.,:;?!)]$")) + else if (match(w[i], "^\\[[0-9a-z]+\\][.,:;?!)]$")) w[i] = ref(substr(w[i], 2, RLENGTH-3)) substr(w[i], RLENGTH) - else if (match(w[i], "^\\[[0-9]+\\]\\)[.,:;?!]$")) + else if (match(w[i], "^\\[[0-9a-z]+\\]\\)[.,:;?!]$")) w[i] = ref(substr(w[i], 2, RLENGTH-4)) substr(w[i], RLENGTH-1) if (!openformat) { @@ -186,16 +191,18 @@ function item(level, type, line) { printf "
  • " } if (type == "ol") { - match($0, "[0-9]+\.") + match($0, "[0-9a-z]+\.") v = substr($0, RSTART, RLENGTH-1) - sub("^ +[0-9]+\. ", "") - printf "
  • ", v + sub("^ +[0-9a-z]+\. ", "") + listvalid(v) + printf "
  • ", listnum(v), listtype(v) } if (type == "nl") { - match($0, "\\[[0-9]+\\]") + match($0, "\\[[0-9a-z]+\\]") v = substr($0, RSTART+1, RLENGTH-2) - sub("^ \\[[0-9]+\\] ", "") - printf "
  • ", v, v + sub("^ \\[[0-9a-z]+\\] ", "") + listvalid(v) + printf "
  • ", listnum(v), listtype(v), v } } @@ -220,6 +227,26 @@ function link(h) { return "" h "" } +function listvalid(v) { + if (match(v, "^[0-9]+$")) return + if (match(v, "^[a-z]$")) return + if (match(v, "^[A-Z]$")) return + printf "error: '%s' is not a valid list item value\n", v > "/dev/stderr" + exit 1 +} + +function listnum(v) { + if (match(v, "^[0-9]+$")) return v + if (match(v, "^[a-z]$")) return ord[v]-96 + if (match(v, "^[A-Z]$")) return ord[v]-64 +} + +function listtype(v) { + if (match(v, "^[0-9]+$")) return "decimal" + if (match(v, "^[a-z]$")) return "lower-alpha" + if (match(v, "^[A-Z]$")) return "upper-alpha" +} + function escape(s) { gsub("&", "\\&", s) gsub("<", "\\<", s) diff --git a/mkfile b/mkfile index 21e11ae..e50c6be 100644 --- a/mkfile +++ b/mkfile @@ -1,4 +1,4 @@ -test.html: +test.html: test.em path=(. /bin) ramfs -p em test.em > /tmp/1 diff --git a/test.em b/test.em index 0f44abb..b0721b2 100644 --- a/test.em +++ b/test.em @@ -24,13 +24,13 @@ ordered list item. 2. Here is another. 1. Here is a list - 1. With another list inside it [1] + a. With another list inside it [1] - And yet another list - Inside that list - 2. And now we're back at the first level [2] + 2. And now we're back at the first level [b] [1] Reference list item - [2] + [b] term: definition long term: long diff --git a/test.html b/test.html index 089f1a6..f3d2c40 100644 --- a/test.html +++ b/test.html @@ -29,26 +29,26 @@ unordered list item.
    1. -
    2. Here is an +
    3. Here is an ordered list item. -
    4. Here is another. +
    5. Here is another.
      -
    1. Here is a list +
    2. Here is a list
      1. -
      2. With another list inside it [1] +
      3. With another list inside it [1]
        • And yet another list
        • Inside that list
      -
    3. And now we're back at the first level [2] +
    4. And now we're back at the first level [b]
      -
    1. Reference list item -
    2. http://example.com/abc +
    3. Reference list item +
    4. http://example.com/abc
    -- cgit v1.2.3