diff options
author | John Ankarström <john@ankarstrom.se> | 2020-10-23 02:11:59 +0200 |
---|---|---|
committer | John Ankarström <john@ankarstrom.se> | 2020-10-23 02:11:59 +0200 |
commit | c3143fbdb7ea46539023e11cb30a7b14434030f3 (patch) | |
tree | e2a4394e0b3d8c6a882c82637a08823e6e5b5043 | |
parent | 24da48a86bb542be247127258db5fcafab4ba011 (diff) | |
download | tt-c3143fbdb7ea46539023e11cb30a7b14434030f3.tar.gz |
release 1.2
-rw-r--r-- | CHANGELOG.txt | 11 | ||||
-rw-r--r-- | build.bat | 4 | ||||
-rw-r--r-- | doc/doc.bat | 18 | ||||
-rw-r--r-- | doc/index.html | 802 | ||||
-rw-r--r-- | doc/markdown.exe | bin | 0 -> 2926856 bytes | |||
-rw-r--r-- | src/tt.c | 46 | ||||
-rw-r--r-- | src/tt.input.c | 397 | ||||
-rw-r--r-- | src/tt.options.c | 90 | ||||
-rw-r--r-- | src/tt.output.c | 199 | ||||
-rw-r--r-- | test/truncation/in.txt | 2 | ||||
-rw-r--r-- | test/truncation/test.txt | 2 | ||||
-rw-r--r-- | tt.c | 464 | ||||
-rw-r--r-- | tt.exe | bin | 61440 -> 62464 bytes |
13 files changed, 1781 insertions, 254 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt index c72de1a..677db61 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,7 +1,6 @@ -CHANGELOG ----------------------- 1.1 ---------------------- 2020-05-26 +CHANGELOG ----------------------- 1.2 ---------------------- 2020-05-29 -NEW: Destination lines can now be space-indented. -FIX: Identifiers in destination lines are now currently truncated. - -NEW: test/indented-triple-pass -NEW: test/truncation
\ No newline at end of file +NEW: tt is now itself compiled using tt. The source files, which are + organized "literately" and tangled to tt.c, are located in src/. + A preliminary HTML version of the source exists in doc/. +FIX: Empty references are now handled properly.
\ No newline at end of file diff --git a/build.bat b/build.bat new file mode 100644 index 0000000..f36af50 --- /dev/null +++ b/build.bat @@ -0,0 +1,4 @@ +@echo off +setlocal +cd %~dp0 +cd src && type tt.*.c 2>nul | ..\tt -d// -c -o../ tt.c
\ No newline at end of file diff --git a/doc/doc.bat b/doc/doc.bat new file mode 100644 index 0000000..80a6351 --- /dev/null +++ b/doc/doc.bat @@ -0,0 +1,18 @@ +@echo off +setlocal +cd %~dp0 + +echo ^<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"^> > index.html +echo ^<html lang=en^> >> index.html +echo ^<head^> >> index.html +echo ^<title^>tt.c^<^/title^> >> index.html +echo ^</head^> >> index.html +echo ^<body^> >> index.html + +for /f "tokens=*" %%f in ('dir /b/a-d ..\src') do ( + echo ^<h1^>%%f^</h1^> >> index.html + awk -F """" "/^\/\// { sub(/^\/\/ */, """", $0); print gensub(/->[ ]*([^ ][^ ]*)$/, ""\\\\→\\\\ *\\\\1*"", $0); next } /^[ ]*$/ { print """"; next } { print "" "" $0 }" < ..\src\%%f | markdown.exe | sed -e "s/h2>/h3>/g" -e "s/h1>/h2>/g" >> index.html + echo ^<hr^> >> index.html +) + +echo ^</body^>^</html^> >> index.html
\ No newline at end of file diff --git a/doc/index.html b/doc/index.html new file mode 100644 index 0000000..4b0c068 --- /dev/null +++ b/doc/index.html @@ -0,0 +1,802 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang=en> +<head> +<title>tt.c</title> +</head> +<body> +<h1>tt.c</h1> +<p>tt.c -- tangle to, written by John Ankarström → <em>tt.c</em></p> + +<pre><code>#include <stdio.h> + +#ifdef _WIN32 +#include <shlwapi.h> +#pragma comment(lib, "Shlwapi.lib") +#else +#include <ctype.h> +#include <errno.h> +#include <string.h> +#include <strings.h> +#include <stdlib.h> +#include <sys/stat.h> +#endif + +<<definitions>> + +#define err(code, string) do { fprintf(stderr, "%s: %s: %s\n", string, strerror(errno)); exit(code); } while (0) +#define die(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while (0) +#define true 1 +#define false 0 +#define bool int + +<<declarations>> + +int main(int argc, char *argv[]) { + <<main.declarations>> + <<main.globals>> + <<main.options>> + <<main.input>> + <<main.output>> + return 0; +} + +void reference(char *line) { + <<reference.declarations>> + <<reference.parse>> + <<reference.add>> +} + +bool insertion(char *line) { + <<insertion.declarations>> + <<insertion.parse>> + <<insertion.add>> +} +</code></pre> +<hr> +<h1>tt.input.c</h1> +<h2>References in source input</h2> + +<p>The references found in the source input is stored as an array of strings +in the global refs variable: → <em>declarations</em></p> + +<pre><code>char **refs; /* references */ +int refs_c; /* count */ +int refs_s; /* size (number of elements allocated for) */ +</code></pre> + +<p>It is allocated at the beginning of the execution to contain an array of +ten strings. The refs<em>s variable keeps track of the amount of allocated +space, while ref</em>c holds the number of actual elements: → <em>main.globals</em></p> + +<pre><code> refs_c = 0; + refs_s = 10; + refs = malloc(refs_s * sizeof(char *)); + if (refs == NULL) err(1, "malloc"); +</code></pre> + +<h2>Insertions in source input</h2> + +<p>tt represents every insertion as an array of strings, where each string +corresponds to a line to be inserted. All insertions are stored in the +global ins array: → <em>declarations</em></p> + +<pre><code>char ***ins; /* insertions */ +</code></pre> + +<p>The position of each insertion in the ins array is always equal to the +position of the corresponding reference in the refs array -- to find what +lines should be inserted at destination X, one must find the value P such +that refs[P] is equal to X. Then, the corresponding insertion will be equal +to ins[P].</p> + +<p>In other words, the ins array should always be of the same length as refs. +As such, the refs<em>s and refs</em>c variables are used for ins as well. The ins +array is allocated to hold the same number of elements as refs. Furthermore, +its elements are set to NULL, signifying the absence of any insertion at +that index: → <em>main.globals</em></p> + +<pre><code> ins = malloc(refs_s * sizeof(char **)); + if (ins == NULL) err(1, "malloc"); + for (i = 0; i < refs_s; i++) + ins[i] = NULL; +</code></pre> + +<p>→ <em>main.declarations</em></p> + +<pre><code>int i; +</code></pre> + +<h2>Parsing standard input</h2> + +<p>Text is read from the standard input, line by line, into a line variable. +Two additional variables, line<em>s and line</em>l, keep track of the amount of +allocated space and the actual number of characters in the string, +respectively: → <em>main.declarations</em></p> + +<pre><code> char *line; + int line_l; /* length */ + int line_s; /* size (number of characters allocated for) */ +</code></pre> + +<p>It initially is allocated to hold 100 characters: → <em>main.input</em></p> + +<pre><code> line_l = 0; + line_s = 100; + line = malloc(1 + line_s * sizeof(char)); + if (line == NULL) err(1, "malloc"); +</code></pre> + +<p>Lines are read character by character until end of file. First, the read +character is assigned to the variable b. When it is certain that it is not +EOF, then it is assigned to the variable c: → <em>main.input</em></p> + +<pre><code> while ((b = getchar()) != EOF) { + c = b; +</code></pre> + +<p>→ <em>main.declarations</em></p> + +<pre><code> char b; + char c; +</code></pre> + +<p>On every iteration, tt checks whether the read character is a newline. If +not, the character is added to the line variable, which is re-allocated if +necessary. The line_l, keeping track of the line's length, is incremented +as well: → <em>main.input</em></p> + +<pre><code> if (c != '\n') { + if (line_l + 1 > line_s) { + line_s += 20; + tmp = realloc(line, 1 + line_s * sizeof(char)); + if (tmp == NULL) err(1, "malloc"); + line = tmp; + } + line[line_l++] = c; + continue; + } +</code></pre> + +<p>The tmp variable used in the re-allocation has a type which is identical to +that of the line variable: → <em>main.declarations</em></p> + +<pre><code> char *tmp; +</code></pre> + +<p>If the read character is a newline, then the program "finishes" the line, +adding a final NULL character and resetting line_l: → <em>main.input</em></p> + +<pre><code>finish: + line[line_l] = '\0'; + line_l = 0; +</code></pre> + +<p>Before parsing the line, we make sure to skip it if it is empty and +following a non-code line: → <em>main.input</em></p> + +<pre><code> if (strlen(code_prefix) == 0 && !wascode && strcmp(line, "") == 0) { + continue; + } +</code></pre> + +<p>This is only desirable if CODE_PREFIX is empty, because then, there is no +way for the writer of the source input to, for appearance's sake, leave an +empty line between non-code lines and code lines; any empty line will +will inevitably be interpreted as a code line. The code above circumvents +this.</p> + +<p>This aesthetical nicety requires the program to keep track of whether the +previous line was a code line or not: → <em>main.declarations</em></p> + +<pre><code> bool wascode = false; +</code></pre> + +<p>Now, it is time to check whether the read line is a code line (an insertion) +or a documentation line (containing a reference): → <em>main.input</em></p> + +<pre><code> if (!insertion(line)) reference(line); + } +</code></pre> + +<p>The insertion and reference functions modify the ins and refs variables +according to the contents of the line.</p> + +<p>Finally, after the loop is finished -- meaning that EOF has been reached -- +we must ensure that the final character was not a newline; otherwise, the +final line of source input has not been processed, as lines are processed +only when the terminated newline is encountered.</p> + +<p>Thus, if the final character was a newline, tt goes back and finishes the +final line: → <em>main.input</em></p> + +<pre><code> if (c != '\n') { c = '\n'; goto finish; } +</code></pre> + +<h2>Identifying and processing documentation lines containing references</h2> + +<p>The reference function is responsible for processing references in source +input lines: → <em>declarations</em></p> + +<pre><code>void reference(char *line); +</code></pre> + +<h3>Parsing the line</h3> + +<p>Documentation lines are formatted as follows: -></p> + +<pre><code>documentation line ::= DOC_PREFIX anything [reference] + +reference ::= "->" [whitespace] identifier [whitespace] +identifier ::= not whitespace +</code></pre> + +<p>In order to identify whether a given line actually is a documentation line +containing a reference, the line variable is aliased to ln, which will be +modified instead of line: → <em>reference.declarations</em></p> + +<pre><code> char *ln = line; +</code></pre> + +<p>First, we ensure the line begins with the doc_prefix: → <em>reference.parse</em></p> + +<pre><code> if (strncmp(ln, doc_prefix, strlen(doc_prefix)) != 0) return; +</code></pre> + +<p>Then, we ensure that a hyphen is present: → <em>reference.parse</em></p> + +<pre><code>hyphen: + if (*ln == '\0') return; + else if (*ln == '-') { ln++; goto lessthan; } + else { ln++; goto hyphen; } +</code></pre> + +<p>After finding the hyphen, we check whether a less-then sign follows it. +If not, we keep looking for another hyphen. → <em>reference.parse</em></p> + +<pre><code>lessthan: + if (*ln != '>') goto hyphen; + else ln++; +</code></pre> + +<p>After finding a less-then sign following a hyphen (->), we ignore all +whitespace, if there is any. If the end of the line has been reached, or is +reached, by this point, then it will be interpreted as an empty reference, +resetting the current reference (meaning that subsequent code lines will not +be attached to any reference): → <em>reference.parse</em></p> + +<pre><code>space: + if (isspace(*ln)) { ln++; goto space; } + if (*ln == '\0') { ref = ""; return; } +</code></pre> + +<p>Now, a valid reference should be a string of non-space characters, +followed optionally by whitespace, but not anything other than whitespace: +→ <em>reference.parse</em></p> + +<pre><code> for (i = 0; i < strlen(ln); i++) + if (isspace(ln[i])) { + for (j = i; j < strlen(ln); j++) + if (!isspace(ln[j])) return; + break; + } +</code></pre> + +<p>→ <em>reference.declarations</em></p> + +<pre><code> int i; + int j; +</code></pre> + +<p>After the loop above, i will be set to the index of the first encountered +space or the end of the line. Any trailing whitespace should be ignored: +→ <em>reference.parse</em></p> + +<pre><code> ln[i] = '\0'; +</code></pre> + +<h3>Adding the reference</h3> + +<p>At this point, we have found a valid reference, which should now be added to +the global refs array.</p> + +<p>First, however, it should be mentioned that reference identifiers have a +maximum length of 80 characters: → <em>definitions</em></p> + +<pre><code>#define REFMAX 80 +</code></pre> + +<p>Thus, any reference identifier longer than REFMAX is truncated, with a +warning printed to the standard error stream: → <em>reference.add</em></p> + +<pre><code> if (strlen(ln) > REFMAX) { + fprintf(stderr, "Warning: Truncating identifier exceeding %d characters\n", + REFMAX); + ln[REFMAX] = '\0'; + } +</code></pre> + +<p>It should also be mentioned that the current reference is always stored in a +global variable, from which the code(char <em>) function knows with which +reference to associate each code line: → </em>declarations*</p> + +<pre><code>char *ref; +</code></pre> + +<p>It is allocated in the beginning of the program's execution: → <em>main.globals</em></p> + +<pre><code> ref = malloc(1 + REFMAX * sizeof(char)); + if (ref == NULL) err(1, "malloc"); +</code></pre> + +<p>It is freed before the output section of the program, at which point it is +no longer needed: → <em>main.output</em></p> + +<pre><code> free(ref); +</code></pre> + +<p>The variable is set by our reference function: → <em>reference.add</em></p> + +<pre><code> sprintf(ref, "%s", ln); /* set current reference */ + ref[strlen(ln)] = '\0'; +</code></pre> + +<p>Now remains the work of adding the reference to the global refs variable -- +unless it already exists in refs: → <em>reference.add</em></p> + +<pre><code> for (i = 0; i < refs_c; i++) + if (strcmp(refs[i], ref) == 0) return; +</code></pre> + +<p>If the reference truly is new, we notify the user: → <em>reference.add</em></p> + +<pre><code> fprintf(stderr, "New reference: %s\n", ref); +</code></pre> + +<p>Before adding the new reference to refs, we re-allocate refs (and therefore +also ins, which should always be as large as refs), if needed: +→ <em>reference.add</em></p> + +<pre><code> if (++refs_c > refs_s) { + refs_s += 10; + tmp = realloc(refs, refs_s * sizeof(char *)); + if (tmp == NULL) err(1, "malloc"); + refs = tmp; + tmp2 = realloc(ins, refs_s * sizeof(char *)); + if (tmp2 == NULL) err(1, "malloc"); + ins = tmp2; + for (i = refs_s - 10; i < refs_s; i++) /* TODO: is this right? */ + ins[i] = NULL; + } +</code></pre> + +<p>→ <em>reference.declarations</em></p> + +<pre><code> char **tmp; + char ***tmp2; +</code></pre> + +<p>Notice that the code above also increases the refs_c count. Now, everything +else is done, and the reference is ready to be added: → <em>reference.add</em></p> + +<pre><code> refs[refs_c-1] = malloc(1 + REFMAX * sizeof(char)); + sprintf(refs[refs_c-1], "%s", ref); +</code></pre> + +<h2>Identifying and processing code lines</h2> + +<p>The insertion function is responsible for processing code lines: +→ <em>declarations</em></p> + +<pre><code>bool insertion(char *line); +</code></pre> + +<p>It returns true if the given line is a code line (i.e., an insertion).</p> + +<h3>Parsing the code line</h3> + +<p>First of all, if there is no current reference, the insertion should be +ignored: → <em>insertion.parse</em></p> + +<pre><code> if (ref[0] == '\0') return false; +</code></pre> + +<p>If there is a CODE<em>PREFIX, we ensure that the line begins with it. +Likewise, if there is a DOC</em>PREFIX, we ensure that the line does not +begin with it: → <em>insertion.parse</em></p> + +<pre><code> if (strlen(code_prefix) > 0) + if (strncmp(line, code_prefix, strlen(code_prefix)) != 0) return false; + if (strlen(doc_prefix) > 0) + if (strncmp(line, doc_prefix, strlen(doc_prefix)) == 0) return false; +</code></pre> + +<p>As you can see, the DOC<em>PREFIX is given precedence over the CODE</em>PREFIX.</p> + +<h3>Adding the code line to the insertions</h3> + +<p>Now that we know the line contains an insertion, we must find the index +of the current reference in the refs array: → <em>insertion.add</em></p> + +<pre><code> for (i = 0; i < refs_c; i++) + if (strcmp(refs[i], ref) == 0) break; +</code></pre> + +<p>→ <em>insertion.declarations</em></p> + +<pre><code> int i; +</code></pre> + +<p>Our goal is to add the insertion to the corresponding position in the ins +array. If there is no insertion at that position, the value will be NULL: +→ <em>insertion.add</em></p> + +<pre><code> if (ins[i] == NULL) { + ins[i] = malloc(1 + 1 * sizeof(char *)); + if (ins[i] == NULL) err(1, "malloc"); + len = 0; + } +</code></pre> + +<p>If ins[i] is not NULL, then it already contains some number of insertion +strings, terminated by a final NULL value. In order to allocate memory +for the new insertion, we find the position of the final NULL value, +corresponding to the length of the ins[i] array: → <em>insertion.add</em></p> + +<pre><code> else { + for (len = 0; ins[i][len] != NULL; len++) ; + tmp = realloc(ins[i], 1 + (len + 1) * sizeof(char *)); + if (tmp == NULL) err(1, "malloc"); + ins[i] = tmp; + } +</code></pre> + +<p>→ <em>insertion.declarations</em></p> + +<pre><code> char **tmp; + int len; +</code></pre> + +<p>Now remains adding the insertion to ins[i]. First, we mark the new final +position: → <em>insertion.add</em></p> + +<pre><code> ins[i][len + 1] = NULL; +</code></pre> + +<p>Then, we allocate memory for the string: → <em>insertion.add</em></p> + +<pre><code> ins[i][len] = malloc(1 + strlen(line) * sizeof(char)); + if (ins[i][len] == NULL) err(1, "malloc"); +</code></pre> + +<p>Finally, we copy the string, returning true, signifying that the line +processed indeed was a code line: → <em>insertion.add</em></p> + +<pre><code> strncpy(ins[i][len], line + strlen(code_prefix), + strlen(line) - strlen(code_prefix)); + ins[i][len][strlen(line) - strlen(code_prefix)] = '\0'; + return true; +</code></pre> + +<p>Notice also that we make sure to skip the CODE_PREFIX.</p> +<hr> +<h1>tt.options.c</h1> +<h2>Command-line flags</h2> + +<p>tt can be configured by changing the value of three variables: +→ <em>declarations</em></p> + +<pre><code>char *code_prefix; /* string with which code lines should start */ +char *doc_prefix; /* string with which documentation lines should start */ +char *out_prefix; /* string to which the output file name should be appended */ +</code></pre> + +<p>The default values are the following: → <em>main.options</em></p> + +<pre><code> code_prefix = " "; /* code lines should begin with four spaces */ + doc_prefix = ""; /* other lines are documentation lines */ + out_prefix = "out/"; /* all output files go in the out/ directory */ +</code></pre> + +<p>Each variable is controlled by a single-letter command-line flag, which +should then be immediately -- without any space -- followed by the +desired value. For example, -dfinal. would set out_prefix to "final.".</p> + +<p>This convention allows for a very simple parsing loop: → <em>main.options</em></p> + +<pre><code> for (i = 1; i < argc; i++) + if (argv[i][0] == '-') { + switch(argv[i][1]) { + case 'c': + code_prefix = argv[i] + 2; + break; + case 'd': + doc_prefix = argv[i] + 2; + break; + case 'o': + out_prefix = argv[i] + 2; + break; + case '-': + i++; + goto end; + default: + die(USAGE); + } + } else + break; +end: +</code></pre> + +<p>If the given argument begins with a hyphen, it is interpreted as a flag. +If the flag is --, then tt ignores the argument and stops looking for flags. +If the flag is unrecognized, the program dies. If the argument does not +begin with a hyphen, it and anything following it will not be interpreted +as a flag.</p> + +<p>USAGE contains information about how to use tt: → <em>definitions</em></p> + +<pre><code>#define USAGE "usage: %s [-cCODE_PREFIX] [-dDOC_PREFIX] [-oOUTPREFIX] destination ...\n", argv[0] +</code></pre> + +<p>Of course, we can't just trust the user to provide reasonable values, so we +ensure that the code<em>prefix and out</em>prefix are not identical and that the +out_prefix is not empty -- otherwise, tt would overwrite all destination +files: → <em>main.options</em></p> + +<pre><code> if (strcmp(code_prefix, doc_prefix) == 0) + die("code_prefix and doc_prefix cannot be identical\n"); + if (strlen(out_prefix) == 0) + die("out_prefix cannot be empty\n"); +</code></pre> + +<h2>Command-line arguments</h2> + +<p>Having finished parsing command-line flags, it is time to collect the +remaining command-line arguments, which should be one or more destination +files. Our loop above, when broken out of or finished naturally, has set +the i variable to the position of the first non-flag argument in argv (or +simply the position after the last flag in argv).</p> + +<p>First, we check if there actually are any further argument, or if i is past +the end of the array: → <em>main.options</em></p> + +<pre><code> if (i == argc) die(USAGE); +</code></pre> + +<p>At least one destination file is required. Then, we save the position of the +first destination file in argv in a special variable for later use: +→ <em>main.options</em></p> + +<pre><code>offset = i; +</code></pre> + +<p>→ <em>main.declarations</em></p> + +<pre><code>int offset; +</code></pre> + +<p>Now, we have successfully finished parsing both flags and arguments, and are +ready to read the lines on the standard input.</p> +<hr> +<h1>tt.output.c</h1> +<h2>Outputting the results</h2> + +<p>At this point, we have collected all references and accompanying insertinos +in the source input. Two tasks remain:</p> + +<ol> +<li>We need to parse the destination files, identifying <<destinations>>.</li> +<li>We need to copy the destination files to the tangled files, overwriting +all <<destinations>> with the corresponding insertions.</li> +</ol> + +<p>Both of these tasks will be performed in the same loop: → <em>main.output</em></p> + +<pre><code> for (k = offset; k < argc; k++) { +</code></pre> + +<p>→ <em>main.declarations</em></p> + +<pre><code> int k; +</code></pre> + +<p>The counter k is set to the offset defined in the options section, which +should be equal to the position of the first destination file in argv. +We loop as long as we haven't reached the end of argv.</p> + +<p>On each iteration of the loop, we can obtain from argv the name of the +destination file and copy it to a new string, adding the out_prefix. We'll +call this string tangledfilename: → <em>main.declarations</em></p> + +<pre><code> char *tangledfilename; +</code></pre> + +<p>→ <em>main.output</em></p> + +<pre><code> tangledfilename = malloc(1 + (strlen(out_prefix) + strlen(argv[k]) + 50) * sizeof(char)); + if (tangledfilename == NULL) err(1, "malloc"); + + if (sprintf(tangledfilename, "%s%s", out_prefix, argv[k]) == -1) + err(1, "sprintf"); +</code></pre> + +<p>Now, we can open the tangled file for writing and the original destination +file for reading. We'll call the handle for tangledfile f and the handle for +argv[k] fo, the o standing for "original": → <em>main.declarations</em></p> + +<pre><code> FILE *f; + FILE *fo; +</code></pre> + +<p>→ <em>main.output</em></p> + +<pre><code> f = fopen(tangledfilename, "w"); + if (f == NULL) err(1, "fopen"); + fo = fopen(argv[k], "r"); + if (fo == NULL) err(1, "fopen"); +</code></pre> + +<p>Having successfully opened the files, we have no need for tangledfilename: +→ <em>main.output</em></p> + +<pre><code> free(tangledfilename); +</code></pre> + +<h3>Parsing the current destination file and writing the tangled file</h3> + +<p>The destination file will be parsed in a manner similar to the way in which +the source input was parsed. The same structure will be used: → <em>main.output</em></p> + +<pre><code> line = ""; + line_l = 0; + /* line_s is remembered */ + + while ((b = fgetc(fo)) != EOF) { + c = b; + if (c != '\n') { + if (line_l + 1 > line_s) { + line_s += 20; + tmp = realloc(line, 1 + line_s * sizeof(char)); + if (tmp == NULL) err(1, "malloc"); + line = tmp; + } + line[line_l++] = c; + continue; + } +</code></pre> + +<p>Again, characters will be added to the line variable until a newline is +encountered, at which point the collected line will be finished: +→ <em>main.output</em></p> + +<pre><code>finish2: + line[line_l] = '\0'; + line_l = 0; /* reset line length count */ +</code></pre> + +<p>From here on, however, the loop will look a bit different. First, tt takes +note of the line's indentation, saving it to the indent variable: +→ <em>main.declarations</em></p> + +<pre><code> int indent; +</code></pre> + +<p>Only spaces are currently supported: → <em>main.output</em></p> + +<pre><code> ref = line; + for (indent = 0; *ref == ' '; ref++) indent++; +</code></pre> + +<p>Also, as you can see, we re-use the ref variable that was used by the input +parsing, but which is now unused.</p> + +<p>Parsing the <<destination identifier>> is simple: → <em>main.output</em></p> + +<pre><code> if (strncmp(ref, "<<", 2) != 0 + || strncmp(ref + strlen(ref) - 2, ">>", 2) != 0) { + fprintf(f, "%s\n", line); + continue; + } +</code></pre> + +<p>If no potential destination is found, then the line will be written as-is to +the tangled file, and the loop continues parsing the next line of the file. +If a potential destination is found, however, we store it in the ref +variable, removing the << and >> markers: → <em>main.output</em></p> + +<pre><code> ref += 2; + ref[strlen(ref) - 2] = '\0'; +</code></pre> + +<p>There is still one thing to check, before we know that the destination is +valid -- it must not contain any whitespace: → <em>main.output</em></p> + +<pre><code> for (i = 0; i < strlen(ref); i++) + if (isspace(ref[i])) { + fprintf(f, "%s\n", line); + continue; + } +</code></pre> + +<p>Again, if there is whitespace, then the line does not signify a destination +and should be printed as-is to the resulting tangled file.</p> + +<p>As when parsing the input, long identifiers are truncated: → <em>main.output</em></p> + +<pre><code> if (strlen(ref) > REFMAX) + fprintf(stderr, + "Warning: Truncating identifier exceeding %d characters\n", REFMAX); +</code></pre> + +<p>Finally, we check whether the destination actually has been referenced by +the source input, warning the user otherwise: → <em>main.output</em></p> + +<pre><code> for (i = 0; i < refs_c; i++) + if (strncmp(refs[i], ref, REFMAX) == 0) goto found; + fprintf(stderr, "Unreferenced destination: %s\n", ref); + continue; +found: +</code></pre> + +<p>Having established that the identified destination is referenced by the +source input, and having stored in the local i variable the reference's +position in the refs variable, we can retrieve the insertion for the +reference by looking at the same position in the ins variable.</p> + +<p>Our first order of business is to make sure that the insertion is not empty +-- in that case, the user is warned, and the loop goes on to the next line: +→ <em>main.output</em></p> + +<pre><code> if (ins[i] == NULL) { + fprintf(stderr, "Warning: Insertion for %s is empty\n", ref); + continue; + } +</code></pre> + +<p>Now, we are ready to write the insertion for the destination to the tangled +file. Because each insertion is stored as an array of strings, each string +containing a single line of the insertion, we use yet another loop: +→ <em>main.output</em></p> + +<pre><code> for (j = 0; ins[i][j] != NULL; j++) { + if (ins[i][j + 1] == NULL) { + if (strlen(ins[i][j]) == 0) + break; /* remove extra newline */ + } + for (m = indent; m > 0; m--) putc(' ', f); + fprintf(f, "%s\n", ins[i][j]); + } + } +</code></pre> + +<p>→ <em>main.declarations</em></p> + +<pre><code>int j; +int m; +</code></pre> + +<p>Apart from simply printing the inserted line to the tangled file, the code +above also skips any empty line at the end of the insertion and adds the +indentation identified when parsing the line in the destination file +containing the destination identifier.</p> + +<p>Now, we have almost finished parsing the current destination file and +writing to the corresponding tangled file, but -- as before -- we still +haven't processed the final line of the file, if that line ends without +a newline. To fix that, we just run the finishing code again: +→ <em>main.output</em></p> + +<pre><code> if (c != '\n') { c = '\n'; goto finish2; } +</code></pre> + +<p>Finally, we close the handles to the destination file and tangled file: +→ <em>main.output</em></p> + +<pre><code> fclose(f); + fclose(fo); + } +</code></pre> + +<p>And that is the end of the loop. The loop continues for every destination +file given as an argument, and when it is done, so is the program.</p> +<hr> +</body></html> diff --git a/doc/markdown.exe b/doc/markdown.exe Binary files differnew file mode 100644 index 0000000..7b93052 --- /dev/null +++ b/doc/markdown.exe diff --git a/src/tt.c b/src/tt.c new file mode 100644 index 0000000..8a5c777 --- /dev/null +++ b/src/tt.c @@ -0,0 +1,46 @@ +// tt.c -- tangle to, written by John Ankarström -> tt.c + +#include <stdio.h> + +#ifdef _WIN32 +#include <shlwapi.h> +#pragma comment(lib, "Shlwapi.lib") +#else +#include <ctype.h> +#include <errno.h> +#include <string.h> +#include <strings.h> +#include <stdlib.h> +#include <sys/stat.h> +#endif + +<<definitions>> + +#define err(code, string) do { fprintf(stderr, "%s: %s: %s\n", string, strerror(errno)); exit(code); } while (0) +#define die(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while (0) +#define true 1 +#define false 0 +#define bool int + +<<declarations>> + +int main(int argc, char *argv[]) { + <<main.declarations>> + <<main.globals>> + <<main.options>> + <<main.input>> + <<main.output>> + return 0; +} + +void reference(char *line) { + <<reference.declarations>> + <<reference.parse>> + <<reference.add>> +} + +bool insertion(char *line) { + <<insertion.declarations>> + <<insertion.parse>> + <<insertion.add>> +}
\ No newline at end of file diff --git a/src/tt.input.c b/src/tt.input.c new file mode 100644 index 0000000..1f88f7d --- /dev/null +++ b/src/tt.input.c @@ -0,0 +1,397 @@ +// References in source input +// ============================================================================ + +// The references found in the source input is stored as an array of strings +// in the global refs variable: -> declarations + +char **refs; /* references */ +int refs_c; /* count */ +int refs_s; /* size (number of elements allocated for) */ + +// It is allocated at the beginning of the execution to contain an array of +// ten strings. The refs_s variable keeps track of the amount of allocated +// space, while ref_c holds the number of actual elements: -> main.globals + + refs_c = 0; + refs_s = 10; + refs = malloc(refs_s * sizeof(char *)); + if (refs == NULL) err(1, "malloc"); + + + +// Insertions in source input +// ============================================================================ + +// tt represents every insertion as an array of strings, where each string +// corresponds to a line to be inserted. All insertions are stored in the +// global ins array: -> declarations + +char ***ins; /* insertions */ + +// The position of each insertion in the ins array is always equal to the +// position of the corresponding reference in the refs array -- to find what +// lines should be inserted at destination X, one must find the value P such +// that refs[P] is equal to X. Then, the corresponding insertion will be equal +// to ins[P]. + +// In other words, the ins array should always be of the same length as refs. +// As such, the refs_s and refs_c variables are used for ins as well. The ins +// array is allocated to hold the same number of elements as refs. Furthermore, +// its elements are set to NULL, signifying the absence of any insertion at +// that index: -> main.globals + + ins = malloc(refs_s * sizeof(char **)); + if (ins == NULL) err(1, "malloc"); + for (i = 0; i < refs_s; i++) + ins[i] = NULL; + +// -> main.declarations + +int i; + + + +// Parsing standard input +// ============================================================================ + +// Text is read from the standard input, line by line, into a line variable. +// Two additional variables, line_s and line_l, keep track of the amount of +// allocated space and the actual number of characters in the string, +// respectively: -> main.declarations + + char *line; + int line_l; /* length */ + int line_s; /* size (number of characters allocated for) */ + +// It initially is allocated to hold 100 characters: -> main.input + + line_l = 0; + line_s = 100; + line = malloc(1 + line_s * sizeof(char)); + if (line == NULL) err(1, "malloc"); + +// Lines are read character by character until end of file. First, the read +// character is assigned to the variable b. When it is certain that it is not +// EOF, then it is assigned to the variable c: -> main.input + + while ((b = getchar()) != EOF) { + c = b; + +// -> main.declarations + + int b; + int c; + +// On every iteration, tt checks whether the read character is a newline. If +// not, the character is added to the line variable, which is re-allocated if +// necessary. The line_l, keeping track of the line's length, is incremented +// as well: -> main.input + + if (c != '\n') { + if (line_l + 1 > line_s) { + line_s += 20; + tmp = realloc(line, 1 + line_s * sizeof(char)); + if (tmp == NULL) err(1, "malloc"); + line = tmp; + } + line[line_l++] = c; + continue; + } + +// The tmp variable used in the re-allocation has a type which is identical to +// that of the line variable: -> main.declarations + + char *tmp; + +// If the read character is a newline, then the program "finishes" the line, +// adding a final NULL character and resetting line_l: -> main.input + +finish: + line[line_l] = '\0'; + line_l = 0; + +// Before parsing the line, we make sure to skip it if it is empty and +// following a non-code line: -> main.input + + if (strlen(code_prefix) == 0 && !wascode && strcmp(line, "") == 0) { + continue; + } + +// This is only desirable if CODE_PREFIX is empty, because then, there is no +// way for the writer of the source input to, for appearance's sake, leave an +// empty line between non-code lines and code lines; any empty line will +// will inevitably be interpreted as a code line. The code above circumvents +// this. + +// This aesthetical nicety requires the program to keep track of whether the +// previous line was a code line or not: -> main.declarations + + bool wascode = false; + +// Now, it is time to check whether the read line is a code line (an insertion) +// or a documentation line (containing a reference): -> main.input + + if (!insertion(line)) reference(line); + } + +// The insertion and reference functions modify the ins and refs variables +// according to the contents of the line. + +// Finally, after the loop is finished -- meaning that EOF has been reached -- +// we must ensure that the final character was not a newline; otherwise, the +// final line of source input has not been processed, as lines are processed +// only when the terminated newline is encountered. + +// Thus, if the final character was a newline, tt goes back and finishes the +// final line: -> main.input + + if (c != '\n') { c = '\n'; goto finish; } + + + +// Identifying and processing documentation lines containing references +// ============================================================================ + +// The reference function is responsible for processing references in source +// input lines: -> declarations + +void reference(char *line); + + +// Parsing the line +// ---------------------------------------------------------------------------- + +// Documentation lines are formatted as follows: -> + +documentation line ::= DOC_PREFIX anything [reference] + +reference ::= "->" [whitespace] identifier [whitespace] +identifier ::= not whitespace + +// In order to identify whether a given line actually is a documentation line +// containing a reference, the line variable is aliased to ln, which will be +// modified instead of line: -> reference.declarations + + char *ln = line; + +// First, we ensure the line begins with the doc_prefix: -> reference.parse + + if (strncmp(ln, doc_prefix, strlen(doc_prefix)) != 0) return; + +// Then, we ensure that a hyphen is present: -> reference.parse + +hyphen: + if (*ln == '\0') return; + else if (*ln == '-') { ln++; goto lessthan; } + else { ln++; goto hyphen; } + +// After finding the hyphen, we check whether a less-then sign follows it. +// If not, we keep looking for another hyphen. -> reference.parse + +lessthan: + if (*ln != '>') goto hyphen; + else ln++; + +// After finding a less-then sign following a hyphen (->), we ignore all +// whitespace, if there is any. If the end of the line has been reached, or is +// reached, by this point, then it will be interpreted as an empty reference, +// resetting the current reference (meaning that subsequent code lines will not +// be attached to any reference): -> reference.parse + +space: + if (isspace(*ln)) { ln++; goto space; } + if (*ln == '\0') { ref = ""; return; } + +// Now, a valid reference should be a string of non-space characters, +// followed optionally by whitespace, but not anything other than whitespace: +// -> reference.parse + + for (i = 0; i < strlen(ln); i++) + if (isspace(ln[i])) { + for (j = i; j < strlen(ln); j++) + if (!isspace(ln[j])) return; + break; + } + +// -> reference.declarations + + int i; + int j; + +// After the loop above, i will be set to the index of the first encountered +// space or the end of the line. Any trailing whitespace should be ignored: +// -> reference.parse + + ln[i] = '\0'; + + +// Adding the reference +// ---------------------------------------------------------------------------- + +// At this point, we have found a valid reference, which should now be added to +// the global refs array. + +// First, however, it should be mentioned that reference identifiers have a +// maximum length of 80 characters: -> definitions + +#define REFMAX 80 + +// Thus, any reference identifier longer than REFMAX is truncated, with a +// warning printed to the standard error stream: -> reference.add + + if (strlen(ln) > REFMAX) { + fprintf(stderr, "Warning: Truncating identifier exceeding %d characters\n", + REFMAX); + ln[REFMAX] = '\0'; + } + +// It should also be mentioned that the current reference is always stored in a +// global variable, from which the code(char *) function knows with which +// reference to associate each code line: -> declarations + +char *ref; + +// It is allocated in the beginning of the program's execution: -> main.globals + + ref = malloc(1 + REFMAX * sizeof(char)); + if (ref == NULL) err(1, "malloc"); + +// It is freed before the output section of the program, at which point it is +// no longer needed: -> main.output + + free(ref); + +// The variable is set by our reference function: -> reference.add + + sprintf(ref, "%s", ln); /* set current reference */ + ref[strlen(ln)] = '\0'; + +// Now remains the work of adding the reference to the global refs variable -- +// unless it already exists in refs: -> reference.add + + for (i = 0; i < refs_c; i++) + if (strcmp(refs[i], ref) == 0) return; + +// If the reference truly is new, we notify the user: -> reference.add + + fprintf(stderr, "New reference: %s\n", ref); + +// Before adding the new reference to refs, we re-allocate refs (and therefore +// also ins, which should always be as large as refs), if needed: +// -> reference.add + + if (++refs_c > refs_s) { + refs_s += 10; + tmp = realloc(refs, refs_s * sizeof(char *)); + if (tmp == NULL) err(1, "malloc"); + refs = tmp; + tmp2 = realloc(ins, refs_s * sizeof(char *)); + if (tmp2 == NULL) err(1, "malloc"); + ins = tmp2; + for (i = refs_s - 10; i < refs_s; i++) /* TODO: is this right? */ + ins[i] = NULL; + } + +// -> reference.declarations + + char **tmp; + char ***tmp2; + +// Notice that the code above also increases the refs_c count. Now, everything +// else is done, and the reference is ready to be added: -> reference.add + + refs[refs_c-1] = malloc(1 + REFMAX * sizeof(char)); + sprintf(refs[refs_c-1], "%s", ref); + + + +// Identifying and processing code lines +// ============================================================================ + +// The insertion function is responsible for processing code lines: +// -> declarations + +bool insertion(char *line); + +// It returns true if the given line is a code line (i.e., an insertion). + + +// Parsing the code line +// ---------------------------------------------------------------------------- + +// First of all, if there is no current reference, the insertion should be +// ignored: -> insertion.parse + + if (ref[0] == '\0') return false; + +// If there is a CODE_PREFIX, we ensure that the line begins with it. +// Likewise, if there is a DOC_PREFIX, we ensure that the line does not +// begin with it: -> insertion.parse + + if (strlen(code_prefix) > 0) + if (strncmp(line, code_prefix, strlen(code_prefix)) != 0) return false; + if (strlen(doc_prefix) > 0) + if (strncmp(line, doc_prefix, strlen(doc_prefix)) == 0) return false; + +// As you can see, the DOC_PREFIX is given precedence over the CODE_PREFIX. + + +// Adding the code line to the insertions +// ---------------------------------------------------------------------------- + +// Now that we know the line contains an insertion, we must find the index +// of the current reference in the refs array: -> insertion.add + + for (i = 0; i < refs_c; i++) + if (strcmp(refs[i], ref) == 0) break; + +// -> insertion.declarations + + int i; + +// Our goal is to add the insertion to the corresponding position in the ins +// array. If there is no insertion at that position, the value will be NULL: +// -> insertion.add + + if (ins[i] == NULL) { + ins[i] = malloc(1 + 1 * sizeof(char *)); + if (ins[i] == NULL) err(1, "malloc"); + len = 0; + } + +// If ins[i] is not NULL, then it already contains some number of insertion +// strings, terminated by a final NULL value. In order to allocate memory +// for the new insertion, we find the position of the final NULL value, +// corresponding to the length of the ins[i] array: -> insertion.add + + else { + for (len = 0; ins[i][len] != NULL; len++) ; + tmp = realloc(ins[i], 1 + (len + 1) * sizeof(char *)); + if (tmp == NULL) err(1, "malloc"); + ins[i] = tmp; + } + +// -> insertion.declarations + + char **tmp; + int len; + +// Now remains adding the insertion to ins[i]. First, we mark the new final +// position: -> insertion.add + + ins[i][len + 1] = NULL; + +// Then, we allocate memory for the string: -> insertion.add + + ins[i][len] = malloc(1 + strlen(line) * sizeof(char)); + if (ins[i][len] == NULL) err(1, "malloc"); + +// Finally, we copy the string, returning true, signifying that the line +// processed indeed was a code line: -> insertion.add + + strncpy(ins[i][len], line + strlen(code_prefix), + strlen(line) - strlen(code_prefix)); + ins[i][len][strlen(line) - strlen(code_prefix)] = '\0'; + return true; + +// Notice also that we make sure to skip the CODE_PREFIX.
\ No newline at end of file diff --git a/src/tt.options.c b/src/tt.options.c new file mode 100644 index 0000000..f502a7a --- /dev/null +++ b/src/tt.options.c @@ -0,0 +1,90 @@ +// Command-line flags +// ============================================================================ + +// tt can be configured by changing the value of three variables: +// -> declarations + +char *code_prefix; /* string with which code lines should start */ +char *doc_prefix; /* string with which documentation lines should start */ +char *out_prefix; /* string to which the output file name should be appended */ + +// The default values are the following: -> main.options + + code_prefix = " "; /* code lines should begin with four spaces */ + doc_prefix = ""; /* other lines are documentation lines */ + out_prefix = "out/"; /* all output files go in the out/ directory */ + +// Each variable is controlled by a single-letter command-line flag, which +// should then be immediately -- without any space -- followed by the +// desired value. For example, -dfinal. would set out_prefix to "final.". + +// This convention allows for a very simple parsing loop: -> main.options + + for (i = 1; i < argc; i++) + if (argv[i][0] == '-') { + switch(argv[i][1]) { + case 'c': + code_prefix = argv[i] + 2; + break; + case 'd': + doc_prefix = argv[i] + 2; + break; + case 'o': + out_prefix = argv[i] + 2; + break; + case '-': + i++; + goto end; + default: + die(USAGE); + } + } else + break; +end: + +// If the given argument begins with a hyphen, it is interpreted as a flag. +// If the flag is --, then tt ignores the argument and stops looking for flags. +// If the flag is unrecognized, the program dies. If the argument does not +// begin with a hyphen, it and anything following it will not be interpreted +// as a flag. + +// USAGE contains information about how to use tt: -> definitions + +#define USAGE "usage: %s [-cCODE_PREFIX] [-dDOC_PREFIX] [-oOUTPREFIX] destination ...\n", argv[0] + +// Of course, we can't just trust the user to provide reasonable values, so we +// ensure that the code_prefix and out_prefix are not identical and that the +// out_prefix is not empty -- otherwise, tt would overwrite all destination +// files: -> main.options + + if (strcmp(code_prefix, doc_prefix) == 0) + die("code_prefix and doc_prefix cannot be identical\n"); + if (strlen(out_prefix) == 0) + die("out_prefix cannot be empty\n"); + +// Command-line arguments +// ============================================================================ + +// Having finished parsing command-line flags, it is time to collect the +// remaining command-line arguments, which should be one or more destination +// files. Our loop above, when broken out of or finished naturally, has set +// the i variable to the position of the first non-flag argument in argv (or +// simply the position after the last flag in argv). + +// First, we check if there actually are any further argument, or if i is past +// the end of the array: -> main.options + + if (i == argc) die(USAGE); + +// At least one destination file is required. Then, we save the position of the +// first destination file in argv in a special variable for later use: +// -> main.options + +offset = i; + +// -> main.declarations + +int offset; + +// Now, we have successfully finished parsing both flags and arguments, and are +// ready to read the lines on the standard input.
\ No newline at end of file diff --git a/src/tt.output.c b/src/tt.output.c new file mode 100644 index 0000000..b1d8e34 --- /dev/null +++ b/src/tt.output.c @@ -0,0 +1,199 @@ +// Outputting the results +// ============================================================================ + +// At this point, we have collected all references and accompanying insertinos +// in the source input. Two tasks remain: + +// 1. We need to parse the destination files, identifying <<destinations>>. +// 2. We need to copy the destination files to the tangled files, overwriting +// all <<destinations>> with the corresponding insertions. + +// Both of these tasks will be performed in the same loop: -> main.output + + for (k = offset; k < argc; k++) { + +// -> main.declarations + + int k; + +// The counter k is set to the offset defined in the options section, which +// should be equal to the position of the first destination file in argv. +// We loop as long as we haven't reached the end of argv. + +// On each iteration of the loop, we can obtain from argv the name of the +// destination file and copy it to a new string, adding the out_prefix. We'll +// call this string tangledfilename: -> main.declarations + + char *tangledfilename; + +// -> main.output + + tangledfilename = malloc(1 + (strlen(out_prefix) + strlen(argv[k]) + 50) * sizeof(char)); + if (tangledfilename == NULL) err(1, "malloc"); + + if (sprintf(tangledfilename, "%s%s", out_prefix, argv[k]) == -1) + err(1, "sprintf"); + +// Now, we can open the tangled file for writing and the original destination +// file for reading. We'll call the handle for tangledfile f and the handle for +// argv[k] fo, the o standing for "original": -> main.declarations + + FILE *f; + FILE *fo; + +// -> main.output + + f = fopen(tangledfilename, "w"); + if (f == NULL) err(1, "fopen"); + fo = fopen(argv[k], "r"); + if (fo == NULL) err(1, "fopen"); + +// Having successfully opened the files, we have no need for tangledfilename: +// -> main.output + + free(tangledfilename); + + +// Parsing the current destination file and writing the tangled file +// ---------------------------------------------------------------------------- + +// The destination file will be parsed in a manner similar to the way in which +// the source input was parsed. The same structure will be used: -> main.output + + line_l = 0; + /* line_s is remembered */ + + while ((b = fgetc(fo)) != EOF) { + c = b; + if (c != '\n') { + if (line_l + 1 > line_s) { + line_s += 20; + tmp = realloc(line, 1 + line_s * sizeof(char)); + if (tmp == NULL) err(1, "malloc"); + line = tmp; + } + line[line_l++] = c; + continue; + } + +// Again, characters will be added to the line variable until a newline is +// encountered, at which point the collected line will be finished: +// -> main.output + +finish2: + line[line_l] = '\0'; + line_l = 0; /* reset line length count */ + +// From here on, however, the loop will look a bit different. First, tt takes +// note of the line's indentation, saving it to the indent variable: +// -> main.declarations + + int indent; + +// Only spaces are currently supported: -> main.output + + ref = line; + for (indent = 0; *ref == ' '; ref++) indent++; + +// Also, as you can see, we re-use the ref variable that was used by the input +// parsing, but which is now unused. + +// Parsing the <<destination identifier>> is simple: -> main.output + + if (strncmp(ref, "<<", 2) != 0 + || strncmp(ref + strlen(ref) - 2, ">>", 2) != 0) { + fprintf(f, "%s\n", line); + continue; + } + +// If no potential destination is found, then the line will be written as-is to +// the tangled file, and the loop continues parsing the next line of the file. +// If a potential destination is found, however, we store it in the ref +// variable, removing the << and >> markers: -> main.output + + ref += 2; + ref[strlen(ref) - 2] = '\0'; + +// There is still one thing to check, before we know that the destination is +// valid -- it must not contain any whitespace: -> main.output + + for (i = 0; i < strlen(ref); i++) + if (isspace(ref[i])) { + fprintf(f, "%s\n", line); + continue; + } + +// Again, if there is whitespace, then the line does not signify a destination +// and should be printed as-is to the resulting tangled file. + +// As when parsing the input, long identifiers are truncated: -> main.output + + if (strlen(ref) > REFMAX) + fprintf(stderr, + "Warning: Truncating identifier exceeding %d characters\n", REFMAX); + +// Finally, we check whether the destination actually has been referenced by +// the source input, warning the user otherwise: -> main.output + + for (i = 0; i < refs_c; i++) + if (strncmp(refs[i], ref, REFMAX) == 0) goto found; + fprintf(stderr, "Unreferenced destination: %s\n", ref); + continue; +found: + +// Having established that the identified destination is referenced by the +// source input, and having stored in the local i variable the reference's +// position in the refs variable, we can retrieve the insertion for the +// reference by looking at the same position in the ins variable. + +// Our first order of business is to make sure that the insertion is not empty +// -- in that case, the user is warned, and the loop goes on to the next line: +// -> main.output + + if (ins[i] == NULL) { + fprintf(stderr, "Warning: Insertion for %s is empty\n", ref); + continue; + } + +// Now, we are ready to write the insertion for the destination to the tangled +// file. Because each insertion is stored as an array of strings, each string +// containing a single line of the insertion, we use yet another loop: +// -> main.output + + for (j = 0; ins[i][j] != NULL; j++) { + if (ins[i][j + 1] == NULL) { + if (strlen(ins[i][j]) == 0) + break; /* remove extra newline */ + } + for (m = indent; m > 0; m--) putc(' ', f); + fprintf(f, "%s\n", ins[i][j]); + } + } + +// -> main.declarations + +int j; +int m; + +// Apart from simply printing the inserted line to the tangled file, the code +// above also skips any empty line at the end of the insertion and adds the +// indentation identified when parsing the line in the destination file +// containing the destination identifier. + +// Now, we have almost finished parsing the current destination file and +// writing to the corresponding tangled file, but -- as before -- we still +// haven't processed the final line of the file, if that line ends without +// a newline. To fix that, we just run the finishing code again: +// -> main.output + + if (c != '\n') { c = '\n'; goto finish2; } + +// Finally, we close the handles to the destination file and tangled file: +// -> main.output + + fclose(f); + fclose(fo); + } + +// And that is the end of the loop. The loop continues for every destination +// file given as an argument, and when it is done, so is the program.
\ No newline at end of file diff --git a/test/truncation/in.txt b/test/truncation/in.txt index 99a0f74..c8913c1 100644 --- a/test/truncation/in.txt +++ b/test/truncation/in.txt @@ -4,4 +4,4 @@ -> faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa - far
\ No newline at end of file + far diff --git a/test/truncation/test.txt b/test/truncation/test.txt index 6c2c776..24692ed 100644 --- a/test/truncation/test.txt +++ b/test/truncation/test.txt @@ -1,3 +1,3 @@ <<loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon>> -<<faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaar>>
\ No newline at end of file +<<faaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaar>> @@ -1,4 +1,4 @@ -/* tt.c -- tangle to, written by John Ankarström */ +// tt.c -- tangle to, written by John Ankarström -> tt.c #include <stdio.h> @@ -14,224 +14,88 @@ #include <sys/stat.h> #endif +#define REFMAX 80 +#define USAGE "usage: %s [-cCODE_PREFIX] [-dDOC_PREFIX] [-oOUTPREFIX] destination ...\n", argv[0] + #define err(code, string) do { fprintf(stderr, "%s: %s: %s\n", string, strerror(errno)); exit(code); } while (0) #define die(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while (0) #define true 1 #define false 0 #define bool int -#define REFMAX 80 - -char *code_prefix; -char *doc_prefix; - -char *ref; /* current reference */ - -char ***ins; /* insertions */ char **refs; /* references */ int refs_c; /* count */ -int refs_s; /* size */ - -void reference(char *line) { - int i, j; - char *ln, **tmp, ***tmp2; - - ln = line; - - /* parse: -> identifier_without_whitespace */ - -start: - if (*ln == '\0') return; - else if (*ln == '-') { ln++; goto arrow; } - else { ln++; goto start; } -arrow: - if (*ln != '>') goto start; - else ln++; -space: - if (*ln == ' ') { ln++; goto space; } - - for (i = 0; i < strlen(ln); i++) - if (isspace(ln[i])) { - for (j = i; j < strlen(ln); j++) - if (!isspace(ln[j])) return; - break; - } - - ln[i] = '\0'; - - if (strlen(ln) > REFMAX) { - fprintf(stderr, "Warning: Truncating identifier exceeding %d characters\n", - REFMAX); - ln[REFMAX] = '\0'; - } - - ref = malloc(1 * strlen(ln) * sizeof(ln)); - sprintf(ref, "%s", ln); /* set current reference */ - ref[strlen(ln)] = 0; - - for (i = 0; i < refs_c; i++) - if (strcmp(refs[i], ref) == 0) return; - - fprintf(stderr, "New reference: %s\n", ref); - - if (++refs_c > refs_s) { - refs_s += 10; - tmp = realloc(refs, refs_s * sizeof(char *)); - if (tmp == NULL) err(1, "malloc"); - refs = tmp; - tmp2 = realloc(ins, refs_s * sizeof(char *)); - if (tmp2 == NULL) err(1, "malloc"); - ins = tmp2; - for (i = refs_s - 10; i < refs_s; i++) /* TODO: is this right? */ - ins[i] = NULL; - } - - refs[refs_c-1] = malloc(1 + REFMAX * sizeof(char)); - sprintf(refs[refs_c-1], "%s", ref); -} - -bool code(char *line) { - char **tmp; - int i, j, len; - - if (ref[0] == '\0') return false; - if (strlen(code_prefix) > 0) - if (strncmp(line, code_prefix, strlen(code_prefix)) != 0) return false; - if (strlen(doc_prefix) > 0) - if (strncmp(line, doc_prefix, strlen(doc_prefix)) == 0) return false; - - for (i = 0; i < refs_c; i++) - if (strcmp(refs[i], ref) == 0) break; - - if (ins[i] == NULL) { - ins[i] = malloc(1 + 1 * sizeof(char *)); - if (ins[i] == NULL) err(1, "malloc"); - len = 0; - } else { - for (len = 0; ins[i][len] != NULL; len++) ; - tmp = realloc(ins[i], 1 + (len + 1) * sizeof(char *)); - if (tmp == NULL) err(1, "malloc"); - ins[i] = tmp; - } - - ins[i][len+1] = NULL; - - ins[i][len] = malloc(1 + strlen(line) * sizeof(char)); - if (ins[i][len] == NULL) err(1, "malloc"); - - strncpy(ins[i][len], line + strlen(code_prefix), - strlen(line) - strlen(code_prefix)); - ins[i][len][strlen(line) - strlen(code_prefix)] = 0; - - return true; -} +int refs_s; /* size (number of elements allocated for) */ +char ***ins; /* insertions */ +void reference(char *line); +char *ref; +bool insertion(char *line); +char *code_prefix; /* string with which code lines should start */ +char *doc_prefix; /* string with which documentation lines should start */ +char *out_prefix; /* string to which the output file name should be appended */ int main(int argc, char *argv[]) { - bool finish, iscode, wascode; - char **a, b, c, *line, *out_prefix, *tangledfile, *tmp; - FILE *f, *fo; - int i, j, k, m; - int indent, line_l, line_s, offset; - - ref = malloc(1 + (REFMAX + 4) * sizeof(char)); /* incl. << and >> */ - if (ref == NULL) err(1, "malloc"); - - refs_c = 0; - refs_s = 10; - refs = malloc(refs_s * sizeof(char *)); - ins = malloc(refs_s * sizeof(char **)); - if (refs == NULL || ins == NULL) err(1, "malloc"); - for (i = 0; i < refs_s; i++) - ins[i] = NULL; - - code_prefix = " "; - doc_prefix = ""; - out_prefix = "out/"; - - for (i = 1; i < argc; i++) - if (argv[i][0] == '-') { - switch(argv[i][1]) { - case 'c': - code_prefix = argv[i] + 2; - break; - case 'd': - doc_prefix = argv[i] + 2; - break; - case 'o': - out_prefix = argv[i] + 2; - break; - } - } else - break; - - if (strcmp(code_prefix, doc_prefix) == 0) - die("code_prefix and doc_prefix cannot be identical\n"); - if (strlen(out_prefix) == 0) - die("out_prefix cannot be empty\n"); - + int i; + char *line; + int line_l; /* length */ + int line_s; /* size (number of characters allocated for) */ + int b; + int c; + char *tmp; + bool wascode = false; + int offset; + int k; + char *tangledfilename; + FILE *f; + FILE *fo; + int indent; + int j; + int m; + refs_c = 0; + refs_s = 10; + refs = malloc(refs_s * sizeof(char *)); + if (refs == NULL) err(1, "malloc"); + ins = malloc(refs_s * sizeof(char **)); + if (ins == NULL) err(1, "malloc"); + for (i = 0; i < refs_s; i++) + ins[i] = NULL; + ref = malloc(1 + REFMAX * sizeof(char)); + if (ref == NULL) err(1, "malloc"); + code_prefix = " "; /* code lines should begin with four spaces */ + doc_prefix = ""; /* other lines are documentation lines */ + out_prefix = "out/"; /* all output files go in the out/ directory */ + for (i = 1; i < argc; i++) + if (argv[i][0] == '-') { + switch(argv[i][1]) { + case 'c': + code_prefix = argv[i] + 2; + break; + case 'd': + doc_prefix = argv[i] + 2; + break; + case 'o': + out_prefix = argv[i] + 2; + break; + case '-': + i++; + goto end; + default: + die(USAGE); + } + } else + break; + end: + if (strcmp(code_prefix, doc_prefix) == 0) + die("code_prefix and doc_prefix cannot be identical\n"); + if (strlen(out_prefix) == 0) + die("out_prefix cannot be empty\n"); + if (i == argc) die(USAGE); offset = i; - if (offset == argc) die("usage: %s destination ...\n", argv[0]); - - line_l = 0; - line_s = 100; - line = malloc(1 + line_s * sizeof(char)); - if (line == NULL) err(1, "malloc"); - - finish = false; - iscode = false; - wascode = false; - while ((b = getchar()) != EOF) { - c = b; - if (c != '\n') { - if (line_l + 1 > line_s) { - line_s += 20; - tmp = realloc(line, 1 + line_s * sizeof(char)); - if (tmp == NULL) err(1, "malloc"); - line = tmp; - } - line[line_l++] = c; - continue; - } - -finish: - line[line_l] = '\0'; line_l = 0; - - if (strlen(code_prefix) == 0 && !wascode && strcmp(line, "") == 0) { - continue; /* ignore empty lines after doc text */ - } - - iscode = code(line); - if (!iscode) reference(line); - - if (strlen(code_prefix) > 0 && wascode && !iscode) { - code(code_prefix); /* add extra empty line after code block */ - } - wascode = iscode; - } - if (c != '\n' && !finish) { finish = true; goto finish; } - - free(ref); - - for (k = offset; k < argc; k++) { - tangledfile = malloc( - 1 + (strlen(out_prefix) + 50 + sizeof(argv[k])) * sizeof(char) /* ??? */ - ); - if (tangledfile == NULL) err(1, "malloc"); - - if (sprintf(tangledfile, "%s%s", out_prefix, argv[k]) == -1) - err(1, "sprintf"); - - f = fopen(tangledfile, "w"); - if (f == NULL) err(1, "fopen"); - fo = fopen(argv[k], "r"); - if (fo == NULL) err(1, "fopen"); - - free(tangledfile); - - line_l = 0; - finish = false; - while ((b = fgetc(fo)) != EOF) { + line_s = 100; + line = malloc(1 + line_s * sizeof(char)); + if (line == NULL) err(1, "malloc"); + while ((b = getchar()) != EOF) { c = b; if (c != '\n') { if (line_l + 1 > line_s) { @@ -243,56 +107,164 @@ finish: line[line_l++] = c; continue; } - -finish2: + finish: line[line_l] = '\0'; - line_l = 0; /* reset line length count */ - - ref = line; - for (indent = 0; *ref == ' '; ref++) indent++; - - if (strncmp(ref, "<<", 2) != 0 - || strncmp(ref + strlen(ref) - 2, ">>", 2) != 0) { - fprintf(f, "%s\n", line); + line_l = 0; + if (strlen(code_prefix) == 0 && !wascode && strcmp(line, "") == 0) { continue; } - ref += 2; - ref[strlen(ref) - 2] = '\0'; + if (!insertion(line)) reference(line); + } + if (c != '\n') { c = '\n'; goto finish; } + free(ref); + for (k = offset; k < argc; k++) { + tangledfilename = malloc(1 + (strlen(out_prefix) + strlen(argv[k]) + 50) * sizeof(char)); + if (tangledfilename == NULL) err(1, "malloc"); + if (sprintf(tangledfilename, "%s%s", out_prefix, argv[k]) == -1) + err(1, "sprintf"); + f = fopen(tangledfilename, "w"); + if (f == NULL) err(1, "fopen"); + fo = fopen(argv[k], "r"); + if (fo == NULL) err(1, "fopen"); + free(tangledfilename); + line_l = 0; + /* line_s is remembered */ - for (i = 0; i < strlen(ref); i++) - if (isspace(ref[i])) { + while ((b = fgetc(fo)) != EOF) { + c = b; + if (c != '\n') { + if (line_l + 1 > line_s) { + line_s += 20; + tmp = realloc(line, 1 + line_s * sizeof(char)); + if (tmp == NULL) err(1, "malloc"); + line = tmp; + } + line[line_l++] = c; + continue; + } + finish2: + line[line_l] = '\0'; + line_l = 0; /* reset line length count */ + ref = line; + for (indent = 0; *ref == ' '; ref++) indent++; + if (strncmp(ref, "<<", 2) != 0 + || strncmp(ref + strlen(ref) - 2, ">>", 2) != 0) { fprintf(f, "%s\n", line); continue; } - - if (strlen(ref) > REFMAX) - fprintf(stderr, - "Warning: Truncating identifier exceeding %d characters\n", REFMAX); - - for (i = 0; i < refs_c; i++) - if (strncmp(refs[i], ref, REFMAX) == 0) goto found; - fprintf(stderr, "Unreferenced destination: %s\n", ref); - continue; -found: - if (ins[i] == NULL) { - fprintf(stderr, "Warning: Insertion for %s is empty\n", ref); + ref += 2; + ref[strlen(ref) - 2] = '\0'; + for (i = 0; i < strlen(ref); i++) + if (isspace(ref[i])) { + fprintf(f, "%s\n", line); + continue; + } + if (strlen(ref) > REFMAX) + fprintf(stderr, + "Warning: Truncating identifier exceeding %d characters\n", REFMAX); + for (i = 0; i < refs_c; i++) + if (strncmp(refs[i], ref, REFMAX) == 0) goto found; + fprintf(stderr, "Unreferenced destination: %s\n", ref); continue; - } - for (j = 0; ins[i][j] != NULL; j++) { - if (ins[i][j + 1] == NULL) { - if (strlen(ins[i][j]) == 0) - break; /* remove extra newline */ + found: + if (ins[i] == NULL) { + fprintf(stderr, "Warning: Insertion for %s is empty\n", ref); + continue; + } + for (j = 0; ins[i][j] != NULL; j++) { + if (ins[i][j + 1] == NULL) { + if (strlen(ins[i][j]) == 0) + break; /* remove extra newline */ + } + for (m = indent; m > 0; m--) putc(' ', f); + fprintf(f, "%s\n", ins[i][j]); } - for (m = indent; m > 0; m--) putc(' ', f); - fprintf(f, "%s\n", ins[i][j]); } + if (c != '\n') { c = '\n'; goto finish2; } + fclose(f); + fclose(fo); } - if (c != '\n' && !finish) { finish = true; goto finish2; } - - fclose(f); - fclose(fo); - } - return 0; } + +void reference(char *line) { + char *ln = line; + int i; + int j; + char **tmp; + char ***tmp2; + if (strncmp(ln, doc_prefix, strlen(doc_prefix)) != 0) return; + hyphen: + if (*ln == '\0') return; + else if (*ln == '-') { ln++; goto lessthan; } + else { ln++; goto hyphen; } + lessthan: + if (*ln != '>') goto hyphen; + else ln++; + space: + if (isspace(*ln)) { ln++; goto space; } + if (*ln == '\0') { ref = ""; return; } + for (i = 0; i < strlen(ln); i++) + if (isspace(ln[i])) { + for (j = i; j < strlen(ln); j++) + if (!isspace(ln[j])) return; + break; + } + ln[i] = '\0'; + + if (strlen(ln) > REFMAX) { + fprintf(stderr, "Warning: Truncating identifier exceeding %d characters\n", + REFMAX); + ln[REFMAX] = '\0'; + } + sprintf(ref, "%s", ln); /* set current reference */ + ref[strlen(ln)] = '\0'; + for (i = 0; i < refs_c; i++) + if (strcmp(refs[i], ref) == 0) return; + fprintf(stderr, "New reference: %s\n", ref); + if (++refs_c > refs_s) { + refs_s += 10; + tmp = realloc(refs, refs_s * sizeof(char *)); + if (tmp == NULL) err(1, "malloc"); + refs = tmp; + tmp2 = realloc(ins, refs_s * sizeof(char *)); + if (tmp2 == NULL) err(1, "malloc"); + ins = tmp2; + for (i = refs_s - 10; i < refs_s; i++) /* TODO: is this right? */ + ins[i] = NULL; + } + refs[refs_c-1] = malloc(1 + REFMAX * sizeof(char)); + sprintf(refs[refs_c-1], "%s", ref); +} + +bool insertion(char *line) { + int i; + char **tmp; + int len; + if (ref[0] == '\0') return false; + if (strlen(code_prefix) > 0) + if (strncmp(line, code_prefix, strlen(code_prefix)) != 0) return false; + if (strlen(doc_prefix) > 0) + if (strncmp(line, doc_prefix, strlen(doc_prefix)) == 0) return false; + for (i = 0; i < refs_c; i++) + if (strcmp(refs[i], ref) == 0) break; + if (ins[i] == NULL) { + ins[i] = malloc(1 + 1 * sizeof(char *)); + if (ins[i] == NULL) err(1, "malloc"); + len = 0; + } + else { + for (len = 0; ins[i][len] != NULL; len++) ; + tmp = realloc(ins[i], 1 + (len + 1) * sizeof(char *)); + if (tmp == NULL) err(1, "malloc"); + ins[i] = tmp; + } + ins[i][len + 1] = NULL; + ins[i][len] = malloc(1 + strlen(line) * sizeof(char)); + if (ins[i][len] == NULL) err(1, "malloc"); + strncpy(ins[i][len], line + strlen(code_prefix), + strlen(line) - strlen(code_prefix)); + ins[i][len][strlen(line) - strlen(code_prefix)] = '\0'; + return true; +} Binary files differ |