From c3143fbdb7ea46539023e11cb30a7b14434030f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20Ankarstr=C3=B6m?= Date: Fri, 23 Oct 2020 02:11:59 +0200 Subject: release 1.2 --- src/tt.input.c | 397 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 397 insertions(+) create mode 100644 src/tt.input.c (limited to 'src/tt.input.c') diff --git a/src/tt.input.c b/src/tt.input.c new file mode 100644 index 0000000..1f88f7d --- /dev/null +++ b/src/tt.input.c @@ -0,0 +1,397 @@ +// References in source input +// ============================================================================ + +// The references found in the source input is stored as an array of strings +// in the global refs variable: -> declarations + +char **refs; /* references */ +int refs_c; /* count */ +int refs_s; /* size (number of elements allocated for) */ + +// It is allocated at the beginning of the execution to contain an array of +// ten strings. The refs_s variable keeps track of the amount of allocated +// space, while ref_c holds the number of actual elements: -> main.globals + + refs_c = 0; + refs_s = 10; + refs = malloc(refs_s * sizeof(char *)); + if (refs == NULL) err(1, "malloc"); + + + +// Insertions in source input +// ============================================================================ + +// tt represents every insertion as an array of strings, where each string +// corresponds to a line to be inserted. All insertions are stored in the +// global ins array: -> declarations + +char ***ins; /* insertions */ + +// The position of each insertion in the ins array is always equal to the +// position of the corresponding reference in the refs array -- to find what +// lines should be inserted at destination X, one must find the value P such +// that refs[P] is equal to X. Then, the corresponding insertion will be equal +// to ins[P]. + +// In other words, the ins array should always be of the same length as refs. +// As such, the refs_s and refs_c variables are used for ins as well. The ins +// array is allocated to hold the same number of elements as refs. Furthermore, +// its elements are set to NULL, signifying the absence of any insertion at +// that index: -> main.globals + + ins = malloc(refs_s * sizeof(char **)); + if (ins == NULL) err(1, "malloc"); + for (i = 0; i < refs_s; i++) + ins[i] = NULL; + +// -> main.declarations + +int i; + + + +// Parsing standard input +// ============================================================================ + +// Text is read from the standard input, line by line, into a line variable. +// Two additional variables, line_s and line_l, keep track of the amount of +// allocated space and the actual number of characters in the string, +// respectively: -> main.declarations + + char *line; + int line_l; /* length */ + int line_s; /* size (number of characters allocated for) */ + +// It initially is allocated to hold 100 characters: -> main.input + + line_l = 0; + line_s = 100; + line = malloc(1 + line_s * sizeof(char)); + if (line == NULL) err(1, "malloc"); + +// Lines are read character by character until end of file. First, the read +// character is assigned to the variable b. When it is certain that it is not +// EOF, then it is assigned to the variable c: -> main.input + + while ((b = getchar()) != EOF) { + c = b; + +// -> main.declarations + + int b; + int c; + +// On every iteration, tt checks whether the read character is a newline. If +// not, the character is added to the line variable, which is re-allocated if +// necessary. The line_l, keeping track of the line's length, is incremented +// as well: -> main.input + + if (c != '\n') { + if (line_l + 1 > line_s) { + line_s += 20; + tmp = realloc(line, 1 + line_s * sizeof(char)); + if (tmp == NULL) err(1, "malloc"); + line = tmp; + } + line[line_l++] = c; + continue; + } + +// The tmp variable used in the re-allocation has a type which is identical to +// that of the line variable: -> main.declarations + + char *tmp; + +// If the read character is a newline, then the program "finishes" the line, +// adding a final NULL character and resetting line_l: -> main.input + +finish: + line[line_l] = '\0'; + line_l = 0; + +// Before parsing the line, we make sure to skip it if it is empty and +// following a non-code line: -> main.input + + if (strlen(code_prefix) == 0 && !wascode && strcmp(line, "") == 0) { + continue; + } + +// This is only desirable if CODE_PREFIX is empty, because then, there is no +// way for the writer of the source input to, for appearance's sake, leave an +// empty line between non-code lines and code lines; any empty line will +// will inevitably be interpreted as a code line. The code above circumvents +// this. + +// This aesthetical nicety requires the program to keep track of whether the +// previous line was a code line or not: -> main.declarations + + bool wascode = false; + +// Now, it is time to check whether the read line is a code line (an insertion) +// or a documentation line (containing a reference): -> main.input + + if (!insertion(line)) reference(line); + } + +// The insertion and reference functions modify the ins and refs variables +// according to the contents of the line. + +// Finally, after the loop is finished -- meaning that EOF has been reached -- +// we must ensure that the final character was not a newline; otherwise, the +// final line of source input has not been processed, as lines are processed +// only when the terminated newline is encountered. + +// Thus, if the final character was a newline, tt goes back and finishes the +// final line: -> main.input + + if (c != '\n') { c = '\n'; goto finish; } + + + +// Identifying and processing documentation lines containing references +// ============================================================================ + +// The reference function is responsible for processing references in source +// input lines: -> declarations + +void reference(char *line); + + +// Parsing the line +// ---------------------------------------------------------------------------- + +// Documentation lines are formatted as follows: -> + +documentation line ::= DOC_PREFIX anything [reference] + +reference ::= "->" [whitespace] identifier [whitespace] +identifier ::= not whitespace + +// In order to identify whether a given line actually is a documentation line +// containing a reference, the line variable is aliased to ln, which will be +// modified instead of line: -> reference.declarations + + char *ln = line; + +// First, we ensure the line begins with the doc_prefix: -> reference.parse + + if (strncmp(ln, doc_prefix, strlen(doc_prefix)) != 0) return; + +// Then, we ensure that a hyphen is present: -> reference.parse + +hyphen: + if (*ln == '\0') return; + else if (*ln == '-') { ln++; goto lessthan; } + else { ln++; goto hyphen; } + +// After finding the hyphen, we check whether a less-then sign follows it. +// If not, we keep looking for another hyphen. -> reference.parse + +lessthan: + if (*ln != '>') goto hyphen; + else ln++; + +// After finding a less-then sign following a hyphen (->), we ignore all +// whitespace, if there is any. If the end of the line has been reached, or is +// reached, by this point, then it will be interpreted as an empty reference, +// resetting the current reference (meaning that subsequent code lines will not +// be attached to any reference): -> reference.parse + +space: + if (isspace(*ln)) { ln++; goto space; } + if (*ln == '\0') { ref = ""; return; } + +// Now, a valid reference should be a string of non-space characters, +// followed optionally by whitespace, but not anything other than whitespace: +// -> reference.parse + + for (i = 0; i < strlen(ln); i++) + if (isspace(ln[i])) { + for (j = i; j < strlen(ln); j++) + if (!isspace(ln[j])) return; + break; + } + +// -> reference.declarations + + int i; + int j; + +// After the loop above, i will be set to the index of the first encountered +// space or the end of the line. Any trailing whitespace should be ignored: +// -> reference.parse + + ln[i] = '\0'; + + +// Adding the reference +// ---------------------------------------------------------------------------- + +// At this point, we have found a valid reference, which should now be added to +// the global refs array. + +// First, however, it should be mentioned that reference identifiers have a +// maximum length of 80 characters: -> definitions + +#define REFMAX 80 + +// Thus, any reference identifier longer than REFMAX is truncated, with a +// warning printed to the standard error stream: -> reference.add + + if (strlen(ln) > REFMAX) { + fprintf(stderr, "Warning: Truncating identifier exceeding %d characters\n", + REFMAX); + ln[REFMAX] = '\0'; + } + +// It should also be mentioned that the current reference is always stored in a +// global variable, from which the code(char *) function knows with which +// reference to associate each code line: -> declarations + +char *ref; + +// It is allocated in the beginning of the program's execution: -> main.globals + + ref = malloc(1 + REFMAX * sizeof(char)); + if (ref == NULL) err(1, "malloc"); + +// It is freed before the output section of the program, at which point it is +// no longer needed: -> main.output + + free(ref); + +// The variable is set by our reference function: -> reference.add + + sprintf(ref, "%s", ln); /* set current reference */ + ref[strlen(ln)] = '\0'; + +// Now remains the work of adding the reference to the global refs variable -- +// unless it already exists in refs: -> reference.add + + for (i = 0; i < refs_c; i++) + if (strcmp(refs[i], ref) == 0) return; + +// If the reference truly is new, we notify the user: -> reference.add + + fprintf(stderr, "New reference: %s\n", ref); + +// Before adding the new reference to refs, we re-allocate refs (and therefore +// also ins, which should always be as large as refs), if needed: +// -> reference.add + + if (++refs_c > refs_s) { + refs_s += 10; + tmp = realloc(refs, refs_s * sizeof(char *)); + if (tmp == NULL) err(1, "malloc"); + refs = tmp; + tmp2 = realloc(ins, refs_s * sizeof(char *)); + if (tmp2 == NULL) err(1, "malloc"); + ins = tmp2; + for (i = refs_s - 10; i < refs_s; i++) /* TODO: is this right? */ + ins[i] = NULL; + } + +// -> reference.declarations + + char **tmp; + char ***tmp2; + +// Notice that the code above also increases the refs_c count. Now, everything +// else is done, and the reference is ready to be added: -> reference.add + + refs[refs_c-1] = malloc(1 + REFMAX * sizeof(char)); + sprintf(refs[refs_c-1], "%s", ref); + + + +// Identifying and processing code lines +// ============================================================================ + +// The insertion function is responsible for processing code lines: +// -> declarations + +bool insertion(char *line); + +// It returns true if the given line is a code line (i.e., an insertion). + + +// Parsing the code line +// ---------------------------------------------------------------------------- + +// First of all, if there is no current reference, the insertion should be +// ignored: -> insertion.parse + + if (ref[0] == '\0') return false; + +// If there is a CODE_PREFIX, we ensure that the line begins with it. +// Likewise, if there is a DOC_PREFIX, we ensure that the line does not +// begin with it: -> insertion.parse + + if (strlen(code_prefix) > 0) + if (strncmp(line, code_prefix, strlen(code_prefix)) != 0) return false; + if (strlen(doc_prefix) > 0) + if (strncmp(line, doc_prefix, strlen(doc_prefix)) == 0) return false; + +// As you can see, the DOC_PREFIX is given precedence over the CODE_PREFIX. + + +// Adding the code line to the insertions +// ---------------------------------------------------------------------------- + +// Now that we know the line contains an insertion, we must find the index +// of the current reference in the refs array: -> insertion.add + + for (i = 0; i < refs_c; i++) + if (strcmp(refs[i], ref) == 0) break; + +// -> insertion.declarations + + int i; + +// Our goal is to add the insertion to the corresponding position in the ins +// array. If there is no insertion at that position, the value will be NULL: +// -> insertion.add + + if (ins[i] == NULL) { + ins[i] = malloc(1 + 1 * sizeof(char *)); + if (ins[i] == NULL) err(1, "malloc"); + len = 0; + } + +// If ins[i] is not NULL, then it already contains some number of insertion +// strings, terminated by a final NULL value. In order to allocate memory +// for the new insertion, we find the position of the final NULL value, +// corresponding to the length of the ins[i] array: -> insertion.add + + else { + for (len = 0; ins[i][len] != NULL; len++) ; + tmp = realloc(ins[i], 1 + (len + 1) * sizeof(char *)); + if (tmp == NULL) err(1, "malloc"); + ins[i] = tmp; + } + +// -> insertion.declarations + + char **tmp; + int len; + +// Now remains adding the insertion to ins[i]. First, we mark the new final +// position: -> insertion.add + + ins[i][len + 1] = NULL; + +// Then, we allocate memory for the string: -> insertion.add + + ins[i][len] = malloc(1 + strlen(line) * sizeof(char)); + if (ins[i][len] == NULL) err(1, "malloc"); + +// Finally, we copy the string, returning true, signifying that the line +// processed indeed was a code line: -> insertion.add + + strncpy(ins[i][len], line + strlen(code_prefix), + strlen(line) - strlen(code_prefix)); + ins[i][len][strlen(line) - strlen(code_prefix)] = '\0'; + return true; + +// Notice also that we make sure to skip the CODE_PREFIX. \ No newline at end of file -- cgit v1.2.3