// References in source input // ============================================================================ // The references found in the source input is stored as an array of strings // in the global refs variable: -> declarations char **refs; /* references */ int refs_c; /* count */ int refs_s; /* size (number of elements allocated for) */ // It is allocated at the beginning of the execution to contain an array of // ten strings. The refs_s variable keeps track of the amount of allocated // space, while ref_c holds the number of actual elements: -> main.globals refs_c = 0; refs_s = 10; refs = malloc(refs_s * sizeof(char *)); if (refs == NULL) err(1, "malloc"); // Insertions in source input // ============================================================================ // tt represents every insertion as an array of strings, where each string // corresponds to a line to be inserted. All insertions are stored in the // global ins array: -> declarations char ***ins; /* insertions */ // The position of each insertion in the ins array is always equal to the // position of the corresponding reference in the refs array -- to find what // lines should be inserted at destination X, one must find the value P such // that refs[P] is equal to X. Then, the corresponding insertion will be equal // to ins[P]. // In other words, the ins array should always be of the same length as refs. // As such, the refs_s and refs_c variables are used for ins as well. The ins // array is allocated to hold the same number of elements as refs. Furthermore, // its elements are set to NULL, signifying the absence of any insertion at // that index: -> main.globals ins = malloc(refs_s * sizeof(char **)); if (ins == NULL) err(1, "malloc"); for (i = 0; i < refs_s; i++) ins[i] = NULL; // -> main.declarations int i; // Parsing standard input // ============================================================================ // Text is read from the standard input, line by line, into a line variable. // Two additional variables, line_s and line_l, keep track of the amount of // allocated space and the actual number of characters in the string, // respectively: -> main.declarations char *line; int line_l; /* length */ int line_s; /* size (number of characters allocated for) */ // It initially is allocated to hold 100 characters: -> main.input line_l = 0; line_s = 100; line = malloc(1 + line_s * sizeof(char)); if (line == NULL) err(1, "malloc"); // Lines are read character by character until end of file. First, the read // character is assigned to the variable b. When it is certain that it is not // EOF, then it is assigned to the variable c: -> main.input while ((b = getchar()) != EOF) { c = b; // -> main.declarations int b; int c; // First of all, tt ignores any carriage returns: -> main.input if (c == '\r') continue; // On Windows, where carriage returns are used, they will automatically be // removed anyway. On systems that don't use carriage returns, they might not // be stripped from the input, which is why tt ignores them. // Otherwise, on every iteration, tt checks whether the read character is a // newline. If not, the character is added to the line variable, which is // re-allocated if necessary. The line_l, keeping track of the line's length, // is incremented as well: -> main.input if (c != '\n') { if (line_l + 1 > line_s) { line_s += 20; tmp = realloc(line, 1 + line_s * sizeof(char)); if (tmp == NULL) err(1, "malloc"); line = tmp; } line[line_l++] = c; continue; } // The tmp variable used in the re-allocation has a type which is identical to // that of the line variable: -> main.declarations char *tmp; // If the read character is a newline, then the program "finishes" the line, // adding a final NULL character and resetting line_l: -> main.input finish: line[line_l] = '\0'; line_l = 0; // Before parsing the line, we make sure to skip it if it is empty and // following a non-code line: -> main.input if (strlen(code_prefix) == 0 && !wascode && strcmp(line, "") == 0) { continue; } // This is only desirable if CODE_PREFIX is empty, because then, there is no // way for the writer of the source input to, for appearance's sake, leave an // empty line between non-code lines and code lines; any empty line will // will inevitably be interpreted as a code line. The code above circumvents // this. // This aesthetical nicety requires the program to keep track of whether the // previous line was a code line or not: -> main.declarations bool wascode = false; // Now, it is time to check whether the read line is a code line (an insertion) // or a documentation line (containing a reference): -> main.input if (!insertion(line)) reference(line); } // The insertion and reference functions modify the ins and refs variables // according to the contents of the line. // Finally, after the loop is finished -- meaning that EOF has been reached -- // we must ensure that the final character was not a newline; otherwise, the // final line of source input has not been processed, as lines are processed // only when the terminated newline is encountered. // Thus, if the final character was a newline, tt goes back and finishes the // final line: -> main.input if (c != '\n') { c = '\n'; goto finish; } // Identifying and processing documentation lines containing references // ============================================================================ // The reference function is responsible for processing references in source // input lines: -> declarations void reference(char *line); // Parsing the line // ---------------------------------------------------------------------------- // Documentation lines are formatted as follows: -> documentation line ::= DOC_PREFIX anything [reference] reference ::= "->" [whitespace] identifier [whitespace] identifier ::= not whitespace // In order to identify whether a given line actually is a documentation line // containing a reference, the line variable is aliased to ln, which will be // modified instead of line: -> reference.declarations char *ln = line; // First, we ensure the line begins with the doc_prefix: -> reference.parse if (strncmp(ln, doc_prefix, strlen(doc_prefix)) != 0) return; // Then, we ensure that a hyphen is present: -> reference.parse hyphen: if (*ln == '\0') return; else if (*ln == '-') { ln++; goto lessthan; } else { ln++; goto hyphen; } // After finding the hyphen, we check whether a less-then sign follows it. // If not, we keep looking for another hyphen. -> reference.parse lessthan: if (*ln != '>') goto hyphen; else ln++; // After finding a less-then sign following a hyphen (->), we ignore all // whitespace, if there is any. If the end of the line has been reached, or is // reached, by this point, then it will be interpreted as an empty reference, // resetting the current reference (meaning that subsequent code lines will not // be attached to any reference): -> reference.parse space: if (isspace(*ln)) { ln++; goto space; } if (*ln == '\0') { ref = ""; return; } // Now, a valid reference should be a string of non-space characters, // followed optionally by whitespace, but not anything other than whitespace: // -> reference.parse for (i = 0; i < strlen(ln); i++) if (isspace(ln[i])) { for (j = i; j < strlen(ln); j++) if (!isspace(ln[j])) return; break; } // -> reference.declarations int i; int j; // After the loop above, i will be set to the index of the first encountered // space or the end of the line. Any trailing whitespace should be ignored: // -> reference.parse ln[i] = '\0'; // Adding the reference // ---------------------------------------------------------------------------- // At this point, we have found a valid reference, which should now be added to // the global refs array. // First, however, it should be mentioned that reference identifiers have a // maximum length of 80 characters: -> definitions #define REFMAX 80 // Thus, any reference identifier longer than REFMAX is truncated, with a // warning printed to the standard error stream: -> reference.add if (strlen(ln) > REFMAX) { fprintf(stderr, "Warning: Truncating identifier exceeding %d characters\n", REFMAX); ln[REFMAX] = '\0'; } // It should also be mentioned that the current reference is always stored in a // global variable, from which the code(char *) function knows with which // reference to associate each code line: -> declarations char *ref; // It is allocated in the beginning of the program's execution: -> main.globals ref = malloc(1 + REFMAX * sizeof(char)); if (ref == NULL) err(1, "malloc"); // It is freed before the output section of the program, at which point it is // no longer needed: -> main.output free(ref); // The variable is set by our reference function: -> reference.add sprintf(ref, "%s", ln); /* set current reference */ ref[strlen(ln)] = '\0'; // Now remains the work of adding the reference to the global refs variable -- // unless it already exists in refs: -> reference.add for (i = 0; i < refs_c; i++) if (strcmp(refs[i], ref) == 0) return; // If the reference truly is new, we notify the user: -> reference.add fprintf(stderr, "New reference: %s\n", ref); // Before adding the new reference to refs, we re-allocate refs (and therefore // also ins, which should always be as large as refs), if needed: // -> reference.add if (++refs_c > refs_s) { refs_s += 10; tmp = realloc(refs, refs_s * sizeof(char *)); if (tmp == NULL) err(1, "malloc"); refs = tmp; tmp2 = realloc(ins, refs_s * sizeof(char *)); if (tmp2 == NULL) err(1, "malloc"); ins = tmp2; for (i = refs_s - 10; i < refs_s; i++) /* TODO: is this right? */ ins[i] = NULL; } // -> reference.declarations char **tmp; char ***tmp2; // Notice that the code above also increases the refs_c count. Now, everything // else is done, and the reference is ready to be added: -> reference.add refs[refs_c-1] = malloc(1 + REFMAX * sizeof(char)); sprintf(refs[refs_c-1], "%s", ref); // Identifying and processing code lines // ============================================================================ // The insertion function is responsible for processing code lines: // -> declarations bool insertion(char *line); // It returns true if the given line is a code line (i.e., an insertion). // Parsing the code line // ---------------------------------------------------------------------------- // First of all, if there is no current reference, the insertion should be // ignored: -> insertion.parse if (ref[0] == '\0') return false; // If there is a CODE_PREFIX, we ensure that the line begins with it. // Likewise, if there is a DOC_PREFIX, we ensure that the line does not // begin with it: -> insertion.parse if (strlen(code_prefix) > 0) if (strncmp(line, code_prefix, strlen(code_prefix)) != 0) return false; if (strlen(doc_prefix) > 0) if (strncmp(line, doc_prefix, strlen(doc_prefix)) == 0) return false; // As you can see, the DOC_PREFIX is given precedence over the CODE_PREFIX. // Adding the code line to the insertions // ---------------------------------------------------------------------------- // Now that we know the line contains an insertion, we must find the index // of the current reference in the refs array: -> insertion.add for (i = 0; i < refs_c; i++) if (strcmp(refs[i], ref) == 0) break; // -> insertion.declarations int i; // Our goal is to add the insertion to the corresponding position in the ins // array. If there is no insertion at that position, the value will be NULL: // -> insertion.add if (ins[i] == NULL) { ins[i] = malloc(1 + 1 * sizeof(char *)); if (ins[i] == NULL) err(1, "malloc"); len = 0; } // If ins[i] is not NULL, then it already contains some number of insertion // strings, terminated by a final NULL value. In order to allocate memory // for the new insertion, we find the position of the final NULL value, // corresponding to the length of the ins[i] array: -> insertion.add else { for (len = 0; ins[i][len] != NULL; len++) ; tmp = realloc(ins[i], 1 + (len + 1) * sizeof(char *)); if (tmp == NULL) err(1, "malloc"); ins[i] = tmp; } // -> insertion.declarations char **tmp; int len; // Now remains adding the insertion to ins[i]. First, we mark the new final // position: -> insertion.add ins[i][len + 1] = NULL; // Then, we allocate memory for the string: -> insertion.add ins[i][len] = malloc(1 + strlen(line) * sizeof(char)); if (ins[i][len] == NULL) err(1, "malloc"); // Finally, we copy the string, returning true, signifying that the line // processed indeed was a code line: -> insertion.add strncpy(ins[i][len], line + strlen(code_prefix), strlen(line) - strlen(code_prefix)); ins[i][len][strlen(line) - strlen(code_prefix)] = '\0'; return true; // Notice also that we make sure to skip the CODE_PREFIX.