// References in source input
// ============================================================================

// The references found in the source input is stored as an array of strings
// in the global refs variable: -> declarations

char **refs; /* references */
int refs_c; /* count */
int refs_s; /* size (number of elements allocated for) */

// It is allocated at the beginning of the execution to contain an array of
// ten strings. The refs_s variable keeps track of the amount of allocated 
// space, while ref_c holds the number of actual elements: -> main.globals

  refs_c = 0;
  refs_s = 10;
  refs = malloc(refs_s * sizeof(char *));
  if (refs == NULL) err(1, "malloc");


// Insertions in source input
// ============================================================================

// tt represents every insertion as an array of strings, where each string
// corresponds to a line to be inserted. All insertions are stored in the
// global ins array: -> declarations

char ***ins; /* insertions */

// The position of each insertion in the ins array is always equal to the
// position of the corresponding reference in the refs array -- to find what
// lines should be inserted at destination X, one must find the value P such
// that refs[P] is equal to X. Then, the corresponding insertion will be equal
// to ins[P].

// In other words, the ins array should always be of the same length as refs.
// As such, the refs_s and refs_c variables are used for ins as well. The ins
// array is allocated to hold the same number of elements as refs. Furthermore,
// its elements are set to NULL, signifying the absence of any insertion at
// that index: -> main.globals

  ins = malloc(refs_s * sizeof(char **));
  if (ins == NULL) err(1, "malloc");
  for (i = 0; i < refs_s; i++)
    ins[i] = NULL;

// -> main.declarations

int i;


// Parsing standard input
// ============================================================================

// Text is read from the standard input, line by line, into a line variable.
// Two additional variables, line_s and line_l, keep track of the amount of
// allocated space and the actual number of characters in the string,
// respectively: -> main.declarations

  char *line;
  int line_l; /* length */
  int line_s; /* size (number of characters allocated for) */

// It initially is allocated to hold 100 characters: -> main.input

  line_l = 0;
  line_s = 100;
  line = malloc(1 + line_s * sizeof(char));
  if (line == NULL) err(1, "malloc");

// Lines are read character by character until end of file. First, the read
// character is assigned to the variable b. When it is certain that it is not
// EOF, then it is assigned to the variable c: -> main.input

  while ((b = getchar()) != EOF) {  
    c = b;

// -> main.declarations

  int b;
  int c;

// First of all, tt ignores any carriage returns: -> main.input

  if (c == '\r') continue;

// On Windows, where carriage returns are used, they will automatically be
// removed anyway. On systems that don't use carriage returns, they might not
// be stripped from the input, which is why tt ignores them.

// Otherwise, on every iteration, tt checks whether the read character is a
// newline. If not, the character is added to the line variable, which is 
// re-allocated if necessary. The line_l, keeping track of the line's length,
// is incremented as well: -> main.input

    if (c != '\n') {
      if (line_l + 1 > line_s) {
        line_s += 20;
        tmp = realloc(line, 1 + line_s * sizeof(char));
        if (tmp == NULL) err(1, "malloc");
        line = tmp;
      }
      line[line_l++] = c;
      continue;
    }

// The tmp variable used in the re-allocation has a type which is identical to
// that of the line variable: -> main.declarations

  char *tmp;

// If the read character is a newline, then the program "finishes" the line,
// adding a final NULL character and resetting line_l: -> main.input

finish:
    line[line_l] = '\0';
    line_l = 0;

// Before parsing the line, we make sure to skip it if it is empty and
// following a non-code line: -> main.input

    if (strlen(code_prefix) == 0 && !wascode && strcmp(line, "") == 0) {
      continue;
    }

// This is only desirable if CODE_PREFIX is empty, because then, there is no
// way for the writer of the source input to, for appearance's sake, leave an
// empty line between non-code lines and code lines; any empty line will
// will inevitably be interpreted as a code line. The code above circumvents
// this.

// This aesthetical nicety requires the program to keep track of whether the
// previous line was a code line or not: -> main.declarations

  bool wascode = false;

// Now, it is time to check whether the read line is a code line (an insertion)
// or a documentation line (containing a reference): -> main.input
    
    if (!insertion(line)) reference(line);
  }

// The insertion and reference functions modify the ins and refs variables
// according to the contents of the line.

// Finally, after the loop is finished -- meaning that EOF has been reached --
// we must ensure that the final character was not a newline; otherwise, the
// final line of source input has not been processed, as lines are processed
// only when the terminated newline is encountered.

// Thus, if the final character was a newline, tt goes back and finishes the
// final line: -> main.input

  if (c != '\n') { c = '\n'; goto finish; }


// Identifying and processing documentation lines containing references
// ============================================================================

// The reference function is responsible for processing references in source
// input lines: -> declarations

void reference(char *line);


// Parsing the line
// ----------------------------------------------------------------------------

// Documentation lines are formatted as follows: ->

documentation line ::= DOC_PREFIX anything [reference]

reference ::= "->" [whitespace] identifier [whitespace]
identifier ::= not whitespace

// In order to identify whether a given line actually is a documentation line
// containing a reference, the line variable is aliased to ln, which will be
// modified instead of line: -> reference.declarations

  char *ln = line;

// First, we ensure the line begins with the doc_prefix: -> reference.parse

  if (strncmp(ln, doc_prefix, strlen(doc_prefix)) != 0) return;

// Then, we ensure that a hyphen is present: -> reference.parse

hyphen:
  if (*ln == '\0') return;
  else if (*ln == '-') { ln++; goto lessthan; }
  else { ln++; goto hyphen; }

// After finding the hyphen, we check whether a less-then sign follows it.
// If not, we keep looking for another hyphen. -> reference.parse

lessthan:
  if (*ln != '>') goto hyphen;
  else ln++;

// After finding a less-then sign following a hyphen (->), we ignore all
// whitespace, if there is any. If the end of the line has been reached, or is
// reached, by this point, then it will be interpreted as an empty reference,
// resetting the current reference (meaning that subsequent code lines will not
// be attached to any reference): -> reference.parse

space:
  if (isspace(*ln)) { ln++; goto space; }
  if (*ln == '\0') { ref = ""; return; }

// Now, a valid reference should be a string of non-space characters,
// followed optionally by whitespace, but not anything other than whitespace:
// -> reference.parse

  for (i = 0; i < strlen(ln); i++)
    if (isspace(ln[i])) {
      for (j = i; j < strlen(ln); j++)
        if (!isspace(ln[j])) return;
      break;
    }

// -> reference.declarations

  int i;
  int j;

// After the loop above, i will be set to the index of the first encountered
// space or the end of the line. Any trailing whitespace should be ignored:
// -> reference.parse

  ln[i] = '\0';


// Adding the reference
// ----------------------------------------------------------------------------

// At this point, we have found a valid reference, which should now be added to
// the global refs array.

// First, however, it should be mentioned that reference identifiers have a
// maximum length of 80 characters: -> definitions

#define REFMAX 80

// Thus, any reference identifier longer than REFMAX is truncated, with a
// warning printed to the standard error stream: -> reference.add
  
  if (strlen(ln) > REFMAX) {
    fprintf(stderr, "Warning: Truncating identifier exceeding %d characters\n",
      REFMAX);
    ln[REFMAX] = '\0';
  }

// It should also be mentioned that the current reference is always stored in a
// global variable, from which the code(char *) function knows with which
// reference to associate each code line: -> declarations

char *ref;

// It is allocated in the beginning of the program's execution: -> main.globals

  ref = malloc(1 + REFMAX * sizeof(char));
  if (ref == NULL) err(1, "malloc");

// It is freed before the output section of the program, at which point it is
// no longer needed: -> main.output

  free(ref);

// The variable is set by our reference function: -> reference.add

  sprintf(ref, "%s", ln); /* set current reference */
  ref[strlen(ln)] = '\0';

// Now remains the work of adding the reference to the global refs variable --
// unless it already exists in refs: -> reference.add

  for (i = 0; i < refs_c; i++)
    if (strcmp(refs[i], ref) == 0) return;

// If the reference truly is new, we notify the user: -> reference.add

  fprintf(stderr, "New reference: %s\n", ref);

// Before adding the new reference to refs, we re-allocate refs (and therefore
// also ins, which should always be as large as refs), if needed:
// -> reference.add

  if (++refs_c > refs_s) {
    refs_s += 10;
    tmp = realloc(refs, refs_s * sizeof(char *));
    if (tmp == NULL) err(1, "malloc");
    refs = tmp;
    tmp2 = realloc(ins, refs_s * sizeof(char *));
    if (tmp2 == NULL) err(1, "malloc");
    ins = tmp2;
    for (i = refs_s - 10; i < refs_s; i++) /* TODO: is this right? */
      ins[i] = NULL;
  }

// -> reference.declarations

  char **tmp;
  char ***tmp2;

// Notice that the code above also increases the refs_c count. Now, everything
// else is done, and the reference is ready to be added: -> reference.add

  refs[refs_c-1] = malloc(1 + REFMAX * sizeof(char));
  sprintf(refs[refs_c-1], "%s", ref);


// Identifying and processing code lines
// ============================================================================

// The insertion function is responsible for processing code lines:
// -> declarations

bool insertion(char *line);

// It returns true if the given line is a code line (i.e., an insertion).


// Parsing the code line
// ----------------------------------------------------------------------------

// First of all, if there is no current reference, the insertion should be
// ignored: -> insertion.parse

  if (ref[0] == '\0') return false;

// If there is a CODE_PREFIX, we ensure that the line begins with it.
// Likewise, if there is a DOC_PREFIX, we ensure that the line does not
// begin with it: -> insertion.parse

  if (strlen(code_prefix) > 0)
    if (strncmp(line, code_prefix, strlen(code_prefix)) != 0) return false;
  if (strlen(doc_prefix) > 0)
    if (strncmp(line, doc_prefix, strlen(doc_prefix)) == 0) return false;

// As you can see, the DOC_PREFIX is given precedence over the CODE_PREFIX.


// Adding the code line to the insertions
// ----------------------------------------------------------------------------

// Now that we know the line contains an insertion, we must find the index
// of the current reference in the refs array: -> insertion.add

  for (i = 0; i < refs_c; i++)
    if (strcmp(refs[i], ref) == 0) break;

// -> insertion.declarations

  int i;

// Our goal is to add the insertion to the corresponding position in the ins
// array. If there is no insertion at that position, the value will be NULL:
// -> insertion.add

  if (ins[i] == NULL) {
    ins[i] = malloc(1 + 1 * sizeof(char *));
    if (ins[i] == NULL) err(1, "malloc");
    len = 0;
  }

// If ins[i] is not NULL, then it already contains some number of insertion
// strings, terminated by a final NULL value. In order to allocate memory
// for the new insertion, we find the position of the final NULL value,
// corresponding to the length of the ins[i] array: -> insertion.add

  else {
    for (len = 0; ins[i][len] != NULL; len++) ;
    tmp = realloc(ins[i], 1 + (len + 1) * sizeof(char *));
    if (tmp == NULL) err(1, "malloc");
    ins[i] = tmp;
  }

// -> insertion.declarations

  char **tmp;
  int len;

// Now remains adding the insertion to ins[i]. First, we mark the new final
// position: -> insertion.add

  ins[i][len + 1] = NULL;

// Then, we allocate memory for the string: -> insertion.add

  ins[i][len] = malloc(1 + strlen(line) * sizeof(char));
  if (ins[i][len] == NULL) err(1, "malloc");

// Finally, we copy the string, returning true, signifying that the line
// processed indeed was a code line: -> insertion.add

  strncpy(ins[i][len], line + strlen(code_prefix),
    strlen(line) - strlen(code_prefix));
  ins[i][len][strlen(line) - strlen(code_prefix)] = '\0';
  return true;

// Notice also that we make sure to skip the CODE_PREFIX.