#include #include #include #define ALLOC(T) (T*)malloc(sizeof(T)) #define STRCPY(D,S) D = (char*)malloc(strlen(S)+1); strcpy(D,S) typedef unsigned long lword; typedef unsigned short word; typedef unsigned char byte; typedef unsigned int bool; #define TRUE 1 #define FALSE 0 struct { char *in; char *out; } thefiles [] = { { "aapte.doc", "" }, { "intro.doc", "intro" }, { "main.doc", "main" }, { "bintree.doc", "bintree" }, { "listing.doc", "listing" }, { "definiti.doc", "definitions" }, { "parser.doc", "parser" }, { "scanner.doc", "scanner" }, { "errors.doc", "errors" }, { "pass2.doc", "pass2" }, { "trans.doc", "trans" }, { "depend.doc", "depend" }, { "allsmp.doc", "allsmp" }, { "genera.doc", "genera" }, { "1passrel.doc", "onepassrel" }, { "gtypes.doc", "gtypes" }, { "gass.doc", "gass" }, { "genpro.doc", "genpro" }, { "printtre.doc", "printtree" }, { "perform.doc", "perform" }, { "openfile.doc", "openfiles" }, { "screen.doc", "screen" }, { "vt100.doc", "vt100" }, { "memo2.doc", "two" }, }; byte *buf; lword glword(long pos) { lword b1 = buf[pos], b2 = buf[pos+1], b3 = buf[pos+2], b4 = buf[pos+3]; return b1 + (b2 << 8) + (b3 << 16) + (b4 << 24); } word gword(long pos) { word b1 = buf[pos], b2 = buf[pos+1]; return b1 + (b2 << 8); } typedef struct { lword f; lword t; char mode; lword descr; } style_t; style_t *styles[10000]; int nr_styles = 0; void add_style(lword f, lword t, char mode, lword descr) { int i = 0, j; while (i < nr_styles && styles[i]->f < f) i++; for (j = nr_styles; j > i; j--) styles[j] = styles[j-1]; styles[i] = ALLOC(style_t); styles[i]->f = f; styles[i]->t = t; styles[i]->mode = mode; styles[i]->descr = descr; nr_styles++; } void dump(lword p, char m[], FILE *f) { int i; int j; if (f == NULL) return; for (i = 0; i < 8; i++) { for (j = 0; j < 16; j++) fprintf(f, "%c %02x ", m[i*16 + j], buf[p + i*16 + j]); fprintf(f, "\n"); } } void parse_formatting(lword begin, lword end, char mode, FILE *f) { lword cblock; for( cblock = begin; cblock < end; cblock += 128) { lword at; lword prev_p; int i; char marking[128]; at = glword(cblock); if (f) fprintf(f, "block at %lx: %lx\n", cblock, at); prev_p = at; for (i = 0; i < 128; i++) marking[i] = ' '; marking[127] = '#'; for(i = 0; i < buf[cblock + 127]; i += 1) { lword p = glword(cblock + 4 + i * 6); word v = gword(cblock + 8 + i * 6); marking[4 + i * 6] = 'p'; marking[8 + i * 6] = 'r'; if ( v == 0xffff ) { if (f) fprintf(f, "%6lx : end\n", p); } else if (v < 128) { marking[v + 4] = mode; add_style(prev_p, p, mode, cblock + v + 4); { int l = buf[cblock + v + 4],i; if (f) fprintf(f, "%6lx : [%3d]", p, v); for(i = 0; i < l; i++) { if (f) fprintf(f, " %02x", buf[cblock + v + 5 + i]); marking[v + 5 + i] = 'd'; } if (f) fprintf(f, "\n"); } } else if (f) fprintf(f, "%6lx : ???\n"); prev_p = p; } dump(cblock, marking, f); } } lword endtext; lword begin_char_format; lword begin_par_format; lword begin_x1_format; void scan_file(char *buf, FILE *fin, FILE *f) { long fl; nr_styles = 0; fl = fread(buf, 1, 200000, fin); if (f) fprintf(f, "bytes read: %ld\n", fl); endtext = glword(14); if (f) fprintf(f, "end text at: %lx\n", endtext); begin_char_format = 128 * ((endtext + 127) / 128); if (f) fprintf(f, "begin char format at: %lx\n", begin_char_format); begin_par_format = 128 * (lword)gword(18); if (f) fprintf(f, "begin par fornat at: %lx\n", begin_par_format); begin_x1_format = 128 * (lword)gword(20); if (f) fprintf(f, "begin x1 fornat at: %lx\n", begin_x1_format); parse_formatting(begin_char_format, begin_par_format, 'C', f); parse_formatting(begin_par_format, begin_x1_format, 'S', f); } void dump_text(lword endtext, FILE *f) { lword p; int i = 0; for( p = 128; p <= endtext; p++) { for (i = 0; i < nr_styles; i++) { if (styles[i]->f == p) { int k; { int l = buf[styles[i]->descr]; fprintf(f, "<%c%d", styles[i]->mode,i); for (k = 0; k < l; k++) fprintf(f, " %02x", buf[styles[i]->descr + k + 1]); fprintf(f, ">"); } } if (styles[i]->t == p) fprintf(f, "",styles[i]->mode,i); } { if (buf[p] == 255) fprintf(f, "_"); else if (buf[p] == 10) ; else if (buf[p] == 13 || buf[p] == 11) fprintf(f, "\n"); else if (buf[p] == 9) fprintf(f, " "); else if (buf[p] < 32 || buf[p] > 128) fprintf(f, "\\%d", buf[p]); else fprintf(f, "%c", buf[p]); } } fprintf(f, "\n"); } int is_file_name(char *s, char **ref) { int i; for (i = 0; i < sizeof(thefiles)/sizeof(thefiles[0]); i++) if (strcmp(s, thefiles[i].out) == 0) { *ref = s; return 1; } if (strcmp(s, "Faase88") == 0) { *ref = ""; return 1; } if (strcmp(s, "Faase89") == 0) { *ref = "intro"; return 1; } return 0; } int reflabel(int find, char *name, char *section) { static struct reflabel_T { char *name; char *section; } *reflabels[1000]; static int nr_reflabels = 0; int i; printf("%s %s|\n", name,section); for (i = 0; i < nr_reflabels; i++) if (strcmp(reflabels[i]->name, name) == 0 && strcmp(reflabels[i]->section, section) == 0) return 1; if (!find) { reflabels[nr_reflabels] = ALLOC(struct reflabel_T); STRCPY(reflabels[nr_reflabels]->name,name); STRCPY(reflabels[nr_reflabels]->section, section); nr_reflabels++; } return 0; } void find_reflabels(char *out) { lword p; int ip = 0, i = 0; p = 128; while( p < endtext ) { int pstyle = 0; int pre = 0; int header = 0; int plr = 0; /* find style for paragraph */ for (; ip < nr_styles && styles[ip]->f <= p ; ip++) if (styles[ip]->f == p && styles[ip]->mode == 'S' /*&& buf[styles[ip]->descr] == 4*/) pstyle = buf[styles[ip]->descr + 1]; header = pstyle == 177 || pstyle == 179 || pstyle == 181 || pstyle == 183; pre = pstyle == 63 || pstyle == 107 || pstyle == 115; { for( ; p < endtext && buf[p] != 10;) { if (!pre && buf[p] == '[') { lword r; int i; char name[30]; char section[30]; char *ref; name[0] = '\0'; section[0] = '\0'; for (r = p; buf[r] != ']' && buf[r] != '"'; r++) printf("%c", buf[r]); printf("]\n"); p++; while (p < r) { /* scan name */ i = 0; if (isalpha(buf[p])) { while (p < r && buf[p] != ' ' && buf[p] != 255 && buf[p] != ',') name[i++] = buf[p++]; if (i > 0) name[i] = '\0'; } while (p < r && (buf[p] == ' ' || buf[p] == 255)) p++; i = 0; while (p < r && (buf[p] == '.' || isdigit(buf[p]))) section[i++] = buf[p++]; section[i] = '\0'; if (is_file_name(name, &ref) && (buf[p] == ',' || buf[p] == ']' || buf[p] == '-')) { reflabel(0, ref, section); } while (p < r && !isalpha(buf[p]) && !isdigit(buf[p])) p++; } } else if (!pre && buf[p] == '(') { lword r; int i; char name[30]; char *ref; name[0] = '\0'; for (i = 0, r = p+1; buf[r] != ')' && buf[r] != ' '; r++) { printf("%c", buf[r]); if (i < 29) name[i++] = buf[r]; } name[i] = '\0'; printf("\n"); if (buf[r] == ')' && is_file_name(name, &ref)) printf(" MT%s.html\n", ref); p = r; } else if (!pre && isdigit(buf[p]) && (buf[p-1] == ' '||buf[p-1]==255)) { lword r,s,t; int i; char section[30]; char before[30]; for (i = 0, r = p; isdigit(buf[r]) || buf[r] == '.'; r++) { if (i < 29) section[i++] = buf[r]; } if (i > 0 && section[i-1] == '.') i--; section[i] = '\0'; for (s = p-1; (buf[s] == ' '||buf[s]==255); s--) t = s; for (; isalpha(buf[s]); s--); s++; for (i=0; s < t; s++) { if (i < 29) before[i++] = buf[s]; } before[i] = '\0'; printf("Section%d |%s|%s|%s|\n",header,out, before, section); if ( strcmp(before, "sections") == 0 || strcmp(before, "subsections") == 0 || strcmp(before, "section") == 0 || strcmp(before, "subsection") == 0 || strcmp(before, "see") == 0 || (plr && strcmp(before, "and") == 0 )) { plr = strcmp(before, "sections") == 0 || strcmp(before, "subsections") == 0; printf("x"); reflabel(0, out, section); } p++; } else p++; } } while( buf[p] == 10) p++; } } void dump_html(FILE *f, char *out) { lword p; int ip = 0, i = 0; int in_math = 0; int cur_level = 0; char levels[10]; int cur_table = 0; int pstyle = 0; /* find style for first paragraph */ for (; ip < nr_styles && styles[ip]->f <= 128 ; ip++) if (styles[ip]->f == 128 && styles[ip]->mode == 'S' /*&& buf[styles[ip]->descr] == 4*/) pstyle = buf[styles[ip]->descr + 1]; p = 128; if (pstyle == 207) { for( ; buf[p] != 10; p++); p++; } /* printf title */ fprintf(f, "\n "); { int first_word = 1; for( ; buf[p] != (pstyle == 207 ? 11 : 10); p++) if (isalpha(buf[p])) { fprintf(f, "%c", first_word ? toupper(buf[p]) : tolower(buf[p])); first_word = 0; } else { fprintf(f, " "); first_word = 1; } } fprintf(f, " \n\n\n"); p = 128; if (pstyle == 207) { for( ; buf[p] != 10; p++); p++; } /* print document paragraph by paragraph */ while( p < endtext ) { int pre = 0; int header = 0; int plr = 0; int tit = 0; int ftit = 1; int tt = 0; char *tt_begin, *tt_end; int level; int table; /* find style for paragraph */ for (; ip < nr_styles && styles[ip]->f <= p ; ip++) if (styles[ip]->f == p && styles[ip]->mode == 'S' /*&& buf[styles[ip]->descr] == 4*/) pstyle = buf[styles[ip]->descr + 1]; header = pstyle == 177 || pstyle == 179 || pstyle == 181 || pstyle == 183; table = pstyle == 77 || pstyle == 81 || pstyle == 199 || pstyle == 201 || pstyle == 203 || pstyle == 205; if (table && !cur_table) { fprintf(f, "\n", pstyle == 77 ? '5' : '1'); cur_table = 1; } if (cur_table && !table) { fprintf(f, "
"); cur_table = 0; } if (pstyle == 83 || pstyle == 93) { level = (pstyle == 83) ? 1 : 2; if (cur_level < level) levels[level-1] = (buf[p] == '1' || buf[p] == 'a') ? 'O' : 'U'; while (buf[p] != 9) p++; if ( buf[p] == 9) p++; } else level = 0; for (;cur_level > level; cur_level--) fprintf(f, "\n", levels[cur_level-1]); for (;cur_level < level; cur_level++) fprintf(f, "<%cL>\n", levels[cur_level]); if ( pstyle == 179 || pstyle == 181 || pstyle == 183) { char section[30]; int i = 0; lword ep = p; while( isdigit(buf[ep]) || buf[ep] == '.') { if (i < 28) section[i++] = buf[ep]; ep++; } if (i > 1 && section[i-1] == '.') i--; section[i] = '\0'; if (ep != p && reflabel(1, out, section)) fprintf(f, "\n", section); } /* 83 one level indent 93 94 two levels indent 199 (contents) 77 (references) 81 indent */ switch( pstyle ) { case 0: case 61: case 109: fprintf(f, "

"); break; case 63: fprintf(f, "
\n"); pre = 1; tt++; break;
      case 107: fprintf(f, "
    \n"); pre = 1; tt++; break;
          case 115: fprintf(f, "
      \n"); pre = 1; tt++; break;
            case 83:
            case 93:
            case 94: fprintf(f, "
    • "); break; case 177: fprintf(f, "

      "); tit = 1; break; case 179: fprintf(f, "\n

      "); tit = 1; break; case 181: fprintf(f, "\n

      "); tit = 1; break; case 183: fprintf(f, "\n

      "); tit = 1; break; case 77: case 81: case 199: case 201: case 203: case 205: fprintf(f, "

\n"); break; default: fprintf(f, "

", pstyle); } /* process paragraph */ for( ; p < endtext && buf[p] != 10 ; p++ ) { for (; i < nr_styles && (styles[i]->t < p || styles[i]->mode != 'C'); i++); tt_end = tt-1 ? "" : ""; if (styles[i]->t == p && styles[i]->mode == 'C' /*&& buf[styles[i]->descr] == 1*/) { switch( buf[styles[i]->descr + 1] ) { case 3: fprintf(f, ""); break; case 7: /* times roman in PRE */ break; case 0: case 17: fprintf(f, "%s", tt_end); tt--; break; case 5: case 9: case 13: case 19: case 15: fprintf(f, "%s", tt_end); tt--; break; case 23: case 29: fprintf(f, ""); break; case 25: fprintf(f, "%s", tt_end); tt--; break; case 31: in_math = 0; break; default: fprintf(f, "", buf[styles[i]->descr + 1]); } if (i+1 < nr_styles) for (i++; i < nr_styles && styles[i]->mode != 'C';i++); } tt_begin = tt ? "" : ""; if (styles[i]->f == p && styles[i]->mode == 'C' /*&& buf[styles[i]->descr] == 1*/) { int cstyle = buf[styles[i]->descr + 1]; switch( cstyle ) { case 3: fprintf(f, ""); break; case 7: /* times roman in PRE */ break; case 0: /* ?? */ case 17: /* parameter in formal expression */ fprintf(f, "%s", tt_begin); tt++; break; case 5: /* formal identifier */ case 9: /* procedure name */ case 13: /* variable name */ case 15: /* type name */ case 19: /* italics */ fprintf(f, "%s", tt_begin); tt++; break; case 23: case 29: fprintf(f, ""); break; case 25: fprintf(f, "%s", tt_begin); tt++; break; case 31: in_math = 1; break; default: fprintf(f, "", cstyle); } } if (!pre && buf[p] == '[') { lword r,s; int i; char name[30]; char section[30]; char *ref; name[0] = '\0'; section[0] = '\0'; for (r = p; buf[r] != ']' && buf[r] != '"'; r++) ; fprintf(f, "["); p++; s = p; for (;s < r;) { /* scan name */ i = 0; if (isalpha(buf[s])) { while (s < r && buf[s] != ' ' && buf[s] != 255 && buf[s] != ',') name[i++] = buf[s++]; if (i > 0) name[i] = '\0'; } while (s < r && (buf[s] == ' ' || buf[s] == 255)) s++; i = 0; while (s < r && (buf[s] == '.' || isdigit(buf[s]))) section[i++] = buf[s++]; section[i] = '\0'; if ( (buf[s] != ',' && buf[s] != ']' && buf[s] != '-') || !is_file_name(name, &ref)) break; fprintf(f, ""); for (;p < s; p++) if (buf[p] == 255) fprintf(f, " "); else fprintf(f, "%c", buf[p]); fprintf(f, ""); while (p < r && !isalpha(buf[p]) && !isdigit(buf[p])) { if (buf[p] == 255) fprintf(f, " "); else fprintf(f, "%c", buf[p]); p++; } s = p; } p--; } else if (!pre && buf[p] == '(') { lword r; int i; char name[30]; char *ref; name[0] = '\0'; fprintf(f, "("); for (i = 0, r = p+1; buf[r] != ')' && buf[r] != ' '; r++) { if (i < 29) name[i++] = buf[r]; } name[i] = '\0'; if (buf[r] == ')' && is_file_name(name, &ref)) { fprintf(f, "", ref); p++; for(; p< r; p++) fprintf(f, "%c", buf[p]); fprintf(f, ")"); } } else if (!header && !pre && isdigit(buf[p]) && (buf[p-1] == ' '||buf[p-1] == 255)) { lword r,s,t; int i; char section[30]; char before[30]; for (i = 0, r = p; isdigit(buf[r]) || buf[r] == '.'; r++) { if (i < 29) section[i++] = buf[r]; } if (i > 0 && section[i-1] == '.') i--; section[i] = '\0'; for (s = p-1; (buf[s] == ' '||buf[s] == 255); s--) t = s; for (; isalpha(buf[s]); s--); s++; for (i=0; s < t; s++) { if (i < 29) before[i++] = buf[s]; } before[i] = '\0'; printf("Section |%s|%s|%s|\n",out, before, section); if ( strcmp(before, "sections") == 0 || strcmp(before, "subsections") == 0 || strcmp(before, "section") == 0 || strcmp(before, "subsection") == 0 || strcmp(before, "see") == 0 || (plr && strcmp(before, "and") == 0 )) { plr = strcmp(before, "sections") == 0 || strcmp(before, "subsections") == 0; fprintf(f, "%s", section, section); p = r-1; } else fprintf(f, "%c", buf[p]); } else if (buf[p] == 255) { if (buf[p+1] != ':') fprintf(f, " "); } else if (buf[p] == 13) ; else if (buf[p] == 11) fprintf(f, "%s", pre ? "\n" : "
\n"); else if (buf[p] == 12) fprintf(f, "\n


\n"); else if (buf[p] == 9) { if (pre) fprintf(f, " "); else if (cur_table) fprintf(f, "

"); else fprintf(f, "*tab*"); } else if (buf[p] == 132) fprintf(f, "ä"); else if (buf[p] == 177) fprintf(f, "&emdash;"); else if (buf[p] == 244) fprintf(f, "|"); else if (in_math) { switch(buf[p]) { case ' ': fprintf(f, " "); break; case '"': fprintf(f, "for all"); break; case '=': fprintf(f, "="); break; case '$': fprintf(f, "exists"); break; case 163: fprintf(f, "<="); break; case 179: fprintf(f, "=>"); break; case 185: fprintf(f, "!="); break; case 197: fprintf(f, "exor"); break; case 198: fprintf(f, "empty"); break; case 199: fprintf(f, "intersection"); break; case 200: fprintf(f, "union"); break; case 203: fprintf(f, "not subset"); break; case 204: fprintf(f, "subset"); break; case 206: fprintf(f, "in"); break; case 207: fprintf(f, "not in"); break; case 217: fprintf(f, "and"); break; case 218: fprintf(f, "or"); break; default: fprintf(f, "MATH*%d", buf[p]); break; } } else { if (buf[p] < 32 || buf[p] > 128) fprintf(f, "\\%d", buf[p]); else if (buf[p] == '<') fprintf(f, "<"); else if (buf[p] == '>') fprintf(f, ">"); else if (buf[p] == '&') fprintf(f, "&"); else if (tit && isalpha(buf[p])) { fprintf(f, "%c", ftit ? toupper(buf[p]) : tolower(buf[p])); ftit = 0; } else fprintf(f, "%c", buf[p]); } } /* end of the paragraph */ while( buf[p] == 10) p++; switch( pstyle ) { case 0: case 61: case 109: fprintf(f, "

\n"); break; case 63: fprintf(f, "\n
\n"); tt--; break; case 107: case 115: fprintf(f, "\n"); tt--; break; case 83: case 93: case 94: fprintf(f, "\n"); break; case 177: fprintf(f, "\n\n"); break; case 179: fprintf(f, "\n\n"); break; case 181: fprintf(f, "\n\n"); break; case 183: fprintf(f, "\n\n"); break; case 77: case 81: case 199: case 201: case 203: case 205: fprintf(f, "\n"); break; default: fprintf(f, "

\n", pstyle); } } if (cur_table) { fprintf(f, ""); cur_table = 0; } for (;cur_level > 0; cur_level--) fprintf(f, "\n", levels[cur_level-1]); fprintf(f, "\n\n


\n

"); fprintf(f, "\nMy life as a hacker |"); fprintf(f, "\nMy home page"); fprintf(f, "\n
"); fprintf(f, "\n "); fprintf(f, "\n"); } void find_all_reflabels(char *in, char *out) { FILE *fin = fopen(in, "r"); if (fin == NULL) { printf("cannot open: %s\n", in); return; } scan_file(buf, fin, NULL); find_reflabels(out); fclose(fin); } void process_file(char *in, char *out) { char out_name[30]; FILE *fin = fopen(in, "r"); FILE *fout; if (fin == NULL) { printf("cannot open: %s\n", in); return; } printf("=== %s\n", out); scan_file(buf, fin, NULL); sprintf(out_name, "MT%s.html", out); fout = fopen(out_name, "w"); if (fout) dump_html(fout, out); fclose(fin); fclose(fout); } void analyse_file(char *in) { char out_name[30]; FILE *fin = fopen(in, "r"); FILE *fout; if (fin == NULL) { printf("cannot open: %s\n", in); return; } scan_file(buf, fin, NULL); dump_text(endtext, stdout); fclose(fout); } int main(int argc, char *argv[]) { int i; buf = (char*)malloc(200000); if (buf == NULL) { printf("allocation failed\n"); return 1; } if (argc > 1) analyse_file(argv[1]); else { for (i = 0; i < sizeof(thefiles)/sizeof(thefiles[0]); i++) find_all_reflabels(thefiles[i].in, thefiles[i].out); for (i = 0; i < sizeof(thefiles)/sizeof(thefiles[0]); i++) process_file(thefiles[i].in, thefiles[i].out); } }