#include
#include
#include
#define ALLOC(T) (T*)malloc(sizeof(T))
#define STRCPY(D,S) D = (char*)malloc(strlen(S)+1); strcpy(D,S)
typedef unsigned long lword;
typedef unsigned short word;
typedef unsigned char byte;
typedef unsigned int bool;
#define TRUE 1
#define FALSE 0
struct {
char *in;
char *out;
} thefiles [] =
{
{ "aapte.doc", "" },
{ "intro.doc", "intro" },
{ "main.doc", "main" },
{ "bintree.doc", "bintree" },
{ "listing.doc", "listing" },
{ "definiti.doc", "definitions" },
{ "parser.doc", "parser" },
{ "scanner.doc", "scanner" },
{ "errors.doc", "errors" },
{ "pass2.doc", "pass2" },
{ "trans.doc", "trans" },
{ "depend.doc", "depend" },
{ "allsmp.doc", "allsmp" },
{ "genera.doc", "genera" },
{ "1passrel.doc", "onepassrel" },
{ "gtypes.doc", "gtypes" },
{ "gass.doc", "gass" },
{ "genpro.doc", "genpro" },
{ "printtre.doc", "printtree" },
{ "perform.doc", "perform" },
{ "openfile.doc", "openfiles" },
{ "screen.doc", "screen" },
{ "vt100.doc", "vt100" },
{ "memo2.doc", "two" },
};
byte *buf;
lword glword(long pos)
{
lword b1 = buf[pos],
b2 = buf[pos+1],
b3 = buf[pos+2],
b4 = buf[pos+3];
return b1 + (b2 << 8) + (b3 << 16) + (b4 << 24);
}
word gword(long pos)
{
word b1 = buf[pos],
b2 = buf[pos+1];
return b1 + (b2 << 8);
}
typedef struct
{ lword f;
lword t;
char mode;
lword descr;
} style_t;
style_t *styles[10000];
int nr_styles = 0;
void add_style(lword f, lword t, char mode, lword descr)
{
int i = 0, j;
while (i < nr_styles && styles[i]->f < f)
i++;
for (j = nr_styles; j > i; j--)
styles[j] = styles[j-1];
styles[i] = ALLOC(style_t);
styles[i]->f = f;
styles[i]->t = t;
styles[i]->mode = mode;
styles[i]->descr = descr;
nr_styles++;
}
void dump(lword p, char m[], FILE *f)
{
int i;
int j;
if (f == NULL) return;
for (i = 0; i < 8; i++)
{
for (j = 0; j < 16; j++)
fprintf(f, "%c %02x ", m[i*16 + j], buf[p + i*16 + j]);
fprintf(f, "\n");
}
}
void parse_formatting(lword begin, lword end, char mode, FILE *f)
{
lword cblock;
for( cblock = begin; cblock < end; cblock += 128)
{
lword at;
lword prev_p;
int i;
char marking[128];
at = glword(cblock);
if (f) fprintf(f, "block at %lx: %lx\n", cblock, at);
prev_p = at;
for (i = 0; i < 128; i++) marking[i] = ' ';
marking[127] = '#';
for(i = 0; i < buf[cblock + 127]; i += 1)
{
lword p = glword(cblock + 4 + i * 6);
word v = gword(cblock + 8 + i * 6);
marking[4 + i * 6] = 'p';
marking[8 + i * 6] = 'r';
if ( v == 0xffff )
{
if (f) fprintf(f, "%6lx : end\n", p);
}
else if (v < 128)
{
marking[v + 4] = mode;
add_style(prev_p, p, mode, cblock + v + 4);
{ int l = buf[cblock + v + 4],i;
if (f) fprintf(f, "%6lx : [%3d]", p, v);
for(i = 0; i < l; i++)
{ if (f) fprintf(f, " %02x", buf[cblock + v + 5 + i]);
marking[v + 5 + i] = 'd';
}
if (f) fprintf(f, "\n");
}
}
else
if (f) fprintf(f, "%6lx : ???\n");
prev_p = p;
}
dump(cblock, marking, f);
}
}
lword endtext;
lword begin_char_format;
lword begin_par_format;
lword begin_x1_format;
void scan_file(char *buf, FILE *fin, FILE *f)
{
long fl;
nr_styles = 0;
fl = fread(buf, 1, 200000, fin);
if (f) fprintf(f, "bytes read: %ld\n", fl);
endtext = glword(14);
if (f) fprintf(f, "end text at: %lx\n", endtext);
begin_char_format = 128 * ((endtext + 127) / 128);
if (f) fprintf(f, "begin char format at: %lx\n", begin_char_format);
begin_par_format = 128 * (lword)gword(18);
if (f) fprintf(f, "begin par fornat at: %lx\n", begin_par_format);
begin_x1_format = 128 * (lword)gword(20);
if (f) fprintf(f, "begin x1 fornat at: %lx\n", begin_x1_format);
parse_formatting(begin_char_format, begin_par_format, 'C', f);
parse_formatting(begin_par_format, begin_x1_format, 'S', f);
}
void dump_text(lword endtext, FILE *f)
{
lword p;
int i = 0;
for( p = 128; p <= endtext; p++)
{
for (i = 0; i < nr_styles; i++)
{
if (styles[i]->f == p)
{ int k;
{
int l = buf[styles[i]->descr];
fprintf(f, "<%c%d", styles[i]->mode,i);
for (k = 0; k < l; k++)
fprintf(f, " %02x", buf[styles[i]->descr + k + 1]);
fprintf(f, ">");
}
}
if (styles[i]->t == p)
fprintf(f, "%c%d>",styles[i]->mode,i);
}
{
if (buf[p] == 255)
fprintf(f, "_");
else if (buf[p] == 10)
;
else if (buf[p] == 13 || buf[p] == 11)
fprintf(f, "\n");
else if (buf[p] == 9)
fprintf(f, " ");
else if (buf[p] < 32 || buf[p] > 128)
fprintf(f, "\\%d", buf[p]);
else
fprintf(f, "%c", buf[p]);
}
}
fprintf(f, "\n");
}
int is_file_name(char *s, char **ref)
{
int i;
for (i = 0; i < sizeof(thefiles)/sizeof(thefiles[0]); i++)
if (strcmp(s, thefiles[i].out) == 0)
{
*ref = s;
return 1;
}
if (strcmp(s, "Faase88") == 0)
{
*ref = "";
return 1;
}
if (strcmp(s, "Faase89") == 0)
{
*ref = "intro";
return 1;
}
return 0;
}
int reflabel(int find, char *name, char *section)
{
static struct reflabel_T
{ char *name;
char *section;
} *reflabels[1000];
static int nr_reflabels = 0;
int i;
printf("%s %s|\n", name,section);
for (i = 0; i < nr_reflabels; i++)
if (strcmp(reflabels[i]->name, name) == 0
&& strcmp(reflabels[i]->section, section) == 0)
return 1;
if (!find)
{
reflabels[nr_reflabels] = ALLOC(struct reflabel_T);
STRCPY(reflabels[nr_reflabels]->name,name);
STRCPY(reflabels[nr_reflabels]->section, section);
nr_reflabels++;
}
return 0;
}
void find_reflabels(char *out)
{
lword p;
int ip = 0, i = 0;
p = 128;
while( p < endtext )
{
int pstyle = 0;
int pre = 0;
int header = 0;
int plr = 0;
/* find style for paragraph */
for (; ip < nr_styles && styles[ip]->f <= p ; ip++)
if (styles[ip]->f == p && styles[ip]->mode == 'S'
/*&& buf[styles[ip]->descr] == 4*/)
pstyle = buf[styles[ip]->descr + 1];
header = pstyle == 177 || pstyle == 179
|| pstyle == 181 || pstyle == 183;
pre = pstyle == 63 || pstyle == 107 || pstyle == 115;
{
for( ; p < endtext && buf[p] != 10;)
{
if (!pre && buf[p] == '[')
{
lword r;
int i;
char name[30];
char section[30];
char *ref;
name[0] = '\0';
section[0] = '\0';
for (r = p; buf[r] != ']' && buf[r] != '"'; r++)
printf("%c", buf[r]);
printf("]\n");
p++;
while (p < r)
{
/* scan name */
i = 0;
if (isalpha(buf[p]))
{ while (p < r && buf[p] != ' ' && buf[p] != 255 && buf[p] != ',')
name[i++] = buf[p++];
if (i > 0) name[i] = '\0';
}
while (p < r && (buf[p] == ' ' || buf[p] == 255))
p++;
i = 0;
while (p < r && (buf[p] == '.' || isdigit(buf[p])))
section[i++] = buf[p++];
section[i] = '\0';
if (is_file_name(name, &ref)
&& (buf[p] == ',' || buf[p] == ']' || buf[p] == '-'))
{
reflabel(0, ref, section);
}
while (p < r && !isalpha(buf[p]) && !isdigit(buf[p]))
p++;
}
}
else if (!pre && buf[p] == '(')
{
lword r;
int i;
char name[30];
char *ref;
name[0] = '\0';
for (i = 0, r = p+1; buf[r] != ')' && buf[r] != ' '; r++)
{
printf("%c", buf[r]);
if (i < 29)
name[i++] = buf[r];
}
name[i] = '\0';
printf("\n");
if (buf[r] == ')' && is_file_name(name, &ref))
printf(" MT%s.html\n", ref);
p = r;
}
else if (!pre && isdigit(buf[p]) && (buf[p-1] == ' '||buf[p-1]==255))
{
lword r,s,t;
int i;
char section[30];
char before[30];
for (i = 0, r = p; isdigit(buf[r]) || buf[r] == '.'; r++)
{
if (i < 29)
section[i++] = buf[r];
}
if (i > 0 && section[i-1] == '.') i--;
section[i] = '\0';
for (s = p-1; (buf[s] == ' '||buf[s]==255); s--)
t = s;
for (; isalpha(buf[s]); s--);
s++;
for (i=0; s < t; s++)
{
if (i < 29)
before[i++] = buf[s];
}
before[i] = '\0';
printf("Section%d |%s|%s|%s|\n",header,out, before, section);
if ( strcmp(before, "sections") == 0
|| strcmp(before, "subsections") == 0
|| strcmp(before, "section") == 0
|| strcmp(before, "subsection") == 0
|| strcmp(before, "see") == 0
|| (plr && strcmp(before, "and") == 0 ))
{ plr = strcmp(before, "sections") == 0
|| strcmp(before, "subsections") == 0;
printf("x");
reflabel(0, out, section);
}
p++;
}
else
p++;
}
}
while( buf[p] == 10) p++;
}
}
void dump_html(FILE *f, char *out)
{
lword p;
int ip = 0, i = 0;
int in_math = 0;
int cur_level = 0;
char levels[10];
int cur_table = 0;
int pstyle = 0;
/* find style for first paragraph */
for (; ip < nr_styles && styles[ip]->f <= 128 ; ip++)
if (styles[ip]->f == 128 && styles[ip]->mode == 'S'
/*&& buf[styles[ip]->descr] == 4*/)
pstyle = buf[styles[ip]->descr + 1];
p = 128;
if (pstyle == 207)
{
for( ; buf[p] != 10; p++);
p++;
}
/* printf title */
fprintf(f, "\n ");
{ int first_word = 1;
for( ; buf[p] != (pstyle == 207 ? 11 : 10); p++)
if (isalpha(buf[p]))
{ fprintf(f, "%c", first_word ? toupper(buf[p]) : tolower(buf[p]));
first_word = 0;
}
else
{ fprintf(f, " ");
first_word = 1;
}
}
fprintf(f, " \n\n\n");
p = 128;
if (pstyle == 207)
{
for( ; buf[p] != 10; p++);
p++;
}
/* print document paragraph by paragraph */
while( p < endtext )
{
int pre = 0;
int header = 0;
int plr = 0;
int tit = 0;
int ftit = 1;
int tt = 0;
char *tt_begin, *tt_end;
int level;
int table;
/* find style for paragraph */
for (; ip < nr_styles && styles[ip]->f <= p ; ip++)
if (styles[ip]->f == p && styles[ip]->mode == 'S'
/*&& buf[styles[ip]->descr] == 4*/)
pstyle = buf[styles[ip]->descr + 1];
header = pstyle == 177 || pstyle == 179
|| pstyle == 181 || pstyle == 183;
table = pstyle == 77 || pstyle == 81 || pstyle == 199
|| pstyle == 201 || pstyle == 203 || pstyle == 205;
if (table && !cur_table)
{
fprintf(f, "\n",
pstyle == 77 ? '5' : '1');
cur_table = 1;
}
if (cur_table && !table)
{
fprintf(f, "
");
cur_table = 0;
}
if (pstyle == 83 || pstyle == 93)
{
level = (pstyle == 83) ? 1 : 2;
if (cur_level < level)
levels[level-1] = (buf[p] == '1' || buf[p] == 'a') ? 'O' : 'U';
while (buf[p] != 9) p++;
if ( buf[p] == 9) p++;
}
else
level = 0;
for (;cur_level > level; cur_level--)
fprintf(f, "%cL>\n", levels[cur_level-1]);
for (;cur_level < level; cur_level++)
fprintf(f, "<%cL>\n", levels[cur_level]);
if ( pstyle == 179 || pstyle == 181 || pstyle == 183)
{
char section[30];
int i = 0;
lword ep = p;
while( isdigit(buf[ep]) || buf[ep] == '.')
{ if (i < 28)
section[i++] = buf[ep];
ep++;
}
if (i > 1 && section[i-1] == '.') i--;
section[i] = '\0';
if (ep != p && reflabel(1, out, section))
fprintf(f, "\n", section);
}
/* 83 one level indent
93
94 two levels indent
199 (contents)
77 (references)
81 indent
*/
switch( pstyle )
{
case 0:
case 61:
case 109: fprintf(f, ""); break;
case 63: fprintf(f, "
\n"); pre = 1; tt++; break;
case 107: fprintf(f, " \n"); pre = 1; tt++; break;
case 115: fprintf(f, " \n"); pre = 1; tt++; break;
case 83:
case 93:
case 94: fprintf(f, " "); break;
case 177: fprintf(f, ""); tit = 1; break;
case 179: fprintf(f, "\n"); tit = 1; break;
case 181: fprintf(f, "\n"); tit = 1; break;
case 183: fprintf(f, "\n"); tit = 1; break;
case 77:
case 81:
case 199:
case 201:
case 203:
case 205: fprintf(f, "\n"); break;
default: fprintf(f, " ", pstyle);
}
/* process paragraph */
for( ; p < endtext && buf[p] != 10 ; p++ )
{
for (; i < nr_styles && (styles[i]->t < p || styles[i]->mode != 'C');
i++);
tt_end = tt-1 ? "" : "";
if (styles[i]->t == p && styles[i]->mode == 'C'
/*&& buf[styles[i]->descr] == 1*/)
{
switch( buf[styles[i]->descr + 1] )
{
case 3: fprintf(f, ""); break;
case 7: /* times roman in PRE */ break;
case 0:
case 17: fprintf(f, "%s", tt_end); tt--; break;
case 5:
case 9:
case 13:
case 19:
case 15: fprintf(f, "%s", tt_end); tt--; break;
case 23:
case 29: fprintf(f, ""); break;
case 25: fprintf(f, "%s", tt_end); tt--; break;
case 31: in_math = 0; break;
default:
fprintf(f, "", buf[styles[i]->descr + 1]);
}
if (i+1 < nr_styles)
for (i++; i < nr_styles && styles[i]->mode != 'C';i++);
}
tt_begin = tt ? "" : "";
if (styles[i]->f == p && styles[i]->mode == 'C'
/*&& buf[styles[i]->descr] == 1*/)
{ int cstyle = buf[styles[i]->descr + 1];
switch( cstyle )
{
case 3: fprintf(f, ""); break;
case 7: /* times roman in PRE */ break;
case 0: /* ?? */
case 17: /* parameter in formal expression */
fprintf(f, "%s", tt_begin); tt++; break;
case 5: /* formal identifier */
case 9: /* procedure name */
case 13: /* variable name */
case 15: /* type name */
case 19: /* italics */
fprintf(f, "%s", tt_begin); tt++; break;
case 23:
case 29: fprintf(f, ""); break;
case 25: fprintf(f, "%s", tt_begin); tt++; break;
case 31: in_math = 1; break;
default:
fprintf(f, "", cstyle);
}
}
if (!pre && buf[p] == '[')
{
lword r,s;
int i;
char name[30];
char section[30];
char *ref;
name[0] = '\0';
section[0] = '\0';
for (r = p; buf[r] != ']' && buf[r] != '"'; r++)
;
fprintf(f, "[");
p++;
s = p;
for (;s < r;)
{
/* scan name */
i = 0;
if (isalpha(buf[s]))
{ while (s < r && buf[s] != ' ' && buf[s] != 255 && buf[s] != ',')
name[i++] = buf[s++];
if (i > 0) name[i] = '\0';
}
while (s < r && (buf[s] == ' ' || buf[s] == 255))
s++;
i = 0;
while (s < r && (buf[s] == '.' || isdigit(buf[s])))
section[i++] = buf[s++];
section[i] = '\0';
if ( (buf[s] != ',' && buf[s] != ']' && buf[s] != '-')
|| !is_file_name(name, &ref))
break;
fprintf(f, "");
for (;p < s; p++)
if (buf[p] == 255)
fprintf(f, " ");
else
fprintf(f, "%c", buf[p]);
fprintf(f, "");
while (p < r && !isalpha(buf[p]) && !isdigit(buf[p]))
{
if (buf[p] == 255)
fprintf(f, " ");
else
fprintf(f, "%c", buf[p]);
p++;
}
s = p;
}
p--;
}
else if (!pre && buf[p] == '(')
{
lword r;
int i;
char name[30];
char *ref;
name[0] = '\0';
fprintf(f, "(");
for (i = 0, r = p+1; buf[r] != ')' && buf[r] != ' '; r++)
{
if (i < 29)
name[i++] = buf[r];
}
name[i] = '\0';
if (buf[r] == ')' && is_file_name(name, &ref))
{
fprintf(f, "", ref);
p++;
for(; p< r; p++)
fprintf(f, "%c", buf[p]);
fprintf(f, ")");
}
}
else if (!header && !pre
&& isdigit(buf[p]) && (buf[p-1] == ' '||buf[p-1] == 255))
{
lword r,s,t;
int i;
char section[30];
char before[30];
for (i = 0, r = p; isdigit(buf[r]) || buf[r] == '.'; r++)
{
if (i < 29)
section[i++] = buf[r];
}
if (i > 0 && section[i-1] == '.') i--;
section[i] = '\0';
for (s = p-1; (buf[s] == ' '||buf[s] == 255); s--)
t = s;
for (; isalpha(buf[s]); s--);
s++;
for (i=0; s < t; s++)
{
if (i < 29)
before[i++] = buf[s];
}
before[i] = '\0';
printf("Section |%s|%s|%s|\n",out, before, section);
if ( strcmp(before, "sections") == 0
|| strcmp(before, "subsections") == 0
|| strcmp(before, "section") == 0
|| strcmp(before, "subsection") == 0
|| strcmp(before, "see") == 0
|| (plr && strcmp(before, "and") == 0 ))
{ plr = strcmp(before, "sections") == 0
|| strcmp(before, "subsections") == 0;
fprintf(f, "%s", section, section);
p = r-1;
}
else
fprintf(f, "%c", buf[p]);
}
else if (buf[p] == 255)
{ if (buf[p+1] != ':')
fprintf(f, " ");
}
else if (buf[p] == 13)
;
else if (buf[p] == 11)
fprintf(f, "%s", pre ? "\n" : " \n");
else if (buf[p] == 12)
fprintf(f, "\n
\n");
else if (buf[p] == 9)
{
if (pre)
fprintf(f, " ");
else if (cur_table)
fprintf(f, " | ");
else
fprintf(f, "*tab*");
}
else if (buf[p] == 132)
fprintf(f, "ä");
else if (buf[p] == 177)
fprintf(f, "&emdash;");
else if (buf[p] == 244)
fprintf(f, "|");
else if (in_math)
{
switch(buf[p])
{ case ' ': fprintf(f, " "); break;
case '"': fprintf(f, "for all"); break;
case '=': fprintf(f, "="); break;
case '$': fprintf(f, "exists"); break;
case 163: fprintf(f, "<="); break;
case 179: fprintf(f, "=>"); break;
case 185: fprintf(f, "!="); break;
case 197: fprintf(f, "exor"); break;
case 198: fprintf(f, "empty"); break;
case 199: fprintf(f, "intersection"); break;
case 200: fprintf(f, "union"); break;
case 203: fprintf(f, "not subset"); break;
case 204: fprintf(f, "subset"); break;
case 206: fprintf(f, "in"); break;
case 207: fprintf(f, "not in"); break;
case 217: fprintf(f, "and"); break;
case 218: fprintf(f, "or"); break;
default: fprintf(f, "MATH*%d", buf[p]); break;
}
}
else
{
if (buf[p] < 32 || buf[p] > 128)
fprintf(f, "\\%d", buf[p]);
else if (buf[p] == '<')
fprintf(f, "<");
else if (buf[p] == '>')
fprintf(f, ">");
else if (buf[p] == '&')
fprintf(f, "&");
else if (tit && isalpha(buf[p]))
{
fprintf(f, "%c", ftit ? toupper(buf[p]) : tolower(buf[p]));
ftit = 0;
}
else
fprintf(f, "%c", buf[p]);
}
}
/* end of the paragraph */
while( buf[p] == 10) p++;
switch( pstyle )
{
case 0:
case 61:
case 109: fprintf(f, "\n"); break;
case 63: fprintf(f, "\n | |
\n"); tt--; break;
case 107:
case 115: fprintf(f, "\n"); tt--; break;
case 83:
case 93:
case 94: fprintf(f, "\n"); break;
case 177: fprintf(f, "\n\n"); break;
case 179: fprintf(f, "\n\n"); break;
case 181: fprintf(f, "\n\n"); break;
case 183: fprintf(f, "\n\n"); break;
case 77:
case 81:
case 199:
case 201:
case 203:
case 205: fprintf(f, "\n"); break;
default: fprintf(f, "
\n", pstyle);
}
}
if (cur_table)
{
fprintf(f, "");
cur_table = 0;
}
for (;cur_level > 0; cur_level--)
fprintf(f, "%cL>\n", levels[cur_level-1]);
fprintf(f, "\n\n
\n
");
fprintf(f, "\nMy life as a hacker |");
fprintf(f, "\nMy home page");
fprintf(f, "\n");
fprintf(f, "\n ");
fprintf(f, "\n");
}
void find_all_reflabels(char *in, char *out)
{
FILE *fin = fopen(in, "r");
if (fin == NULL)
{ printf("cannot open: %s\n", in);
return;
}
scan_file(buf, fin, NULL);
find_reflabels(out);
fclose(fin);
}
void process_file(char *in, char *out)
{
char out_name[30];
FILE *fin = fopen(in, "r");
FILE *fout;
if (fin == NULL)
{ printf("cannot open: %s\n", in);
return;
}
printf("=== %s\n", out);
scan_file(buf, fin, NULL);
sprintf(out_name, "MT%s.html", out);
fout = fopen(out_name, "w");
if (fout)
dump_html(fout, out);
fclose(fin);
fclose(fout);
}
void analyse_file(char *in)
{
char out_name[30];
FILE *fin = fopen(in, "r");
FILE *fout;
if (fin == NULL)
{ printf("cannot open: %s\n", in);
return;
}
scan_file(buf, fin, NULL);
dump_text(endtext, stdout);
fclose(fout);
}
int main(int argc, char *argv[])
{
int i;
buf = (char*)malloc(200000);
if (buf == NULL)
{ printf("allocation failed\n");
return 1;
}
if (argc > 1)
analyse_file(argv[1]);
else
{
for (i = 0; i < sizeof(thefiles)/sizeof(thefiles[0]); i++)
find_all_reflabels(thefiles[i].in, thefiles[i].out);
for (i = 0; i < sizeof(thefiles)/sizeof(thefiles[0]); i++)
process_file(thefiles[i].in, thefiles[i].out);
}
}