/* ChkHTML -- Program for checking HTML files Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001 Frans Faase This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. GNU General Public License: http://home.planet.nl/~faase009/GNU.txt */ #define VERSION "2.8 of January 23, 2002." #define WRITTEN_BY "F.J. Faase. http://home.planet.nl/~faase009/" /* Status: Has been tested on my documents */ #define DYN_DEBUG #include #include /*#include */ #include #include /*#include */ #include /*********** Basic definitions **************/ typedef char bool; #define TRUE (bool)1 #define FALSE (bool)0 typedef unsigned char byte; typedef unsigned int word; bool file_exists(file) char *file; { FILE *f = fopen(file, "r"); if (f != NULL) { fclose(f); return TRUE; } else return FALSE; } #define ALLOC(type) (type *)malloc(sizeof(type)) #define SALLOC(s) (char *)malloc(strlen(s)+1) #define NALLOC(type,n) (type *)malloc((n)*sizeof(type)) #define STRCPY(D,S) D = SALLOC(S); strcpy(D,S) #define STRNCPY(D,S,N) D = NALLOC(char,N+1); strncpy(D,S,N); D[N] = '\0'; /************ lclint macros ************/ #define streq(A,B) (strcmp(A,B) == 0) #define strieq(A,B) (stricmp(A,B) == 0) #define strneq(A,B,C) (strncmp(A,B,C) == 0) #define memeq(A,B,C) (memcmp(A,B,C) == 0) /*********** debug macros *********/ #define NO_DEBUG #ifdef DEBUG #define DEBUG_PRINT(X) printf X #define DEBUG_P(X) printf(X) #define DEBUG_P1(X,A1) printf(X,A1) #define DEBUG_P2(X,A1,A2) printf(X,A1,A2) #define DEBUG_P3(X,A1,A2,A3) printf(X,A1,A2,A3) #define DEBUG_P4(X,A1,A2,A3,A4) printf(X,A1,A2,A3,A4) #define DEBUG_P5(X,A1,A2,A3,A4,A5) printf(X,A1,A2,A3,A4,A5) #define BREAK { int i = i/0; } /* sneaky trick to remain in debugger */ #else #ifdef DYN_DEBUG bool option_debug = FALSE; #define DEBUG_PRINT(X) if (option_debug) printf X #define DEBUG_P(X) if (option_debug) printf(X) #define DEBUG_P1(X,A1) if (option_debug) printf(X,A1) #define DEBUG_P2(X,A1,A2) if (option_debug) printf(X,A1,A2) #define DEBUG_P3(X,A1,A2,A3) if (option_debug) printf(X,A1,A2,A3) #define DEBUG_P4(X,A1,A2,A3,A4) if (option_debug) printf(X,A1,A2,A3,A4) #define DEBUG_P5(X,A1,A2,A3,A4,A5) if (option_debug) printf(X,A1,A2,A3,A4,A5) #define BREAK { int i = i/0; } /* sneaky trick to remain in debugger */ #else #define DEBUG_PRINT(X) #define DEBUG_P(X) #define DEBUG_P1(X,A1) #define DEBUG_P2(X,A1,A2) #define DEBUG_P3(X,A1,A2,A3) #define DEBUG_P4(X,A1,A2,A3,A4) #define DEBUG_P5(X,A1,A2,A3,A4,A5) #define BREAK { printf("\nInternal error, please report\n"); abort(); } #endif #endif #define DEBUG_GN(X) /* fputc(X, fout) */ #define DO_DEBUG_PRINT(X) printf X #define DO_DEBUG_P(X) printf(X) #define DO_DEBUG_P1(X,A1) printf(X,A1) #define DO_DEBUG_P2(X,A1,A2) printf(X,A1,A2) #define DO_DEBUG_P3(X,A1,A2,A3) printf(X,A1,A2,A3) #define DO_DEBUG_P4(X,A1,A2,A3,A4) printf(X,A1,A2,A3,A4) /************** prototypes ***************/ char *rel_URL(char *from, char *to); /************** Program options stored in global variables *******/ bool option_info = FALSE, option_warn = FALSE, option_pedantic = FALSE, option_bibliography = FALSE, is_html_fn = FALSE; long nr_int_links, nr_ext_links, nr_broken_ext_links; char *other_ext_name[30]; long other_ext_size[30]; int other_ext_nr[30]; int nr_other_ext = 0; void add_other_ext(char *ext, long size) { int i; for (i = 0; i < nr_other_ext; i++) if (strcmp(ext, other_ext_name[i]) == 0) { other_ext_size[i] += size; other_ext_nr[i]++; return; } if (i == 30) return; STRCPY(other_ext_name[i], ext); other_ext_size[i] = size; other_ext_nr[i] = 1; nr_other_ext++; } int nr_ext_files(char *ext) { int i; for (i = 0; i < nr_other_ext; i++) if (strcmp(ext, other_ext_name[i]) == 0) return other_ext_nr[i]; return 0; } long size_ext_files(char *ext) { int i; for (i = 0; i < nr_other_ext; i++) if (strcmp(ext, other_ext_name[i]) == 0) return other_ext_size[i]; return 0; } /********** HTML files and there references **********/ #define NAME_EXISTS 1 #define NAME_REPEATED 2 #define NAME_REFERENCED 4 /* coding for ref_t.status: */ #define S_CORRECT 0 #define S_E_FILE 1 /* HTML file does not exist */ #define S_U_FILE 2 /* HTML file name is an URL */ #define S_E_NAME 3 /* NAME in HTML file does not exist */ typedef struct file_t file_t, *file_p; typedef struct section_t section_t, *section_p; typedef struct section_list_t section_list_t, *section_list_p; typedef struct name_t name_t, *name_p; typedef struct href_t href_t, *href_p; struct name_t { name_p next; char *name; long line; char status; }; struct href_t { href_p next; file_p file; char *name; long line; section_p section; bool in_header; }; struct section_t { section_p next; file_p file; section_p parent; int level; char *title; name_p names; href_p hrefs; section_p nested; section_list_p refered_by; bool has_text; bool oneway; }; struct section_list_t { section_list_p next; section_p section; }; /* Information of a file: */ struct file_t { file_p next; char *name; section_p sections; bool exists; /* exists == TRUE implies !is_URL(name) */ char read; int nr_refs; /* number of references to this file */ long size_local; long date_days; short date_mins; bool on_ftp; char *contents; }; section_p cur_section = NULL; void add_section(file_p file, int level) { section_p new_section; new_section = ALLOC(section_t); new_section->next = NULL; new_section->file = file; new_section->parent = NULL; new_section->level = level; new_section->title = ""; new_section->names = NULL; new_section->hrefs = NULL; new_section->nested = NULL; new_section->refered_by = NULL; new_section->has_text = FALSE; new_section->oneway = FALSE; if (file->sections == NULL) { file->sections = new_section; } else { while (cur_section->parent != NULL && cur_section->parent->level >= level) cur_section = cur_section->parent; if (cur_section->level < level) { cur_section->nested = new_section; new_section->parent = cur_section; } else { cur_section->next = new_section; new_section->parent = cur_section->parent; } } cur_section = new_section; } void next_section(section_p *r_section, int *depth) { if (*r_section == NULL) return; if ((*r_section)->nested != NULL) { *r_section = (*r_section)->nested; if (depth) (*depth)++; return; } while ((*r_section) && (*r_section)->next == NULL) { *r_section = (*r_section)->parent; if (depth) (*depth)--; } if ((*r_section) != NULL) *r_section = (*r_section)->next; } /* Coding for file_t.read: */ #define R_UNREAD 0 #define R_READ 1 /* HTML file read, to check for consistency */ #define R_INCLUDED 2 /* HTML file included in LaTeX output */ #define R_INDIR 4 #define R_DOREAD 8 /* List of all files: */ file_p the_files = NULL; /************ Procedures for storing reference information *******/ char *c_top = ":TOP"; char *norm_name(char *name) { return name == NULL ? c_top : name; } bool eq_name(char *a, char *b) { if (a == NULL || a == c_top || *a == '\0') return b == NULL || b == c_top || *b == '\0'; if (b == NULL || b == c_top || *b == '\0') return FALSE; return !strcmp(a, b); } bool is_URL(name) char *name; /* returns TRUE if name is an URL. */ { return memeq(name, "news:", 5) || memeq(name, "http:", 5) || memeq(name, "file:", 5) || memeq(name, "ftp:", 4) || memeq(name, "wais:", 5) || memeq(name, "gopher:", 7) || memeq(name, "mailto:", 7) || memeq(name, "telnet:", 7); } bool is_html(name) char *name; { return streq(name + strlen(name) - 5, ".html") || streq(name + strlen(name) - 4, ".htm"); } bool is_js(name) char *name; { return streq(name + strlen(name) - 3, ".js"); } int stricmp(const char *a, const char *b) { while(*a != '\0' && *b != '\0' && toupper(*a) == toupper(*b)) { a++; b++; } if (toupper(*a) < toupper(*b)) return -1; if (toupper(*a) > toupper(*b)) return 1; return 0; } file_p find_file(file) char *file; /* Returns pointer to HTML file record with the name `file'. If such a record did not exist in the list, it is added alphabetically on the file name. */ { file_p *p_file = &the_files; while (*p_file != NULL && stricmp((*p_file)->name, file) < 0) p_file = &(*p_file)->next; if (*p_file == NULL || stricmp((*p_file)->name, file)) { file_p n = ALLOC(file_t); DEBUG_PRINT(("FILEADDED\n")); n->next = *p_file; STRCPY(n->name, file); n->sections = NULL; n->exists = file_exists(file); n->read = R_UNREAD; n->nr_refs = 0; n->size_local = -1; n->date_days = -1; n->date_mins = -1; n->contents = NULL; n->on_ftp = FALSE; *p_file = n; } return *p_file; } bool name_repeated(section_p section, char *name) { int count; count = 2; for ( ; section; next_section(§ion, NULL)) { name_p names; for (names = section->names; names; names = names->next) { if (streq(names->name, name)) { count--; if (count == 0) return TRUE; } } } return FALSE; } section_p section_with_name(section_p section, char *search_name) { for ( ; section; next_section(§ion, NULL)) { name_p name; for (name = section->names; name; name = name->next) { if (streq(name->name, search_name)) { name->status |= NAME_REFERENCED; return section; } } } return NULL; } /*link_t *last_ref = NULL;*/ /* Return values for find_ref */ #define REF_OKAY 0 #define REF_FILE_NOT_FOUND 1 /* HTML file does not exist */ #define REF_FILE_NOT_INC 2 /* HTML file not included in LaTeX */ #define REF_NAME_NOT_FOUND 4 /* NAME not found in HTML file */ #define REF_NAME_NOT_INC 5 /* NAME not included in LaTeX, but referenced */ #define REF_ILL 6 /* Illegal (or too long) formed URL */ int find_ref(file, name) char *file, *name; /* Checks reference to HTML file `file' with (optional) NAME `name' and returns appropriate return value. */ { file_p tfile = the_files; DEBUG_PRINT(("find_ref(%s, %s)\n", file, name)); if (is_URL(file)/* || !is_html(file)*/) return REF_OKAY; while (tfile != NULL && stricmp(tfile->name, file) != 0) tfile = tfile->next; if (tfile == NULL || stricmp(tfile->name, file)) return REF_FILE_NOT_FOUND; if (!tfile->exists) return REF_FILE_NOT_FOUND; if (name[0] == '\0') return REF_OKAY; { section_p section = section_with_name(tfile->sections, name); return section ? REF_OKAY : REF_NAME_NOT_FOUND; } } /********** Procedures for making/checking cross references ***********/ int nstrcmp(str1, str2) char *str1, *str2; /* This procedure compares two strings, like strcmp, where the string pointers can be NULL. A NULL pointer comes before all other strings. */ { return (str1 == NULL) ? (str2 == NULL ? 0 : -1) : (str2 == NULL) ? 1 : strcmp(str1, str2); } /************ Scanning buffers **************/ /* sizes of buffers used during reading of HTML files: */ #define MAX_SF 80 /* max size of name of source HTML file name */ #define MAX_DF 600 /* max size of name of reference HTML file name */ #define MAX_N 100 /* max size of NAME's */ #define MAX_HC 100 /* max size of HTML commands */ #define MAX_AT 1000 /* max size of text inside an anchor */ #define MAX_AV 600 /* max size of attribute value */ char url_argument[MAX_DF]; /* Temporary buffer used during file name manipulations: */ char df_buffer[MAX_DF+1]; /*********** Manipulating URL's *************/ /* URL of document */ char *document_URL = NULL, *server_URL = NULL, /* server part of document_URL (without last '/'). */ *file_URL = NULL; /* file part of document_URL (starting with '/'). */ char *roots[2] = { "http://home.wxs.nl/~kabuki/", "http://home.wxs.nl/~faase009/", }; bool norm_URL(origin, file) char *origin, *file; /* Normalizes the file name `file' appearing in HTML file `origin', with the following steps: 1. If `file' is empty, use assign `origin' to `file'. Else if `file' is not an URL and does not start with '/' then glue it together with directories in `origin'. 2. If `file' is not an URL glue it together with document URL. 3. If `file' starts with document URL, remove it. 4. If not URL and not html, add index.html, when the file exists. 5. Do any link mappings. */ { /* assume that origin: ['/']( '/')* */ int i; char *s; DEBUG_PRINT(("norm_URL(%s, %s) %s %s\n", origin, file, server_URL, file_URL)); /* Step 1: */ /* if file is empty, use origin: */ if (file[0] == '\0') { if (strlen(origin) < MAX_DF) strcpy(file, origin); else return FALSE; } /* if file is not an URL and does not start with '/' then glue it together with directories in origin: */ else if (file[0] != '/' && !is_URL(file)) { int i = strlen(origin); char *s = file; DEBUG_PRINT(("glue %s with %s", origin, file)); /* remove file-name from origin: */ while (i > 0 && origin[i - 1] != '/') i--; DEBUG_PRINT((" : %s + %s\n", origin, file)); /* cancel last directory in origin with '../': */ while ( i > 1 && origin[i - 1] == '/' && s[0] == '.' && ( (s[1] == '.' && (s[2] == '/' || s[2] == '\0')) || s[1] == '/' || s[1] == '\0')) if (s[1] == '/') s += 2; else if (s[1] == '\0') s++; else { s += s[2] == '/' ? 3 : 2; do i--; while (i > 0 && origin[i - 1] != '/'); } if ( i == 1 && origin[0] == '/' && s[0] == '.' && s[1] == '.' /* && s[2] == '/'*/) return FALSE; else if (i == 0) strcpy(file, s); else if(i + strlen(s) < MAX_DF) { memcpy(df_buffer, origin, i); strcpy(df_buffer + i, s); strcpy(file, df_buffer); } else return FALSE; } DEBUG_PRINT(("After step 1: %s\n", file)); /* Step 2. */ if ( document_URL != NULL && file[0] == '.' && file[1] == '.' && file[2] == '/') { int i = strlen(file_URL) - 1; char *s = file; DEBUG_PRINT(("glue %s + %s\n", file_URL, file)); /* assume that file_URL is of the form: '/' ( '/')* */ while (i > 0 && s[0] == '.' && s[1] == '.' && s[2] == '/') { s += 3; i--; while (i > 0 && file_URL[i] != '/') i--; DEBUG_PRINT(("replace %s with %s\n", s, file_URL + i)); } i += strlen(server_URL); if (i + 1 + strlen(s) < MAX_DF) { memcpy(df_buffer, document_URL, i + 1); strcpy(df_buffer + i + 1, s); strcpy(file, df_buffer); } else return FALSE; } /* if file starts with '/' add server_URL: */ else if (document_URL != NULL && file[0] == '/') { if (strlen(file) + strlen(server_URL) < MAX_DF) { strcpy(df_buffer, server_URL); strcat(df_buffer, file); strcpy(file, df_buffer); } else return FALSE; } DEBUG_PRINT(("After step 2: %s\n", file)); /* Step 3: */ /* if URL starts with document URL, remove it: */ if ( document_URL != NULL && memeq(file, document_URL, strlen(document_URL))) strcpy(file, file + strlen(document_URL)); for (i = 0; i < 2; i++) if (!strncmp(file, roots[i], strlen(roots[i]))) strcpy(file, file + strlen(roots[i])); DEBUG_PRINT(("After step 3: %s\n", file)); /* Step 4: */ if (streq(file, ".")) file[0] = '\0'; /* if not URL and not .html, add index.html, when file exists */ if (!is_URL(file) && !is_html(file) && strlen(file) + 12 < MAX_DF) { strcpy(df_buffer, file); if (df_buffer[0] != '\0' && df_buffer[strlen(df_buffer)-1] != '/') strcat(df_buffer, "/"); strcat(df_buffer, "index.html"); DEBUG_PRINT(("Try: %s\n", df_buffer)); if (file_exists(df_buffer)) strcpy(file, df_buffer); } url_argument[0] = '\0'; s = strstr(file, ".html?"); if (!is_URL(file) && s != NULL) { strcpy(url_argument, s+6); s[5] = '\0'; } DEBUG_PRINT(("After step 4: %s\n", file)); if (streq(file, "Broken.html")) nr_broken_ext_links++; else if (is_URL(file)) { if (strcmp(file, "brexrefs.html")) nr_ext_links++; } else nr_int_links++; return TRUE; } char *rel_URL(char *from, char *to) /* This function returns the shortest string to go file "to" inside file "from" */ { char *to2 = to; bool go; int d; DEBUG_P2("rel_URL(%s, %s)\n", from, to); go = TRUE; while (go) { char *f = from, *t = to; for (; *f != '\0' && *f != '/' && *f == *t; f++, t++); if (*f == '/' && *t == '/') { from = f + 1; to = t + 1; } else go = FALSE; } d = 0; for (; *from != '\0'; from++) if (*from == '/') d++; df_buffer[0] = '\0'; for (; d > 0; d--) if (strlen(df_buffer) + 3 >= MAX_DF) return to; else strcat(df_buffer, "../"); if (strlen(df_buffer) + strlen(to) >= MAX_DF) return to; else strcat(df_buffer, to); DEBUG_P1("relative: %s\n", df_buffer); if (file_URL != NULL && strlen(file_URL) + strlen(to2) < strlen(df_buffer)) { strcpy(df_buffer, file_URL); strcat(df_buffer, to2); } if (streq(df_buffer, "index.html")) return "."; { int l = strlen(df_buffer); if (l > 11 && streq(df_buffer + l - 11, "/index.html")) df_buffer[l - 11] = '\0'; } DEBUG_P1("return: %s\n", df_buffer); return df_buffer; } /******* Translating special characters to LaTeX characters ******/ /********* Generating TeX output procedures **********/ /* Generation state values */ /************* Scanning a HTML file ******************/ #define T_ILL 0 #define T_HTML 1 #define T_HEAD 2 #define T_TITLE 3 #define T_BODY 4 #define T_ADDR 5 #define T_LINK 6 #define T_H 10 #define T_VERB 11 #define T_DIR 12 #define T_LIST 13 #define T_DESC 14 #define T_ITEM 15 #define T_DT 16 #define T_DD 17 #define T_P 18 #define T_A 19 #define T_IMG 20 #define T_CHAR 21 #define T_BR 22 #define T_META 23 #define T_SCRIPT 24 #define NR_TAGS 76 #define TN_H1 5 #define C_NO 0 #define C_YES 1 #define C_OPT 2 struct Codes { char *name; int closing; byte kind; } tags[NR_TAGS] = { #define H_HTML 0 { "html", C_YES, T_HTML }, #define H_HEAD 1 { "head", C_YES, T_HEAD }, #define H_TITLE 2 { "title", C_YES, T_TITLE }, #define H_BODY 3 { "body", C_YES, T_BODY }, #define H_ADDRESS 4 { "address", C_YES, T_ADDR }, #define H_H1 5 { "h1", C_YES, T_H }, #define H_H2 6 { "h2", C_YES, T_H }, #define H_H3 7 { "h3", C_YES, T_H }, #define H_H4 8 { "h4", C_YES, T_H }, #define H_H5 9 { "h5", C_YES, T_H }, #define H_H6 10 { "h6", C_YES, T_H }, #define H_P 11 { "p", C_OPT, T_P }, #define H_UL 12 { "ul", C_YES, T_LIST }, #define H_MENU 13 { "menu", C_YES, T_LIST }, #define H_DIR 14 { "dir", C_YES, T_LIST }, #define H_OL 15 { "ol", C_YES, T_LIST }, #define H_LI 16 { "li", C_OPT, T_ITEM }, #define H_LH 17 { "lh", C_OPT, T_ITEM }, #define H_DL 18 { "dl", C_YES, T_DESC }, #define H_DT 19 { "dt", C_OPT, T_DT }, #define H_DD 20 { "dd", C_OPT, T_DD }, #define H_A 21 { "a", C_OPT, T_A }, #define H_Q 22 { "q", C_YES, T_CHAR }, #define H_I 23 { "i", C_YES, T_CHAR }, #define H_EM 24 { "em", C_YES, T_CHAR }, #define H_B 25 { "b", C_YES, T_CHAR }, #define H_STRONG 26 { "strong", C_YES, T_CHAR }, #define H_TT 27 { "tt", C_YES, T_CHAR }, #define H_SAMP 28 { "samp", C_YES, T_CHAR }, #define H_KDB 29 { "kbd", C_YES, T_CHAR }, #define H_VAR 30 { "var", C_YES, T_CHAR }, #define H_DFN 31 { "dfn", C_YES, T_CHAR }, #define H_CODE 32 { "code", C_YES, T_CHAR }, #define H_BLINK 33 { "blink", C_YES, T_CHAR }, #define H_CITE 34 { "cite", C_YES, T_CHAR }, #define H_BLOCKQUOTE 35 { "blockquote", C_YES, T_CHAR }, #define H_BQ 36 { "bq", C_YES, T_CHAR }, #define H_U 37 { "u", C_YES, T_CHAR }, #define H_S 38 { "s", C_YES, T_CHAR }, #define H_SMALL 39 { "small", C_YES, T_CHAR }, #define H_BIG 40 { "big", C_YES, T_CHAR }, #define H_NOTE 41 { "note", C_YES, T_CHAR }, #define H_AU 42 { "au", C_YES, T_CHAR }, #define H_PERSON 43 { "person", C_YES, T_CHAR }, #define H_ACRONYM 44 { "acronym", C_YES, T_CHAR }, #define H_ABBREV 45 { "abbrev", C_YES, T_CHAR }, #define H_CREDIT 46 { "credit", C_YES, T_CHAR }, #define H_INS 47 { "ins", C_YES, T_CHAR }, #define H_DEL 48 { "del", C_YES, T_CHAR }, #define H_PRE 49 { "pre", C_YES, T_VERB }, #define H_XMP 50 { "xmp", C_YES, T_VERB }, #define H_LISTING 51 { "listing", C_YES, T_VERB }, #define H_BR 52 { "br", C_NO, T_DIR }, #define H_HR 53 { "hr", C_NO, T_DIR }, #define H_IMG 54 { "img", C_NO, T_IMG }, #define H_ISINDEX 55 { "isindex", C_NO, T_DIR }, #define H_SELECT 56 { "select", C_YES, T_DIR }, #define H_LINK 57 { "link", C_NO, T_LINK }, #define H_CENTER 58 { "center", C_YES, T_CHAR }, #define H_META 59 { "meta", C_NO, T_META }, #define H_TABLE 60 { "table", C_YES, T_DIR }, #define H_TH 61 { "th", C_OPT, T_DIR }, #define H_TR 62 { "tr", C_OPT, T_DIR }, #define H_TD 63 { "td", C_OPT, T_DIR }, #define H_SUP 64 { "sup", C_YES, T_DIR }, #define H_SUB 65 { "sub", C_YES, T_DIR }, #define H_CAPTION 66 { "caption", C_OPT, T_CHAR }, #define H_SCRIPT 67 { "script", C_OPT, T_SCRIPT }, #define H_FONT 68 { "font", C_OPT, T_DIR }, #define H_FORM 69 { "form", C_OPT, T_CHAR }, #define H_INPUT 70 { "input", C_NO, T_CHAR }, #define H_TEXTAREA 71 { "textarea",C_OPT, T_CHAR }, #define H_NOBR 72 { "nobr", C_OPT, T_DIR }, #define H_NOSCRIPT 73 { "noscript", C_YES, T_DIR }, #define H_MARQUEE 74 { "marquee", C_YES, T_DIR }, #define H_STYLE 75 { "style", C_YES, T_DIR }, }; /* Generation state values */ bool in_html, in_head, in_title, in_body, in_header, in_address; bool active_href; char dest_file[MAX_DF + 1], href_name[MAX_N + 1]; int href_status; #define MAX_STACK 30 struct { int ln; byte tagnr; byte tagkind; bool closing; } stack[MAX_STACK]; int stack_depth = 0; bool scan_def_string(char *def, int *rp, int *rl, char *str) { int p = *rp, l = *rl; l = 0; p++; for (;;) { if (def[p] == '\0') { *rp = p; *rl = l; return FALSE; } else if (def[p] == '"') if (def[p+1] == '"') { if (str != NULL) str[l] = '"'; l++; p++; } else { if (str != NULL) str[l] = '\0'; p++; *rp = p; *rl = l; return TRUE; } else if (strneq(def + p, "&", 5)) { if (str != NULL) str[l] = '&'; l++; p += 5; } else if (strneq(def + p, "<", 4)) { if (str != NULL) str[l] = '<'; l++; p += 4; } else if (strneq(def + p, ">", 4)) { if (str != NULL) str[l] = '>'; l++; p += 4; } else if (strneq(def + p, "‐", 6)) { if (str != NULL) str[l] = '-'; l++; p += 6; } else if (strneq(def + p, "\\nl", 3)) { if (str != NULL) str[l] = '\n'; l++; p += 3; } else { if (str != NULL) str[l] = def[p]; l++; p++; } } } void latex_open(FILE *fout, FILE *freport, int tagnr, int ln, char *html_fn, bool closing) { int tagkind = tags[tagnr].kind; DEBUG_P3("latex_open(,tagnr=%d, %s, ln=%d )\n", tagnr, tags[tagnr].name, ln); if (tags[tagnr].closing == C_NO) return; if (stack_depth < MAX_STACK) { DEBUG_P3("push(%d,%d,) : %d\n", ln, tagnr, stack_depth); stack[stack_depth].ln = ln; stack[stack_depth].tagnr = tagnr; stack[stack_depth].tagkind = tagkind; stack[stack_depth].closing = closing; stack_depth++; } else if (freport != NULL) fprintf(freport, "%s (%d) : remove <%s>, more than %d nested tags.\n", html_fn, ln, tags[tagnr].name, MAX_STACK); DEBUG_P2("latex_open(,tagnr=%d, ln=%d)\n", tagnr, ln); switch (tagnr) { case H_HTML : in_html = TRUE; break; case H_HEAD : in_head = TRUE; break; case H_BODY : in_body = TRUE; break; case H_TITLE : in_title = TRUE; break; case H_ADDRESS : in_address = TRUE; break; } switch (tagkind) { case T_H : in_header = TRUE; break; } } void latex_close(FILE *fout, FILE *freport, int ln, char *html_fn) { if (stack_depth <= 0) return; stack_depth--; /*DEBUG_P1("latex_close: %s\n", tags[tagnr].name);*/ switch (stack[stack_depth].tagnr) { case H_HTML : in_html = FALSE; break; case H_HEAD : in_head = FALSE; break; case H_BODY : in_body = FALSE; break; case H_TITLE : in_title = FALSE; break; case H_ADDRESS : in_address = FALSE; break; } switch (stack[stack_depth].tagkind) { case T_H : in_header = FALSE; case T_A : active_href = FALSE; break; } } char tmp_tagname[30]; char *tagname(int tagnr, bool closing_tag) { if (closing_tag) { tmp_tagname[0] = '/'; strcpy(tmp_tagname + 1, tags[tagnr].name); return tmp_tagname; } else return tags[tagnr].name; } #define REMOVE_TAG(R) remove_tag(freport, tagnr, closing_tag, R, html_fn, ln) void remove_tag(FILE *freport, int tagnr, bool closing_tag, char *r, char *html_fn, int ln) { if (freport != NULL) fprintf(freport, "%s (%d) : remove <%s>%s.\n", html_fn, ln, tagname(tagnr, closing_tag), r); } #define ADD_TAG(T,C,R) add_tag(freport, T, C, R, html_fn, ln) void add_tag(FILE *freport, int tagnr, bool closing_tag, char *r, char *html_fn, int ln) { if (freport != NULL) { fprintf(freport, "%s (%d) : add <%s>", html_fn, ln, tagname(tagnr, closing_tag)); if (closing_tag) fprintf(freport, " (with <%s> in line %d)", tags[tagnr].name, stack[stack_depth-1].ln); fprintf(freport, "%s.\n", r ? r : ""); } } #define REPLACE_TAG() replace_tag(freport, tagnr, closing_tag, html_fn, ln) void replace_tag(FILE *freport, int tagnr, bool closing_tag, char *html_fn, int ln) { if (freport != NULL) fprintf(freport, "%s (%d) : replace <%s> by .\n", html_fn, ln, tagname(tagnr, closing_tag), tags[stack[stack_depth-1].tagnr].name); } void latex_closes(FILE *fout, FILE *freport, int tagnr, int tagkind, bool closing_tag, bool till, int ln, char *html_fn) { int j; bool found = FALSE; for (j = stack_depth - 1; j >= 0; j--) if ( tagkind != T_ILL ? stack[j].tagkind == tagkind : stack[j].tagnr == tagnr) { found = TRUE; break; } if (found && tags[tagnr].kind == T_CHAR) found = j > stack_depth - 5; if (found) { while (stack_depth > j + 1) { char mess[60]; sprintf(mess, ", before <%s>", tagname(tagnr, closing_tag)); if (stack[stack_depth-1].closing == C_YES) ADD_TAG(stack[stack_depth-1].tagnr, TRUE, mess); latex_close(fout, freport, ln, html_fn); } } if (till) return; if (found) { if (stack[stack_depth-1].tagnr != tagnr) REPLACE_TAG(); latex_close(fout, freport, ln, html_fn); } else if (stack_depth == 0) REMOVE_TAG(", no open tag"); else { char mess[60]; sprintf(mess, ", does not match <%s>", tags[stack[stack_depth-1].tagnr].name); REMOVE_TAG(mess); } } bool inside_tag(int tagkind) { int j; for (j = stack_depth - 1; j >= 0; j--) if (stack[j].tagkind == tagkind) return TRUE; else if ( stack[j].tagkind == T_LIST || stack[j].tagkind == T_DESC) return FALSE; return FALSE; } #define OPEN_TAG(K) (tagkind == K && !closing_tag) #define CLOSE_TAG(K) (tagkind == K && closing_tag) #define LATEX_OPEN(T) \ { latex_open(fout, freport, T, ln, html_fn, tags[T].closing); } #define LATEX_OPEN_C(T,C) \ { latex_open(fout, freport, T, ln, html_fn, C); } #define LATEX_CLOSES(T) \ { latex_closes(fout, freport, T, T_ILL, TRUE, FALSE, ln, html_fn); } #define LATEX_CLOSES_K() \ { latex_closes(fout, freport, tagnr, tagkind, closing_tag, FALSE, ln, \ html_fn); } #define LATEX_CLOSES_T(K) \ { latex_closes(fout, freport, tagnr, K, FALSE, TRUE, ln, html_fn); } void add_href(href_p *r_href, char *dest_file, char *href_name, long line, bool in_header); void skip_spaces(char **r_s, char *html_fn, FILE *fout, FILE *freport, char *r_ch, int *r_lni, bool first); void skip_spaces(char **r_s, char *html_fn, FILE *fout, FILE *freport, char *r_ch, int *r_ln, bool first) { char ch = *r_ch; int ln = *r_ln; /* skip spaces */ while (ch != '\0' && (ch == ' ' || ch == '\n')) if ((ch = *(*r_s)++) == '\n') ln++; /* process comments */ while (ch != '\0' && ch == '-') { char prev_ch = '\0'; char comment[2000]; int i = 0; if ((ch = *(*r_s)++) == '\n') ln++; if (ch != '-') { if (freport) fprintf(freport, "%s (%d) : ill comment start\n", html_fn, ln); break; } while(ch != '\0') { if ((ch = *(*r_s)++) == '\n') ln++; if (ch == '-' && prev_ch == '-') { if ((ch = *(*r_s)++) == '\n') ln++; break; } prev_ch = ch; if (i < 1999) comment[i++] = ch; } comment[i] = '\0'; if (i > 0 && comment[i-1] == '-') comment[i-1] = '\0'; if (streq(comment, "ONEWAY") && cur_section) cur_section->oneway = TRUE; if (!strncmp(comment, "REFBY:", 6)) { int i; char *s; for (i = 0, s = comment+6; *s != '\0' && *s != '#'; i++, s++) dest_file[i] = *s; dest_file[i] = '\0'; if (*s == '#') s++; for (i = 0; *s != '\0'; i++, s++) href_name[i] = *s; href_name[i] = '\0'; if (cur_section) add_href(&cur_section->hrefs, dest_file, href_name, ln, FALSE); } /* skip spaces */ while (ch != '\0' && (ch == ' ' || ch == '\n')) if ((ch = *(*r_s)++) == '\n') ln++; } *r_ch = ch; *r_ln = ln; } bool check_src(FILE *freport, char *html_fn, int ln, char *src, bool first) { if (!norm_URL(html_fn, src)) { if (freport != NULL) fprintf(freport, "%s (%d) : URL `%s' illegal or too long.\n", html_fn, ln, src); return FALSE; } else if (!is_URL(src)) { file_p src_file = find_file(src) ; src_file->read |= R_DOREAD; if (first) src_file->nr_refs++; else if (!src_file->exists) { if ( freport != NULL && ( option_warn || src_file->nr_refs < 2)) fprintf(freport, "%s (%d) : file `%s' does not exist.\n", html_fn, ln, src); return FALSE; } else if (strcmp(src_file->name, src)) { if ( freport ) fprintf(freport, "%s (%d) : change `%s' into '%s'.\n", html_fn, ln, src, src_file->name); return FALSE; } else src_file->read |= R_INCLUDED; } return TRUE; } #define AT_ANY 0 #define AT_ALIGN 1 #define AT_COLOR 2 #define AT_NUM 3 #define AT_IMG_ALIGN 4 #define AT_FILE 5 typedef struct { int tagnr; char *attr_name; int type; } valid_comb_t; valid_comb_t valid_comb[] = { { H_BODY, "alink", AT_COLOR }, { H_BODY, "onload", AT_ANY }, { H_BODY, "bgcolor", AT_COLOR }, { H_BODY, "bgproperties", AT_ANY }, { H_BODY, "background", AT_FILE }, { H_P, "title", AT_ANY }, { H_P, "align", AT_ALIGN }, { H_TABLE, "width", AT_NUM }, { H_TABLE, "border", AT_NUM }, { H_TABLE, "cellpadding", AT_NUM }, { H_TABLE, "cellspacing", AT_NUM }, { H_TABLE, "align", AT_ALIGN }, { H_TABLE, "vspace", AT_NUM }, { H_TABLE, "hspace", AT_NUM }, { H_TABLE, "bgcolor", AT_COLOR }, { H_TABLE, "background", AT_FILE }, { H_TR, "valign", AT_ANY }, { H_TR, "align", AT_ALIGN }, { H_IMG, "border", AT_NUM }, { H_IMG, "width", AT_NUM }, { H_IMG, "height", AT_NUM }, { H_IMG, "align", AT_IMG_ALIGN }, { H_IMG, "space", AT_NUM }, { H_IMG, "hspace", AT_NUM }, { H_IMG, "vspace", AT_NUM }, { H_IMG, "alt", AT_ANY }, { H_IMG, "nosave", AT_ANY }, { H_IMG, "src", AT_FILE }, { H_FONT, "color", AT_COLOR }, { H_FONT, "size", AT_NUM }, { H_FONT, "face", AT_ANY }, { H_BR, "clear", AT_ANY }, { H_HR, "width", AT_NUM }, { H_HR, "align", AT_ALIGN }, { H_H1, "align", AT_ALIGN }, { H_H2, "align", AT_ALIGN }, { H_H3, "align", AT_ALIGN }, { H_H4, "align", AT_ALIGN }, { H_OL, "type", AT_NUM }, { H_UL, "type", AT_NUM }, { H_FORM, "method", AT_ANY }, { H_FORM, "action", AT_ANY }, { H_FORM, "name", AT_ANY }, { H_FORM, "target", AT_ANY }, { H_INPUT, "type", AT_ANY }, { H_INPUT, "name", AT_ANY }, { H_INPUT, "size", AT_NUM }, { H_INPUT, "maxlength", AT_NUM }, { H_INPUT, "value", AT_ANY }, { H_INPUT, "onclick", AT_ANY }, { H_INPUT, "checked", AT_ANY }, { H_TEXTAREA, "name", AT_ANY }, { H_TEXTAREA, "cols", AT_NUM }, { H_TEXTAREA, "rows", AT_NUM }, { H_TD, "align", AT_ALIGN }, { H_TD, "valign", AT_ANY }, { H_TD, "colspan", AT_ANY }, { H_TD, "rowspan", AT_ANY }, { H_TD, "width", AT_NUM }, { H_TD, "bgcolor", AT_COLOR }, { H_TD, "style", AT_ANY }, { H_TD, "title", AT_ANY }, { H_TH, "align", AT_ALIGN }, { H_TH, "valign", AT_ANY }, { H_TH, "colspan", AT_ANY }, { H_TH, "rowspan", AT_ANY }, { H_TH, "width", AT_NUM }, { H_TH, "bgcolor", AT_COLOR }, { H_TH, "style", AT_ANY }, { H_TH, "title", AT_ANY }, { H_A, "target", AT_ANY }, { H_A, "onmouseover", AT_ANY }, { H_A, "onclick", AT_ANY }, { H_A, "title", AT_ANY }, { H_A, "style", AT_ANY }, { H_SCRIPT, "language", AT_ANY }, { H_SCRIPT, "src", AT_FILE }, { H_MARQUEE, "direction", AT_ANY }, { H_MARQUEE, "height", AT_ANY }, { H_MARQUEE, "scrollamount", AT_ANY }, }; char *ch_table[] = { /*160*/ "nbsp", "iexp", "cent", "pound", "curren", "yen", "brvar", "sect", "uml", "copy", /*170*/ "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmin", "sup2", "sup3", /*180*/ "acute", "micro", "para", "middot", "cedol", "sup1", "ordm", "raquo", "frac14", "fraq12", /*190*/ "fraq34", "iquest", "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", /*200*/ "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", /*210*/ "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", NULL, "Oslash", "Ugrave", "Uacute", "Ucirc", /*220*/ "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", /*230*/ "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", /*240*/ "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", NULL, "oslash", "ugrave", /*250*/ "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml", "aring", "Eth", "icirc", "Thorn", "Yuml", "nbsp", "emsp", "ensp", "shy", "pd", "emdash", "endash", "copy", "reg", "trade", "alpha", "beta", "gamma", "delta", "epsi", "zeta", "eta", "theta", "thetav", "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi", "rho", "sigma", "tau", "upsi", "phi", "chi", "psi", "omega", "Alpha", "Beta", "Gamma", "Delta", "Epsi", "Zeta", "Eta", "Theta", "Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi", "Pi", "Rho", "Sigma", "Tau", "Upsi", "Phi", "Chi", "Psi", "Omega", "amp", "gt", "lt", "quot", "euro", }; #define NR_CH_TABLE (sizeof(ch_table)/sizeof(char *)) bool valid_attr(FILE *freport, char *html_fn, int ln, char *html_com, int tagnr, char *attr_name, char *attr_value, bool first) { size_t i; for (i = 0; i < sizeof(valid_comb)/sizeof(valid_comb[0]); i++) if ( valid_comb[i].tagnr == tagnr && streq(valid_comb[i].attr_name, attr_name)) { bool correct = TRUE; switch (valid_comb[i].type) { case AT_ALIGN: correct = strieq(attr_value, "RIGHT") || strieq(attr_value, "LEFT") || strieq(attr_value, "CENTER"); break; case AT_IMG_ALIGN: correct = strieq(attr_value, "RIGHT") || strieq(attr_value, "LEFT") || strieq(attr_value, "CENTER") || strieq(attr_value, "TOP"); break; case AT_FILE: return check_src(freport, html_fn, ln, attr_value, first); default: return TRUE; } if (!correct && freport != NULL) fprintf(freport, "%s (%d) : incorrect <%s .. %s=\"%s\">\n" , html_fn, ln, html_com, attr_name, attr_value); return correct; } return FALSE; } void print_sections(section_p section) { int depth = 0; for ( ; section; next_section(§ion, &depth)) { printf("%*.*sSection(%d): '%s' #%ld", depth, depth, "", section->level, section->title, (long)section); { name_p names = section->names; if (names) { printf (" with name:"); for ( ; names; names = names->next) printf(" %s", names->name); } } printf("\n"); { href_p hrefs; for (hrefs = section->hrefs; hrefs; hrefs = hrefs->next) printf("%*.*s to: %s %s\n", depth, depth, "", hrefs->file->name, hrefs->name); } } } void add_name(name_p *r_name, char *name, long line) { while (*r_name != NULL) r_name = &(*r_name)->next; (*r_name) = ALLOC(name_t); (*r_name)->next = NULL; STRCPY((*r_name)->name, name); (*r_name)->line = line; (*r_name)->status = 0; } void add_href(href_p *r_href, char *dest_file, char *href_name, long line, bool in_header) { while (*r_href != NULL) r_href = &(*r_href)->next; (*r_href) = ALLOC(href_t); (*r_href)->next = NULL; (*r_href)->file = find_file(dest_file); if (href_name[0] != '\0') { STRCPY((*r_href)->name, href_name); } else (*r_href)->name = NULL; (*r_href)->line = line; (*r_href)->section = NULL; (*r_href)->in_header = in_header; } char *trim(char *s) { int l; char *r; while (*s == ' ') s++; l = strlen(s); while (l > 0 && s[l-1] == ' ') l--; s[l] = '\0'; STRCPY(r,s); return r; } FILE *f_ext_dest = NULL; FILE *f_broken_ext_dest = NULL; char *get_contents(file_p in_file, FILE *freport) { FILE *fin = NULL; if (in_file->contents != NULL) return in_file->contents; if (in_file->exists) { fin = fopen(in_file->name, "r"); if (fin == NULL) in_file->exists = FALSE; } if (!in_file->exists) { if (freport != NULL) { if (in_file->nr_refs > 1 || option_warn) fprintf(freport, "%s (0) : does not exist\n", in_file->name); } return NULL; } { int fh = fileno(fin); long file_len; file_len = lseek(fh, 0L, SEEK_END); lseek(fh, 0L, SEEK_SET); in_file->contents = (char*)malloc(file_len+2); file_len = read(fh, in_file->contents, file_len); in_file->contents[file_len] = '\0'; fclose(fin); } return in_file->contents; } void scan_a_file(html_fn, fout, freport, depth, first, included) char *html_fn; /* file name of HTML file */ FILE *fout, /* LaTeX output file */ *freport; /* Error report file */ int depth; /* Depth for headings */ bool first; /* First scanning */ bool included; /* Included in output (or only checking) */ { file_p in_file = find_file(html_fn); char html_com[MAX_HC + 1], attr_name[MAX_HC + 1], attr_val[MAX_AV + 1], name[MAX_N + 1], src[MAX_DF + 1], alt[MAX_DF + 1]; char ch; int ln = 1; char *s = NULL; name_t *cur_names = NULL; char section_title[1000]; word i_st; cur_section = NULL; /* add_section(in_file, 0); */ DEBUG_P3("Scan %s %d %d\n", html_fn, first, in_file->nr_refs); in_html = FALSE; in_head = FALSE; in_title = FALSE; in_body = FALSE; in_address = FALSE; in_header = FALSE; active_href = FALSE; s = get_contents(in_file, freport); if (s == NULL) return; in_file->read |= R_READ; if (included) in_file->read |= R_INCLUDED; if ((ch = *s++) == '\n') ln++; while(ch != '\0') { if (ch == '<') { bool is_comment = FALSE, a_name = FALSE, a_href = FALSE; byte tagnr, tagkind = T_ILL; bool closing_tag = FALSE; alt[0] = '\0'; /* SCAN <...> */ /* skip < and following spaces: */ if ((ch = *s++) == '\n') ln++; while (ch != '\0' && (ch == ' ' || ch == '\n')) if ((ch = *s++) == '\n') ln++; html_com[0] = '\0'; if (ch != '!') { int i; bool too_long; /* scan first word in html_com: */ i = 0; too_long = FALSE; while ( ch != '\0' && ch != '>' && ch != ' ' && ch != '\n' && ch != '\t') { if (i < MAX_HC) html_com[i++] = tolower(ch); else too_long = TRUE; if ((ch = *s++) == '\n') ln++; } html_com[i] = '\0'; if (too_long && freport != NULL) fprintf(freport, "%s (%d) : HTML tag name too long --- `%s'\n", html_fn, ln, html_com); for (tagnr = 0; tagnr < NR_TAGS; tagnr++) if (streq(html_com, tags[tagnr].name)) { tagkind = tags[tagnr].kind; DEBUG_P2("found `%s' = %d\n", tags[tagnr].name, tagkind); break; } else if ( html_com[0] == '/' && tags[tagnr].closing != C_NO && streq(html_com + 1, tags[tagnr].name)) { tagkind = tags[tagnr].kind; closing_tag = TRUE; DEBUG_P2("found `/%s' = %d\n", tags[tagnr].name, tagkind); break; } while (ch != '\0' && ch != '>') { bool found_is = FALSE; skip_spaces(&s, html_fn, fout, freport, &ch, &ln, first); if (ch == '\0' || ch == '>') break; /* scan attribute in attr_name */ i = 0; too_long = FALSE; while (ch != '\0' && ch != '>' && ch != '=' && ch != ' ' && ch != '\n' && ch != '\t') { if (i < MAX_HC) attr_name[i++] = tolower(ch); else too_long = TRUE; if ((ch = *s++) == '\n') ln++; } attr_name[i] = '\0'; DEBUG_P1("found attribute: `%s'\n", attr_name); if (too_long && freport != NULL) fprintf(freport, "%s (%d) : HTML attribute '%s' too long.\n", html_fn, ln, attr_name); /* skip = and spaces */ while ( ch != '\0' && ( ch == ' ' || ch == '\n' || ch == '=' || ch == '\t')) { if (ch == '=') found_is = TRUE; if ((ch = *s++) == '\n') ln++; } /* scan string into name */ i = 0; if (found_is) { bool is_quoted = ch == '"'; too_long = FALSE; if (is_quoted) if ((ch = *s++) == '\n') ln++; while( ch != '\0' && ch != '>' && ch != '"' && (is_quoted || ch != ' ')) { if (i < MAX_AV) attr_val[i++] = ch; else too_long = TRUE; if ((ch = *s++) == '\n') ln++; } if (is_quoted != (ch == '\"') && freport != NULL) fprintf(freport, "%s (%d) : incorrectly quoted string\n", html_fn, ln); if (ch == '\"') if ((ch = *s++) == '\n') ln++; if (too_long && freport != NULL) fprintf(freport, "%s (%d) : attr value too long name `%s'\n", html_fn, ln, attr_val); } attr_val[i] = '\0'; DEBUG_P1("found attribute value: `%s'\n", attr_val); if (OPEN_TAG(T_A) && streq(attr_name, "name")) { if (attr_val[0] == '\0') { if (freport != NULL) fprintf(freport, "%s (%d) : found \n", html_fn, ln); } else if (strlen(attr_val) > MAX_N) { if (freport != NULL) fprintf(freport, "%s (%d) : name too long `%s'\n", html_fn, ln, attr_val); } else { strcpy(name, attr_val); a_name = TRUE; } } else if (OPEN_TAG(T_A) && streq(attr_name, "href") && attr_val[0] != '\0') { char *av = attr_val; i = 0; too_long = FALSE; for (; *av != '\0' && *av != '#' && *av != '?'; av++) if (i < MAX_DF) dest_file[i++] = *av; else too_long = TRUE; if (*av == '?') for (; *av != '\0'; av++) { if (i < MAX_DF) dest_file[i++] = *av; else too_long = TRUE; } dest_file[i] = '\0'; DEBUG_P1("found dest file `%s'\n", dest_file); href_name[0] = '\0'; if (attr_val[0] == '\0') { if (freport != NULL) fprintf(freport, "%s (%d) : found \n", html_fn, ln); } else if (too_long) { if (freport != NULL) fprintf(freport, "%s (%d) : URL too long `%s'\n", html_fn, ln, dest_file); } else if (*av == '\0') a_href = TRUE; else { i = 0; too_long = FALSE; av++; for (; *av != '\0'; av++) if (i < MAX_N) href_name[i++] = *av; else too_long = TRUE; href_name[i] = '\0'; DEBUG_P1("found href name `%s'\n", href_name); if (too_long) { if (freport != NULL) fprintf(freport, "%s (%d) : name too long `%s'\n", html_fn, ln, href_name); } else a_href = TRUE; } } else if ( OPEN_TAG(T_IMG) && streq(attr_name, "alt") && attr_val[0] != '\0') { if (strlen(attr_val) > MAX_DF) { if (freport != NULL) fprintf(freport, "%s (%d) : alt too long `%s'\n", html_fn, ln, attr_val); } else strcpy(alt, attr_val); } else if (!valid_attr(freport, html_fn, ln, html_com, tagnr, attr_name, attr_val, first)) ; else if ( option_warn && freport != NULL && tagkind != T_ILL) fprintf(freport, "%s (%d) : ignored <%s .. %s=\"%s\">\n" , html_fn, ln, html_com, attr_name, attr_val); } } else /* ch == '!' */ { is_comment = TRUE; if ((ch = *s++) == '\n') ln++; skip_spaces(&s, html_fn, fout, freport, &ch, &ln, first); if (ch != '>') { if (freport != NULL && option_warn) fprintf(freport, "%s (%d) : using non-standard comments.\n", html_fn, ln); while (ch != '\0' && ch != '>') { if ((ch = *s++) == '\n') ln++; } } } /* skip till > */ while (ch != '\0' && ch != '>') if ((ch = *s++) == '\n') ln++; /* PROCESS references */ if (first) { if (a_name) add_name(&cur_names, name, ln); if (a_href) { href_status = REF_OKAY; if (!norm_URL(html_fn, dest_file)) { if (freport != NULL) fprintf(freport, "%s (%d) : URL `%s' illegal or too long\n", html_fn, ln, dest_file); } else if (!is_URL(dest_file)) { /* add_ref(html_fn, ln, dest_file, href_name[0] == '\0' ? NULL : href_name); */ /* to add dest_file to list of files: */ file_p file = find_file(dest_file); file->read |= R_DOREAD; if (streq(dest_file, "Broken.html") && strcmp(html_fn, "brexrefs.html")) { char *error_code = NULL; char *brfile = url_argument; char *s = strstr(url_argument, "|"); if (s != NULL && !is_URL(url_argument)) { error_code = url_argument; *s = '\0'; brfile = s + 1; } if (f_broken_ext_dest != NULL) { fprintf(f_broken_ext_dest, "
  • %s in file %s", brfile, brfile, html_fn, html_fn, ln); if (error_code != NULL) fprintf(f_broken_ext_dest, " with error code: %s", error_code); fprintf(f_broken_ext_dest, ".\n"); } nr_broken_ext_links++; if (f_ext_dest != NULL) fprintf(f_ext_dest, "%s in %s:%d is Broken
    \n", brfile, brfile, html_fn, ln); } else if (is_html(dest_file)) { /* -- internal reference */ if (in_file->sections == NULL) { add_section(in_file, 0); cur_section->names = cur_names; cur_names = NULL; } add_href( in_address ? &in_file->sections->hrefs : &cur_section->hrefs, dest_file, href_name, ln, in_header||in_address); } } else if (f_ext_dest != NULL && strcmp(html_fn, "brexrefs.html")) fprintf(f_ext_dest, "%s in %s:%d
    \n", dest_file, dest_file, html_fn, ln); } } else if (freport != NULL)/* second */ { if (a_name) { DEBUG_PRINT(("\n")); if (name_repeated(in_file->sections, name)) fprintf(freport, "%s (%d) : not unique.\n", html_fn, ln, name); if (strstr(name, "#") || strstr(name, ".html") || strstr(name, ".jpg")) fprintf(freport, "%s (%d) : looks like link.\n", html_fn, ln, name); } if (a_href) { DEBUG_PRINT(("\n", html_fn, dest_file)); if (!norm_URL(html_fn, dest_file)) { href_status = REF_ILL; fprintf(freport, "%s (%d) : URL `%s' illegal or too long.\n", html_fn, ln, dest_file); } else { href_status = find_ref(dest_file, href_name); if (href_status == REF_FILE_NOT_FOUND) fprintf(freport, "%s (%d) : file `%s' does not exist.\n", html_fn, ln, dest_file); else { if (href_status == REF_NAME_NOT_FOUND) { fprintf(freport, "%s (%d) : no in file `%s'.\n", html_fn, ln, href_name, dest_file); } if (!is_URL(dest_file)) { char *fn = find_file(dest_file)->name; if (strcmp(fn, dest_file)) fprintf(freport, "%s (%d) : change `%s' into `%s'.\n", html_fn, ln, dest_file, fn); } } } } } /* PROCESS <...> */ DEBUG_P4("found %s at %d: in_head %d: %d\n", tags[tagnr].name, ln, in_head, tagkind); if (is_comment) /* skip */; else if (tagkind == T_ILL) { if (freport != NULL) fprintf(freport, "%s (%d) : unknown <%s>.\n", html_fn, ln, html_com); } else if (OPEN_TAG(T_HTML)) if (in_html) REMOVE_TAG(", tag only inside "); else LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_HTML)) if (!in_html) REMOVE_TAG(", not inside "); else LATEX_CLOSES(tagnr) else { if (!in_html) { if (option_pedantic) ADD_TAG(H_HTML, FALSE, ", tag requires "); LATEX_OPEN(H_HTML) } if (OPEN_TAG(T_HEAD)) if (in_head) REMOVE_TAG(", tag only outside "); else LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_HEAD)) if (!in_head) REMOVE_TAG(", not inside "); else LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_TITLE)) if (in_body) REMOVE_TAG(", tag not inside "); else if (in_title) REMOVE_TAG(", nested "); else LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_TITLE)) if (in_body) REMOVE_TAG(", tag not inside <body>"); else if (!in_title) REMOVE_TAG(", not inside <title>"); else LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_BODY)) { if (in_body) REMOVE_TAG(", nested <body>"); else if (in_head) REMOVE_TAG(", still inside <head>"); else LATEX_OPEN(tagnr) /* print file name here ??? */ /* \n\\par{\\footnotesize$(File:\\ )$}\\par\n */ } else if (CLOSE_TAG(T_BODY)) if (!in_body) REMOVE_TAG(", not inside <body>"); else if (in_head) REMOVE_TAG(", still inside <head>"); else LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_LINK) || OPEN_TAG(T_META)) { /* ignore */ } else if (OPEN_TAG(T_SCRIPT)) { LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_SCRIPT)) LATEX_CLOSES(tagnr) else { if (in_head && !a_name) { char mess[60]; sprintf(mess, ", required by <%s>", tags[tagnr].name); ADD_TAG(H_HEAD, TRUE, mess); LATEX_CLOSES(H_HEAD) } if (!in_body && !a_name) { char mess[60]; sprintf(mess, ", required by <%s>", tags[tagnr].name); ADD_TAG(H_BODY, FALSE, mess); LATEX_OPEN(H_BODY) } if (OPEN_TAG(T_DIR)) LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_DIR)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_ADDR)) if (in_address) REMOVE_TAG(", nested <address>"); else LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_ADDR)) if (!in_address) REMOVE_TAG(", not inside <address>"); else LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_H)) if (in_header) { REPLACE_TAG(); LATEX_CLOSES_K() /* lower section level */ section_title[i_st] = '\0'; if (first) { cur_section->title = trim(section_title); cur_section->names = cur_names; cur_names = NULL; } } else { int level = 0; switch (tagnr) { case H_H1: level = 1; break; case H_H2: level = 2; break; case H_H3: level = 3; break; case H_H4: level = 4; break; case H_H5: level = 5; break; case H_H6: level = 6; break; } if (first) add_section(in_file, level); i_st = 0; LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_H)) if (!in_header) REMOVE_TAG(", not inside <h?>"); else { LATEX_CLOSES_K() section_title[i_st] = '\0'; if (first) { cur_section->title = trim(section_title); cur_section->names = cur_names; cur_names = NULL; } } else if (OPEN_TAG(T_LIST)) LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_LIST)) LATEX_CLOSES_K() else if (OPEN_TAG(T_ITEM)) if (!inside_tag(T_LIST)) REMOVE_TAG(", not inside listing tag"); else { LATEX_CLOSES_T(T_LIST) LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_ITEM)) LATEX_CLOSES_K() else if (OPEN_TAG(T_DESC)) LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_DESC)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_DT)) if (!inside_tag(T_DESC)) REMOVE_TAG(", not inside <dl>"); else { LATEX_CLOSES_T(T_DESC) LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_DT)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_DD)) if (!inside_tag(T_DESC)) REMOVE_TAG(", not inside <dl>"); else { LATEX_CLOSES_T(T_DESC) LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_DD)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_A)) { int j; for (j = stack_depth - 1; j >= 0 && stack[j].closing == C_OPT; j--) if (stack[j].tagkind == T_A) { LATEX_CLOSES(H_A); break; } if (a_href && href_status == REF_OKAY) { if (active_href && freport != NULL) fprintf(freport, "%s (%d) : nested href\n", html_fn, ln); active_href = TRUE; } LATEX_OPEN_C(tagnr, a_href ? C_YES : C_OPT); } else if (CLOSE_TAG(T_A)) { LATEX_CLOSES(tagnr) } else if (OPEN_TAG(T_VERB)) LATEX_OPEN(tagnr) else if (CLOSE_TAG(T_VERB)) LATEX_CLOSES_K() else if (OPEN_TAG(T_P)) { if (stack[stack_depth-1].tagkind == T_P) LATEX_CLOSES(H_P); if (in_header && freport != NULL && option_info) fprintf(freport, "%s (%d) : <p> ignored in header\n", html_fn, ln); LATEX_OPEN(tagnr) } else if (CLOSE_TAG(T_P)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_CHAR)) { int j; bool found = FALSE; for (j = stack_depth - 1; j >= 0; j--) if (stack[j].tagnr == tagnr) { found = TRUE; break; } if (found && j == stack_depth - 1) { REPLACE_TAG(); LATEX_CLOSES(tagnr) } else { if (found && freport != NULL) fprintf(freport, "%s (%d) : nested <%s>\n", html_fn, ln, html_com); LATEX_OPEN(tagnr) } } else if (CLOSE_TAG(T_CHAR)) LATEX_CLOSES(tagnr) else if (OPEN_TAG(T_IMG)) ; else { if (freport != NULL) fprintf(freport, "%s (%d) : tag %s not processed\n", html_fn, ln, tags[tagnr].name); } } } /* read > and skip till first non-space */ if (ch != '\0' && ch == '>') { if ((ch = *s++) == '\n') ln++; } } else if (freport == NULL) { if (first && !isspace(ch) && !in_header && !in_head) { if (cur_section == NULL) { add_section(in_file, 0); cur_section->title = "<TOP OF THE FILE>"; cur_section->names = cur_names; cur_names = NULL; } if (cur_names != NULL) { /* Introduce fake section */ add_section(in_file, 10); cur_section->title = "<NO TITLE>"; cur_section->names = cur_names; cur_names = NULL; } cur_section->has_text = TRUE; } if (in_header && ch != '\n' && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; } else { int ch_val = (ch + 256)%256; bool skip = FALSE; if (ch == '\n') skip = TRUE; else if (ch_val >= 160 && ch_val <= 255 && ch_table[ch-160] != NULL) { fprintf(freport, "%s (%d) : Replace character %d by `&%s;'.\n", html_fn, ln, ch_val, ch_table[ch_val-160]); skip = TRUE; } else if (ch == '&') { int i = 0; bool correct = FALSE; char html_ch[10]; int v; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; if (isalpha(ch)) { while (isalpha(ch)) { if (i < 9) html_ch[i++] = ch; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; } html_ch[i] = '\0'; for (v = 0; v < NR_CH_TABLE; v++) if ( ch_table[v] != NULL && !strcmp(html_ch, ch_table[v])) { correct = TRUE; break; } if (ch == ';') { if (i < 9) html_ch[i++] = ch; html_ch[i] = '\0'; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; } else fprintf(freport, "%s (%d) : Place ';' after sequence `&%s'.\n", html_fn, ln, html_ch); if (!correct) fprintf(freport, "%s (%d) : Unknown sequence `&%s'.\n", html_fn, ln, html_ch); } else if (ch == '#') { int code = 0; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; html_ch[i++] = '#'; while (isdigit(ch)) { if (i < 9) html_ch[i++] = ch; code = code * 10 + ch - '0'; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; } html_ch[i] = '\0'; if (ch == ';') { if (i < 9) html_ch[i++] = ch; html_ch[i] = '\0'; if (in_header && i_st < 999) section_title[i_st++] = ch; if ((ch = *s++) == '\n') ln++; } else fprintf(freport, "%s (%d) : Place ';' after sequence `&%s'.\n", html_fn, ln, html_ch); if ((code >= ' ' && code < 127)) { fprintf(freport, "%s (%d) : Replace sequence `&%s' by '%c'.\n", html_fn, ln, html_ch, code); } else if (code >= 160 && code <= 255 && ch_table[code-160] != 0) { fprintf(freport, "%s (%d) : Replace sequence `&%s' by '&%s;'.\n", html_fn, ln, html_ch, ch_table[code-160]); } else fprintf(freport, "%s (%d) : Unknown sequence `&%s;'.\n", html_fn, ln, html_ch); } else { fprintf(freport, "%s (%d) : Replace `&' by `&'.\n", html_fn, ln - (ch == '\n')); } } else if (ch == '>') { fprintf(freport, "%s (%d) : Replace `>' by `>'.\n", html_fn, ln); skip = TRUE; } else if ((ch >= ' ' && ch_val < 127) || ch == '\t') skip = TRUE; else { fprintf(freport, "%s (%d) : Unknown character %d (decimal)\n", html_fn, ln, ch_val); skip = TRUE; } if (skip) if ((ch = *s++) == '\n') ln++; } } /* In case no header occured, dump label */ while (stack_depth > 0) { if ( freport != NULL && (!option_pedantic || stack[stack_depth-1].tagnr != T_HTML)) fprintf(freport, "%s (%d) : add </%s>.\n", html_fn, ln, tags[stack[stack_depth-1].tagnr].name); latex_close(fout, freport, ln, html_fn); } #ifdef DEBUG_REFBY if (first) print_sections(in_file->sections); #endif } void check_exists_file(char *html_fn, FILE *freport) { file_p in_file = find_file(html_fn); if (in_file->exists) in_file->exists = file_exists(html_fn); if (!in_file->exists) { if (freport != NULL) { if (in_file->nr_refs > 1 || option_warn) fprintf(freport, "%s (0) : does not exist\n", html_fn); } } in_file->read |= R_READ; in_file->read |= R_INCLUDED; } void scan_a_js_file(char *js_fn, FILE *freport) { file_p in_file = find_file(js_fn); char *s = NULL, ch; int ln = 1; /*printf("Include JS file %s\n", js_fn);*/ s = get_contents(in_file, freport); if (s == NULL) return; in_file->read |= R_READ; in_file->read |= R_INCLUDED; if ((ch = *s++) == '\n') ln++; while(ch != '\0') { if (ch == '/' && *s == '/') { do { if ((ch = *s++) == '\n') ln++; } while (ch != '\0' && ch != '\n'); } else if (ch == '"' || ch == '\'') { char fn[101]; int i = 0; char quote = ch; if ((ch = *s++) == '\n') ln++; while (ch != quote && ch != '\0' && ch != '\n') { if (ch == '\\') { if ((ch = *s++) == '\n') ln++; if (ch != '\0' && ch != '\n') { if (i < 100) fn[i++] = ch; if ((ch = *s++) == '\n') ln++; } } else { if (i < 100) fn[i++] = ch; if ((ch = *s++) == '\n') ln++; } } if (ch == quote) if ((ch = *s++) == '\n') ln++; fn[i] = '\0'; /*printf("Found string |%s|\n", fn);*/ if ( !strcmp(fn + strlen(fn) - 4, ".jpg") || !strcmp(fn + strlen(fn) - 4, ".gif")) { file_p img_file = find_file(fn); if (img_file->read & R_INDIR) img_file->read |= R_INCLUDED; else if (!img_file->exists) { if (freport) fprintf(freport, "%s (%d) : file '%s' does not exist.\n", js_fn, ln, fn); } } } else { if ((ch = *s++) == '\n') ln++; } } } void accept_root_URL(URL, fout, freport) char *URL; FILE *fout, *freport; { int strlen_URL = strlen(URL); document_URL = NALLOC(char, strlen_URL + 2); strcpy(document_URL, URL); if (document_URL[strlen_URL - 1] != '/') { document_URL[strlen_URL] = '/'; document_URL[strlen_URL + 1] = '\0'; } server_URL = NULL; { int i; for (i = 0; URL[i] != '\0' && URL[i] != ':'; i++); if (URL[i] != '\0' && URL[i+1] == '/' && URL[i+2] == '/') { i += 3; while (URL[i] != '\0' && URL[i] != '/') i++; if (URL[i] == '/') file_URL = document_URL + i; else file_URL = "/"; URL[i] = '\0'; server_URL = SALLOC(URL); strcpy(server_URL, URL); } } if (server_URL == NULL) { if (freport != NULL) fprintf(freport, " : illegal URL `%s'\n", document_URL); document_URL = NULL; } } void scan_not_included_files(freport, first, included) FILE *freport; bool first, included; { if (first) { bool found = TRUE; while (found) { file_p file = the_files; found = FALSE; while (file != NULL && !found) { DEBUG_P4("%s %d %d %d ", file->name, file->exists, !(file->read & R_READ), !is_URL(file->name)); DEBUG_P3("%d %d %d\n", is_html(file->name), file->name[0] != '.', file->name[1] != '.'); if ( file->exists && !(file->read & R_READ) && (file->read & R_DOREAD) && !is_URL(file->name) && (file->name[0] != '.' || file->name[1] != '.')) found = TRUE; else file = file->next; } if (found) { if (is_html(file->name)) scan_a_file(file->name, NULL, freport, 1, first, included); else if (!is_URL(file->name)) check_exists_file(file->name, freport); } } } else { file_p file = the_files; for (file = the_files; file != NULL; file = file->next) if ( (file->read & R_DOREAD) && !is_URL(file->name) && is_html(file->name) && (file->name[0] != '.' || file->name[1] != '.') && strcmp(file->name, "brexrefs.html")) scan_a_file(file->name, NULL, freport, 1, first, FALSE); else if ((file->read & R_INCLUDED) && is_js(file->name)) scan_a_js_file(file->name, freport); } } void read_dir(void) { FILE *f; char fn[400]; /*fprintf(stderr, "Scan directory\n"); system("dir \\www >\\www\\compare\\dir.txt"); fprintf(stderr, "Ready\n");*/ f = fopen("compare\\dir.txt", "r"); if (f==NULL) return; while (fgets(fn, 399, f)) { file_p file; if (fn[strlen(fn)-1]=='\n') fn[strlen(fn)-1] = '\0'; if (fn[16] == ' ' && strlen(fn) >= 45) { char size[30]; file = find_file(fn+44); file->read |= R_INDIR; strncpy(size, fn+16, 6); strncpy(size+6, fn+23, 3); size[9] = '\0'; sscanf(size, "%ld", &file->size_local); { long int year, mon, day, hour, min; char *d = fn+28; char *t = strstr(d, ":") - 2; year = (d[6] - '0')*10 + d[7] - '0'; year += (year < 50) ? 2000 : 1900; if (t[5] == 'p' || t[5] == 'm') { mon = (d[0] - '0')*10 + d[1] - '0'; day = (d[3] - '0')*10 + d[4] - '0'; } else { day = (d[0] - '0')*10 + d[1] - '0'; mon = (d[3] - '0')*10 + d[4] - '0'; } hour = t[1] - '0'; if (t[0] != ' ') hour += (t[0] - '0')*10; min = (t[3] - '0')*10 + t[4] - '0'; if (hour == 12 && t[5] == 'a') hour -= 12; if (t[5] == 'p') hour += 12; file->date_days = year * 10000 + mon * 100 + day; file->date_mins = hour * 60 + min; } } } fclose(f); } int month_of(char *s) { static char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; int i; for (i = 0; i < 12; i++) if (strncmp(s, months[i], 3) == 0) return i+1; return 0; } void add_size(long size, char *fn) { int l = strlen(fn); char *ext = fn + strlen(fn); while (ext > fn && *(ext-1) != '.') ext--; if (!strcmp(fn + l - 6, "_c.txt")) { add_other_ext("C",size); } else if (!strcmp(fn + l - 8, "_pas.txt")) { add_other_ext("Pascal",size); } else { add_other_ext(ext, size); } } void compare(bool execute) { FILE *fls = fopen("compare\\ls.txt", "r"); FILE *fout = fopen("cp2ftp.bat", "w"); char buffer[500]; time_t timeinsec; struct tm timem; file_p a_file; long oldsize = 0L; long sizecopied = 0L; long newsize = 0L; long removesize = 0L; int i; time(&timeinsec); memcpy(&timem, localtime(&timeinsec), sizeof(struct tm)); while (fgets(buffer,499, fls)) { if (buffer[0] == '-') { file_p file; long size_ftp; bool notincluded; if (buffer[strlen(buffer)-1] == '\n') buffer[strlen(buffer)-1] = '\0'; sscanf(buffer+21, "%ld", &size_ftp); file = find_file(buffer+46); file->on_ftp = TRUE; notincluded = !(file->read & R_INCLUDED) && (file->read & R_INDIR) && file->exists; if (file->size_local == -1 || notincluded) { printf("remove %s\n", file->name); removesize += size_ftp; /*add_size(-size_ftp, file->name);*/ } else if ( !notincluded ) { long int fyear, fmon, fday, fhour = 23, fmin = 59, fd=0, fm=0; long int ld = file->date_days, lm = file->date_mins; char *d; int newer = 0; int diffsize = 0; int picture = 0; d = buffer + 33; fmon = month_of(d); fday = d[5] - '0'; if (d[4] != ' ') fday += (d[4] - '0')*10; if ( d[9] == ':' ) { fhour = d[8] - '0'; if (d[7] != ' ') fhour += (d[7] - '0')*10; fmin = (d[10] - '0')*10 + d[11] - '0'; fyear = timem.tm_year; if (fmon * 100 + fday > (timem.tm_mon+1) * 100 + timem.tm_mday) fyear = fyear - 1; fyear += 1900; if (fyear < 1980) fyear += 100; } else { sscanf(d+7, "%ld", &fyear); } fd = fyear * 10000 + fmon * 100 + fday; fm = fhour * 60 + fmin; /*printf("%s\n %ld %ld %ld %ld %ld %ld\n", buffer, size_ftp, fyear, fmon, fday, fhour, fmin); printf("%-15.15s %8ld %8ld %ld %ld\n", file->name, ld, fd, lm, fm);*/ /*printf("%-30.30s", file->name);*/ oldsize += size_ftp > 0 ? size_ftp : 0; newer = ld > fd || (ld == fd && lm > (fm + 60)); diffsize = size_ftp != file->size_local; /*printf(" %s %s", newer ? "newer" : " ", diffsize ? "diffsize" : " ");*/ picture = strcmp(file->name + strlen(file->name)-4, ".jpg") == 0; if (!strcmp(file->name, "Fhome.html")) newer = TRUE; if (diffsize || (!picture && newer)) { printf("copy %s", file->name); if (diffsize) printf(" diffsize"); if (!picture && newer) printf(" newer"); printf("\n"); if (execute) fprintf(fout, "copy \"%s\" \"ftp\\%s\"\n", file->name, file->name); sizecopied += file->size_local; newsize += file->size_local; add_size(file->size_local, file->name); } else { newsize += size_ftp > 0 ? size_ftp : 0; add_size(size_ftp > 0 ? size_ftp : 0, file->name); } /*printf("\n");*/ } } } fclose(fls); for (a_file = the_files; a_file != NULL ; a_file = a_file->next) { /*printf("New? %s %ld %d %d %d\n", a_file->name, a_file->size_local, a_file->on_ftp, a_file->read, a_file->exists);*/ if ( a_file->size_local >= 0 && !a_file->on_ftp) { bool notincluded = !(a_file->read & R_INCLUDED) && (a_file->read & R_INDIR) && a_file->exists; if (!notincluded) { printf("copy %s new\n", a_file->name); if (execute) fprintf(fout, "copy \"%s\" \"ftp\\%s\"\n", a_file->name, a_file->name); sizecopied += a_file->size_local; newsize += a_file->size_local; add_size(a_file->size_local, a_file->name); } } } printf("\n\nOld size %ld bytes, New size is %ld bytes\n", oldsize, newsize); printf("Have to copy %ld bytes\n", sizecopied); printf("Could remove %ld bytes\n", removesize); printf("Netto new size: %ld bytes\n", newsize - removesize); printf("\n"); { FILE *f = fopen("Fhome.html", "w"); file_p fhome_file = find_file("Fhome.html"); char *cont = fhome_file->contents; char linebuf[1000]; char *start_stat = "<!--startstat-->"; char *end_stat = "<!--endstat-->"; if (f != NULL && cont != NULL && *cont != '\0') { linebuf[0] = '\0'; for(;*cont != '\0';) { int i; for (i = 0; *cont != '\0' && *cont != '\n'; i++, cont++) linebuf[i] = *cont; linebuf[i] = '\0'; cont++; if (!strcmp(linebuf, start_stat)) break; fprintf(f, "%s\n", linebuf); } fprintf(f, "%s\n", linebuf); fprintf(f, "consists of %d HTML-files", nr_ext_files("html")); fprintf(f, " with a total size of %ld characters,\n", size_ext_files("html")); fprintf(f, "having %ld internal links and %ld external links.\n", nr_int_links, nr_ext_links + nr_broken_ext_links); if (nr_broken_ext_links != 0) fprintf(f, "(About %ld of the external links are broken.)\n", nr_broken_ext_links); fprintf(f, "Furthermore, it contains %d C program files ", nr_ext_files("C")); fprintf(f, "with a total size of %ld characters,\n", size_ext_files("C")); fprintf(f, "and %d PASCAL program files ", nr_ext_files("Pascal")); fprintf(f, "with a total size of %ld characters.\n", size_ext_files("Pascal")); fprintf(f, "There are %d text files with a total size of %ld characters.\n", nr_ext_files("txt"), size_ext_files("txt")); fprintf(f, "With respect to images, this site containts %d GIF images\n" "(total size %ld bytes), and %d JPEG images (total size %ld bytes).\n", nr_ext_files("gif"), size_ext_files("gif"), nr_ext_files("jpg"), size_ext_files("jpg")); fprintf(f, "With respect to sounds, it contains %d WAV files\n" "with a total size of %ld bytes.\n", nr_ext_files("wav"), size_ext_files("wav")); fprintf(f, "It also contains %d PostScript files (total size %ld bytes),\n", nr_ext_files("ps") + nr_ext_files("eps"), size_ext_files("ps") + size_ext_files("eps")); fprintf(f, "%d LaTeX files (total size %ld characters), and\n", nr_ext_files("tex"), size_ext_files("tex")); fprintf(f, "%d gzipped tar files (total size %ld bytes).\n", nr_ext_files("tgz"), size_ext_files("tgz")); fprintf(f, "It also uses %d <A HREF=\"JavaScript.html\">JavaScript</A> files with \n" "code with a total size of %ld bytes.\n", nr_ext_files("js"), size_ext_files("js")); for(;;) { int i; for (i = 0; *cont != '\0' && *cont != '\n'; i++, cont++) linebuf[i] = *cont; linebuf[i] = '\0'; cont++; if (!strcmp(linebuf, end_stat)) break; } fprintf(f, "%s\n", linebuf); for (; *cont != '\0'; cont++) fprintf(f, "%c", *cont); fclose(f); } } for (i = 0; i < nr_other_ext; i++) if (other_ext_nr[i] > 1) printf("%ld bytes in %d \"%s\"-files.\n", other_ext_size[i], other_ext_nr[i], other_ext_name[i]); else printf("%ld bytes in one \"%s\"-file.\n", other_ext_size[i], other_ext_name[i]); printf("\n\n"); fclose(fout); } bool is_oneway(section_p sect) { for (; sect; sect = sect->parent) if (sect->oneway) return TRUE; return FALSE; } void build_refered_by() { file_p file; /* For all files */ for (file = the_files; file; file = file->next) { /* For all sections in these files */ section_p section; for (section = file->sections; section; next_section(§ion, NULL)) if (!is_oneway(section)) { /* For all links in those sections */ href_p href; for (href = section->hrefs; href; href = href->next) { section_p refered_section = NULL; /* Determine the section being linked to: */ if (href->name != NULL) refered_section = section_with_name(href->file->sections, href->name); else refered_section = href->file->sections; /* Make a direct reference in the link to the section: */ href->section = refered_section; /* Add the section of the file to the referenced section in the link: */ if (refered_section) { section_list_p *r_section_list = &refered_section->refered_by; #ifdef DEBUG_REFBY printf("%s.%s -> %ld %s.%s\n", href->file->name, href->name, (long)refered_section, refered_section->file->name, refered_section->names ? refered_section->names->name : "(null)"); #endif while ((*r_section_list) && (*r_section_list)->section != section) r_section_list = &(*r_section_list)->next; if (*r_section_list == NULL) { *r_section_list = ALLOC(section_list_t); (*r_section_list)->next = NULL; (*r_section_list)->section = section; } } } } } } void print_file_section(file_p file, section_p section) { static section_p cur_section = NULL; if (section != cur_section) { if (cur_section != NULL && cur_section->file != file) printf("\n"); printf("In file %s", file->name); if (section->names) printf(", at line %ld", section->names->line); if (section->title[0] != '\0') printf(", in section '%s'", section->title); printf(":\n"); cur_section = section; } } #ifdef DEBUG_REFBY void print_section(section_p section) { if (section == NULL) printf("[NULL]"); else printf("[%ld %s'%s']", (long)section, section->file && section->file->name ? section->file->name : "", section->title); } #endif int cur_level; void set_level_found(int level) { #ifdef DEBUG_REFBY printf("%*.*s #set\n", level*2, level*2, ""); #endif if (level < cur_level) cur_level = level; } void check_section_or_parent(section_p from, section_p to, int level) { #ifdef DEBUG_REFBY printf("%*.*s check_section_or_parent ", level*2, level*2, ""); print_section(from); print_section(to); printf("\n"); #endif if (from == to) { set_level_found(level); return; } if (from->file == to->file) { for (to = to->parent; to; to = to->parent) { if (++level >= cur_level) return; #ifdef DEBUG_REFBY printf("%*.*s = parent ", level*2, level*2, ""); print_section(to); printf("\n"); #endif if (from == to) { set_level_found(level); return; } } } } void search_my_parents(section_p from, section_p to, int level) { section_p parent; #ifdef DEBUG_REFBY printf("%*.*s search_my_parents ", level*2, level*2, ""); print_section(from); print_section(to); printf("\n"); #endif for (parent = from->parent; parent; parent = parent->parent) { if (++level >= cur_level) return; #ifdef DEBUG_REFBY printf("%*.*s = parent ", level*2, level*2, ""); print_section(parent); printf("\n"); #endif if (parent == to) { set_level_found(level); return; } } } void search_subsections(section_p from, section_p to, int level) { #ifdef DEBUG_REFBY printf("%*.*s search_subsections ", level*2, level*2, ""); print_section(from); print_section(to); printf("\n"); #endif if (level >= cur_level) return; { href_p href; for (href = from->hrefs; href; href = href->next) if (href->section) check_section_or_parent(href->section, to, level); } /* check all sub section for the same: */ { section_p nested; for (nested = from->nested; nested; nested = nested->next) search_subsections(nested, to, level+1); } } void search_parents(section_p from, section_p to, int level) { #ifdef DEBUG_REFBY printf("%*.*s search_parents ", level*2, level*2, ""); print_section(from); print_section(to); printf("\n"); #endif for (; from; from = from->parent) { if (++level >= cur_level) return; { href_p href; for (href = from->hrefs; href; href = href->next) { if (level >= cur_level) return; if (href->section) check_section_or_parent(href->section, to, level); } } } } void search_chain(section_p from, section_p to, int level) { #ifdef DEBUG_REFBY printf("%*.*s search_chain ", level*2, level*2, ""); print_section(from); print_section(to); printf("\n"); #endif if (level >= cur_level) return; { href_p href; for (href = from->hrefs; href; href = href->next) if (href->section) { check_section_or_parent(href->section, to, level); search_chain(href->section, to, level+3); } } } int search_for_reference_to(section_p from, section_p to) { cur_level = 10; search_my_parents(from, to, 1); search_subsections(from, to, 1); search_parents(from, to, 1); search_chain(from, to, 1); return cur_level; } void analyze_all_sections() { file_p file; /* For all files */ for (file = the_files; file; file = file->next) { /* For all sections in these files */ section_p section; /* print_sections(file->sections); */ for (section = file->sections; section; next_section(§ion, NULL)) { name_p name; section_list_p refered_by; int c = 0; #ifdef DEBUG_REFBY printf("Analyzing section: "); print_section(section); printf("\n"); #endif if (section->level == 10 && 0) { print_file_section(file, section); printf("- Name %s has no section title\n", section->names->name); } for (name = section->names; name; name = name->next) { if (!(name->status & NAME_REFERENCED)) { print_file_section(file, section); printf("- Name %s is not referenced\n", name->name); } c++; } if (c > 1) { print_file_section(file, section); printf("- Has more than one name\n"); } if (section->hrefs != NULL && option_warn) { section_p sect = section; while (sect->names == NULL && sect->parent != NULL && !sect->parent->has_text) sect = sect->parent; if (sect->names == NULL) { print_file_section(file, section); printf("- Might add name to section\n"); } } /* For all of the sections that link to this section: */ for (refered_by = section->refered_by; refered_by; refered_by = refered_by->next) { section_p ref_by_sect; int level; /* Determine the section that should be referenced */ ref_by_sect = refered_by->section; while (ref_by_sect->parent != NULL && ref_by_sect->names == NULL) ref_by_sect = ref_by_sect->parent; #ifdef DEBUG_REFBY printf(" Section that needs to be referenced: "); print_section(ref_by_sect); printf("\n"); #endif level = search_for_reference_to(section, ref_by_sect); #ifdef DEBUG_REFBY if (level != 10) { print_file_section(file, section); printf(" Has level %d return reference to %s '%s'\n", level, refered_by->section->file->name, refered_by->section->title); } #endif /* if (level == 10 && streq(refered_by->section->file->name, "brexrefs.html")) level = -2; */ if (level == 10) { section_p sect; print_file_section(file, section); printf("<A HREF=\"%s", refered_by->section->file->name); for (sect = refered_by->section; sect; sect = sect->parent) if (sect->names) break; if (sect && sect->names) printf("#%s", sect->names->name); printf("\">"); if (refered_by->section->parent != NULL && !refered_by->section->parent->has_text) printf("%s: ", refered_by->section->parent->title); printf("%s</A> ", refered_by->section->title); printf("<!--REFBY:%s", refered_by->section->file->name); if (sect && sect->names) printf("#%s", sect->names->name); printf("-->\n"); } } } } } int main(argc, argv) int argc; char **argv; { FILE *fin; char *fn = NULL, *html_fn, *outfn = NULL, *reffn; bool option_scan_not_inc = FALSE, option_cross_ref = FALSE, option_compare = FALSE, option_compare_copy = FALSE; /* global options */ option_info = FALSE; option_warn = FALSE; option_pedantic = FALSE; option_bibliography = FALSE; printf("%s: Version %s\nWritten by %s\n\n", "chkhtml", VERSION, WRITTEN_BY); { int i; bool error = FALSE; for (i = 1; i < argc; i++) { if (argv[i][0] == '-') { if (argv[i][1] == 'o') { if (argv[i][2] != '\0') outfn = argv[i] + 2; else if (i + 1 < argc) outfn = argv[++i]; else printf("Argument of -o option missing\n"); } else if (argv[i][1] == 'i' && argv[i][2] == '\0') { option_warn = TRUE; option_info = TRUE; } else if (argv[i][1] == 'w' && argv[i][2] == '\0') option_warn = TRUE; else if (argv[i][1] == 'p' && argv[i][2] == '\0') option_pedantic = TRUE; else if (argv[i][1] == 's' && argv[i][2] == '\0') { option_scan_not_inc = TRUE; option_compare = TRUE; option_compare_copy = TRUE; } else if (argv[i][1] == 'r') { if (argv[i][2] != '\0') accept_root_URL(argv[i] + 2, stdout, NULL); else if (i + 1 < argc) accept_root_URL(argv[++i], stdout, NULL); else printf("Argument of -r option missing\n"); } else if (argv[i][1] == 'b' && argv[i][2] == '\0') option_bibliography = TRUE; #ifdef DYN_DEBUG else if (argv[i][1] == 'd') option_debug = TRUE; #endif else if (streq(argv[i], "-cr")) option_cross_ref = TRUE; else { printf("Unknown option %s\n", argv[i]); error = TRUE; } } else if (fn == NULL) fn = argv[i]; else { printf("Too many input filenames\n"); error = TRUE; } } if (fn == NULL) { printf("No input filename given\n"); error = TRUE; } else { fin = fopen(fn, "r"); if (fin == NULL) { printf("Error: Cannot open file: `%s'.\n", fn); error = TRUE; } } if (error) { printf("Usages: html2tex [options] <file>\n"); printf("\nOptions:\n"); printf(" -o<FN> : specify output file\n"); printf(" -i : print info\n"); printf(" -w : print warnings (and info)\n"); printf(" -r<URL> : root URL of document\n"); printf(" -b : make bibliography\n"); printf(" -cr : generate cross-reference\n"); printf(" -c : check html file\n"); printf(" -s : scan not included files\n"); #ifdef DYN_DEBUG printf(" -d : print (a lot of) debugging information\n"); #endif return 1; } if (streq(fn + strlen(fn) - 5, ".html")) { is_html_fn = TRUE; html_fn = SALLOC(fn); strcpy(html_fn, fn); fn[strlen(fn) - 5] = '\0'; } } reffn = NALLOC(char, strlen(fn) + 5); strcpy(reffn, fn); strcat(reffn, ".ref"); read_dir(); f_ext_dest = fopen("compare\\ext_dest.txt", "w"); f_broken_ext_dest = fopen("compare\\broken_ext_dest.txt", "w"); scan_a_file(html_fn, (FILE *)NULL, (FILE *)NULL, 0, TRUE, TRUE); if (option_scan_not_inc) scan_not_included_files(NULL, TRUE, is_html_fn); { file_p file; for (file = the_files; file != NULL; file = file->next) file->read &= ~R_READ; } fclose(f_ext_dest); fclose(f_broken_ext_dest); rewind(fin); nr_other_ext = 0; nr_int_links = 0; nr_ext_links = 0; nr_broken_ext_links = 0; scan_a_file(html_fn, (FILE *)NULL, stdout, 0, FALSE, TRUE); if (option_scan_not_inc) { file_p file; scan_not_included_files(stdout, FALSE, is_html_fn); for (file = the_files; file != NULL; file = file->next) if (!(file->read & R_INCLUDED) && (file->read & R_INDIR) && file->exists) printf("%s (0) : not included\n", file->name); } DEBUG_PRINT(("ready reading\n")); DEBUG_PRINT(("\n\n\n")); if (option_compare) compare(option_compare_copy); build_refered_by(); analyze_all_sections(); return 0; }