]> git.ipfire.org Git - people/ms/strongswan.git/blob - doc/utils/html2four.c
(no commit message)
[people/ms/strongswan.git] / doc / utils / html2four.c
1 /*
2 extract headers from HTML files
3 in format suitable for turning into permuted index
4 */
5
6 #include <ctype.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <string.h>
10
11 /*
12 maximum sizes for input line and for name in <a> tag
13 */
14 #define MAX_LINE 512
15 #define MAX_NAME 64
16
17 /*
18 functions
19 all return 0 for OK, 1 for errors
20 */
21 int do_file( char *, FILE * ) ;
22 int parse_line( char * ) ;
23 int print_line( char *, char *) ;
24 int print_header_problem( char * ) ;
25 int sanity() ;
26
27 void die( char * ) ;
28
29 char *prog_name ;
30 int max_level ;
31 char *current_file ;
32
33 int main(int argc, char* argv[])
34 {
35 char *p ;
36 int temp, done, status ;
37 FILE *fp ;
38
39 prog_name = *argv ;
40 argc--,argv++ ;
41
42 max_level = 9 ;
43 if(argc && *argv ) {
44 p = *argv ;
45 if( p[0] == '-' ) {
46 if( isdigit(p[1]) && p[2] == '\0' ) {
47 max_level = p[1] - 0 ;
48 argc-- ;
49 argv++ ;
50 }
51 else die("unknown option") ;
52 } }
53
54 status = done = 0 ;
55 if( argc == 0) {
56 if( (status = do_file("STDIN", stdin)) == 0 )
57 done++ ;
58 }
59 else {
60 /*
61 printf("ARGC = %d\n", argc ) ;
62 */
63 while( argc-- ) {
64 p = *argv++ ;
65 /*
66 printf("ARGV P %s %s\n", *argv, p) ;
67 */
68 if( p == NULL ) {
69 fprintf(stderr, "%s: null filename pointer\n", prog_name) ;
70 status++ ;
71 }
72 else if( (fp = fopen(p,"r")) == NULL ) {
73 fprintf(stderr, "%s: cannot open file %s\n", prog_name, p) ;
74 status++ ;
75 }
76 else {
77 if( (temp = do_file(p, fp)) != 0 )
78 status++ ;
79 done++ ;
80 fclose(fp) ;
81 }
82 fflush(stderr) ;
83 fflush(stdout) ;
84 }
85 }
86 /*
87 printf("%s: %d files processed, %d with errors\n", prog_name, done, status) ;
88 */
89 return( status ? 1 : 0 ) ;
90 }
91
92 void die( char *message )
93 {
94 fflush(stdout) ;
95 fprintf(stderr, "%s: %s\n", prog_name, message) ;
96 exit(1) ;
97 }
98
99 int header_flags[10] ;
100 int in_header ;
101
102 char buffer[MAX_LINE+1] ;
103 char label[MAX_NAME+1] ;
104
105 int do_file( char *file, FILE *fp )
106 {
107 int i, status, x, y ;
108 char *base, *p ;
109
110 status = 0 ;
111 in_header = 0 ;
112 label[0] = '\0' ;
113 for( i = 0 ; i < 10 ; i++ )
114 header_flags[i] = 0 ;
115 current_file = file ;
116
117 while( base = fgets(buffer, MAX_LINE, fp) ) {
118 // count < and > characters in line
119 for( x = y = 0, p = base ; *p ; p++ )
120 switch( *p ) {
121 case '<':
122 x++ ;
123 break ;
124 case '>':
125 y++ ;
126 break ;
127 default:
128 break ;
129 }
130 // skip line if no < or >
131 if( x == 0 && y == 0 )
132 continue ;
133 // report error for unequal count
134 else if( x != y ) {
135 if( strncmp( base, "<!--", 4) && strncmp(base, "-->", 3) ) {
136 fflush(stdout) ;
137 fprintf(stderr, "%s in file %s: unequal < > counts %d %d\n",
138 prog_name, file, x, y ) ;
139 fprintf(stderr, "%s: %s\n", prog_name, base) ;
140 fflush(stderr) ;
141 status = 1 ;
142 }
143 continue ;
144 }
145 // parse lines containing tags
146 else
147 if( parse_line(base) )
148 status = 1 ;
149 // check that header labelling is sane
150 for( i = x = y = 0 ; i < 10 ; i++ ) {
151 // count non-zero entries
152 if( x = header_flags[i] )
153 y++ ;
154 // should be in 0 or 1 headers at a time
155 if( x > 1 || x < 0 )
156 status = 1 ;
157 }
158 if( y > 1 )
159 status = 1 ;
160 }
161 return status ;
162 }
163
164 int parse_line( char *data )
165 {
166 char *p, *q, *end ;
167 int x ;
168
169 // set end pointer
170 for( end = data ; *end ; end++ )
171 ;
172 // trim off trailing returns or newlines
173 for( p = end - 1, q = end ; q > data ; p--,q-- ) {
174 switch( *p ) {
175 case '\012':
176 case '\015':
177 *p = '\0' ;
178 continue ;
179 default:
180 break ; // out of switch()
181 }
182 break ; // out of for()
183 }
184 end = q ;
185 p = data ;
186 while( p < end ) {
187 // find tag delimiters
188 if( *p == '<') {
189 for( q = p + 1 ; *q ; q++ )
190 if( *q == '<' || *q == '>' )
191 break ;
192 // if we find another '<'
193 // restart tag search from it
194 if( *q == '<' ) {
195 p = q ;
196 continue ;
197 }
198 // "<>" is not interesting
199 if( q == p + 1 ) {
200 fflush(stdout) ;
201 fprintf(stderr, "%s: null tag\n", prog_name) ;
202 fprintf(stderr, "%s: line\n", prog_name, data) ;
203 fflush(stderr) ;
204 p = q + 1 ;
205 continue ;
206 }
207 // ignore delimiters once found
208 *q = '\0' ;
209 p++ ;
210 // p points to tag contents, null terminated
211 switch( *p ) {
212 // save contents of <a name= > tags
213 case 'a' :
214 case 'A' :
215 if( p[1] == ' ' &&
216 (p[2] == 'n' || p[2] == 'N') &&
217 (p[3] == 'a' || p[3] == 'A') &&
218 (p[4] == 'm' || p[4] == 'M') &&
219 (p[5] == 'e' || p[5] == 'E') &&
220 p[6] == '=' )
221 strncpy(label, p + 7, MAX_NAME) ;
222 break ;
223 case 'b' :
224 case 'B' :
225 if( in_header && strlen(p) == 2 &&
226 (p[1] == 'r' || p[1] == 'R') )
227 putchar(' ') ;
228 break ;
229 // header tags
230 case 'h' :
231 case 'H' :
232 if( strlen(p) == 2 && isdigit(p[1]) ) {
233 if( in_header )
234 fprintf(stderr, "%s: bad header nesting in %s\n",
235 prog_name, current_file) ;
236 x = p[1] - '0' ;
237 in_header = 1 ;
238 header_flags[x]++ ;
239 printf("%s\t%s\tH%d\t", current_file, label, x) ;
240 }
241 break ;
242 // only care about end-of-header
243 case '/':
244 p++ ;
245 switch( *p ) {
246 case 'h' :
247 case 'H' :
248 if( strlen(p) == 2 && isdigit(p[1]) ) {
249 if( ! in_header )
250 fprintf(stderr, "%s: bad header nesting in %s\n",
251 prog_name, current_file) ;
252 x = p[1] - '0' ;
253 in_header = 0 ;
254 header_flags[x]-- ;
255 printf("\n") ;
256 }
257 break ;
258 }
259 break ;
260 // uninteresting tag, look for next
261 default :
262 break ;
263 }
264 // tag done, point p beyond it
265 p = q + 1 ;
266 }
267 else if( in_header ) {
268 if( isprint(*p) && *p != '\n' )
269 putchar(*p) ;
270 else
271 putchar(' ');
272 p++ ;
273 }
274 else
275 p++ ;
276 }
277 return(0) ;
278 }
279
280 int print_line( char *tag, char *text)
281 {
282 printf("%%s\ts\t%s\t%s\t\n", current_file, label, tag, text) ;
283 return 0 ;
284 }
285
286 int print_header_problem( char *file )
287 {
288 int i ;
289 fflush(stdout) ;
290 fprintf(stderr, "%s: HEADER TAG PROBLEM in file %s\n", prog_name, file) ;
291 fprintf(stderr, "%s: counts", prog_name) ;
292 for ( i = 0 ; i < 10 ; i++ )
293 fprintf(stderr, "\t%d", i) ;
294 fprintf(stderr,"\n") ;
295 fflush(stderr) ;
296 return(0) ;
297 }
298