]>
Commit | Line | Data |
---|---|---|
6dbe3af9 KZ |
1 | /* |
2 | * Copyright (c) 1989 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Michael Rendell of Memorial University of Newfoundland. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. All advertising materials mentioning features or use of this software | |
17 | * must display the following acknowledgement: | |
18 | * This product includes software developed by the University of | |
19 | * California, Berkeley and its contributors. | |
20 | * 4. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | |
23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | |
35 | */ | |
36 | ||
37 | #ifndef lint | |
38 | char copyright[] = | |
39 | "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ | |
40 | All rights reserved.\n"; | |
41 | #endif /* not lint */ | |
42 | ||
43 | #ifndef lint | |
44 | static char sccsid[] = "@(#)tsort.c 5.3 (Berkeley) 6/1/90"; | |
45 | #endif /* not lint */ | |
46 | ||
47 | #include <sys/types.h> | |
48 | #include <errno.h> | |
49 | #include <stdio.h> | |
50 | #include <ctype.h> | |
51 | #include <string.h> | |
52 | ||
53 | /* | |
54 | * Topological sort. Input is a list of pairs of strings seperated by | |
55 | * white space (spaces, tabs, and/or newlines); strings are written to | |
56 | * standard output in sorted order, one per line. | |
57 | * | |
58 | * usage: | |
59 | * tsort [inputfile] | |
60 | * If no input file is specified, standard input is read. | |
61 | * | |
62 | * Should be compatable with AT&T tsort HOWEVER the output is not identical | |
63 | * (i.e. for most graphs there is more than one sorted order, and this tsort | |
64 | * usually generates a different one then the AT&T tsort). Also, cycle | |
65 | * reporting seems to be more accurate in this version (the AT&T tsort | |
66 | * sometimes says a node is in a cycle when it isn't). | |
67 | * | |
68 | * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 | |
69 | */ | |
70 | #define HASHSIZE 53 /* doesn't need to be big */ | |
71 | #define NF_MARK 0x1 /* marker for cycle detection */ | |
72 | #define NF_ACYCLIC 0x2 /* this node is cycle free */ | |
73 | ||
74 | typedef struct node_str NODE; | |
75 | ||
76 | struct node_str { | |
77 | char *n_name; /* name of this node */ | |
78 | NODE **n_prevp; /* pointer to previous node's n_next */ | |
79 | NODE *n_next; /* next node in graph */ | |
80 | NODE *n_hash; /* next node in hash table */ | |
81 | int n_narcs; /* number of arcs in n_arcs[] */ | |
82 | int n_arcsize; /* size of n_arcs[] array */ | |
83 | NODE **n_arcs; /* array of arcs to other nodes */ | |
84 | int n_refcnt; /* # of arcs pointing to this node */ | |
85 | int n_flags; /* NF_* */ | |
86 | }; | |
87 | ||
88 | typedef struct _buf { | |
89 | char *b_buf; | |
90 | int b_bsize; | |
91 | } BUF; | |
92 | ||
93 | NODE *add_node(), *find_node(); | |
94 | void add_arc(), no_memory(), remove_node(), tsort(); | |
95 | char *grow_buf(), *malloc(); | |
fd6b7a7f | 96 | int find_cycle(NODE *, NODE *, int, int); |
6dbe3af9 KZ |
97 | |
98 | extern int errno; | |
99 | NODE *graph; | |
100 | NODE *hashtable[HASHSIZE]; | |
101 | NODE **cycle_buf; | |
102 | NODE **longest_cycle; | |
103 | ||
fd6b7a7f | 104 | int |
6dbe3af9 KZ |
105 | main(argc, argv) |
106 | int argc; | |
107 | char **argv; | |
108 | { | |
109 | register BUF *b; | |
110 | register int c, n; | |
111 | FILE *fp; | |
112 | int bsize, nused; | |
113 | BUF bufs[2]; | |
114 | ||
115 | if (argc < 2) | |
116 | fp = stdin; | |
117 | /* == becomes > in next line per Volker Meyer_zu_Bexten | |
118 | <vmzb@ims.fhg.de> -- faith@cs.unc.edu, Sat Feb 4 21:25:09 1995 */ | |
119 | else if (argc > 2) { | |
120 | (void)fprintf(stderr, "usage: tsort [ inputfile ]\n"); | |
121 | exit(1); | |
122 | } else if (!(fp = fopen(argv[1], "r"))) { | |
123 | (void)fprintf(stderr, "tsort: %s.\n", strerror(errno)); | |
124 | exit(1); | |
125 | } | |
126 | ||
127 | for (b = bufs, n = 2; --n >= 0; b++) | |
128 | b->b_buf = grow_buf((char *)NULL, b->b_bsize = 1024); | |
129 | ||
130 | /* parse input and build the graph */ | |
131 | for (n = 0, c = getc(fp);;) { | |
132 | while (c != EOF && isspace(c)) | |
133 | c = getc(fp); | |
134 | if (c == EOF) | |
135 | break; | |
136 | ||
137 | nused = 0; | |
138 | b = &bufs[n]; | |
139 | bsize = b->b_bsize; | |
140 | do { | |
141 | b->b_buf[nused++] = c; | |
142 | if (nused == bsize) { | |
143 | bsize *= 2; | |
144 | b->b_buf = grow_buf(b->b_buf, bsize); | |
145 | } | |
146 | c = getc(fp); | |
147 | } while (c != EOF && !isspace(c)); | |
148 | ||
149 | b->b_buf[nused] = '\0'; | |
150 | b->b_bsize = bsize; | |
151 | if (n) | |
152 | add_arc(bufs[0].b_buf, bufs[1].b_buf); | |
153 | n = !n; | |
154 | } | |
155 | (void)fclose(fp); | |
156 | if (n) { | |
157 | (void)fprintf(stderr, "tsort: odd data count.\n"); | |
158 | exit(1); | |
159 | } | |
160 | ||
161 | /* do the sort */ | |
162 | tsort(); | |
fd6b7a7f | 163 | return 0; |
6dbe3af9 KZ |
164 | } |
165 | ||
166 | /* double the size of oldbuf and return a pointer to the new buffer. */ | |
167 | char * | |
168 | grow_buf(bp, size) | |
169 | char *bp; | |
170 | int size; | |
171 | { | |
172 | char *realloc(); | |
173 | ||
174 | if (!(bp = realloc(bp, (u_int)size))) | |
175 | no_memory(); | |
176 | return(bp); | |
177 | } | |
178 | ||
179 | /* | |
180 | * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in | |
181 | * the graph, then add them. | |
182 | */ | |
183 | void | |
184 | add_arc(s1, s2) | |
185 | char *s1, *s2; | |
186 | { | |
187 | register NODE *n1; | |
188 | NODE *n2; | |
189 | int bsize; | |
190 | ||
191 | n1 = find_node(s1); | |
192 | if (!n1) | |
193 | n1 = add_node(s1); | |
194 | ||
195 | if (!strcmp(s1, s2)) | |
196 | return; | |
197 | ||
198 | n2 = find_node(s2); | |
199 | if (!n2) | |
200 | n2 = add_node(s2); | |
201 | ||
202 | /* | |
203 | * could check to see if this arc is here already, but it isn't | |
204 | * worth the bother -- there usually isn't and it doesn't hurt if | |
205 | * there is (I think :-). | |
206 | */ | |
207 | if (n1->n_narcs == n1->n_arcsize) { | |
208 | if (!n1->n_arcsize) | |
209 | n1->n_arcsize = 10; | |
210 | bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; | |
211 | n1->n_arcs = (NODE **)grow_buf((char *)n1->n_arcs, bsize); | |
212 | n1->n_arcsize = bsize / sizeof(*n1->n_arcs); | |
213 | } | |
214 | n1->n_arcs[n1->n_narcs++] = n2; | |
215 | ++n2->n_refcnt; | |
216 | } | |
217 | ||
fd6b7a7f | 218 | int |
6dbe3af9 KZ |
219 | hash_string(s) |
220 | char *s; | |
221 | { | |
222 | register int hash, i; | |
223 | ||
224 | for (hash = 0, i = 1; *s; s++, i++) | |
225 | hash += *s * i; | |
226 | return(hash % HASHSIZE); | |
227 | } | |
228 | ||
229 | /* | |
230 | * find a node in the graph and return a pointer to it - returns null if not | |
231 | * found. | |
232 | */ | |
233 | NODE * | |
234 | find_node(name) | |
235 | char *name; | |
236 | { | |
237 | register NODE *n; | |
238 | ||
239 | for (n = hashtable[hash_string(name)]; n; n = n->n_hash) | |
240 | if (!strcmp(n->n_name, name)) | |
241 | return(n); | |
242 | return((NODE *)NULL); | |
243 | } | |
244 | ||
245 | /* Add a node to the graph and return a pointer to it. */ | |
246 | NODE * | |
247 | add_node(name) | |
248 | char *name; | |
249 | { | |
250 | register NODE *n; | |
251 | int hash; | |
252 | ||
253 | if (!(n = (NODE *)malloc(sizeof(NODE))) || !(n->n_name = strdup(name))) | |
254 | no_memory(); | |
255 | ||
256 | n->n_narcs = 0; | |
257 | n->n_arcsize = 0; | |
258 | n->n_arcs = (NODE **)NULL; | |
259 | n->n_refcnt = 0; | |
260 | n->n_flags = 0; | |
261 | ||
262 | /* add to linked list */ | |
fd6b7a7f | 263 | if ((n->n_next = graph) != NULL) |
6dbe3af9 KZ |
264 | graph->n_prevp = &n->n_next; |
265 | n->n_prevp = &graph; | |
266 | graph = n; | |
267 | ||
268 | /* add to hash table */ | |
269 | hash = hash_string(name); | |
270 | n->n_hash = hashtable[hash]; | |
271 | hashtable[hash] = n; | |
272 | return(n); | |
273 | } | |
274 | ||
275 | /* do topological sort on graph */ | |
276 | void | |
277 | tsort() | |
278 | { | |
279 | register NODE *n, *next; | |
280 | register int cnt; | |
281 | ||
282 | while (graph) { | |
283 | /* | |
284 | * keep getting rid of simple cases until there are none left, | |
285 | * if there are any nodes still in the graph, then there is | |
286 | * a cycle in it. | |
287 | */ | |
288 | do { | |
289 | for (cnt = 0, n = graph; n; n = next) { | |
290 | next = n->n_next; | |
291 | if (n->n_refcnt == 0) { | |
292 | remove_node(n); | |
293 | ++cnt; | |
294 | } | |
295 | } | |
296 | } while (graph && cnt); | |
297 | ||
298 | if (!graph) | |
299 | break; | |
300 | ||
301 | if (!cycle_buf) { | |
302 | /* | |
303 | * allocate space for two cycle logs - one to be used | |
304 | * as scratch space, the other to save the longest | |
305 | * cycle. | |
306 | */ | |
307 | for (cnt = 0, n = graph; n; n = n->n_next) | |
308 | ++cnt; | |
309 | cycle_buf = | |
310 | (NODE **)malloc((u_int)sizeof(NODE *) * cnt); | |
311 | longest_cycle = | |
312 | (NODE **)malloc((u_int)sizeof(NODE *) * cnt); | |
313 | if (!cycle_buf || !longest_cycle) | |
314 | no_memory(); | |
315 | } | |
316 | for (n = graph; n; n = n->n_next) | |
317 | if (!(n->n_flags & NF_ACYCLIC)) { | |
fd6b7a7f | 318 | if ((cnt = find_cycle(n, n, 0, 0)) != 0) { |
6dbe3af9 KZ |
319 | register int i; |
320 | ||
321 | (void)fprintf(stderr, | |
322 | "tsort: cycle in data.\n"); | |
323 | for (i = 0; i < cnt; i++) | |
324 | (void)fprintf(stderr, | |
325 | "tsort: %s.\n", longest_cycle[i]->n_name); | |
326 | remove_node(n); | |
327 | break; | |
328 | } else | |
329 | /* to avoid further checks */ | |
330 | n->n_flags = NF_ACYCLIC; | |
331 | } | |
332 | ||
333 | if (!n) { | |
334 | (void)fprintf(stderr, | |
335 | "tsort: internal error -- could not find cycle.\n"); | |
336 | exit(1); | |
337 | } | |
338 | } | |
339 | } | |
340 | ||
341 | /* print node and remove from graph (does not actually free node) */ | |
342 | void | |
343 | remove_node(n) | |
344 | register NODE *n; | |
345 | { | |
346 | register NODE **np; | |
347 | register int i; | |
348 | ||
349 | (void)printf("%s\n", n->n_name); | |
350 | for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) | |
351 | --(*np)->n_refcnt; | |
352 | n->n_narcs = 0; | |
353 | *n->n_prevp = n->n_next; | |
354 | if (n->n_next) | |
355 | n->n_next->n_prevp = n->n_prevp; | |
356 | } | |
357 | ||
358 | /* look for the longest cycle from node from to node to. */ | |
fd6b7a7f | 359 | int |
6dbe3af9 KZ |
360 | find_cycle(from, to, longest_len, depth) |
361 | NODE *from, *to; | |
362 | int depth, longest_len; | |
363 | { | |
364 | register NODE **np; | |
365 | register int i, len; | |
366 | ||
367 | /* | |
368 | * avoid infinite loops and ignore portions of the graph known | |
369 | * to be acyclic | |
370 | */ | |
371 | if (from->n_flags & (NF_MARK|NF_ACYCLIC)) | |
372 | return(0); | |
373 | from->n_flags = NF_MARK; | |
374 | ||
375 | for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { | |
376 | cycle_buf[depth] = *np; | |
377 | if (*np == to) { | |
378 | if (depth + 1 > longest_len) { | |
379 | longest_len = depth + 1; | |
380 | (void)memcpy((char *)longest_cycle, | |
381 | (char *)cycle_buf, | |
382 | longest_len * sizeof(NODE *)); | |
383 | } | |
384 | } else { | |
385 | len = find_cycle(*np, to, longest_len, depth + 1); | |
386 | if (len > longest_len) | |
387 | longest_len = len; | |
388 | } | |
389 | } | |
390 | from->n_flags &= ~NF_MARK; | |
391 | return(longest_len); | |
392 | } | |
393 | ||
394 | void | |
395 | no_memory() | |
396 | { | |
397 | (void)fprintf(stderr, "tsort: %s.\n", strerror(ENOMEM)); | |
398 | exit(1); | |
399 | } |