]>
Commit | Line | Data |
---|---|---|
d0bfd026 JH |
1 | #include "cache.h" |
2 | #include "attr.h" | |
3 | ||
a5e92abd JH |
4 | const char git_attr__true[] = "(builtin)true"; |
5 | const char git_attr__false[] = "\0(builtin)false"; | |
6 | static const char git_attr__unknown[] = "(builtin)unknown"; | |
7 | #define ATTR__TRUE git_attr__true | |
8 | #define ATTR__FALSE git_attr__false | |
9 | #define ATTR__UNSET NULL | |
10 | #define ATTR__UNKNOWN git_attr__unknown | |
515106fa | 11 | |
d0bfd026 JH |
12 | /* |
13 | * The basic design decision here is that we are not going to have | |
14 | * insanely large number of attributes. | |
15 | * | |
16 | * This is a randomly chosen prime. | |
17 | */ | |
18 | #define HASHSIZE 257 | |
19 | ||
20 | #ifndef DEBUG_ATTR | |
21 | #define DEBUG_ATTR 0 | |
22 | #endif | |
23 | ||
24 | struct git_attr { | |
25 | struct git_attr *next; | |
26 | unsigned h; | |
f48fd688 | 27 | int attr_nr; |
d0bfd026 JH |
28 | char name[FLEX_ARRAY]; |
29 | }; | |
f48fd688 | 30 | static int attr_nr; |
d0bfd026 | 31 | |
f48fd688 | 32 | static struct git_attr_check *check_all_attr; |
d0bfd026 JH |
33 | static struct git_attr *(git_attr_hash[HASHSIZE]); |
34 | ||
35 | static unsigned hash_name(const char *name, int namelen) | |
36 | { | |
37 | unsigned val = 0; | |
38 | unsigned char c; | |
39 | ||
40 | while (namelen--) { | |
41 | c = *name++; | |
42 | val = ((val << 7) | (val >> 22)) ^ c; | |
43 | } | |
44 | return val; | |
45 | } | |
46 | ||
e4aee10a JH |
47 | static int invalid_attr_name(const char *name, int namelen) |
48 | { | |
49 | /* | |
50 | * Attribute name cannot begin with '-' and from | |
51 | * [-A-Za-z0-9_.]. We'd specifically exclude '=' for now, | |
52 | * as we might later want to allow non-binary value for | |
53 | * attributes, e.g. "*.svg merge=special-merge-program-for-svg" | |
54 | */ | |
55 | if (*name == '-') | |
56 | return -1; | |
57 | while (namelen--) { | |
58 | char ch = *name++; | |
59 | if (! (ch == '-' || ch == '.' || ch == '_' || | |
60 | ('0' <= ch && ch <= '9') || | |
61 | ('a' <= ch && ch <= 'z') || | |
62 | ('A' <= ch && ch <= 'Z')) ) | |
63 | return -1; | |
64 | } | |
65 | return 0; | |
66 | } | |
67 | ||
d0bfd026 JH |
68 | struct git_attr *git_attr(const char *name, int len) |
69 | { | |
70 | unsigned hval = hash_name(name, len); | |
71 | unsigned pos = hval % HASHSIZE; | |
72 | struct git_attr *a; | |
73 | ||
74 | for (a = git_attr_hash[pos]; a; a = a->next) { | |
75 | if (a->h == hval && | |
76 | !memcmp(a->name, name, len) && !a->name[len]) | |
77 | return a; | |
78 | } | |
79 | ||
e4aee10a JH |
80 | if (invalid_attr_name(name, len)) |
81 | return NULL; | |
82 | ||
d0bfd026 JH |
83 | a = xmalloc(sizeof(*a) + len + 1); |
84 | memcpy(a->name, name, len); | |
85 | a->name[len] = 0; | |
86 | a->h = hval; | |
87 | a->next = git_attr_hash[pos]; | |
f48fd688 | 88 | a->attr_nr = attr_nr++; |
d0bfd026 | 89 | git_attr_hash[pos] = a; |
f48fd688 JH |
90 | |
91 | check_all_attr = xrealloc(check_all_attr, | |
92 | sizeof(*check_all_attr) * attr_nr); | |
93 | check_all_attr[a->attr_nr].attr = a; | |
515106fa | 94 | check_all_attr[a->attr_nr].value = ATTR__UNKNOWN; |
d0bfd026 JH |
95 | return a; |
96 | } | |
97 | ||
98 | /* | |
99 | * .gitattributes file is one line per record, each of which is | |
100 | * | |
101 | * (1) glob pattern. | |
102 | * (2) whitespace | |
103 | * (3) whitespace separated list of attribute names, each of which | |
515106fa JH |
104 | * could be prefixed with '-' to mean "set to false", '!' to mean |
105 | * "unset". | |
d0bfd026 JH |
106 | */ |
107 | ||
515106fa | 108 | /* What does a matched pattern decide? */ |
d0bfd026 | 109 | struct attr_state { |
d0bfd026 | 110 | struct git_attr *attr; |
a5e92abd | 111 | const char *setto; |
d0bfd026 JH |
112 | }; |
113 | ||
114 | struct match_attr { | |
f48fd688 JH |
115 | union { |
116 | char *pattern; | |
117 | struct git_attr *attr; | |
118 | } u; | |
119 | char is_macro; | |
d0bfd026 JH |
120 | unsigned num_attr; |
121 | struct attr_state state[FLEX_ARRAY]; | |
122 | }; | |
123 | ||
124 | static const char blank[] = " \t\r\n"; | |
125 | ||
515106fa JH |
126 | static const char *parse_attr(const char *src, int lineno, const char *cp, |
127 | int *num_attr, struct match_attr *res) | |
128 | { | |
129 | const char *ep, *equals; | |
130 | int len; | |
131 | ||
132 | ep = cp + strcspn(cp, blank); | |
133 | equals = strchr(cp, '='); | |
134 | if (equals && ep < equals) | |
135 | equals = NULL; | |
136 | if (equals) | |
137 | len = equals - cp; | |
138 | else | |
139 | len = ep - cp; | |
140 | if (!res) { | |
141 | if (*cp == '-' || *cp == '!') { | |
142 | cp++; | |
143 | len--; | |
144 | } | |
145 | if (invalid_attr_name(cp, len)) { | |
146 | fprintf(stderr, | |
147 | "%.*s is not a valid attribute name: %s:%d\n", | |
148 | len, cp, src, lineno); | |
149 | return NULL; | |
150 | } | |
151 | } else { | |
152 | struct attr_state *e; | |
153 | ||
154 | e = &(res->state[*num_attr]); | |
155 | if (*cp == '-' || *cp == '!') { | |
156 | e->setto = (*cp == '-') ? ATTR__FALSE : ATTR__UNSET; | |
157 | cp++; | |
158 | len--; | |
159 | } | |
160 | else if (!equals) | |
161 | e->setto = ATTR__TRUE; | |
162 | else { | |
163 | char *value; | |
164 | int vallen = ep - equals; | |
165 | value = xmalloc(vallen); | |
166 | memcpy(value, equals+1, vallen-1); | |
167 | value[vallen-1] = 0; | |
168 | e->setto = value; | |
169 | } | |
170 | e->attr = git_attr(cp, len); | |
171 | } | |
172 | (*num_attr)++; | |
173 | return ep + strspn(ep, blank); | |
174 | } | |
175 | ||
f48fd688 JH |
176 | static struct match_attr *parse_attr_line(const char *line, const char *src, |
177 | int lineno, int macro_ok) | |
d0bfd026 JH |
178 | { |
179 | int namelen; | |
180 | int num_attr; | |
181 | const char *cp, *name; | |
515106fa | 182 | struct match_attr *res = NULL; |
d0bfd026 | 183 | int pass; |
f48fd688 | 184 | int is_macro; |
d0bfd026 JH |
185 | |
186 | cp = line + strspn(line, blank); | |
187 | if (!*cp || *cp == '#') | |
188 | return NULL; | |
189 | name = cp; | |
190 | namelen = strcspn(name, blank); | |
f48fd688 JH |
191 | if (strlen(ATTRIBUTE_MACRO_PREFIX) < namelen && |
192 | !prefixcmp(name, ATTRIBUTE_MACRO_PREFIX)) { | |
193 | if (!macro_ok) { | |
194 | fprintf(stderr, "%s not allowed: %s:%d\n", | |
195 | name, src, lineno); | |
196 | return NULL; | |
197 | } | |
198 | is_macro = 1; | |
199 | name += strlen(ATTRIBUTE_MACRO_PREFIX); | |
200 | name += strspn(name, blank); | |
201 | namelen = strcspn(name, blank); | |
e4aee10a JH |
202 | if (invalid_attr_name(name, namelen)) { |
203 | fprintf(stderr, | |
204 | "%.*s is not a valid attribute name: %s:%d\n", | |
205 | namelen, name, src, lineno); | |
206 | return NULL; | |
207 | } | |
f48fd688 JH |
208 | } |
209 | else | |
210 | is_macro = 0; | |
d0bfd026 JH |
211 | |
212 | for (pass = 0; pass < 2; pass++) { | |
213 | /* pass 0 counts and allocates, pass 1 fills */ | |
214 | num_attr = 0; | |
215 | cp = name + namelen; | |
216 | cp = cp + strspn(cp, blank); | |
515106fa JH |
217 | while (*cp) |
218 | cp = parse_attr(src, lineno, cp, &num_attr, res); | |
d0bfd026 JH |
219 | if (pass) |
220 | break; | |
221 | res = xcalloc(1, | |
222 | sizeof(*res) + | |
223 | sizeof(struct attr_state) * num_attr + | |
f48fd688 | 224 | (is_macro ? 0 : namelen + 1)); |
515106fa | 225 | if (is_macro) |
f48fd688 JH |
226 | res->u.attr = git_attr(name, namelen); |
227 | else { | |
228 | res->u.pattern = (char*)&(res->state[num_attr]); | |
229 | memcpy(res->u.pattern, name, namelen); | |
230 | res->u.pattern[namelen] = 0; | |
231 | } | |
232 | res->is_macro = is_macro; | |
d0bfd026 JH |
233 | res->num_attr = num_attr; |
234 | } | |
235 | return res; | |
236 | } | |
237 | ||
238 | /* | |
239 | * Like info/exclude and .gitignore, the attribute information can | |
240 | * come from many places. | |
241 | * | |
242 | * (1) .gitattribute file of the same directory; | |
515106fa JH |
243 | * (2) .gitattribute file of the parent directory if (1) does not have |
244 | * any match; this goes recursively upwards, just like .gitignore. | |
245 | * (3) $GIT_DIR/info/attributes, which overrides both of the above. | |
d0bfd026 JH |
246 | * |
247 | * In the same file, later entries override the earlier match, so in the | |
248 | * global list, we would have entries from info/attributes the earliest | |
249 | * (reading the file from top to bottom), .gitattribute of the root | |
250 | * directory (again, reading the file from top to bottom) down to the | |
251 | * current directory, and then scan the list backwards to find the first match. | |
252 | * This is exactly the same as what excluded() does in dir.c to deal with | |
253 | * .gitignore | |
254 | */ | |
255 | ||
256 | static struct attr_stack { | |
257 | struct attr_stack *prev; | |
258 | char *origin; | |
259 | unsigned num_matches; | |
260 | struct match_attr **attrs; | |
261 | } *attr_stack; | |
262 | ||
263 | static void free_attr_elem(struct attr_stack *e) | |
264 | { | |
265 | int i; | |
266 | free(e->origin); | |
515106fa JH |
267 | for (i = 0; i < e->num_matches; i++) { |
268 | struct match_attr *a = e->attrs[i]; | |
269 | int j; | |
270 | for (j = 0; j < a->num_attr; j++) { | |
a5e92abd | 271 | const char *setto = a->state[j].setto; |
515106fa JH |
272 | if (setto == ATTR__TRUE || |
273 | setto == ATTR__FALSE || | |
274 | setto == ATTR__UNSET || | |
275 | setto == ATTR__UNKNOWN) | |
276 | ; | |
277 | else | |
a5e92abd | 278 | free((char*) setto); |
515106fa JH |
279 | } |
280 | free(a); | |
281 | } | |
d0bfd026 JH |
282 | free(e); |
283 | } | |
284 | ||
285 | static const char *builtin_attr[] = { | |
e4aee10a | 286 | "[attr]binary -diff -crlf", |
d0bfd026 JH |
287 | NULL, |
288 | }; | |
289 | ||
290 | static struct attr_stack *read_attr_from_array(const char **list) | |
291 | { | |
292 | struct attr_stack *res; | |
293 | const char *line; | |
f48fd688 | 294 | int lineno = 0; |
d0bfd026 JH |
295 | |
296 | res = xcalloc(1, sizeof(*res)); | |
297 | while ((line = *(list++)) != NULL) { | |
f48fd688 JH |
298 | struct match_attr *a; |
299 | ||
300 | a = parse_attr_line(line, "[builtin]", ++lineno, 1); | |
d0bfd026 JH |
301 | if (!a) |
302 | continue; | |
46297958 AR |
303 | res->attrs = xrealloc(res->attrs, |
304 | sizeof(struct match_attr *) * (res->num_matches + 1)); | |
d0bfd026 JH |
305 | res->attrs[res->num_matches++] = a; |
306 | } | |
307 | return res; | |
308 | } | |
309 | ||
f48fd688 | 310 | static struct attr_stack *read_attr_from_file(const char *path, int macro_ok) |
d0bfd026 JH |
311 | { |
312 | FILE *fp; | |
313 | struct attr_stack *res; | |
314 | char buf[2048]; | |
f48fd688 | 315 | int lineno = 0; |
d0bfd026 JH |
316 | |
317 | res = xcalloc(1, sizeof(*res)); | |
318 | fp = fopen(path, "r"); | |
319 | if (!fp) | |
320 | return res; | |
321 | ||
322 | while (fgets(buf, sizeof(buf), fp)) { | |
f48fd688 JH |
323 | struct match_attr *a; |
324 | ||
325 | a = parse_attr_line(buf, path, ++lineno, macro_ok); | |
d0bfd026 JH |
326 | if (!a) |
327 | continue; | |
46297958 AR |
328 | res->attrs = xrealloc(res->attrs, |
329 | sizeof(struct match_attr *) * (res->num_matches + 1)); | |
d0bfd026 JH |
330 | res->attrs[res->num_matches++] = a; |
331 | } | |
332 | fclose(fp); | |
333 | return res; | |
334 | } | |
335 | ||
336 | #if DEBUG_ATTR | |
337 | static void debug_info(const char *what, struct attr_stack *elem) | |
338 | { | |
339 | fprintf(stderr, "%s: %s\n", what, elem->origin ? elem->origin : "()"); | |
340 | } | |
515106fa | 341 | static void debug_set(const char *what, const char *match, struct git_attr *attr, void *v) |
f48fd688 | 342 | { |
515106fa JH |
343 | const char *value = v; |
344 | ||
345 | if (ATTR_TRUE(value)) | |
346 | value = "set"; | |
347 | else if (ATTR_FALSE(value)) | |
348 | value = "unset"; | |
349 | else if (ATTR_UNSET(value)) | |
350 | value = "unspecified"; | |
351 | ||
352 | fprintf(stderr, "%s: %s => %s (%s)\n", | |
353 | what, attr->name, (char *) value, match); | |
f48fd688 | 354 | } |
d0bfd026 JH |
355 | #define debug_push(a) debug_info("push", (a)) |
356 | #define debug_pop(a) debug_info("pop", (a)) | |
357 | #else | |
358 | #define debug_push(a) do { ; } while (0) | |
359 | #define debug_pop(a) do { ; } while (0) | |
f48fd688 | 360 | #define debug_set(a,b,c,d) do { ; } while (0) |
d0bfd026 JH |
361 | #endif |
362 | ||
f48fd688 JH |
363 | static void bootstrap_attr_stack(void) |
364 | { | |
365 | if (!attr_stack) { | |
366 | struct attr_stack *elem; | |
367 | ||
368 | elem = read_attr_from_array(builtin_attr); | |
369 | elem->origin = NULL; | |
370 | elem->prev = attr_stack; | |
371 | attr_stack = elem; | |
372 | ||
373 | elem = read_attr_from_file(GITATTRIBUTES_FILE, 1); | |
374 | elem->origin = strdup(""); | |
375 | elem->prev = attr_stack; | |
376 | attr_stack = elem; | |
377 | debug_push(elem); | |
378 | ||
379 | elem = read_attr_from_file(git_path(INFOATTRIBUTES_FILE), 1); | |
380 | elem->origin = NULL; | |
381 | elem->prev = attr_stack; | |
382 | attr_stack = elem; | |
383 | } | |
384 | } | |
385 | ||
d0bfd026 JH |
386 | static void prepare_attr_stack(const char *path, int dirlen) |
387 | { | |
388 | struct attr_stack *elem, *info; | |
389 | int len; | |
390 | char pathbuf[PATH_MAX]; | |
391 | ||
392 | /* | |
393 | * At the bottom of the attribute stack is the built-in | |
394 | * set of attribute definitions. Then, contents from | |
395 | * .gitattribute files from directories closer to the | |
396 | * root to the ones in deeper directories are pushed | |
397 | * to the stack. Finally, at the very top of the stack | |
398 | * we always keep the contents of $GIT_DIR/info/attributes. | |
399 | * | |
400 | * When checking, we use entries from near the top of the | |
401 | * stack, preferring $GIT_DIR/info/attributes, then | |
402 | * .gitattributes in deeper directories to shallower ones, | |
403 | * and finally use the built-in set as the default. | |
404 | */ | |
f48fd688 JH |
405 | if (!attr_stack) |
406 | bootstrap_attr_stack(); | |
d0bfd026 JH |
407 | |
408 | /* | |
409 | * Pop the "info" one that is always at the top of the stack. | |
410 | */ | |
411 | info = attr_stack; | |
412 | attr_stack = info->prev; | |
413 | ||
414 | /* | |
415 | * Pop the ones from directories that are not the prefix of | |
416 | * the path we are checking. | |
417 | */ | |
418 | while (attr_stack && attr_stack->origin) { | |
419 | int namelen = strlen(attr_stack->origin); | |
420 | ||
421 | elem = attr_stack; | |
422 | if (namelen <= dirlen && | |
423 | !strncmp(elem->origin, path, namelen)) | |
424 | break; | |
425 | ||
426 | debug_pop(elem); | |
427 | attr_stack = elem->prev; | |
428 | free_attr_elem(elem); | |
429 | } | |
430 | ||
431 | /* | |
432 | * Read from parent directories and push them down | |
433 | */ | |
434 | while (1) { | |
435 | char *cp; | |
436 | ||
437 | len = strlen(attr_stack->origin); | |
438 | if (dirlen <= len) | |
439 | break; | |
440 | memcpy(pathbuf, path, dirlen); | |
441 | memcpy(pathbuf + dirlen, "/", 2); | |
442 | cp = strchr(pathbuf + len + 1, '/'); | |
443 | strcpy(cp + 1, GITATTRIBUTES_FILE); | |
f48fd688 | 444 | elem = read_attr_from_file(pathbuf, 0); |
d0bfd026 JH |
445 | *cp = '\0'; |
446 | elem->origin = strdup(pathbuf); | |
447 | elem->prev = attr_stack; | |
448 | attr_stack = elem; | |
449 | debug_push(elem); | |
450 | } | |
451 | ||
452 | /* | |
453 | * Finally push the "info" one at the top of the stack. | |
454 | */ | |
455 | info->prev = attr_stack; | |
456 | attr_stack = info; | |
457 | } | |
458 | ||
459 | static int path_matches(const char *pathname, int pathlen, | |
460 | const char *pattern, | |
461 | const char *base, int baselen) | |
462 | { | |
463 | if (!strchr(pattern, '/')) { | |
464 | /* match basename */ | |
465 | const char *basename = strrchr(pathname, '/'); | |
466 | basename = basename ? basename + 1 : pathname; | |
467 | return (fnmatch(pattern, basename, 0) == 0); | |
468 | } | |
469 | /* | |
470 | * match with FNM_PATHNAME; the pattern has base implicitly | |
471 | * in front of it. | |
472 | */ | |
473 | if (*pattern == '/') | |
474 | pattern++; | |
475 | if (pathlen < baselen || | |
476 | (baselen && pathname[baselen - 1] != '/') || | |
477 | strncmp(pathname, base, baselen)) | |
478 | return 0; | |
479 | return fnmatch(pattern, pathname + baselen, FNM_PATHNAME) == 0; | |
480 | } | |
481 | ||
515106fa JH |
482 | static int fill_one(const char *what, struct match_attr *a, int rem) |
483 | { | |
484 | struct git_attr_check *check = check_all_attr; | |
485 | int i; | |
486 | ||
487 | for (i = 0; 0 < rem && i < a->num_attr; i++) { | |
488 | struct git_attr *attr = a->state[i].attr; | |
a5e92abd JH |
489 | const char **n = &(check[attr->attr_nr].value); |
490 | const char *v = a->state[i].setto; | |
515106fa JH |
491 | |
492 | if (*n == ATTR__UNKNOWN) { | |
493 | debug_set(what, a->u.pattern, attr, v); | |
494 | *n = v; | |
495 | rem--; | |
496 | } | |
497 | } | |
498 | return rem; | |
499 | } | |
500 | ||
f48fd688 | 501 | static int fill(const char *path, int pathlen, struct attr_stack *stk, int rem) |
d0bfd026 | 502 | { |
515106fa | 503 | int i; |
d0bfd026 JH |
504 | const char *base = stk->origin ? stk->origin : ""; |
505 | ||
506 | for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) { | |
507 | struct match_attr *a = stk->attrs[i]; | |
f48fd688 JH |
508 | if (a->is_macro) |
509 | continue; | |
d0bfd026 | 510 | if (path_matches(path, pathlen, |
515106fa JH |
511 | a->u.pattern, base, strlen(base))) |
512 | rem = fill_one("fill", a, rem); | |
d0bfd026 JH |
513 | } |
514 | return rem; | |
515 | } | |
516 | ||
f48fd688 JH |
517 | static int macroexpand(struct attr_stack *stk, int rem) |
518 | { | |
515106fa | 519 | int i; |
f48fd688 JH |
520 | struct git_attr_check *check = check_all_attr; |
521 | ||
522 | for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) { | |
523 | struct match_attr *a = stk->attrs[i]; | |
524 | if (!a->is_macro) | |
525 | continue; | |
515106fa | 526 | if (check[a->u.attr->attr_nr].value != ATTR__TRUE) |
f48fd688 | 527 | continue; |
515106fa | 528 | rem = fill_one("expand", a, rem); |
f48fd688 JH |
529 | } |
530 | return rem; | |
531 | } | |
532 | ||
d0bfd026 JH |
533 | int git_checkattr(const char *path, int num, struct git_attr_check *check) |
534 | { | |
535 | struct attr_stack *stk; | |
536 | const char *cp; | |
537 | int dirlen, pathlen, i, rem; | |
538 | ||
f48fd688 JH |
539 | bootstrap_attr_stack(); |
540 | for (i = 0; i < attr_nr; i++) | |
515106fa | 541 | check_all_attr[i].value = ATTR__UNKNOWN; |
d0bfd026 JH |
542 | |
543 | pathlen = strlen(path); | |
544 | cp = strrchr(path, '/'); | |
545 | if (!cp) | |
546 | dirlen = 0; | |
547 | else | |
548 | dirlen = cp - path; | |
549 | prepare_attr_stack(path, dirlen); | |
f48fd688 JH |
550 | rem = attr_nr; |
551 | for (stk = attr_stack; 0 < rem && stk; stk = stk->prev) | |
552 | rem = fill(path, pathlen, stk, rem); | |
553 | ||
d0bfd026 | 554 | for (stk = attr_stack; 0 < rem && stk; stk = stk->prev) |
f48fd688 JH |
555 | rem = macroexpand(stk, rem); |
556 | ||
515106fa | 557 | for (i = 0; i < num; i++) { |
a5e92abd | 558 | const char *value = check_all_attr[check[i].attr->attr_nr].value; |
515106fa JH |
559 | if (value == ATTR__UNKNOWN) |
560 | value = ATTR__UNSET; | |
561 | check[i].value = value; | |
562 | } | |
f48fd688 | 563 | |
d0bfd026 JH |
564 | return 0; |
565 | } |