]>
Commit | Line | Data |
---|---|---|
d0bfd026 JH |
1 | #include "cache.h" |
2 | #include "attr.h" | |
3 | ||
4 | /* | |
5 | * The basic design decision here is that we are not going to have | |
6 | * insanely large number of attributes. | |
7 | * | |
8 | * This is a randomly chosen prime. | |
9 | */ | |
10 | #define HASHSIZE 257 | |
11 | ||
12 | #ifndef DEBUG_ATTR | |
13 | #define DEBUG_ATTR 0 | |
14 | #endif | |
15 | ||
16 | struct git_attr { | |
17 | struct git_attr *next; | |
18 | unsigned h; | |
19 | char name[FLEX_ARRAY]; | |
20 | }; | |
21 | ||
22 | static struct git_attr *(git_attr_hash[HASHSIZE]); | |
23 | ||
24 | static unsigned hash_name(const char *name, int namelen) | |
25 | { | |
26 | unsigned val = 0; | |
27 | unsigned char c; | |
28 | ||
29 | while (namelen--) { | |
30 | c = *name++; | |
31 | val = ((val << 7) | (val >> 22)) ^ c; | |
32 | } | |
33 | return val; | |
34 | } | |
35 | ||
36 | struct git_attr *git_attr(const char *name, int len) | |
37 | { | |
38 | unsigned hval = hash_name(name, len); | |
39 | unsigned pos = hval % HASHSIZE; | |
40 | struct git_attr *a; | |
41 | ||
42 | for (a = git_attr_hash[pos]; a; a = a->next) { | |
43 | if (a->h == hval && | |
44 | !memcmp(a->name, name, len) && !a->name[len]) | |
45 | return a; | |
46 | } | |
47 | ||
48 | a = xmalloc(sizeof(*a) + len + 1); | |
49 | memcpy(a->name, name, len); | |
50 | a->name[len] = 0; | |
51 | a->h = hval; | |
52 | a->next = git_attr_hash[pos]; | |
53 | git_attr_hash[pos] = a; | |
54 | return a; | |
55 | } | |
56 | ||
57 | /* | |
58 | * .gitattributes file is one line per record, each of which is | |
59 | * | |
60 | * (1) glob pattern. | |
61 | * (2) whitespace | |
62 | * (3) whitespace separated list of attribute names, each of which | |
63 | * could be prefixed with '!' to mean "not set". | |
64 | */ | |
65 | ||
66 | struct attr_state { | |
67 | int unset; | |
68 | struct git_attr *attr; | |
69 | }; | |
70 | ||
71 | struct match_attr { | |
72 | char *pattern; | |
73 | unsigned num_attr; | |
74 | struct attr_state state[FLEX_ARRAY]; | |
75 | }; | |
76 | ||
77 | static const char blank[] = " \t\r\n"; | |
78 | ||
79 | static struct match_attr *parse_attr_line(const char *line) | |
80 | { | |
81 | int namelen; | |
82 | int num_attr; | |
83 | const char *cp, *name; | |
84 | struct match_attr *res = res; | |
85 | int pass; | |
86 | ||
87 | cp = line + strspn(line, blank); | |
88 | if (!*cp || *cp == '#') | |
89 | return NULL; | |
90 | name = cp; | |
91 | namelen = strcspn(name, blank); | |
92 | ||
93 | for (pass = 0; pass < 2; pass++) { | |
94 | /* pass 0 counts and allocates, pass 1 fills */ | |
95 | num_attr = 0; | |
96 | cp = name + namelen; | |
97 | cp = cp + strspn(cp, blank); | |
98 | while (*cp) { | |
99 | const char *ep; | |
100 | ep = cp + strcspn(cp, blank); | |
101 | if (pass) { | |
102 | struct attr_state *e; | |
103 | ||
104 | e = &(res->state[num_attr]); | |
105 | if (*cp == '!') { | |
106 | e->unset = 1; | |
107 | cp++; | |
108 | } | |
109 | e->attr = git_attr(cp, ep - cp); | |
110 | } | |
111 | num_attr++; | |
112 | cp = ep + strspn(ep, blank); | |
113 | } | |
114 | if (pass) | |
115 | break; | |
116 | res = xcalloc(1, | |
117 | sizeof(*res) + | |
118 | sizeof(struct attr_state) * num_attr + | |
119 | namelen + 1); | |
120 | res->pattern = (char*)&(res->state[num_attr]); | |
121 | memcpy(res->pattern, name, namelen); | |
122 | res->pattern[namelen] = 0; | |
123 | res->num_attr = num_attr; | |
124 | } | |
125 | return res; | |
126 | } | |
127 | ||
128 | /* | |
129 | * Like info/exclude and .gitignore, the attribute information can | |
130 | * come from many places. | |
131 | * | |
132 | * (1) .gitattribute file of the same directory; | |
133 | * (2) .gitattribute file of the parent directory if (1) does not have any match; | |
134 | * this goes recursively upwards, just like .gitignore | |
135 | * (3) perhaps $GIT_DIR/info/attributes, as the final fallback. | |
136 | * | |
137 | * In the same file, later entries override the earlier match, so in the | |
138 | * global list, we would have entries from info/attributes the earliest | |
139 | * (reading the file from top to bottom), .gitattribute of the root | |
140 | * directory (again, reading the file from top to bottom) down to the | |
141 | * current directory, and then scan the list backwards to find the first match. | |
142 | * This is exactly the same as what excluded() does in dir.c to deal with | |
143 | * .gitignore | |
144 | */ | |
145 | ||
146 | static struct attr_stack { | |
147 | struct attr_stack *prev; | |
148 | char *origin; | |
149 | unsigned num_matches; | |
150 | struct match_attr **attrs; | |
151 | } *attr_stack; | |
152 | ||
153 | static void free_attr_elem(struct attr_stack *e) | |
154 | { | |
155 | int i; | |
156 | free(e->origin); | |
157 | for (i = 0; i < e->num_matches; i++) | |
158 | free(e->attrs[i]); | |
159 | free(e); | |
160 | } | |
161 | ||
162 | static const char *builtin_attr[] = { | |
163 | NULL, | |
164 | }; | |
165 | ||
166 | static struct attr_stack *read_attr_from_array(const char **list) | |
167 | { | |
168 | struct attr_stack *res; | |
169 | const char *line; | |
170 | ||
171 | res = xcalloc(1, sizeof(*res)); | |
172 | while ((line = *(list++)) != NULL) { | |
173 | struct match_attr *a = parse_attr_line(line); | |
174 | if (!a) | |
175 | continue; | |
176 | res->attrs = xrealloc(res->attrs, res->num_matches + 1); | |
177 | res->attrs[res->num_matches++] = a; | |
178 | } | |
179 | return res; | |
180 | } | |
181 | ||
182 | static struct attr_stack *read_attr_from_file(const char *path) | |
183 | { | |
184 | FILE *fp; | |
185 | struct attr_stack *res; | |
186 | char buf[2048]; | |
187 | ||
188 | res = xcalloc(1, sizeof(*res)); | |
189 | fp = fopen(path, "r"); | |
190 | if (!fp) | |
191 | return res; | |
192 | ||
193 | while (fgets(buf, sizeof(buf), fp)) { | |
194 | struct match_attr *a = parse_attr_line(buf); | |
195 | if (!a) | |
196 | continue; | |
197 | res->attrs = xrealloc(res->attrs, res->num_matches + 1); | |
198 | res->attrs[res->num_matches++] = a; | |
199 | } | |
200 | fclose(fp); | |
201 | return res; | |
202 | } | |
203 | ||
204 | #if DEBUG_ATTR | |
205 | static void debug_info(const char *what, struct attr_stack *elem) | |
206 | { | |
207 | fprintf(stderr, "%s: %s\n", what, elem->origin ? elem->origin : "()"); | |
208 | } | |
209 | #define debug_push(a) debug_info("push", (a)) | |
210 | #define debug_pop(a) debug_info("pop", (a)) | |
211 | #else | |
212 | #define debug_push(a) do { ; } while (0) | |
213 | #define debug_pop(a) do { ; } while (0) | |
214 | #endif | |
215 | ||
216 | static void prepare_attr_stack(const char *path, int dirlen) | |
217 | { | |
218 | struct attr_stack *elem, *info; | |
219 | int len; | |
220 | char pathbuf[PATH_MAX]; | |
221 | ||
222 | /* | |
223 | * At the bottom of the attribute stack is the built-in | |
224 | * set of attribute definitions. Then, contents from | |
225 | * .gitattribute files from directories closer to the | |
226 | * root to the ones in deeper directories are pushed | |
227 | * to the stack. Finally, at the very top of the stack | |
228 | * we always keep the contents of $GIT_DIR/info/attributes. | |
229 | * | |
230 | * When checking, we use entries from near the top of the | |
231 | * stack, preferring $GIT_DIR/info/attributes, then | |
232 | * .gitattributes in deeper directories to shallower ones, | |
233 | * and finally use the built-in set as the default. | |
234 | */ | |
235 | if (!attr_stack) { | |
236 | elem = read_attr_from_array(builtin_attr); | |
237 | elem->origin = NULL; | |
238 | elem->prev = attr_stack; | |
239 | attr_stack = elem; | |
240 | ||
241 | elem = read_attr_from_file(GITATTRIBUTES_FILE); | |
242 | elem->origin = strdup(""); | |
243 | elem->prev = attr_stack; | |
244 | attr_stack = elem; | |
245 | debug_push(elem); | |
246 | ||
247 | elem = read_attr_from_file(git_path(INFOATTRIBUTES_FILE)); | |
248 | elem->origin = NULL; | |
249 | elem->prev = attr_stack; | |
250 | attr_stack = elem; | |
251 | } | |
252 | ||
253 | /* | |
254 | * Pop the "info" one that is always at the top of the stack. | |
255 | */ | |
256 | info = attr_stack; | |
257 | attr_stack = info->prev; | |
258 | ||
259 | /* | |
260 | * Pop the ones from directories that are not the prefix of | |
261 | * the path we are checking. | |
262 | */ | |
263 | while (attr_stack && attr_stack->origin) { | |
264 | int namelen = strlen(attr_stack->origin); | |
265 | ||
266 | elem = attr_stack; | |
267 | if (namelen <= dirlen && | |
268 | !strncmp(elem->origin, path, namelen)) | |
269 | break; | |
270 | ||
271 | debug_pop(elem); | |
272 | attr_stack = elem->prev; | |
273 | free_attr_elem(elem); | |
274 | } | |
275 | ||
276 | /* | |
277 | * Read from parent directories and push them down | |
278 | */ | |
279 | while (1) { | |
280 | char *cp; | |
281 | ||
282 | len = strlen(attr_stack->origin); | |
283 | if (dirlen <= len) | |
284 | break; | |
285 | memcpy(pathbuf, path, dirlen); | |
286 | memcpy(pathbuf + dirlen, "/", 2); | |
287 | cp = strchr(pathbuf + len + 1, '/'); | |
288 | strcpy(cp + 1, GITATTRIBUTES_FILE); | |
289 | elem = read_attr_from_file(pathbuf); | |
290 | *cp = '\0'; | |
291 | elem->origin = strdup(pathbuf); | |
292 | elem->prev = attr_stack; | |
293 | attr_stack = elem; | |
294 | debug_push(elem); | |
295 | } | |
296 | ||
297 | /* | |
298 | * Finally push the "info" one at the top of the stack. | |
299 | */ | |
300 | info->prev = attr_stack; | |
301 | attr_stack = info; | |
302 | } | |
303 | ||
304 | static int path_matches(const char *pathname, int pathlen, | |
305 | const char *pattern, | |
306 | const char *base, int baselen) | |
307 | { | |
308 | if (!strchr(pattern, '/')) { | |
309 | /* match basename */ | |
310 | const char *basename = strrchr(pathname, '/'); | |
311 | basename = basename ? basename + 1 : pathname; | |
312 | return (fnmatch(pattern, basename, 0) == 0); | |
313 | } | |
314 | /* | |
315 | * match with FNM_PATHNAME; the pattern has base implicitly | |
316 | * in front of it. | |
317 | */ | |
318 | if (*pattern == '/') | |
319 | pattern++; | |
320 | if (pathlen < baselen || | |
321 | (baselen && pathname[baselen - 1] != '/') || | |
322 | strncmp(pathname, base, baselen)) | |
323 | return 0; | |
324 | return fnmatch(pattern, pathname + baselen, FNM_PATHNAME) == 0; | |
325 | } | |
326 | ||
327 | /* | |
328 | * I do not like this at all. Only because we allow individual | |
329 | * attribute to be set or unset incrementally by individual | |
330 | * lines in .gitattribute files, we need to do this triple | |
331 | * loop which looks quite wasteful. | |
332 | */ | |
333 | static int fill(const char *path, int pathlen, | |
334 | struct attr_stack *stk, struct git_attr_check *check, | |
335 | int num, int rem) | |
336 | { | |
337 | int i, j, k; | |
338 | const char *base = stk->origin ? stk->origin : ""; | |
339 | ||
340 | for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) { | |
341 | struct match_attr *a = stk->attrs[i]; | |
342 | if (path_matches(path, pathlen, | |
343 | a->pattern, base, strlen(base))) { | |
344 | for (j = 0; j < a->num_attr; j++) { | |
345 | struct git_attr *attr = a->state[j].attr; | |
346 | int set = !a->state[j].unset; | |
347 | for (k = 0; k < num; k++) { | |
348 | if (0 <= check[k].isset || | |
349 | check[k].attr != attr) | |
350 | continue; | |
351 | check[k].isset = set; | |
352 | rem--; | |
353 | } | |
354 | } | |
355 | } | |
356 | } | |
357 | return rem; | |
358 | } | |
359 | ||
360 | int git_checkattr(const char *path, int num, struct git_attr_check *check) | |
361 | { | |
362 | struct attr_stack *stk; | |
363 | const char *cp; | |
364 | int dirlen, pathlen, i, rem; | |
365 | ||
366 | for (i = 0; i < num; i++) | |
367 | check[i].isset = -1; | |
368 | ||
369 | pathlen = strlen(path); | |
370 | cp = strrchr(path, '/'); | |
371 | if (!cp) | |
372 | dirlen = 0; | |
373 | else | |
374 | dirlen = cp - path; | |
375 | prepare_attr_stack(path, dirlen); | |
376 | rem = num; | |
377 | for (stk = attr_stack; 0 < rem && stk; stk = stk->prev) | |
378 | rem = fill(path, pathlen, stk, check, num, rem); | |
379 | return 0; | |
380 | } | |
35ebfd6a JH |
381 | |
382 | static void setup_binary_check(struct git_attr_check *check) | |
383 | { | |
384 | static struct git_attr *attr_binary; | |
385 | ||
386 | if (!attr_binary) | |
387 | attr_binary = git_attr("binary", 6); | |
388 | check->attr = attr_binary; | |
389 | } | |
390 | ||
391 | int git_path_is_binary(const char *path) | |
392 | { | |
393 | struct git_attr_check attr_binary_check; | |
394 | ||
395 | setup_binary_check(&attr_binary_check); | |
396 | return (!git_checkattr(path, 1, &attr_binary_check) && | |
397 | (0 < attr_binary_check.isset)); | |
398 | } |