From 034612cd515f249453a435f3d94b04f65744a54d Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Tue, 5 Aug 2025 16:00:20 +0200 Subject: [PATCH] tool_urlglob: polish, cleanups, improvements - assert instead of printing "internal error" for unlikely events - avoid allocating the main struct - convert globerror() from macro to function - renames to shorter and clearer names - malloc + copy => memdup0 - change buffer handling to dynbuf - realloc to handle more globs, but use less memory for few Closes #18198 --- src/tool_cfgable.h | 4 +- src/tool_operate.c | 25 +-- src/tool_urlglob.c | 383 +++++++++++++++++++++------------------------ src/tool_urlglob.h | 51 +++--- 4 files changed, 216 insertions(+), 247 deletions(-) diff --git a/src/tool_cfgable.h b/src/tool_cfgable.h index 28f90d264c..e4c969ebf1 100644 --- a/src/tool_cfgable.h +++ b/src/tool_cfgable.h @@ -66,8 +66,8 @@ struct GlobalConfig; struct State { struct getout *urlnode; - struct URLGlob *inglob; - struct URLGlob *urls; + struct URLGlob inglob; + struct URLGlob urlglob; char *outfiles; char *httpgetfields; char *uploadfile; diff --git a/src/tool_operate.c b/src/tool_operate.c index e785bc321b..de5caaea99 100644 --- a/src/tool_operate.c +++ b/src/tool_operate.c @@ -348,7 +348,7 @@ void single_transfer_cleanup(struct OperationConfig *config) state = &config->state; /* Free list of remaining URLs */ - glob_cleanup(&state->urls); + glob_cleanup(&state->urlglob); state->outfiles = NULL; tool_safefree(state->uploadfile); /* Free list of globbed upload files */ @@ -862,7 +862,7 @@ static CURLcode etag_store(struct OperationConfig *config, warnf(config->global, "Failed creating file for saving etags: \"%s\". " "Skip this transfer", config->etag_save_file); state->outfiles = NULL; - glob_cleanup(&state->urls); + glob_cleanup(&state->urlglob); *skip = TRUE; return CURLE_OK; } @@ -955,10 +955,11 @@ static CURLcode setup_outfile(struct OperationConfig *config, return result; } } - else if(state->urls) { + else if(glob_inuse(&state->urlglob)) { /* fill '#1' ... '#9' terms from URL pattern */ char *storefile = per->outfile; - CURLcode result = glob_match_url(&per->outfile, storefile, state->urls); + CURLcode result = + glob_match_url(&per->outfile, storefile, &state->urlglob); tool_safefree(storefile); if(result) { /* bad globbing */ @@ -1153,7 +1154,7 @@ static CURLcode single_transfer(struct OperationConfig *config, if(u->outfile && !state->outfiles) state->outfiles = u->outfile; - if(!config->globoff && u->infile && !state->inglob) { + if(!config->globoff && u->infile && !glob_inuse(&state->inglob)) { /* Unless explicitly shut off */ result = glob_url(&state->inglob, u->infile, &state->infilenum, (!global->silent || global->showerror) ? @@ -1165,8 +1166,8 @@ static CURLcode single_transfer(struct OperationConfig *config, if(state->up || u->infile) { if(!state->uploadfile) { - if(state->inglob) { - result = glob_next_url(&state->uploadfile, state->inglob); + if(glob_inuse(&state->inglob)) { + result = glob_next_url(&state->uploadfile, &state->inglob); if(result == CURLE_OUT_OF_MEMORY) errorf(global, "out of memory"); } @@ -1184,7 +1185,7 @@ static CURLcode single_transfer(struct OperationConfig *config, if(!config->globoff && !u->noglob) { /* Unless explicitly shut off, we expand '{...}' and '[...]' expressions and return total number of URLs in pattern set */ - result = glob_url(&state->urls, u->url, &state->urlnum, + result = glob_url(&state->urlglob, u->url, &state->urlnum, (!global->silent || global->showerror) ? tool_stderr : NULL); if(result) @@ -1268,8 +1269,8 @@ static CURLcode single_transfer(struct OperationConfig *config, /* default output stream is stdout */ outs->stream = stdout; - if(state->urls) { - result = glob_next_url(&per->url, state->urls); + if(glob_inuse(&state->urlglob)) { + result = glob_next_url(&per->url, &state->urlglob); if(result) return result; } @@ -1371,7 +1372,7 @@ static CURLcode single_transfer(struct OperationConfig *config, if(state->li >= state->urlnum) { state->li = 0; state->urlnum = 0; /* forced reglob of URLs */ - glob_cleanup(&state->urls); + glob_cleanup(&state->urlglob); state->up++; tool_safefree(state->uploadfile); /* clear it to get the next */ } @@ -1383,7 +1384,7 @@ static CURLcode single_transfer(struct OperationConfig *config, node itself nor modifying next pointer. */ u->outset = u->urlset = u->useremote = u->uploadset = u->noupload = u->noglob = FALSE; - glob_cleanup(&state->urls); + glob_cleanup(&state->urlglob); state->urlnum = 0; state->outfiles = NULL; diff --git a/src/tool_urlglob.c b/src/tool_urlglob.c index d79aa2990b..63d3a7c72a 100644 --- a/src/tool_urlglob.c +++ b/src/tool_urlglob.c @@ -30,25 +30,30 @@ #include "tool_strdup.h" #include "memdebug.h" /* keep this as LAST include */ -#define GLOBERROR(string, column, code) \ - glob->error = string, glob->pos = column, code +static CURLcode globerror(struct URLGlob *glob, const char *err, + size_t pos, CURLcode error) +{ + glob->error = err; + glob->pos = pos; + return error; +} static CURLcode glob_fixed(struct URLGlob *glob, char *fixed, size_t len) { struct URLPattern *pat = &glob->pattern[glob->size]; - pat->type = UPTSet; - pat->content.Set.size = 1; - pat->content.Set.ptr_s = 0; + pat->type = GLOB_SET; + pat->c.set.size = 1; + pat->c.set.idx = 0; pat->globindex = -1; - pat->content.Set.elements = malloc(sizeof(char *)); + pat->c.set.elem = malloc(sizeof(char *)); - if(!pat->content.Set.elements) - return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); + if(!pat->c.set.elem) + return globerror(glob, NULL, 0, CURLE_OUT_OF_MEMORY); - pat->content.Set.elements[0] = memdup0(fixed, len); - if(!pat->content.Set.elements[0]) - return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); + pat->c.set.elem[0] = memdup0(fixed, len); + if(!pat->c.set.elem[0]) + return globerror(glob, NULL, 0, CURLE_OUT_OF_MEMORY); return CURLE_OK; } @@ -90,61 +95,58 @@ static CURLcode glob_set(struct URLGlob *glob, const char **patternp, */ struct URLPattern *pat; bool done = FALSE; - char *buf = glob->glob_buffer; const char *pattern = *patternp; const char *opattern = pattern; size_t opos = *posp-1; pat = &glob->pattern[glob->size]; /* patterns 0,1,2,... correspond to size=1,3,5,... */ - pat->type = UPTSet; - pat->content.Set.size = 0; - pat->content.Set.ptr_s = 0; - pat->content.Set.elements = NULL; + pat->type = GLOB_SET; + pat->c.set.size = 0; + pat->c.set.idx = 0; + pat->c.set.elem = NULL; pat->globindex = globindex; while(!done) { switch(*pattern) { case '\0': /* URL ended while set was still open */ - return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT); + return globerror(glob, "unmatched brace", opos, CURLE_URL_MALFORMAT); case '{': case '[': /* no nested expressions at this time */ - return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT); + return globerror(glob, "nested brace", *posp, CURLE_URL_MALFORMAT); case '}': /* set element completed */ if(opattern == pattern) - return GLOBERROR("empty string within braces", *posp, + return globerror(glob, "empty string within braces", *posp, CURLE_URL_MALFORMAT); /* add 1 to size since it will be incremented below */ - if(multiply(amount, pat->content.Set.size + 1)) - return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT); + if(multiply(amount, pat->c.set.size + 1)) + return globerror(glob, "range overflow", 0, CURLE_URL_MALFORMAT); FALLTHROUGH(); case ',': - - *buf = '\0'; - if(pat->content.Set.elements) { - char **new_arr = realloc(pat->content.Set.elements, - (size_t)(pat->content.Set.size + 1) * + if(pat->c.set.elem) { + char **new_arr = realloc(pat->c.set.elem, + (size_t)(pat->c.set.size + 1) * sizeof(char *)); if(!new_arr) - return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); + return globerror(glob, NULL, 0, CURLE_OUT_OF_MEMORY); - pat->content.Set.elements = new_arr; + pat->c.set.elem = new_arr; } else - pat->content.Set.elements = malloc(sizeof(char *)); + pat->c.set.elem = malloc(sizeof(char *)); - if(!pat->content.Set.elements) - return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); + if(!pat->c.set.elem) + return globerror(glob, NULL, 0, CURLE_OUT_OF_MEMORY); - pat->content.Set.elements[pat->content.Set.size] = - strdup(glob->glob_buffer); - if(!pat->content.Set.elements[pat->content.Set.size]) - return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); - ++pat->content.Set.size; + pat->c.set.elem[pat->c.set.size] = strdup(curlx_dyn_ptr(&glob->buf)); + if(!pat->c.set.elem[pat->c.set.size]) + return globerror(glob, NULL, 0, CURLE_OUT_OF_MEMORY); + ++pat->c.set.size; + curlx_dyn_reset(&glob->buf); if(*pattern == '}') { pattern++; /* pass the closing brace */ @@ -152,13 +154,13 @@ static CURLcode glob_set(struct URLGlob *glob, const char **patternp, continue; } - buf = glob->glob_buffer; ++pattern; ++(*posp); break; case ']': /* illegal closing bracket */ - return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT); + return globerror(glob, "unexpected close bracket", *posp, + CURLE_URL_MALFORMAT); case '\\': /* escaped character, skip '\' */ if(pattern[1]) { @@ -167,7 +169,9 @@ static CURLcode glob_set(struct URLGlob *glob, const char **patternp, } FALLTHROUGH(); default: - *buf++ = *pattern++; /* copy character to set element */ + /* copy character to set element */ + if(curlx_dyn_addn(&glob->buf, pattern++, 1)) + return CURLE_OUT_OF_MEMORY; ++(*posp); } } @@ -199,9 +203,9 @@ static CURLcode glob_range(struct URLGlob *glob, const char **patternp, char min_c = 0; char max_c = 0; char end_c = 0; - unsigned long step = 1; + unsigned char step = 1; - pat->type = UPTCharRange; + pat->type = GLOB_ASCII; if((pattern[1] == '-') && pattern[2] && pattern[3]) { min_c = pattern[0]; @@ -215,7 +219,7 @@ static CURLcode glob_range(struct URLGlob *glob, const char **patternp, if(curlx_str_number(&p, &num, 256) || curlx_str_single(&p, ']')) step = 0; else - step = (unsigned long)num; + step = (unsigned char)num; pattern = p; } else if(end_c != ']') @@ -233,50 +237,49 @@ static CURLcode glob_range(struct URLGlob *glob, const char **patternp, (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) || (max_c - min_c) > ('z' - 'a')))) /* the pattern is not well-formed */ - return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); + return globerror(glob, "bad range", *posp, CURLE_URL_MALFORMAT); /* if there was a ":[num]" thing, use that as step or else use 1 */ - pat->content.CharRange.step = (int)step; - pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; - pat->content.CharRange.max_c = max_c; - - if(multiply(amount, ((pat->content.CharRange.max_c - - pat->content.CharRange.min_c) / - pat->content.CharRange.step + 1))) - return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); + pat->c.ascii.step = step; + pat->c.ascii.letter = pat->c.ascii.min = min_c; + pat->c.ascii.max = max_c; + + if(multiply(amount, ((pat->c.ascii.max - pat->c.ascii.min) / + pat->c.ascii.step + 1))) + return globerror(glob, "range overflow", *posp, CURLE_URL_MALFORMAT); } else if(ISDIGIT(*pattern)) { /* numeric range detected */ - unsigned long min_n = 0; - unsigned long max_n = 0; - unsigned long step_n = 0; + curl_off_t min_n = 0; + curl_off_t max_n = 0; + curl_off_t step_n = 0; curl_off_t num; - pat->type = UPTNumRange; - pat->content.NumRange.padlength = 0; + pat->type = GLOB_NUM; + pat->c.num.npad = 0; if(*pattern == '0') { /* leading zero specified, count them! */ c = pattern; while(ISDIGIT(*c)) { c++; - ++pat->content.NumRange.padlength; /* padding length is set for all - instances of this pattern */ + ++pat->c.num.npad; /* padding length is set for all instances of this + pattern */ } } if(!curlx_str_number(&pattern, &num, CURL_OFF_T_MAX)) { - min_n = (unsigned long)num; + min_n = num; if(!curlx_str_single(&pattern, '-')) { curlx_str_passblanks(&pattern); if(!curlx_str_number(&pattern, &num, CURL_OFF_T_MAX)) { - max_n = (unsigned long)num; + max_n = num; if(!curlx_str_single(&pattern, ']')) step_n = 1; else if(!curlx_str_single(&pattern, ':') && !curlx_str_number(&pattern, &num, CURL_OFF_T_MAX) && !curlx_str_single(&pattern, ']')) { - step_n = (unsigned long)num; + step_n = num; } /* else bad syntax */ } @@ -289,21 +292,21 @@ static CURLcode glob_range(struct URLGlob *glob, const char **patternp, (min_n == max_n && step_n != 1) || (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n)))) /* the pattern is not well-formed */ - return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); + return globerror(glob, "bad range", *posp, CURLE_URL_MALFORMAT); /* typecasting to ints are fine here since we make sure above that we are within 31 bits */ - pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; - pat->content.NumRange.max_n = max_n; - pat->content.NumRange.step = step_n; - - if(multiply(amount, ((pat->content.NumRange.max_n - - pat->content.NumRange.min_n) / - pat->content.NumRange.step + 1))) - return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); + pat->c.num.idx = pat->c.num.min = min_n; + pat->c.num.max = max_n; + pat->c.num.step = step_n; + + if(multiply(amount, ((pat->c.num.max - pat->c.num.min) / + pat->c.num.step + 1))) + return globerror(glob, "range overflow", *posp, CURLE_URL_MALFORMAT); } else - return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT); + return globerror(glob, "bad range specification", *posp, + CURLE_URL_MALFORMAT); *patternp = pattern; return CURLE_OK; @@ -358,8 +361,6 @@ static CURLcode glob_parse(struct URLGlob *glob, const char *pattern, *amount = 1; while(*pattern && !res) { - char *buf = glob->glob_buffer; - size_t sublen = 0; while(*pattern && *pattern != '{') { if(*pattern == '[') { /* skip over IPv6 literals and [] */ @@ -367,16 +368,15 @@ static CURLcode glob_parse(struct URLGlob *glob, const char *pattern, if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']')) skip = 2; if(skip) { - memcpy(buf, pattern, skip); - buf += skip; + if(curlx_dyn_addn(&glob->buf, pattern, skip)) + return CURLE_OUT_OF_MEMORY; pattern += skip; - sublen += skip; continue; } break; } if(*pattern == '}' || *pattern == ']') - return GLOBERROR("unmatched close brace/bracket", pos, + return globerror(glob, "unmatched close brace/bracket", pos, CURLE_URL_MALFORMAT); /* only allow \ to escape known "special letters" */ @@ -388,14 +388,16 @@ static CURLcode glob_parse(struct URLGlob *glob, const char *pattern, ++pattern; ++pos; } - *buf++ = *pattern++; /* copy character to literal */ + /* copy character to literal */ + if(curlx_dyn_addn(&glob->buf, pattern++, 1)) + return CURLE_OUT_OF_MEMORY; ++pos; - sublen++; } - if(sublen) { + if(curlx_dyn_len(&glob->buf)) { /* we got a literal string, add it as a single-item list */ - *buf = '\0'; - res = glob_fixed(glob, glob->glob_buffer, sublen); + res = glob_fixed(glob, curlx_dyn_ptr(&glob->buf), + curlx_dyn_len(&glob->buf)); + curlx_dyn_reset(&glob->buf); } else { switch(*pattern) { @@ -418,102 +420,96 @@ static CURLcode glob_parse(struct URLGlob *glob, const char *pattern, } } - if(++glob->size >= GLOB_PATTERN_NUM) - return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT); + if(++glob->size >= glob->palloc) { + struct URLPattern *np = NULL; + glob->palloc *= 2; + if(glob->size < 10000) /* avoid ridiculous amounts */ + np = realloc(glob->pattern, glob->palloc * sizeof(struct URLPattern)); + if(!np) + return globerror(glob, NULL, pos, CURLE_OUT_OF_MEMORY); + glob->pattern = np; + } } return res; } -CURLcode glob_url(struct URLGlob **glob, char *url, curl_off_t *urlnum, +bool glob_inuse(struct URLGlob *glob) +{ + return glob->palloc ? TRUE : FALSE; +} + +CURLcode glob_url(struct URLGlob *glob, char *url, curl_off_t *urlnum, FILE *error) { /* * We can deal with any-size, just make a buffer with the same length * as the specified URL! */ - struct URLGlob *glob_expand; curl_off_t amount = 0; - char *glob_buffer; CURLcode res; - *glob = NULL; - - glob_buffer = malloc(strlen(url) + 1); - if(!glob_buffer) - return CURLE_OUT_OF_MEMORY; - glob_buffer[0] = 0; - - glob_expand = calloc(1, sizeof(struct URLGlob)); - if(!glob_expand) { - tool_safefree(glob_buffer); + memset(glob, 0, sizeof(struct URLGlob)); + curlx_dyn_init(&glob->buf, 1024*1024); + glob->pattern = malloc(2 * sizeof(struct URLPattern)); + if(!glob->pattern) return CURLE_OUT_OF_MEMORY; - } - glob_expand->urllen = strlen(url); - glob_expand->glob_buffer = glob_buffer; + glob->palloc = 2; - res = glob_parse(glob_expand, url, 1, &amount); + res = glob_parse(glob, url, 1, &amount); if(!res) *urlnum = amount; else { - if(error && glob_expand->error) { + if(error && glob->error) { char text[512]; const char *t; - if(glob_expand->pos) { + if(glob->pos) { msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^", - glob_expand->error, - glob_expand->pos, url, (int)glob_expand->pos - 1, " "); + glob->error, + glob->pos, url, (int)glob->pos - 1, " "); t = text; } else - t = glob_expand->error; + t = glob->error; /* send error description to the error-stream */ fprintf(error, "curl: (%d) %s\n", res, t); } /* it failed, we cleanup */ - glob_cleanup(&glob_expand); + glob_cleanup(glob); *urlnum = 1; return res; } - *glob = glob_expand; return CURLE_OK; } -void glob_cleanup(struct URLGlob **globp) +void glob_cleanup(struct URLGlob *glob) { size_t i; curl_off_t elem; - struct URLGlob *glob = *globp; - - if(!glob) - return; - - for(i = 0; i < glob->size; i++) { - if((glob->pattern[i].type == UPTSet) && - (glob->pattern[i].content.Set.elements)) { - for(elem = glob->pattern[i].content.Set.size - 1; - elem >= 0; - --elem) { - tool_safefree(glob->pattern[i].content.Set.elements[elem]); + + if(glob->pattern) { + for(i = 0; i < glob->size; i++) { + if((glob->pattern[i].type == GLOB_SET) && + (glob->pattern[i].c.set.elem)) { + for(elem = glob->pattern[i].c.set.size - 1; elem >= 0; --elem) + tool_safefree(glob->pattern[i].c.set.elem[elem]); + tool_safefree(glob->pattern[i].c.set.elem); } - tool_safefree(glob->pattern[i].content.Set.elements); } + tool_safefree(glob->pattern); + glob->palloc = 0; + curlx_dyn_free(&glob->buf); } - tool_safefree(glob->glob_buffer); - tool_safefree(glob); - *globp = NULL; } CURLcode glob_next_url(char **globbed, struct URLGlob *glob) { struct URLPattern *pat; size_t i; - size_t len; - size_t buflen = glob->urllen + 1; - char *buf = glob->glob_buffer; *globbed = NULL; + curlx_dyn_reset(&glob->buf); if(!glob->beenhere) glob->beenhere = 1; @@ -526,31 +522,28 @@ CURLcode glob_next_url(char **globbed, struct URLGlob *glob) carry = FALSE; pat = &glob->pattern[glob->size - 1 - i]; switch(pat->type) { - case UPTSet: - if((pat->content.Set.elements) && - (++pat->content.Set.ptr_s == pat->content.Set.size)) { - pat->content.Set.ptr_s = 0; + case GLOB_SET: + if((pat->c.set.elem) && (++pat->c.set.idx == pat->c.set.size)) { + pat->c.set.idx = 0; carry = TRUE; } break; - case UPTCharRange: - pat->content.CharRange.ptr_c = - (char)(pat->content.CharRange.step + - (int)((unsigned char)pat->content.CharRange.ptr_c)); - if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { - pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; + case GLOB_ASCII: + pat->c.ascii.letter += pat->c.ascii.step; + if(pat->c.ascii.letter > pat->c.ascii.max) { + pat->c.ascii.letter = pat->c.ascii.min; carry = TRUE; } break; - case UPTNumRange: - pat->content.NumRange.ptr_n += pat->content.NumRange.step; - if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { - pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; + case GLOB_NUM: + pat->c.num.idx += pat->c.num.step; + if(pat->c.num.idx > pat->c.num.max) { + pat->c.num.idx = pat->c.num.min; carry = TRUE; } break; default: - printf("internal error: invalid pattern type (%d)\n", (int)pat->type); + DEBUGASSERT(0); return CURLE_FAILED_INIT; } } @@ -562,68 +555,55 @@ CURLcode glob_next_url(char **globbed, struct URLGlob *glob) for(i = 0; i < glob->size; ++i) { pat = &glob->pattern[i]; switch(pat->type) { - case UPTSet: - if(pat->content.Set.elements) { - msnprintf(buf, buflen, "%s", - pat->content.Set.elements[pat->content.Set.ptr_s]); - len = strlen(buf); - buf += len; - buflen -= len; + case GLOB_SET: + if(pat->c.set.elem) { + if(curlx_dyn_add(&glob->buf, pat->c.set.elem[pat->c.set.idx])) + return CURLE_OUT_OF_MEMORY; } break; - case UPTCharRange: - if(buflen) { - *buf++ = pat->content.CharRange.ptr_c; - *buf = '\0'; - buflen--; - } + case GLOB_ASCII: { + char letter = (char)pat->c.ascii.letter; + if(curlx_dyn_addn(&glob->buf, &letter, 1)) + return CURLE_OUT_OF_MEMORY; break; - case UPTNumRange: - msnprintf(buf, buflen, "%0*" CURL_FORMAT_CURL_OFF_T, - pat->content.NumRange.padlength, - pat->content.NumRange.ptr_n); - len = strlen(buf); - buf += len; - buflen -= len; + } + case GLOB_NUM: + if(curlx_dyn_addf(&glob->buf, "%0*" CURL_FORMAT_CURL_OFF_T, + pat->c.num.npad, pat->c.num.idx)) + return CURLE_OUT_OF_MEMORY; break; default: - printf("internal error: invalid pattern type (%d)\n", (int)pat->type); + DEBUGASSERT(0); return CURLE_FAILED_INIT; } } - *globbed = strdup(glob->glob_buffer); + *globbed = strdup(curlx_dyn_ptr(&glob->buf)); if(!*globbed) return CURLE_OUT_OF_MEMORY; return CURLE_OK; } -#define MAX_OUTPUT_GLOB_LENGTH (10*1024) +#define MAX_OUTPUT_GLOB_LENGTH (1024*1024) -CURLcode glob_match_url(char **result, const char *filename, +CURLcode glob_match_url(char **output, const char *filename, struct URLGlob *glob) { - char numbuf[18]; - const char *appendthis = ""; - size_t appendlen = 0; struct dynbuf dyn; + *output = NULL; - *result = NULL; - - /* We cannot use the glob_buffer for storage since the filename may be - * longer than the URL we use. - */ curlx_dyn_init(&dyn, MAX_OUTPUT_GLOB_LENGTH); while(*filename) { + CURLcode result = CURLE_OK; if(*filename == '#' && ISDIGIT(filename[1])) { const char *ptr = filename; curl_off_t num; struct URLPattern *pat = NULL; filename++; if(!curlx_str_number(&filename, &num, glob->size) && num) { - unsigned long i; + size_t i; num--; /* make it zero based */ /* find the correct glob entry */ for(i = 0; i < glob->size; i++) { @@ -636,46 +616,33 @@ CURLcode glob_match_url(char **result, const char *filename, if(pat) { switch(pat->type) { - case UPTSet: - if(pat->content.Set.elements) { - appendthis = pat->content.Set.elements[pat->content.Set.ptr_s]; - appendlen = - strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); - } + case GLOB_SET: + if(pat->c.set.elem) + result = curlx_dyn_add(&dyn, pat->c.set.elem[pat->c.set.idx]); break; - case UPTCharRange: - numbuf[0] = pat->content.CharRange.ptr_c; - numbuf[1] = 0; - appendthis = numbuf; - appendlen = 1; + case GLOB_ASCII: { + char letter = (char)pat->c.ascii.letter; + result = curlx_dyn_addn(&dyn, &letter, 1); break; - case UPTNumRange: - msnprintf(numbuf, sizeof(numbuf), "%0*" CURL_FORMAT_CURL_OFF_T, - pat->content.NumRange.padlength, - pat->content.NumRange.ptr_n); - appendthis = numbuf; - appendlen = strlen(numbuf); + } + case GLOB_NUM: + result = curlx_dyn_addf(&dyn, "%0*" CURL_FORMAT_CURL_OFF_T, + pat->c.num.npad, pat->c.num.idx); break; default: - fprintf(tool_stderr, "internal error: invalid pattern type (%d)\n", - (int)pat->type); + DEBUGASSERT(0); curlx_dyn_free(&dyn); return CURLE_FAILED_INIT; } } - else { + else /* #[num] out of range, use the #[num] in the output */ - filename = ptr; - appendthis = filename++; - appendlen = 1; - } - } - else { - appendthis = filename++; - appendlen = 1; + result = curlx_dyn_addn(&dyn, ptr, filename - ptr); } - if(curlx_dyn_addn(&dyn, appendthis, appendlen)) - return CURLE_OUT_OF_MEMORY; + else + result = curlx_dyn_addn(&dyn, filename++, 1); + if(result) + return result; } if(curlx_dyn_addn(&dyn, "", 0)) @@ -690,11 +657,11 @@ CURLcode glob_match_url(char **result, const char *filename, curlx_dyn_free(&dyn); if(sc) return CURLE_URL_MALFORMAT; - *result = sanitized; + *output = sanitized; return CURLE_OK; } #else - *result = curlx_dyn_ptr(&dyn); + *output = curlx_dyn_ptr(&dyn); return CURLE_OK; #endif /* _WIN32 || MSDOS */ } diff --git a/src/tool_urlglob.h b/src/tool_urlglob.h index 6fcd0db498..4bcf229ecc 100644 --- a/src/tool_urlglob.h +++ b/src/tool_urlglob.h @@ -26,53 +26,54 @@ #include "tool_setup.h" typedef enum { - UPTSet = 1, - UPTCharRange, - UPTNumRange -} URLPatternType; + GLOB_SET = 1, + GLOB_ASCII, + GLOB_NUM +} globtype; struct URLPattern { - URLPatternType type; + globtype type; int globindex; /* the number of this particular glob or -1 if not used within {} or [] */ union { struct { - char **elements; + char **elem; curl_off_t size; - int ptr_s; - } Set; + curl_off_t idx; + } set; struct { - char min_c; - char max_c; - char ptr_c; - int step; - } CharRange; + int min; + int max; + int letter; + unsigned char step; + } ascii; struct { - curl_off_t min_n; - curl_off_t max_n; - int padlength; - curl_off_t ptr_n; + curl_off_t min; + curl_off_t max; + curl_off_t idx; curl_off_t step; - } NumRange; - } content; + int npad; + } num; + } c; }; /* the total number of globs supported */ -#define GLOB_PATTERN_NUM 100 +#define GLOB_PATTERN_NUM 30 struct URLGlob { - struct URLPattern pattern[GLOB_PATTERN_NUM]; + struct dynbuf buf; + struct URLPattern *pattern; + size_t palloc; /* number of pattern entries allocated */ size_t size; - size_t urllen; - char *glob_buffer; char beenhere; const char *error; /* error message */ size_t pos; /* column position of error or 0 */ }; -CURLcode glob_url(struct URLGlob**, char *, curl_off_t *, FILE *); +CURLcode glob_url(struct URLGlob *, char *, curl_off_t *, FILE *); CURLcode glob_next_url(char **, struct URLGlob *); CURLcode glob_match_url(char **, const char *, struct URLGlob *); -void glob_cleanup(struct URLGlob **glob); +void glob_cleanup(struct URLGlob *glob); +bool glob_inuse(struct URLGlob *glob); #endif /* HEADER_CURL_TOOL_URLGLOB_H */ -- 2.47.3