From: Tom Lane Date: Mon, 11 May 2026 12:13:46 +0000 (-0700) Subject: Harden our regex engine against integer overflow in size calculations. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0dc1fdc75ebbad9419ac5e313064be0fcf092543;p=thirdparty%2Fpostgresql.git Harden our regex engine against integer overflow in size calculations. The number of NFA states, number of NFA arcs, and number of colors are all bounded to reasonably small values. However, there are places where we try to allocate arrays sized by products of those quantities, and those calculations could overflow, enabling buffer-overrun attacks. In practice there's no problem on 64-bit machines, but there are some live scenarios on 32-bit machines. A related problem is that citerdissect() and creviterdissect() allocate arrays based on the length of the input string, which potentially could overflow. To fix, invent MALLOC_ARRAY and REALLOC_ARRAY macros that rely on palloc_array_extended and repalloc_array_extended with the NO_OOM option, similarly to the existing MALLOC and REALLOC macros. (Like those, they'll throw an error not return a NULL result for oversize requests. This doesn't really fit into the regex code's view of error handling, but it'll do for now. We can consider whether to change that behavior in a non-security follow-up patch.) I installed similar defenses in the colormap construction code. It's not entirely clear whether integer overflow is possible there, but analyzing the behavior in detail seems not worth the trouble, as the risky spots are not in hot code paths. I left a bunch of calls as-is after verifying that they can't overflow given reasonable limits on nstates and narcs. Those limits were enforced already via REG_MAX_COMPILE_SPACE, but add commentary to document the interactions. In passing, also fix a related edge case, which is that the special color numbers used in LACON carcs could overflow the "color" data type, if ncolors is close to MAX_COLOR. In v14 and v15, the regex engine calls malloc() directly instead of using palloc(), so MALLOC_ARRAY and REALLOC_ARRAY do likewise. Reported-by: Xint Code Author: Tom Lane Reviewed-by: Masahiko Sawada Backpatch-through: 14 Security: CVE-2026-6473 --- diff --git a/src/backend/regex/regc_color.c b/src/backend/regex/regc_color.c index 8ae788f5195..1587f452ea3 100644 --- a/src/backend/regex/regc_color.c +++ b/src/backend/regex/regc_color.c @@ -218,6 +218,7 @@ newcolor(struct colormap *cm) n = cm->ncds * 2; if (n > MAX_COLOR + 1) n = MAX_COLOR + 1; + /* the MAX_COLOR+1 limit ensures these alloc sizes can't overflow: */ if (cm->cd == cm->cdspace) { newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc)); @@ -434,9 +435,8 @@ newhicolorrow(struct colormap *cm, CERR(REG_ESPACE); return 0; } - newarray = (color *) REALLOC(cm->hicolormap, - cm->maxarrayrows * 2 * - cm->hiarraycols * sizeof(color)); + newarray = REALLOC_ARRAY(cm->hicolormap, color, + cm->maxarrayrows * 2 * cm->hiarraycols); if (newarray == NULL) { CERR(REG_ESPACE); @@ -477,9 +477,8 @@ newhicolorcols(struct colormap *cm) CERR(REG_ESPACE); return; } - newarray = (color *) REALLOC(cm->hicolormap, - cm->maxarrayrows * - cm->hiarraycols * 2 * sizeof(color)); + newarray = REALLOC_ARRAY(cm->hicolormap, color, + cm->maxarrayrows * cm->hiarraycols * 2); if (newarray == NULL) { CERR(REG_ESPACE); @@ -652,8 +651,7 @@ subcoloronechr(struct vars *v, * Potentially, we could need two more colormapranges than we have now, if * the given chr is in the middle of some existing range. */ - newranges = (colormaprange *) - MALLOC((cm->numcmranges + 2) * sizeof(colormaprange)); + newranges = MALLOC_ARRAY(colormaprange, cm->numcmranges + 2); if (newranges == NULL) { CERR(REG_ESPACE); @@ -766,8 +764,7 @@ subcoloronerange(struct vars *v, * Potentially, if we have N non-adjacent ranges, we could need as many as * 2N+1 result ranges (consider case where new range spans 'em all). */ - newranges = (colormaprange *) - MALLOC((cm->numcmranges * 2 + 1) * sizeof(colormaprange)); + newranges = MALLOC_ARRAY(colormaprange, cm->numcmranges * 2 + 1); if (newranges == NULL) { CERR(REG_ESPACE); diff --git a/src/backend/regex/regc_cvec.c b/src/backend/regex/regc_cvec.c index 10306215596..8dbcf3c55e3 100644 --- a/src/backend/regex/regc_cvec.c +++ b/src/backend/regex/regc_cvec.c @@ -40,6 +40,9 @@ /* * newcvec - allocate a new cvec + * + * Note: in current usage, nchrs and nranges are never so large that we risk + * integer overflow in these size calculations, even with 32-bit size_t. */ static struct cvec * newcvec(int nchrs, /* to hold this many chrs... */ diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index acd2286defd..92b11116194 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -3523,6 +3523,10 @@ compact(struct nfa *nfa, assert(!NISERR()); + /* + * The REG_MAX_COMPILE_SPACE restriction ensures that integer overflow + * can't occur in this loop nor in the allocation requests below. + */ nstates = 0; narcs = 0; for (s = nfa->states; s != NULL; s = s->next) @@ -3575,6 +3579,12 @@ compact(struct nfa *nfa, case LACON: assert(s->no != cnfa->pre); assert(a->co >= 0); + /* make sure the modified color number will fit */ + if (a->co > MAX_COLOR - cnfa->ncolors) + { + NERR(REG_ECOLORS); + return; + } ca->co = (color) (cnfa->ncolors + a->co); ca->to = a->to->no; ca++; diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 820995332ba..f50acc08577 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -561,6 +561,7 @@ moresubs(struct vars *v, assert(wanted > 0 && (size_t) wanted >= v->nsubs); n = (size_t) wanted * 3 / 2 + 1; + /* n is bounded by the number of states, so no chance of overflow here */ if (v->subs == v->sub10) { p = (struct subre **) MALLOC(n * sizeof(struct subre *)); @@ -2405,8 +2406,8 @@ newlacon(struct vars *v, else { n = v->nlacons; - newlacons = (struct subre *) REALLOC(v->lacons, - (n + 1) * sizeof(struct subre)); + /* better use REALLOC_ARRAY here, as struct subre is big */ + newlacons = REALLOC_ARRAY(v->lacons, struct subre, n + 1); } if (newlacons == NULL) { diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c index 1f8f2ab1441..5b57fed60a9 100644 --- a/src/backend/regex/rege_dfa.c +++ b/src/backend/regex/rege_dfa.c @@ -640,20 +640,29 @@ newdfa(struct vars *v, } else { + /* + * Restrict the ranges of nstates and ncolors enough that the arrays + * we allocate here have no more than INT_MAX members. This protects + * not only the allocation calculations just below, but later indexing + * into these arrays. + */ + if (wordsper >= INT_MAX / (nss + WORK) || + cnfa->ncolors >= INT_MAX / nss) + { + ERR(REG_ETOOBIG); + return NULL; + } d = (struct dfa *) MALLOC(sizeof(struct dfa)); if (d == NULL) { ERR(REG_ESPACE); return NULL; } - d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset)); - d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper * - sizeof(unsigned)); + d->ssets = MALLOC_ARRAY(struct sset, nss); + d->statesarea = MALLOC_ARRAY(unsigned, (nss + WORK) * wordsper); d->work = &d->statesarea[nss * wordsper]; - d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors * - sizeof(struct sset *)); - d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors * - sizeof(struct arcp)); + d->outsarea = MALLOC_ARRAY(struct sset *, nss * cnfa->ncolors); + d->incarea = MALLOC_ARRAY(struct arcp, nss * cnfa->ncolors); d->ismalloced = true; d->arraysmalloced = true; /* now freedfa() will behave sanely */ diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 2a1d5bebda3..665aa31bd03 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -231,7 +231,7 @@ pg_regexec(regex_t *re, if (v->nmatch <= LOCALMAT) v->pmatch = mat; else - v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t)); + v->pmatch = MALLOC_ARRAY(regmatch_t, v->nmatch); if (v->pmatch == NULL) return REG_ESPACE; zapallsubs(v->pmatch, v->nmatch); @@ -265,6 +265,7 @@ pg_regexec(regex_t *re, v->subdfas = subdfas; else { + /* ntree is surely less than the number of states, so this is safe: */ v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *)); if (v->subdfas == NULL) { @@ -279,6 +280,7 @@ pg_regexec(regex_t *re, n = (size_t) v->g->nlacons; if (n > 0) { + /* nlacons is surely less than the number of arcs, so this is safe: */ v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *)); if (v->ladfas == NULL) { @@ -1163,7 +1165,7 @@ citerdissect(struct vars *v, max_matches = t->max; if (max_matches < min_matches) max_matches = min_matches; - endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *)); + endpts = MALLOC_ARRAY(chr *, max_matches + 1); if (endpts == NULL) return REG_ESPACE; endpts[0] = begin; @@ -1370,7 +1372,7 @@ creviterdissect(struct vars *v, max_matches = t->max; if (max_matches < min_matches) max_matches = min_matches; - endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *)); + endpts = MALLOC_ARRAY(chr *, max_matches + 1); if (endpts == NULL) return REG_ESPACE; endpts[0] = begin; diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h index 4557e7a62c0..8c4d5d8cc7c 100644 --- a/src/include/regex/regcustom.h +++ b/src/include/regex/regcustom.h @@ -52,6 +52,8 @@ #define MALLOC(n) palloc_extended((n), MCXT_ALLOC_NO_OOM) #define FREE(p) pfree(VS(p)) #define REALLOC(p,n) repalloc_extended(VS(p),(n), MCXT_ALLOC_NO_OOM) +#define MALLOC_ARRAY(type, n) palloc_array_extended(type, n, MCXT_ALLOC_NO_OOM) +#define REALLOC_ARRAY(p, type, n) repalloc_array_extended(p, type, n, MCXT_ALLOC_NO_OOM) #define INTERRUPT(re) CHECK_FOR_INTERRUPTS() #undef assert #define assert(x) Assert(x) diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index fd69299a16d..6fb9551721d 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -76,6 +76,14 @@ #ifndef FREE #define FREE(p) free(VS(p)) #endif +#ifndef MALLOC_ARRAY +/* we don't depend on calloc's zeroing behavior, we do need overflow check */ +#define MALLOC_ARRAY(type, n) ((type *) calloc(sizeof(type), n)) +#endif +#ifndef REALLOC_ARRAY +/* XXX this definition does not provide the desired overflow check */ +#define REALLOC_ARRAY(p, type, n) ((type *) REALLOC(p, sizeof(type) * (n))) +#endif /* interruption */ #ifndef INTERRUPT @@ -446,6 +454,11 @@ struct cnfa * (the compacted NFA and the colormap). * The scaling here is based on an empirical measurement that very large * NFAs tend to have about 4 arcs/state. + * + * Do not raise this so high as to allow more than INT_MAX/8 states or arcs, + * or you risk integer overflows in various space allocation requests. + * (We could be more defensive in those places, but that's so far beyond the + * practical range of NFA sizes that it doesn't seem worth additional code.) */ #ifndef REG_MAX_COMPILE_SPACE #define REG_MAX_COMPILE_SPACE \