static int
find_nfc_index(const struct reindex* nfc, Py_UCS4 code)
{
- unsigned int index;
- for (index = 0; nfc[index].start; index++) {
- unsigned int start = nfc[index].start;
- if (code < start)
- return -1;
- if (code <= start + nfc[index].count) {
- unsigned int delta = code - start;
- return nfc[index].index + delta;
- }
+ /* The table is sorted by .start ascending with disjoint [start, start+count]
+ ranges and ends with a sentinel whose .start exceeds every codepoint, so
+ a single .start <= code test per entry also stops at the sentinel. Find
+ the first entry past code, then range-check the candidate (entry i - 1). */
+ unsigned int i;
+ for (i = 0; (Py_UCS4)nfc[i].start <= code; i++) {
+ }
+ if (i == 0) {
+ return -1;
+ }
+ unsigned int start = nfc[i - 1].start;
+ if (code <= start + nfc[i - 1].count) {
+ return nfc[i - 1].index + (code - start);
}
return -1;
}
fprint("#define TOTAL_FIRST",total_first)
fprint("#define TOTAL_LAST",total_last)
fprint("struct reindex{int start;short count,index;};")
+ # The reindex tables are read only by find_nfc_index(), which scans
+ # forward while .start <= code. The trailing sentinel's .start must
+ # exceed every codepoint (so the scan stops with a single comparison)
+ # and fit the signed int .start field.
+ nfc_sentinel = 0x7fffffff
+ assert sys.maxunicode < nfc_sentinel <= 0x7fffffff
fprint("static struct reindex nfc_first[] = {")
for start,end in comp_first_ranges:
fprint(" { %d, %d, %d}," % (start,end-start,comp_first[start]))
- fprint(" {0,0,0}")
+ fprint(" {0x%x, 0, 0}" % nfc_sentinel)
fprint("};\n")
fprint("static struct reindex nfc_last[] = {")
for start,end in comp_last_ranges:
fprint(" { %d, %d, %d}," % (start,end-start,comp_last[start]))
- fprint(" {0,0,0}")
+ fprint(" {0x%x, 0, 0}" % nfc_sentinel)
fprint("};\n")
# FIXME: <fl> the following tables could be made static, and