unsigned char from = (u8)range_start;
if (from > to) {
throw LocatedParseError("Range out of order in character class");
- } else {
- in_cand_range = false;
- cr.setRange(from, to);
- range_start = INVALID_UNICODE;
}
+
+ in_cand_range = false;
+ CharReach ncr(from, to);
+ if (mode.caseless) {
+ make_caseless(&ncr);
+ }
+ cr |= ncr;
+ range_start = INVALID_UNICODE;
}
void AsciiComponentClass::notePositions(GlushkovBuildState &bs) {
c = translateForUcpMode(c, mode);
}
+ // Note: caselessness is handled by getPredefinedCharReach.
CharReach pcr = getPredefinedCharReach(c, mode);
if (negative) {
pcr.flip();
}
- if (isUcp(c)) {
- cr_ucp |= pcr;
- } else {
- cr |= pcr;
- }
+ cr |= pcr;
range_start = INVALID_UNICODE;
in_cand_range = false;
}
return;
}
- cr.set(c);
+ CharReach ncr(c, c);
+ if (mode.caseless) {
+ make_caseless(&ncr);
+ }
+
+ cr |= ncr;
range_start = c;
}
in_cand_range = false;
}
- if (mode.caseless) {
- make_caseless(&cr);
- }
-
- cr |= cr_ucp; /* characters from ucp props don't participate in caseless */
-
if (m_negate) {
cr.flip();
}
private:
Position position;
CharReach cr;
- CharReach cr_ucp;
// Private copy ctor. Use clone instead.
AsciiComponentClass(const AsciiComponentClass &other)
- : ComponentClass(other), position(other.position), cr(other.cr),
- cr_ucp(other.cr_ucp) {}
+ : ComponentClass(other), position(other.position), cr(other.cr) {}
};
} // namespace ue2
unichar from = range_start;
if (from > to) {
throw LocatedParseError("Range out of order in character class");
- } else {
- in_cand_range = false;
- CodePointSet ncps;
- ncps.setRange(from, to);
- if (mode.caseless) {
- make_caseless(&ncps);
- }
- cps |= ncps;
- range_start = INVALID_UNICODE;
}
+
+ in_cand_range = false;
+ CodePointSet ncps;
+ ncps.setRange(from, to);
+ if (mode.caseless) {
+ make_caseless(&ncps);
+ }
+ cps |= ncps;
+ range_start = INVALID_UNICODE;
}
void UTF8ComponentClass::add(PredefinedClass c, bool negative) {
pcps.flip();
}
- if (isUcp(c)) {
- cps_ucp |= pcps;
- } else {
- cps |= pcps;
- }
+ cps |= pcps;
range_start = INVALID_UNICODE;
in_cand_range = false;
in_cand_range = false;
}
- cps |= cps_ucp; /* characters from ucp props always case sensitive */
-
if (m_negate) {
cps.flip();
}
finalized = true;
}
-bool isUcp(PredefinedClass c) {
- switch (c) {
- case CLASS_ALNUM:
- case CLASS_ALPHA:
- case CLASS_ANY:
- case CLASS_ASCII:
- case CLASS_BLANK:
- case CLASS_CNTRL:
- case CLASS_DIGIT:
- case CLASS_GRAPH:
- case CLASS_HORZ:
- case CLASS_LOWER:
- case CLASS_PRINT:
- case CLASS_PUNCT:
- case CLASS_SPACE:
- case CLASS_UPPER:
- case CLASS_VERT:
- case CLASS_WORD:
- case CLASS_XDIGIT:
- return false;
- default:
- return true;
- }
-}
-
Position UTF8ComponentClass::getHead(NFABuilder &builder, u8 first_byte) {
map<u8, Position>::const_iterator it = heads.find(first_byte);
if (it != heads.end()) {
void buildFourByte(GlushkovBuildState &bs);
CodePointSet cps;
- CodePointSet cps_ucp;
std::map<u8, Position> heads;
Position single_pos;
};
PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode);
-bool isUcp(PredefinedClass c);
CodePointSet getPredefinedCodePointSet(PredefinedClass c,
const ParseMode &mode);