include/cctype.h

   1 /* Character handling in C locale.
   2
   3    These functions work like the corresponding functions in <ctype.h>,
   4    except that they have the C (POSIX) locale hardwired, whereas the
   5    <ctype.h> functions' behaviour depends on the current locale set via
   6    setlocale.
   7
   8    Copyright (C) 2000-2003, 2006, 2008-2023 Free Software Foundation, Inc.
   9
  10    This file is free software: you can redistribute it and/or modify
  11    it under the terms of the GNU Lesser General Public License as
  12    published by the Free Software Foundation; either version 2.1 of the
  13    License, or (at your option) any later version.
  14
  15    This file is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU Lesser General Public License for more details.
  19
  20    You should have received a copy of the GNU Lesser General Public License
  21    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  22
  23 #ifndef UTIL_LINUX_CCTYPE_H
  24 #define UTIL_LINUX_CCTYPE_H
  25
  26 /**
  27  * The functions defined in this file assume the "C" locale and a character
  28  * set without diacritics (ASCII-US or EBCDIC-US or something like that).
  29  * Even if the "C" locale on a particular system is an extension of the ASCII
  30  * character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it
  31  * is ISO-8859-1), the functions in this file recognize only the ASCII
  32  * characters.
  33  */
  34
  35 #if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
  36         && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
  37         && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
  38         && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
  39         && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
  40         && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
  41         && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
  42         && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
  43         && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
  44         && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
  45         && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
  46         && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
  47         && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
  48         && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
  49         && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
  50         && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
  51         && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
  52         && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
  53         && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
  54         && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
  55         && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
  56         && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
  57         && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)
  58
  59 /*
  60  * The character set is ASCII or one of its variants or extensions, not EBCDIC.
  61  * Testing the value of '\n' and '\r' is not relevant.
  62  */
  63 # define C_CTYPE_ASCII 1
  64 #elif ! (' ' == '\x40' && '0' == '\xf0'                     \
  65          && 'A' == '\xc1' && 'J' == '\xd1' && 'S' == '\xe2' \
  66          && 'a' == '\x81' && 'j' == '\x91' && 's' == '\xa2')
  67 # error "Only ASCII and EBCDIC are supported"
  68 #endif
  69
  70 #if 'A' < 0
  71 # error "EBCDIC and char is signed -- not supported"
  72 #endif
  73
  74 /* Cases for control characters. */
  75 #define _C_CTYPE_CNTRL \
  76         case '\a': case '\b': case '\f': case '\n': \
  77         case '\r': case '\t': case '\v': \
  78         _C_CTYPE_OTHER_CNTRL
  79
  80 /* ASCII control characters other than those with \-letter escapes. */
  81 #if C_CTYPE_ASCII
  82 # define _C_CTYPE_OTHER_CNTRL \
  83         case '\x00': case '\x01': case '\x02': case '\x03': \
  84         case '\x04': case '\x05': case '\x06': case '\x0e': \
  85         case '\x0f': case '\x10': case '\x11': case '\x12': \
  86         case '\x13': case '\x14': case '\x15': case '\x16': \
  87         case '\x17': case '\x18': case '\x19': case '\x1a': \
  88         case '\x1b': case '\x1c': case '\x1d': case '\x1e': \
  89         case '\x1f': case '\x7f'
  90 #else
  91
  92 /*
  93  * Use EBCDIC code page 1047's assignments for ASCII control chars;
  94  * assume all EBCDIC code pages agree about these assignments.
  95  */
  96 # define _C_CTYPE_OTHER_CNTRL \
  97         case '\x00': case '\x01': case '\x02': case '\x03': \
  98         case '\x07': case '\x0e': case '\x0f': case '\x10': \
  99         case '\x11': case '\x12': case '\x13': case '\x18': \
 100         case '\x19': case '\x1c': case '\x1d': case '\x1e': \
 101         case '\x1f': case '\x26': case '\x27': case '\x2d': \
 102         case '\x2e': case '\x32': case '\x37': case '\x3c': \
 103         case '\x3d': case '\x3f'
 104 #endif
 105
 106 /* Cases for lowercase hex letters, and lowercase letters, all offset by N. */
 107 #define _C_CTYPE_LOWER_A_THRU_F_N(N) \
 108         case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \
 109         case 'e' + (N): case 'f' + (N)
 110 #define _C_CTYPE_LOWER_N(N) \
 111         _C_CTYPE_LOWER_A_THRU_F_N(N): \
 112         case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \
 113         case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \
 114         case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \
 115         case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \
 116         case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N)
 117
 118 /* Cases for hex letters, digits, lower, punct, and upper. */
 119 #define _C_CTYPE_A_THRU_F \
 120         _C_CTYPE_LOWER_A_THRU_F_N (0): \
 121         _C_CTYPE_LOWER_A_THRU_F_N ('A' - 'a')
 122 #define _C_CTYPE_DIGIT                     \
 123         case '0': case '1': case '2': case '3': \
 124         case '4': case '5': case '6': case '7': \
 125         case '8': case '9'
 126 #define _C_CTYPE_LOWER _C_CTYPE_LOWER_N (0)
 127 #define _C_CTYPE_PUNCT \
 128         case '!': case '"': case '#': case '$':  \
 129         case '%': case '&': case '\'': case '(': \
 130         case ')': case '*': case '+': case ',':  \
 131         case '-': case '.': case '/': case ':':  \
 132         case ';': case '<': case '=': case '>':  \
 133         case '?': case '@': case '[': case '\\': \
 134         case ']': case '^': case '_': case '`':  \
 135         case '{': case '|': case '}': case '~'
 136 #define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a')
 137
 138 /**
 139  * Function definitions.
 140  *
 141  * Unlike the functions in <ctype.h>, which require an argument in the range
 142  * of the 'unsigned char' type, the functions here operate on values that are
 143  * in the 'unsigned char' range or in the 'char' range.  In other words,
 144  * when you have a 'char' value, you need to cast it before using it as
 145  * argument to a <ctype.h> function:
 146  *
 147  *      const char *s = ...;
 148  *      if (isalpha ((unsigned char) *s)) ...
 149  *
 150  * but you don't need to cast it for the functions defined in this file:
 151  *
 152  *      const char *s = ...;
 153  *      if (c_isalpha (*s)) ...
 154  */
 155
 156 static inline int c_isalnum (int c)
 157 {
 158         switch (c) {
 159         _C_CTYPE_DIGIT:
 160         _C_CTYPE_LOWER:
 161         _C_CTYPE_UPPER:
 162                 return 1;
 163         default:
 164                 return 0;
 165         }
 166 }
 167
 168 static inline int c_isalpha (int c)
 169 {
 170         switch (c) {
 171         _C_CTYPE_LOWER:
 172         _C_CTYPE_UPPER:
 173                 return 1;
 174         default:
 175                 return 0;
 176         }
 177 }
 178
 179 /* The function isascii is not locale dependent.
 180  * Its use in EBCDIC is questionable.
 181  */
 182 static inline int c_isascii (int c)
 183 {
 184         switch (c) {
 185         case ' ':
 186         _C_CTYPE_CNTRL:
 187         _C_CTYPE_DIGIT:
 188         _C_CTYPE_LOWER:
 189         _C_CTYPE_PUNCT:
 190         _C_CTYPE_UPPER:
 191                 return 1;
 192         default:
 193                 return 0;
 194         }
 195 }
 196
 197 static inline int c_isblank (int c)
 198 {
 199         return c == ' ' || c == '\t';
 200 }
 201
 202 static inline int c_iscntrl (int c)
 203 {
 204         switch (c) {
 205         _C_CTYPE_CNTRL:
 206                 return 1;
 207         default:
 208                 return 0;
 209         }
 210 }
 211
 212 static inline int c_isdigit (int c)
 213 {
 214         switch (c) {
 215         _C_CTYPE_DIGIT:
 216                 return 1;
 217         default:
 218                 return 0;
 219         }
 220 }
 221
 222 static inline int c_isgraph (int c)
 223 {
 224         switch (c) {
 225         _C_CTYPE_DIGIT:
 226         _C_CTYPE_LOWER:
 227         _C_CTYPE_PUNCT:
 228         _C_CTYPE_UPPER:
 229                 return 1;
 230         default:
 231                 return 0;
 232         }
 233 }
 234
 235 static inline int c_islower (int c)
 236 {
 237         switch (c) {
 238         _C_CTYPE_LOWER:
 239                 return 1;
 240         default:
 241                 return 0;
 242         }
 243 }
 244
 245 static inline int c_isprint (int c)
 246 {
 247         switch (c) {
 248         case ' ':
 249         _C_CTYPE_DIGIT:
 250         _C_CTYPE_LOWER:
 251         _C_CTYPE_PUNCT:
 252         _C_CTYPE_UPPER:
 253                 return 1;
 254         default:
 255                 return 0;
 256         }
 257 }
 258
 259 static inline int c_ispunct (int c)
 260 {
 261         switch (c) {
 262         _C_CTYPE_PUNCT:
 263                 return 1;
 264         default:
 265                 return 0;
 266         }
 267 }
 268
 269 static inline int c_isspace (int c)
 270 {
 271         switch (c) {
 272         case ' ': case '\t': case '\n': case '\v': case '\f': case '\r':
 273                 return 1;
 274         default:
 275                 return 0;
 276         }
 277 }
 278
 279 static inline int c_isupper (int c)
 280 {
 281         switch (c) {
 282         _C_CTYPE_UPPER:
 283                 return 1;
 284         default:
 285                 return 0;
 286         }
 287 }
 288
 289 static inline int c_isxdigit (int c)
 290 {
 291         switch (c) {
 292         _C_CTYPE_DIGIT:
 293         _C_CTYPE_A_THRU_F:
 294                 return 1;
 295         default:
 296                 return 0;
 297         }
 298 }
 299
 300 static inline int c_tolower (int c)
 301 {
 302         switch (c) {
 303         _C_CTYPE_UPPER:
 304                 return c - 'A' + 'a';
 305         default:
 306                 return c;
 307         }
 308 }
 309
 310 static inline int c_toupper (int c)
 311 {
 312         switch (c) {
 313         _C_CTYPE_LOWER:
 314                 return c - 'a' + 'A';
 315         default:
 316                 return c;
 317         }
 318 }
 319
 320 static inline int c_strncasecmp(const char *a, const char *b, size_t n)
 321 {
 322         int res = 0;
 323
 324         for (; n > 0; a++, b++, n--) {
 325                 unsigned int x = (unsigned int) *a;
 326                 unsigned int y = (unsigned int) *b;
 327
 328                 res = c_tolower(x) - c_tolower(y);
 329                 if (res)
 330                         break;
 331         }
 332         return res;
 333 }
 334
 335 static inline int c_strcasecmp(const char *a, const char *b)
 336 {
 337         int res = 0;
 338
 339         if (a == b)
 340                 return 0;
 341
 342         for (; *a != '\0'; a++, b++) {
 343                 unsigned int x = (unsigned int) *a;
 344                 unsigned int y = (unsigned int) *b;
 345
 346                 res = c_tolower(x) - c_tolower(y);
 347                 if (res)
 348                         break;
 349         }
 350
 351         return res;
 352 }
 353
 354 #endif /* UTIL_LINUX_CCTYPE_H */