]>
Commit | Line | Data |
---|---|---|
fea681da MK |
1 | .\" Copyright (C), 1995, Graeme W. Wilford. (Wilf.) |
2 | .\" | |
93015253 | 3 | .\" %%%LICENSE_START(VERBATIM) |
fea681da MK |
4 | .\" Permission is granted to make and distribute verbatim copies of this |
5 | .\" manual provided the copyright notice and this permission notice are | |
6 | .\" preserved on all copies. | |
7 | .\" | |
8 | .\" Permission is granted to copy and distribute modified versions of this | |
9 | .\" manual under the conditions for verbatim copying, provided that the | |
10 | .\" entire resulting derived work is distributed under the terms of a | |
11 | .\" permission notice identical to this one. | |
c13182ef | 12 | .\" |
fea681da MK |
13 | .\" Since the Linux kernel and libraries are constantly changing, this |
14 | .\" manual page may be incorrect or out-of-date. The author(s) assume no | |
15 | .\" responsibility for errors or omissions, or for damages resulting from | |
16 | .\" the use of the information contained herein. The author(s) may not | |
17 | .\" have taken the same level of care in the production of this manual, | |
18 | .\" which is licensed free of charge, as they might when working | |
19 | .\" professionally. | |
c13182ef | 20 | .\" |
fea681da MK |
21 | .\" Formatted or processed versions of this manual, if unaccompanied by |
22 | .\" the source, must acknowledge the copyright and authors of this work. | |
4b72fb64 | 23 | .\" %%%LICENSE_END |
fea681da MK |
24 | .\" |
25 | .\" Wed Jun 14 16:10:28 BST 1995 Wilf. (G.Wilford@ee.surrey.ac.uk) | |
26 | .\" Tiny change in formatting - aeb, 950812 | |
27 | .\" Modified 8 May 1998 by Joseph S. Myers (jsm28@cam.ac.uk) | |
28 | .\" | |
29 | .\" show the synopsis section nicely | |
867c9b34 | 30 | .TH REGEX 3 2019-10-10 "GNU" "Linux Programmer's Manual" |
fea681da MK |
31 | .SH NAME |
32 | regcomp, regexec, regerror, regfree \- POSIX regex functions | |
33 | .SH SYNOPSIS | |
62218dc0 | 34 | .nf |
fea681da | 35 | .B #include <sys/types.h> |
fea681da | 36 | .B #include <regex.h> |
f90f031e | 37 | .PP |
62218dc0 | 38 | .BI "int regcomp(regex_t *" preg ", const char *" regex ", int " cflags ); |
f90f031e | 39 | .PP |
62218dc0 MK |
40 | .BI "int regexec(const regex_t *" preg ", const char *" string \ |
41 | ", size_t " nmatch , | |
42 | .BI " regmatch_t " pmatch[] ", int " eflags ); | |
f90f031e | 43 | .PP |
62218dc0 MK |
44 | .BI "size_t regerror(int " errcode ", const regex_t *" preg ", char *" errbuf , |
45 | .BI " size_t " errbuf_size ); | |
f90f031e | 46 | .PP |
62218dc0 MK |
47 | .BI "void regfree(regex_t *" preg ); |
48 | .fi | |
8af1ba10 | 49 | .SH DESCRIPTION |
73d8cece | 50 | .SS POSIX regex compiling |
e511ffb6 | 51 | .BR regcomp () |
fea681da | 52 | is used to compile a regular expression into a form that is suitable |
c13182ef | 53 | for subsequent |
e511ffb6 | 54 | .BR regexec () |
fea681da | 55 | searches. |
847e0d88 | 56 | .PP |
e511ffb6 | 57 | .BR regcomp () |
c13182ef MK |
58 | is supplied with |
59 | .IR preg , | |
60 | a pointer to a pattern buffer storage area; | |
fea681da MK |
61 | .IR regex , |
62 | a pointer to the null-terminated string and | |
63 | .IR cflags , | |
64 | flags used to determine the type of compilation. | |
847e0d88 | 65 | .PP |
fea681da | 66 | All regular expression searching must be done via a compiled pattern |
c13182ef | 67 | buffer, thus |
e511ffb6 | 68 | .BR regexec () |
c13182ef | 69 | must always be supplied with the address of a |
e511ffb6 | 70 | .BR regcomp () |
fea681da | 71 | initialized pattern buffer. |
847e0d88 | 72 | .PP |
c13182ef MK |
73 | .I cflags |
74 | may be the | |
fea681da | 75 | .RB bitwise- or |
d86c357d | 76 | of zero or more of the following: |
c13182ef | 77 | .TP |
fea681da | 78 | .B REG_EXTENDED |
c13182ef | 79 | Use |
fea681da | 80 | .B POSIX |
c13182ef | 81 | Extended Regular Expression syntax when interpreting |
fea681da MK |
82 | .IR regex . |
83 | If not set, | |
84 | .B POSIX | |
85 | Basic Regular Expression syntax is used. | |
c13182ef | 86 | .TP |
fea681da | 87 | .B REG_ICASE |
c13182ef MK |
88 | Do not differentiate case. |
89 | Subsequent | |
e511ffb6 | 90 | .BR regexec () |
fea681da | 91 | searches using this pattern buffer will be case insensitive. |
c13182ef | 92 | .TP |
fea681da | 93 | .B REG_NOSUB |
ea72c0cc | 94 | Do not report position of matches. |
fea681da MK |
95 | The |
96 | .I nmatch | |
97 | and | |
98 | .I pmatch | |
c4bb193f | 99 | arguments to |
e511ffb6 | 100 | .BR regexec () |
fea681da | 101 | are ignored if the pattern buffer supplied was compiled with this flag set. |
c13182ef | 102 | .TP |
fea681da MK |
103 | .B REG_NEWLINE |
104 | Match-any-character operators don't match a newline. | |
847e0d88 | 105 | .IP |
24b74457 | 106 | A nonmatching list |
fea681da MK |
107 | .RB ( [^...] ) |
108 | not containing a newline does not match a newline. | |
847e0d88 | 109 | .IP |
fea681da MK |
110 | Match-beginning-of-line operator |
111 | .RB ( ^ ) | |
112 | matches the empty string immediately after a newline, regardless of | |
113 | whether | |
114 | .IR eflags , | |
115 | the execution flags of | |
e511ffb6 | 116 | .BR regexec (), |
c13182ef | 117 | contains |
fea681da | 118 | .BR REG_NOTBOL . |
847e0d88 | 119 | .IP |
c13182ef | 120 | Match-end-of-line operator |
fea681da MK |
121 | .RB ( $ ) |
122 | matches the empty string immediately before a newline, regardless of | |
c13182ef | 123 | whether |
0daa9e92 | 124 | .I eflags |
fea681da MK |
125 | contains |
126 | .BR REG_NOTEOL . | |
73d8cece | 127 | .SS POSIX regex matching |
e511ffb6 | 128 | .BR regexec () |
fea681da | 129 | is used to match a null-terminated string |
c13182ef | 130 | against the precompiled pattern buffer, |
fea681da MK |
131 | .IR preg . |
132 | .I nmatch | |
133 | and | |
134 | .I pmatch | |
c13182ef | 135 | are used to provide information regarding the location of any matches. |
fea681da | 136 | .I eflags |
c13182ef | 137 | may be the |
fea681da | 138 | .RB bitwise- or |
c13182ef | 139 | of one or both of |
fea681da MK |
140 | .B REG_NOTBOL |
141 | and | |
c13182ef | 142 | .B REG_NOTEOL |
d9bfdb9c | 143 | which cause changes in matching behavior described below. |
fea681da MK |
144 | .TP |
145 | .B REG_NOTBOL | |
146 | The match-beginning-of-line operator always fails to match (but see the | |
147 | compilation flag | |
c13182ef | 148 | .B REG_NEWLINE |
75cd78ea | 149 | above). |
c13182ef | 150 | This flag may be used when different portions of a string are passed to |
e511ffb6 | 151 | .BR regexec () |
fea681da MK |
152 | and the beginning of the string should not be interpreted as the |
153 | beginning of the line. | |
154 | .TP | |
155 | .B REG_NOTEOL | |
156 | The match-end-of-line operator always fails to match (but see the | |
157 | compilation flag | |
158 | .B REG_NEWLINE | |
7020f14d | 159 | above). |
f8c3a927 RL |
160 | .TP |
161 | .B REG_STARTEND | |
ce0fd56b MK |
162 | Use |
163 | .I pmatch[0] | |
164 | on the input string, starting at byte | |
165 | .I pmatch[0].rm_so | |
166 | and ending before byte | |
167 | .IR pmatch[0].rm_eo . | |
e7a1bf89 | 168 | This allows matching embedded NUL bytes |
ce0fd56b MK |
169 | and avoids a |
170 | .BR strlen (3) | |
171 | on large strings. | |
172 | It does not use | |
173 | .I nmatch | |
174 | on input, and does not change | |
f8c3a927 RL |
175 | .B REG_NOTBOL |
176 | or | |
177 | .B REG_NEWLINE | |
178 | processing. | |
2896c552 | 179 | This flag is a BSD extension, not present in POSIX. |
73d8cece | 180 | .SS Byte offsets |
c13182ef | 181 | Unless |
fea681da MK |
182 | .B REG_NOSUB |
183 | was set for the compilation of the pattern buffer, it is possible to | |
ea72c0cc | 184 | obtain match addressing information. |
fea681da MK |
185 | .I pmatch |
186 | must be dimensioned to have at least | |
187 | .I nmatch | |
188 | elements. | |
189 | These are filled in by | |
e511ffb6 | 190 | .BR regexec () |
e4b382b9 MK |
191 | with substring match addresses. |
192 | The offsets of the subexpression starting at the | |
ea72c0cc RT |
193 | .IR i th |
194 | open parenthesis are stored in | |
195 | .IR pmatch[i] . | |
196 | The entire regular expression's match addresses are stored in | |
197 | .IR pmatch[0] . | |
198 | (Note that to return the offsets of | |
199 | .I N | |
200 | subexpression matches, | |
201 | .I nmatch | |
202 | must be at least | |
203 | .IR N+1 .) | |
c13182ef | 204 | Any unused structure elements will contain the value \-1. |
847e0d88 | 205 | .PP |
c13182ef | 206 | The |
f19a0f03 | 207 | .I regmatch_t |
fea681da MK |
208 | structure which is the type of |
209 | .I pmatch | |
210 | is defined in | |
a9a13a50 | 211 | .IR <regex.h> . |
847e0d88 | 212 | .PP |
bd191423 | 213 | .in +4n |
b8302363 | 214 | .EX |
f19a0f03 MK |
215 | typedef struct { |
216 | regoff_t rm_so; | |
217 | regoff_t rm_eo; | |
218 | } regmatch_t; | |
b8302363 | 219 | .EE |
bd191423 | 220 | .in |
847e0d88 | 221 | .PP |
c13182ef | 222 | Each |
fea681da | 223 | .I rm_so |
8729177b | 224 | element that is not \-1 indicates the start offset of the next largest |
c13182ef MK |
225 | substring match within the string. |
226 | The relative | |
227 | .I rm_eo | |
7fb9948d MK |
228 | element indicates the end offset of the match, |
229 | which is the offset of the first character after the matching text. | |
73d8cece | 230 | .SS POSIX error reporting |
e511ffb6 | 231 | .BR regerror () |
c13182ef | 232 | is used to turn the error codes that can be returned by both |
e511ffb6 | 233 | .BR regcomp () |
fea681da | 234 | and |
e511ffb6 | 235 | .BR regexec () |
fea681da | 236 | into error message strings. |
847e0d88 | 237 | .PP |
e511ffb6 | 238 | .BR regerror () |
fea681da MK |
239 | is passed the error code, |
240 | .IR errcode , | |
241 | the pattern buffer, | |
242 | .IR preg , | |
243 | a pointer to a character string buffer, | |
244 | .IR errbuf , | |
245 | and the size of the string buffer, | |
246 | .IR errbuf_size . | |
247 | It returns the size of the | |
248 | .I errbuf | |
c13182ef MK |
249 | required to contain the null-terminated error message string. |
250 | If both | |
fea681da MK |
251 | .I errbuf |
252 | and | |
253 | .I errbuf_size | |
c7094399 | 254 | are nonzero, |
fea681da | 255 | .I errbuf |
c13182ef | 256 | is filled in with the first |
c65433e6 | 257 | .I "errbuf_size \- 1" |
d1a71985 | 258 | characters of the error message and a terminating null byte (\(aq\e0\(aq). |
73d8cece | 259 | .SS POSIX pattern buffer freeing |
c13182ef | 260 | Supplying |
e511ffb6 | 261 | .BR regfree () |
fea681da MK |
262 | with a precompiled pattern buffer, |
263 | .I preg | |
264 | will free the memory allocated to the pattern buffer by the compiling | |
265 | process, | |
e511ffb6 | 266 | .BR regcomp (). |
47297adb | 267 | .SH RETURN VALUE |
e511ffb6 | 268 | .BR regcomp () |
fea681da | 269 | returns zero for a successful compilation or an error code for failure. |
847e0d88 | 270 | .PP |
e511ffb6 | 271 | .BR regexec () |
c13182ef | 272 | returns zero for a successful match or |
fea681da MK |
273 | .B REG_NOMATCH |
274 | for failure. | |
275 | .SH ERRORS | |
c13182ef | 276 | The following errors can be returned by |
e511ffb6 | 277 | .BR regcomp (): |
fea681da MK |
278 | .TP |
279 | .B REG_BADBR | |
280 | Invalid use of back reference operator. | |
281 | .TP | |
282 | .B REG_BADPAT | |
283 | Invalid use of pattern operators such as group or list. | |
284 | .TP | |
285 | .B REG_BADRPT | |
f81fb444 | 286 | Invalid use of repetition operators such as using \(aq*\(aq |
fea681da MK |
287 | as the first character. |
288 | .TP | |
289 | .B REG_EBRACE | |
290 | Un-matched brace interval operators. | |
291 | .TP | |
292 | .B REG_EBRACK | |
293 | Un-matched bracket list operators. | |
294 | .TP | |
295 | .B REG_ECOLLATE | |
296 | Invalid collating element. | |
297 | .TP | |
298 | .B REG_ECTYPE | |
299 | Unknown character class name. | |
300 | .TP | |
301 | .B REG_EEND | |
b2653a99 | 302 | Nonspecific error. |
c13182ef | 303 | This is not defined by POSIX.2. |
fea681da MK |
304 | .TP |
305 | .B REG_EESCAPE | |
306 | Trailing backslash. | |
307 | .TP | |
308 | .B REG_EPAREN | |
309 | Un-matched parenthesis group operators. | |
310 | .TP | |
311 | .B REG_ERANGE | |
10850212 | 312 | Invalid use of the range operator; for example, the ending point of the range |
fea681da MK |
313 | occurs prior to the starting point. |
314 | .TP | |
315 | .B REG_ESIZE | |
ee8655b5 | 316 | Compiled regular expression requires a pattern buffer larger than 64\ kB. |
fea681da MK |
317 | This is not defined by POSIX.2. |
318 | .TP | |
319 | .B REG_ESPACE | |
320 | The regex routines ran out of memory. | |
321 | .TP | |
322 | .B REG_ESUBREG | |
323 | Invalid back reference to a subexpression. | |
285ff586 PH |
324 | .SH ATTRIBUTES |
325 | For an explanation of the terms used in this section, see | |
326 | .BR attributes (7). | |
327 | .TS | |
328 | allbox; | |
329 | lbw20 lb lb | |
330 | l l l. | |
331 | Interface Attribute Value | |
332 | T{ | |
333 | .BR regcomp (), | |
334 | .BR regexec () | |
335 | T} Thread safety MT-Safe locale | |
336 | T{ | |
337 | .BR regerror () | |
338 | T} Thread safety MT-Safe env | |
339 | T{ | |
340 | .BR regfree () | |
341 | T} Thread safety MT-Safe | |
342 | .TE | |
47297adb | 343 | .SH CONFORMING TO |
3528e8e5 | 344 | POSIX.1-2001, POSIX.1-2008. |
47297adb | 345 | .SH SEE ALSO |
79e7547f | 346 | .BR grep (1), |
8720e679 | 347 | .BR regex (7) |
4a2b2c2c | 348 | .PP |
1bb65947 | 349 | The glibc manual section, |
6de48cdf | 350 | .I "Regular Expressions" |