542 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Groff
		
	
	
	
	
	
			
		
		
	
	
			542 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Groff
		
	
	
	
	
	
.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
 | 
						|
.\" Copyright (c) 1992, 1993, 1994
 | 
						|
.\"	The Regents of the University of California.  All rights reserved.
 | 
						|
.\"
 | 
						|
.\" This code is derived from software contributed to Berkeley by
 | 
						|
.\" Henry Spencer.
 | 
						|
.\"
 | 
						|
.\" Redistribution and use in source and binary forms, with or without
 | 
						|
.\" modification, are permitted provided that the following conditions
 | 
						|
.\" are met:
 | 
						|
.\" 1. Redistributions of source code must retain the above copyright
 | 
						|
.\"    notice, this list of conditions and the following disclaimer.
 | 
						|
.\" 2. Redistributions in binary form must reproduce the above copyright
 | 
						|
.\"    notice, this list of conditions and the following disclaimer in the
 | 
						|
.\"    documentation and/or other materials provided with the distribution.
 | 
						|
.\" 3. All advertising materials mentioning features or use of this software
 | 
						|
.\"    must display the following acknowledgement:
 | 
						|
.\"	This product includes software developed by the University of
 | 
						|
.\"	California, Berkeley and its contributors.
 | 
						|
.\" 4. Neither the name of the University nor the names of its contributors
 | 
						|
.\"    may be used to endorse or promote products derived from this software
 | 
						|
.\"    without specific prior written permission.
 | 
						|
.\"
 | 
						|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 | 
						|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
						|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
						|
.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 | 
						|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
						|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 | 
						|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 | 
						|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 | 
						|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 | 
						|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 | 
						|
.\" SUCH DAMAGE.
 | 
						|
.\"
 | 
						|
.\"	@(#)regex.3	8.4 (Berkeley) 3/20/94
 | 
						|
.\"
 | 
						|
.TH REGEX 3 "March 20, 1994"
 | 
						|
.de ZR
 | 
						|
.\" one other place knows this name:  the SEE ALSO section
 | 
						|
.BR re_format (7) \\$1
 | 
						|
..
 | 
						|
.SH NAME
 | 
						|
regex, regcomp, regexec, regerror, regfree \- regular-expression library
 | 
						|
.SH SYNOPSIS
 | 
						|
.ft B
 | 
						|
.\".na
 | 
						|
#include <sys/types.h>
 | 
						|
.br
 | 
						|
#include <regex.h>
 | 
						|
.sp
 | 
						|
.in +.5i
 | 
						|
.ti -.5i
 | 
						|
int regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP, int \fIcflags\fP);
 | 
						|
.ti -.5i
 | 
						|
int regexec(const regex_t *\fIpreg\fP, const char *\fIstring\fP,
 | 
						|
size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);
 | 
						|
.ti -.5i
 | 
						|
size_t regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP,
 | 
						|
char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);
 | 
						|
.ti -.5i
 | 
						|
void regfree(regex_t *\fIpreg\fP);
 | 
						|
.in -.5i
 | 
						|
.ft R
 | 
						|
.SH DESCRIPTION
 | 
						|
These routines implement POSIX 1003.2 regular expressions (``RE''s);
 | 
						|
see
 | 
						|
.ZR .
 | 
						|
.B Regcomp
 | 
						|
compiles an RE written as a string into an internal form,
 | 
						|
.B regexec
 | 
						|
matches that internal form against a string and reports results,
 | 
						|
.B regerror
 | 
						|
transforms error codes from either into human-readable messages,
 | 
						|
and
 | 
						|
.B regfree
 | 
						|
frees any dynamically-allocated storage used by the internal form
 | 
						|
of an RE.
 | 
						|
.PP
 | 
						|
The header
 | 
						|
.I <regex.h>
 | 
						|
declares two structure types,
 | 
						|
.B regex_t
 | 
						|
and
 | 
						|
.BR regmatch_t ,
 | 
						|
the former for compiled internal forms and the latter for match reporting.
 | 
						|
It also declares the four functions,
 | 
						|
a type
 | 
						|
.BR regoff_t ,
 | 
						|
and a number of constants with names starting with ``REG_''.
 | 
						|
.PP
 | 
						|
.B Regcomp
 | 
						|
compiles the regular expression contained in the
 | 
						|
.I pattern
 | 
						|
string,
 | 
						|
subject to the flags in
 | 
						|
.IR cflags ,
 | 
						|
and places the results in the
 | 
						|
.B regex_t
 | 
						|
structure pointed to by
 | 
						|
.IR preg .
 | 
						|
.I Cflags
 | 
						|
is the bitwise OR of zero or more of the following flags:
 | 
						|
.IP REG_EXTENDED \w'REG_EXTENDED'u+2n
 | 
						|
Compile modern (``extended'') REs,
 | 
						|
rather than the obsolete (``basic'') REs that
 | 
						|
are the default.
 | 
						|
.IP REG_BASIC
 | 
						|
This is a synonym for 0,
 | 
						|
provided as a counterpart to REG_EXTENDED to improve readability.
 | 
						|
.IP REG_NOSPEC
 | 
						|
Compile with recognition of all special characters turned off.
 | 
						|
All characters are thus considered ordinary,
 | 
						|
so the ``RE'' is a literal string.
 | 
						|
This is an extension,
 | 
						|
compatible with but not specified by POSIX 1003.2,
 | 
						|
and should be used with
 | 
						|
caution in software intended to be portable to other systems.
 | 
						|
REG_EXTENDED and REG_NOSPEC may not be used
 | 
						|
in the same call to
 | 
						|
.IR regcomp .
 | 
						|
.IP REG_ICASE
 | 
						|
Compile for matching that ignores upper/lower case distinctions.
 | 
						|
See
 | 
						|
.ZR .
 | 
						|
.IP REG_NOSUB
 | 
						|
Compile for matching that need only report success or failure,
 | 
						|
not what was matched.
 | 
						|
.IP REG_NEWLINE
 | 
						|
Compile for newline-sensitive matching.
 | 
						|
By default, newline is a completely ordinary character with no special
 | 
						|
meaning in either REs or strings.
 | 
						|
With this flag,
 | 
						|
`[^' bracket expressions and `.' never match newline,
 | 
						|
a `^' anchor matches the null string after any newline in the string
 | 
						|
in addition to its normal function,
 | 
						|
and the `$' anchor matches the null string before any newline in the
 | 
						|
string in addition to its normal function.
 | 
						|
.IP REG_PEND
 | 
						|
The regular expression ends,
 | 
						|
not at the first NUL,
 | 
						|
but just before the character pointed to by the
 | 
						|
.B re_endp
 | 
						|
member of the structure pointed to by
 | 
						|
.IR preg .
 | 
						|
The
 | 
						|
.B re_endp
 | 
						|
member is of type
 | 
						|
.BR "const\ char\ *" .
 | 
						|
This flag permits inclusion of NULs in the RE;
 | 
						|
they are considered ordinary characters.
 | 
						|
This is an extension,
 | 
						|
compatible with but not specified by POSIX 1003.2,
 | 
						|
and should be used with
 | 
						|
caution in software intended to be portable to other systems.
 | 
						|
.PP
 | 
						|
When successful,
 | 
						|
.B regcomp
 | 
						|
returns 0 and fills in the structure pointed to by
 | 
						|
.IR preg .
 | 
						|
One member of that structure
 | 
						|
(other than
 | 
						|
.BR re_endp )
 | 
						|
is publicized:
 | 
						|
.BR re_nsub ,
 | 
						|
of type
 | 
						|
.BR size_t ,
 | 
						|
contains the number of parenthesized subexpressions within the RE
 | 
						|
(except that the value of this member is undefined if the
 | 
						|
REG_NOSUB flag was used).
 | 
						|
If
 | 
						|
.B regcomp
 | 
						|
fails, it returns a non-zero error code;
 | 
						|
see DIAGNOSTICS.
 | 
						|
.PP
 | 
						|
.B Regexec
 | 
						|
matches the compiled RE pointed to by
 | 
						|
.I preg
 | 
						|
against the
 | 
						|
.IR string ,
 | 
						|
subject to the flags in
 | 
						|
.IR eflags ,
 | 
						|
and reports results using
 | 
						|
.IR nmatch ,
 | 
						|
.IR pmatch ,
 | 
						|
and the returned value.
 | 
						|
The RE must have been compiled by a previous invocation of
 | 
						|
.BR regcomp .
 | 
						|
The compiled form is not altered during execution of
 | 
						|
.BR regexec ,
 | 
						|
so a single compiled RE can be used simultaneously by multiple threads.
 | 
						|
.PP
 | 
						|
By default,
 | 
						|
the NUL-terminated string pointed to by
 | 
						|
.I string
 | 
						|
is considered to be the text of an entire line, minus any terminating
 | 
						|
newline.
 | 
						|
The
 | 
						|
.I eflags
 | 
						|
argument is the bitwise OR of zero or more of the following flags:
 | 
						|
.IP REG_NOTBOL \w'REG_STARTEND'u+2n
 | 
						|
The first character of
 | 
						|
the string
 | 
						|
is not the beginning of a line, so the `^' anchor should not match before it.
 | 
						|
This does not affect the behavior of newlines under REG_NEWLINE.
 | 
						|
.IP REG_NOTEOL
 | 
						|
The NUL terminating
 | 
						|
the string
 | 
						|
does not end a line, so the `$' anchor should not match before it.
 | 
						|
This does not affect the behavior of newlines under REG_NEWLINE.
 | 
						|
.IP REG_STARTEND
 | 
						|
The string is considered to start at
 | 
						|
\fIstring\fR\ + \fIpmatch\fR[0].\fBrm_so\fR
 | 
						|
and to have a terminating NUL located at
 | 
						|
\fIstring\fR\ + \fIpmatch\fR[0].\fBrm_eo\fR
 | 
						|
(there need not actually be a NUL at that location),
 | 
						|
regardless of the value of
 | 
						|
.IR nmatch .
 | 
						|
See below for the definition of
 | 
						|
.IR pmatch
 | 
						|
and
 | 
						|
.IR nmatch .
 | 
						|
This is an extension,
 | 
						|
compatible with but not specified by POSIX 1003.2,
 | 
						|
and should be used with
 | 
						|
caution in software intended to be portable to other systems.
 | 
						|
Note that a non-zero \fBrm_so\fR does not imply REG_NOTBOL;
 | 
						|
REG_STARTEND affects only the location of the string,
 | 
						|
not how it is matched.
 | 
						|
.PP
 | 
						|
See
 | 
						|
.ZR
 | 
						|
for a discussion of what is matched in situations where an RE or a
 | 
						|
portion thereof could match any of several substrings of
 | 
						|
.IR string .
 | 
						|
.PP
 | 
						|
Normally,
 | 
						|
.B regexec
 | 
						|
returns 0 for success and the non-zero code REG_NOMATCH for failure.
 | 
						|
Other non-zero error codes may be returned in exceptional situations;
 | 
						|
see DIAGNOSTICS.
 | 
						|
.PP
 | 
						|
If REG_NOSUB was specified in the compilation of the RE,
 | 
						|
or if
 | 
						|
.I nmatch
 | 
						|
is 0,
 | 
						|
.B regexec
 | 
						|
ignores the
 | 
						|
.I pmatch
 | 
						|
argument (but see below for the case where REG_STARTEND is specified).
 | 
						|
Otherwise,
 | 
						|
.I pmatch
 | 
						|
points to an array of
 | 
						|
.I nmatch
 | 
						|
structures of type
 | 
						|
.BR regmatch_t .
 | 
						|
Such a structure has at least the members
 | 
						|
.B rm_so
 | 
						|
and
 | 
						|
.BR rm_eo ,
 | 
						|
both of type
 | 
						|
.B regoff_t
 | 
						|
(a signed arithmetic type at least as large as an
 | 
						|
.B off_t
 | 
						|
and a
 | 
						|
.BR ssize_t ),
 | 
						|
containing respectively the offset of the first character of a substring
 | 
						|
and the offset of the first character after the end of the substring.
 | 
						|
Offsets are measured from the beginning of the
 | 
						|
.I string
 | 
						|
argument given to
 | 
						|
.BR regexec .
 | 
						|
An empty substring is denoted by equal offsets,
 | 
						|
both indicating the character following the empty substring.
 | 
						|
.PP
 | 
						|
The 0th member of the
 | 
						|
.I pmatch
 | 
						|
array is filled in to indicate what substring of
 | 
						|
.I string
 | 
						|
was matched by the entire RE.
 | 
						|
Remaining members report what substring was matched by parenthesized
 | 
						|
subexpressions within the RE;
 | 
						|
member
 | 
						|
.I i
 | 
						|
reports subexpression
 | 
						|
.IR i ,
 | 
						|
with subexpressions counted (starting at 1) by the order of their opening
 | 
						|
parentheses in the RE, left to right.
 | 
						|
Unused entries in the array\(emcorresponding either to subexpressions that
 | 
						|
did not participate in the match at all, or to subexpressions that do not
 | 
						|
exist in the RE (that is, \fIi\fR\ > \fIpreg\fR\->\fBre_nsub\fR)\(emhave both
 | 
						|
.B rm_so
 | 
						|
and
 | 
						|
.B rm_eo
 | 
						|
set to \-1.
 | 
						|
If a subexpression participated in the match several times,
 | 
						|
the reported substring is the last one it matched.
 | 
						|
(Note, as an example in particular, that when the RE `(b*)+' matches `bbb',
 | 
						|
the parenthesized subexpression matches each of the three `b's and then
 | 
						|
an infinite number of empty strings following the last `b',
 | 
						|
so the reported substring is one of the empties.)
 | 
						|
.PP
 | 
						|
If REG_STARTEND is specified,
 | 
						|
.I pmatch
 | 
						|
must point to at least one
 | 
						|
.B regmatch_t
 | 
						|
(even if
 | 
						|
.I nmatch
 | 
						|
is 0 or REG_NOSUB was specified),
 | 
						|
to hold the input offsets for REG_STARTEND.
 | 
						|
Use for output is still entirely controlled by
 | 
						|
.IR nmatch ;
 | 
						|
if
 | 
						|
.I nmatch
 | 
						|
is 0 or REG_NOSUB was specified,
 | 
						|
the value of
 | 
						|
.IR pmatch [0]
 | 
						|
will not be changed by a successful
 | 
						|
.BR regexec .
 | 
						|
.PP
 | 
						|
.B Regerror
 | 
						|
maps a non-zero
 | 
						|
.I errcode
 | 
						|
from either
 | 
						|
.B regcomp
 | 
						|
or
 | 
						|
.B regexec
 | 
						|
to a human-readable, printable message.
 | 
						|
If
 | 
						|
.I preg
 | 
						|
is non-NULL,
 | 
						|
the error code should have arisen from use of
 | 
						|
the
 | 
						|
.B regex_t
 | 
						|
pointed to by
 | 
						|
.IR preg ,
 | 
						|
and if the error code came from
 | 
						|
.BR regcomp ,
 | 
						|
it should have been the result from the most recent
 | 
						|
.B regcomp
 | 
						|
using that
 | 
						|
.BR regex_t .
 | 
						|
.RI ( Regerror
 | 
						|
may be able to supply a more detailed message using information
 | 
						|
from the
 | 
						|
.BR regex_t .)
 | 
						|
.B Regerror
 | 
						|
places the NUL-terminated message into the buffer pointed to by
 | 
						|
.IR errbuf ,
 | 
						|
limiting the length (including the NUL) to at most
 | 
						|
.I errbuf_size
 | 
						|
bytes.
 | 
						|
If the whole message won't fit,
 | 
						|
as much of it as will fit before the terminating NUL is supplied.
 | 
						|
In any case,
 | 
						|
the returned value is the size of buffer needed to hold the whole
 | 
						|
message (including terminating NUL).
 | 
						|
If
 | 
						|
.I errbuf_size
 | 
						|
is 0,
 | 
						|
.I errbuf
 | 
						|
is ignored but the return value is still correct.
 | 
						|
.PP
 | 
						|
If the
 | 
						|
.I errcode
 | 
						|
given to
 | 
						|
.B regerror
 | 
						|
is first ORed with REG_ITOA,
 | 
						|
the ``message'' that results is the printable name of the error code,
 | 
						|
e.g. ``REG_NOMATCH'',
 | 
						|
rather than an explanation thereof.
 | 
						|
If
 | 
						|
.I errcode
 | 
						|
is REG_ATOI,
 | 
						|
then
 | 
						|
.I preg
 | 
						|
shall be non-NULL and the
 | 
						|
.B re_endp
 | 
						|
member of the structure it points to
 | 
						|
must point to the printable name of an error code;
 | 
						|
in this case, the result in
 | 
						|
.I errbuf
 | 
						|
is the decimal digits of
 | 
						|
the numeric value of the error code
 | 
						|
(0 if the name is not recognized).
 | 
						|
REG_ITOA and REG_ATOI are intended primarily as debugging facilities;
 | 
						|
they are extensions,
 | 
						|
compatible with but not specified by POSIX 1003.2,
 | 
						|
and should be used with
 | 
						|
caution in software intended to be portable to other systems.
 | 
						|
Be warned also that they are considered experimental and changes are possible.
 | 
						|
.PP
 | 
						|
.B Regfree
 | 
						|
frees any dynamically-allocated storage associated with the compiled RE
 | 
						|
pointed to by
 | 
						|
.IR preg .
 | 
						|
The remaining
 | 
						|
.B regex_t
 | 
						|
is no longer a valid compiled RE
 | 
						|
and the effect of supplying it to
 | 
						|
.B regexec
 | 
						|
or
 | 
						|
.B regerror
 | 
						|
is undefined.
 | 
						|
.PP
 | 
						|
None of these functions references global variables except for tables
 | 
						|
of constants;
 | 
						|
all are safe for use from multiple threads if the arguments are safe.
 | 
						|
.SH IMPLEMENTATION CHOICES
 | 
						|
There are a number of decisions that 1003.2 leaves up to the implementor,
 | 
						|
either by explicitly saying ``undefined'' or by virtue of them being
 | 
						|
forbidden by the RE grammar.
 | 
						|
This implementation treats them as follows.
 | 
						|
.PP
 | 
						|
See
 | 
						|
.ZR
 | 
						|
for a discussion of the definition of case-independent matching.
 | 
						|
.PP
 | 
						|
There is no particular limit on the length of REs,
 | 
						|
except insofar as memory is limited.
 | 
						|
Memory usage is approximately linear in RE size, and largely insensitive
 | 
						|
to RE complexity, except for bounded repetitions.
 | 
						|
See BUGS for one short RE using them
 | 
						|
that will run almost any system out of memory.
 | 
						|
.PP
 | 
						|
A backslashed character other than one specifically given a magic meaning
 | 
						|
by 1003.2 (such magic meanings occur only in obsolete [``basic''] REs)
 | 
						|
is taken as an ordinary character.
 | 
						|
.PP
 | 
						|
Any unmatched [ is a REG_EBRACK error.
 | 
						|
.PP
 | 
						|
Equivalence classes cannot begin or end bracket-expression ranges.
 | 
						|
The endpoint of one range cannot begin another.
 | 
						|
.PP
 | 
						|
RE_DUP_MAX, the limit on repetition counts in bounded repetitions, is 255.
 | 
						|
.PP
 | 
						|
A repetition operator (?, *, +, or bounds) cannot follow another
 | 
						|
repetition operator.
 | 
						|
A repetition operator cannot begin an expression or subexpression
 | 
						|
or follow `^' or `|'.
 | 
						|
.PP
 | 
						|
`|' cannot appear first or last in a (sub)expression or after another `|',
 | 
						|
i.e. an operand of `|' cannot be an empty subexpression.
 | 
						|
An empty parenthesized subexpression, `()', is legal and matches an
 | 
						|
empty (sub)string.
 | 
						|
An empty string is not a legal RE.
 | 
						|
.PP
 | 
						|
A `{' followed by a digit is considered the beginning of bounds for a
 | 
						|
bounded repetition, which must then follow the syntax for bounds.
 | 
						|
A `{' \fInot\fR followed by a digit is considered an ordinary character.
 | 
						|
.PP
 | 
						|
`^' and `$' beginning and ending subexpressions in obsolete (``basic'')
 | 
						|
REs are anchors, not ordinary characters.
 | 
						|
.SH SEE ALSO
 | 
						|
.BR grep (1),
 | 
						|
.BR re_format (7).
 | 
						|
.PP
 | 
						|
POSIX 1003.2, sections 2.8 (Regular Expression Notation)
 | 
						|
and
 | 
						|
B.5 (C Binding for Regular Expression Matching).
 | 
						|
.SH DIAGNOSTICS
 | 
						|
Non-zero error codes from
 | 
						|
.B regcomp
 | 
						|
and
 | 
						|
.B regexec
 | 
						|
include the following:
 | 
						|
.PP
 | 
						|
.nf
 | 
						|
.ta \w'REG_ECOLLATE'u+3n
 | 
						|
REG_NOMATCH	regexec() failed to match
 | 
						|
REG_BADPAT	invalid regular expression
 | 
						|
REG_ECOLLATE	invalid collating element
 | 
						|
REG_ECTYPE	invalid character class
 | 
						|
REG_EESCAPE	\e applied to unescapable character
 | 
						|
REG_ESUBREG	invalid backreference number
 | 
						|
REG_EBRACK	brackets [ ] not balanced
 | 
						|
REG_EPAREN	parentheses ( ) not balanced
 | 
						|
REG_EBRACE	braces { } not balanced
 | 
						|
REG_BADBR	invalid repetition count(s) in { }
 | 
						|
REG_ERANGE	invalid character range in [ ]
 | 
						|
REG_ESPACE	ran out of memory
 | 
						|
REG_BADRPT	?, *, or + operand invalid
 | 
						|
REG_EMPTY	empty (sub)expression
 | 
						|
REG_ASSERT	``can't happen''\(emyou found a bug
 | 
						|
REG_INVARG	invalid argument, e.g. negative-length string
 | 
						|
.fi
 | 
						|
.SH HISTORY
 | 
						|
Originally written by Henry Spencer.
 | 
						|
Altered for inclusion in the 4.4BSD distribution.
 | 
						|
.SH BUGS
 | 
						|
This is an alpha release with known defects.
 | 
						|
Please report problems.
 | 
						|
.PP
 | 
						|
There is one known functionality bug.
 | 
						|
The implementation of internationalization is incomplete:
 | 
						|
the locale is always assumed to be the default one of 1003.2,
 | 
						|
and only the collating elements etc. of that locale are available.
 | 
						|
.PP
 | 
						|
The back-reference code is subtle and doubts linger about its correctness
 | 
						|
in complex cases.
 | 
						|
.PP
 | 
						|
.B Regexec
 | 
						|
performance is poor.
 | 
						|
This will improve with later releases.
 | 
						|
.I Nmatch
 | 
						|
exceeding 0 is expensive;
 | 
						|
.I nmatch
 | 
						|
exceeding 1 is worse.
 | 
						|
.B Regexec
 | 
						|
is largely insensitive to RE complexity \fIexcept\fR that back
 | 
						|
references are massively expensive.
 | 
						|
RE length does matter; in particular, there is a strong speed bonus
 | 
						|
for keeping RE length under about 30 characters,
 | 
						|
with most special characters counting roughly double.
 | 
						|
.PP
 | 
						|
.B Regcomp
 | 
						|
implements bounded repetitions by macro expansion,
 | 
						|
which is costly in time and space if counts are large
 | 
						|
or bounded repetitions are nested.
 | 
						|
An RE like, say,
 | 
						|
`((((a{1,100}){1,100}){1,100}){1,100}){1,100}'
 | 
						|
will (eventually) run almost any existing machine out of swap space.
 | 
						|
.PP
 | 
						|
There are suspected problems with response to obscure error conditions.
 | 
						|
Notably,
 | 
						|
certain kinds of internal overflow,
 | 
						|
produced only by truly enormous REs or by multiply nested bounded repetitions,
 | 
						|
are probably not handled well.
 | 
						|
.PP
 | 
						|
Due to a mistake in 1003.2, things like `a)b' are legal REs because `)' is
 | 
						|
a special character only in the presence of a previous unmatched `('.
 | 
						|
This can't be fixed until the spec is fixed.
 | 
						|
.PP
 | 
						|
The standard's definition of back references is vague.
 | 
						|
For example, does
 | 
						|
`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?
 | 
						|
Until the standard is clarified,
 | 
						|
behavior in such cases should not be relied on.
 | 
						|
.PP
 | 
						|
The implementation of word-boundary matching is a bit of a kludge,
 | 
						|
and bugs may lurk in combinations of word-boundary matching and anchoring.
 |