Switch to the current NetBSD regex implementation.

Change-Id: If32b28dd85d6a7ab8957ab81d19fa4c0de9499d5
This commit is contained in:
Elliott Hughes 2012-08-14 15:32:42 -07:00
parent 53493a9b26
commit cc213f871b
18 changed files with 1832 additions and 717 deletions

View file

@ -9,7 +9,6 @@ libc_common_src_files := \
unistd/abort.c \
unistd/alarm.c \
unistd/brk.c \
unistd/creat.c \
unistd/daemon.c \
unistd/eventfd.c \
unistd/exec.c \
@ -328,13 +327,14 @@ libc_common_src_files := \
netbsd/nameser/ns_netint.c \
netbsd/nameser/ns_print.c \
netbsd/nameser/ns_samedomain.c \
regex/regcomp.c \
regex/regerror.c \
regex/regexec.c \
regex/regfree.c \
libc_upstream_netbsd_src_files := \
upstream-netbsd/libc/compat-43/creat.c \
upstream-netbsd/libc/gen/nice.c \
upstream-netbsd/libc/regex/regcomp.c \
upstream-netbsd/libc/regex/regerror.c \
upstream-netbsd/libc/regex/regexec.c \
upstream-netbsd/libc/regex/regfree.c \
upstream-netbsd/libc/stdlib/tdelete.c \
upstream-netbsd/libc/stdlib/tfind.c \
upstream-netbsd/libc/stdlib/tsearch.c \

View file

@ -2211,7 +2211,6 @@ SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 1992, 1993, 1994 Henry Spencer.
Copyright (c) 1992, 1993, 1994
The Regents of the University of California. All rights reserved.
@ -2244,6 +2243,41 @@ SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 1992, 1993, 1994 Henry Spencer.
This code is derived from software contributed to Berkeley by
Henry Spencer.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software
must display the following acknowledgement:
This product includes software developed by the University of
California, Berkeley and its contributors.
4. Neither the name of the University nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
-------------------------------------------------------------------
Copyright (c) 1993
The Regents of the University of California. All rights reserved.

View file

@ -1,130 +0,0 @@
/* $OpenBSD: regerror.c,v 1.13 2005/08/05 13:03:00 espie Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regerror.c 8.4 (Berkeley) 3/20/94
*/
#include <sys/types.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include <regex.h>
#include "utils.h"
static char *regatoi(const regex_t *, char *, int);
static const struct rerr {
int code;
char *name;
char *explain;
} rerrs[] = {
{ REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" },
{ REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
{ REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
{ REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
{ REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
{ REG_EBRACE, "REG_EBRACE", "braces not balanced" },
{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
{ REG_ERANGE, "REG_ERANGE", "invalid character range" },
{ REG_ESPACE, "REG_ESPACE", "out of memory" },
{ REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
{ REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
{ REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
{ 0, "", "*** unknown regexp error code ***" }
};
/*
- regerror - the interface to error numbers
= extern size_t regerror(int, const regex_t *, char *, size_t);
*/
/* ARGSUSED */
size_t
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
const struct rerr *r;
size_t len;
int target = errcode &~ REG_ITOA;
char *s;
char convbuf[50];
if (errcode == REG_ATOI)
s = regatoi(preg, convbuf, sizeof convbuf);
else {
for (r = rerrs; r->code != 0; r++)
if (r->code == target)
break;
if (errcode&REG_ITOA) {
if (r->code != 0) {
assert(strlen(r->name) < sizeof(convbuf));
(void) strlcpy(convbuf, r->name, sizeof convbuf);
} else
(void)snprintf(convbuf, sizeof convbuf,
"REG_0x%x", target);
s = convbuf;
} else
s = r->explain;
}
len = strlen(s) + 1;
if (errbuf_size > 0) {
strlcpy(errbuf, s, errbuf_size);
}
return(len);
}
/*
- regatoi - internal routine to implement REG_ATOI
*/
static char *
regatoi(const regex_t *preg, char *localbuf, int localbufsize)
{
const struct rerr *r;
for (r = rerrs; r->code != 0; r++)
if (strcmp(r->name, preg->re_endp) == 0)
break;
if (r->code == 0)
return("0");
(void)snprintf(localbuf, localbufsize, "%d", r->code);
return(localbuf);
}

View file

@ -1,71 +0,0 @@
/* $OpenBSD: regfree.c,v 1.7 2005/08/05 13:03:00 espie Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regfree.c 8.3 (Berkeley) 3/20/94
*/
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#include "utils.h"
#include "regex2.h"
/*
- regfree - free everything
*/
void
regfree(regex_t *preg)
{
struct re_guts *g;
if (preg->re_magic != MAGIC1) /* oops */
return; /* nice to complain, but hard */
g = preg->re_g;
if (g == NULL || g->magic != MAGIC2) /* oops again */
return;
preg->re_magic = 0; /* mark it invalid */
g->magic = 0; /* mark it invalid */
if (g->strip != NULL)
free((char *)g->strip);
if (g->sets != NULL)
free((char *)g->sets);
if (g->setbits != NULL)
free((char *)g->setbits);
if (g->must != NULL)
free(g->must);
free((char *)g);
}

View file

@ -1,34 +0,0 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <unistd.h>
#include <fcntl.h>
int creat(const char* pathname, mode_t mode)
{
return open(pathname, O_WRONLY|O_TRUNC|O_CREAT, mode);
}

View file

@ -1,13 +1,9 @@
/* $OpenBSD: utils.h,v 1.4 2003/06/02 20:18:36 millert Exp $ */
/* $NetBSD: creat.c,v 1.10 2003/08/07 16:42:39 agc Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -31,25 +27,26 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)utils.h 8.3 (Berkeley) 3/20/94
*/
/* utility definitions */
#define DUPMAX 255
#define INFINITY (DUPMAX + 1)
#define NC (CHAR_MAX - CHAR_MIN + 1)
typedef unsigned char uch;
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
#if 0
static char sccsid[] = "@(#)creat.c 8.1 (Berkeley) 6/2/93";
#else
__RCSID("$NetBSD: creat.c,v 1.10 2003/08/07 16:42:39 agc Exp $");
#endif
#endif /* LIBC_SCCS and not lint */
/* switch off assertions (if not already off) if no REDEBUG */
#ifndef REDEBUG
#ifndef NDEBUG
#define NDEBUG /* no assertions please */
#endif
#endif
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
/* for old systems with bcopy() but no memmove() */
#ifdef USEBCOPY
#define memmove(d, s, c) bcopy(s, d, c)
#endif
int
creat(const char *path, mode_t mode)
{
_DIAGASSERT(path != NULL);
return(open(path, O_WRONLY|O_CREAT|O_TRUNC, mode));
}

View file

@ -1,7 +1,6 @@
/* $OpenBSD: cclass.h,v 1.5 2003/06/02 20:18:36 millert Exp $ */
/* $NetBSD: cclass.h,v 1.7 2003/08/07 16:43:19 agc Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@ -35,34 +34,71 @@
* @(#)cclass.h 8.3 (Berkeley) 3/20/94
*/
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)cclass.h 8.3 (Berkeley) 3/20/94
*/
/* character-class table */
static const struct cclass {
char *name;
char *chars;
char *multis;
const char *name;
const char *chars;
const char *multis;
} cclasses[] = {
{ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789", ""} ,
0123456789", "" },
{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
""} ,
{ "blank", " \t", ""} ,
"" },
{ "blank", " \t", "" },
{ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
\25\26\27\30\31\32\33\34\35\36\37\177", ""} ,
{ "digit", "0123456789", ""} ,
\25\26\27\30\31\32\33\34\35\36\37\177", "" },
{ "digit", "0123456789", "" },
{ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
""} ,
"" },
{ "lower", "abcdefghijklmnopqrstuvwxyz",
""} ,
"" },
{ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
""} ,
"" },
{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
""} ,
{ "space", "\t\n\v\f\r ", ""} ,
"" },
{ "space", "\t\n\v\f\r ", "" },
{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
""} ,
"" },
{ "xdigit", "0123456789ABCDEFabcdef",
""} ,
"" },
{ NULL, 0, "" }
};

View file

@ -1,7 +1,6 @@
/* $OpenBSD: cname.h,v 1.5 2003/06/02 20:18:36 millert Exp $ */
/* $NetBSD: cname.h,v 1.7 2003/08/07 16:43:19 agc Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@ -35,9 +34,46 @@
* @(#)cname.h 8.3 (Berkeley) 3/20/94
*/
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)cname.h 8.3 (Berkeley) 3/20/94
*/
/* character-name table */
static const struct cname {
char *name;
const char *name;
char code;
} cnames[] = {
{ "NUL", '\0' },
@ -135,5 +171,5 @@ static const struct cname {
{ "right-curly-bracket", '}' },
{ "tilde", '~' },
{ "DEL", '\177' },
{ NULL, 0 }
{ NULL, 0 },
};

View file

@ -1,7 +1,6 @@
/* $OpenBSD: engine.c,v 1.15 2005/08/05 13:03:00 espie Exp $ */
/* $NetBSD: engine.c,v 1.24 2012/03/13 21:13:42 christos Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@ -35,6 +34,43 @@
* @(#)engine.c 8.5 (Berkeley) 3/20/94
*/
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)engine.c 8.5 (Berkeley) 3/20/94
*/
/*
* The matching engine and friends. This file is #included by regexec.c
* after suitable #defines of a variety of macros used herein, so that
@ -72,11 +108,11 @@ struct match {
struct re_guts *g;
int eflags;
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
char *offp; /* offsets work from here */
char *beginp; /* start of string -- virtual NUL precedes */
char *endp; /* end of string -- virtual NUL here */
char *coldp; /* can be no match starting before here */
char **lastpos; /* [nplus+1] */
const char *offp; /* offsets work from here */
const char *beginp; /* start of string -- virtual NUL precedes */
const char *endp; /* end of string -- virtual NUL here */
const char *coldp; /* can be no match starting before here */
const char **lastpos; /* [nplus+1] */
STATEVARS;
states st; /* current states */
states fresh; /* states for a fresh start */
@ -84,13 +120,18 @@ struct match {
states empty; /* empty set of states */
};
static int matcher(struct re_guts *, char *, size_t, regmatch_t[], int);
static char *dissect(struct match *, char *, char *, sopno, sopno);
static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int);
static char *fast(struct match *, char *, char *, sopno, sopno);
static char *slow(struct match *, char *, char *, sopno, sopno);
static states step(struct re_guts *, sopno, sopno, states, int, states);
#define MAX_RECURSION 100
/* ========= begin header generated by ./mkh ========= */
#ifdef __cplusplus
extern "C" {
#endif
/* === engine.c === */
static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev);
static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
#define BOL (OUT+1)
#define EOL (BOL+1)
#define BOLEOL (BOL+2)
@ -101,19 +142,24 @@ static states step(struct re_guts *, sopno, sopno, states, int, states);
#define NONCHAR(c) ((c) > CHAR_MAX)
#define NNONCHAR (CODEMAX-CHAR_MAX)
#ifdef REDEBUG
static void print(struct match *, char *, states, int, FILE *);
static void print(struct match *m, char *caption, states st, int ch, FILE *d);
#endif
#ifdef REDEBUG
static void at(struct match *, char *, char *, char *, sopno, sopno);
static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
#endif
#ifdef REDEBUG
static char *pchar(int);
static char *pchar(int ch);
#endif
#ifdef __cplusplus
}
#endif
/* ========= end header generated by ./mkh ========= */
#ifdef REDEBUG
#define SP(t, s, c) print(m, t, s, c, stdout)
#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
#define NOTE(str) { if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
#define NOTE(str) { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
static int nope = 0;
#else
#define SP(t, s, c) /* nothing */
@ -123,27 +169,39 @@ static int nope = 0;
/*
- matcher - the actual matching engine
== static int matcher(struct re_guts *g, char *string, \
== size_t nmatch, regmatch_t pmatch[], int eflags);
*/
static int /* 0 success, REG_NOMATCH failure */
matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
matcher(
struct re_guts *g,
const char *string,
size_t nmatch,
regmatch_t pmatch[],
int eflags)
{
char *endp;
int i;
const char *endp;
size_t i;
struct match mv;
struct match *m = &mv;
char *dp;
const char *dp;
const sopno gf = g->firststate+1; /* +1 for OEND */
const sopno gl = g->laststate;
char *start;
char *stop;
const char *start;
const char *stop;
int error = 0;
_DIAGASSERT(g != NULL);
_DIAGASSERT(string != NULL);
/* pmatch checked below */
/* simplify the situation where possible */
if (g->cflags&REG_NOSUB)
nmatch = 0;
if (eflags&REG_STARTEND) {
start = string + pmatch[0].rm_so;
stop = string + pmatch[0].rm_eo;
_DIAGASSERT(pmatch != NULL);
start = string + (size_t)pmatch[0].rm_so;
stop = string + (size_t)pmatch[0].rm_eo;
} else {
start = string;
stop = start + strlen(start);
@ -154,8 +212,8 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
/* prescreening; this does wonders for this rather slow code */
if (g->must != NULL) {
for (dp = start; dp < stop; dp++)
if (*dp == g->must[0] && stop - dp >= g->mlen &&
memcmp(dp, g->must, (size_t)g->mlen) == 0)
if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
memcmp(dp, g->must, g->mlen) == 0)
break;
if (dp == stop) /* we didn't find g->must */
return(REG_NOMATCH);
@ -180,10 +238,8 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
for (;;) {
endp = fast(m, start, stop, gf, gl);
if (endp == NULL) { /* a miss */
free(m->pmatch);
free(m->lastpos);
STATETEARDOWN(m);
return(REG_NOMATCH);
error = REG_NOMATCH;
goto done;
}
if (nmatch == 0 && !g->backrefs)
break; /* no further info needed */
@ -206,25 +262,24 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
sizeof(regmatch_t));
if (m->pmatch == NULL) {
STATETEARDOWN(m);
return(REG_ESPACE);
error = REG_ESPACE;
goto done;
}
for (i = 1; i <= (int)m->g->nsub; i++)
m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
for (i = 1; i <= m->g->nsub; i++)
m->pmatch[i].rm_so = m->pmatch[i].rm_eo = (regoff_t)-1;
if (!g->backrefs && !(m->eflags&REG_BACKR)) {
NOTE("dissecting");
dp = dissect(m, m->coldp, endp, gf, gl);
} else {
if (g->nplus > 0 && m->lastpos == NULL)
m->lastpos = (char **)malloc((g->nplus+1) *
sizeof(char *));
m->lastpos = malloc((g->nplus+1) *
sizeof(const char *));
if (g->nplus > 0 && m->lastpos == NULL) {
free(m->pmatch);
STATETEARDOWN(m);
return(REG_ESPACE);
error = REG_ESPACE;
goto done;
}
NOTE("backref dissect");
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
}
if (dp != NULL)
break;
@ -242,12 +297,12 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
/* try it on a shorter possibility */
#ifndef NDEBUG
for (i = 1; i <= m->g->nsub; i++) {
assert(m->pmatch[i].rm_so == -1);
assert(m->pmatch[i].rm_eo == -1);
assert(m->pmatch[i].rm_so == (regoff_t)-1);
assert(m->pmatch[i].rm_eo == (regoff_t)-1);
}
#endif
NOTE("backoff dissect");
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
}
assert(dp == NULL || dp == endp);
if (dp != NULL) /* found a shorter one */
@ -255,54 +310,72 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
/* despite initial appearances, there is no match here */
NOTE("false alarm");
if (m->coldp == stop)
break;
start = m->coldp + 1; /* recycle starting later */
assert(start <= stop);
}
/* fill in the details if requested */
if (nmatch > 0) {
_DIAGASSERT(pmatch != NULL);
pmatch[0].rm_so = m->coldp - m->offp;
pmatch[0].rm_eo = endp - m->offp;
}
if (nmatch > 1) {
assert(m->pmatch != NULL);
for (i = 1; i < (ssize_t)nmatch; i++)
if (i <= (int)m->g->nsub)
for (i = 1; i < nmatch; i++)
if (i <= m->g->nsub)
pmatch[i] = m->pmatch[i];
else {
pmatch[i].rm_so = -1;
pmatch[i].rm_eo = -1;
pmatch[i].rm_so = (regoff_t)-1;
pmatch[i].rm_eo = (regoff_t)-1;
}
}
if (m->pmatch != NULL)
free((char *)m->pmatch);
if (m->lastpos != NULL)
free((char *)m->lastpos);
done:
if (m->pmatch != NULL) {
free(m->pmatch);
m->pmatch = NULL;
}
if (m->lastpos != NULL) {
free(m->lastpos);
m->lastpos = NULL;
}
STATETEARDOWN(m);
return(0);
return error;
}
/*
- dissect - figure out what matched what, no back references
== static const char *dissect(struct match *m, const char *start, \
== const char *stop, sopno startst, sopno stopst);
*/
static char * /* == stop (success) always */
dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
static const char * /* == stop (success) always */
dissect(
struct match *m,
const char *start,
const char *stop,
sopno startst,
sopno stopst)
{
int i;
sopno ss; /* start sop of current subRE */
sopno es; /* end sop of current subRE */
char *sp; /* start of string matched by it */
char *stp; /* string matched by it cannot pass here */
char *rest; /* start of rest of string */
char *tail; /* string unmatched by rest of RE */
const char *sp; /* start of string matched by it */
const char *stp; /* string matched by it cannot pass here */
const char *rest; /* start of rest of string */
const char *tail; /* string unmatched by rest of RE */
sopno ssub; /* start sop of subsubRE */
sopno esub; /* end sop of subsubRE */
char *ssp; /* start of string matched by subsubRE */
char *sep; /* end of string matched by subsubRE */
char *oldssp; /* previous ssp */
char *dp;
const char *ssp; /* start of string matched by subsubRE */
const char *sep; /* end of string matched by subsubRE */
const char *oldssp; /* previous ssp */
#ifndef NDEBUG
const char *dp;
#endif
_DIAGASSERT(m != NULL);
_DIAGASSERT(start != NULL);
_DIAGASSERT(stop != NULL);
AT("diss", start, stop, startst, stopst);
sp = start;
@ -361,7 +434,12 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
esub = es - 1;
/* did innards match? */
if (slow(m, sp, rest, ssub, esub) != NULL) {
dp = dissect(m, sp, rest, ssub, esub);
#ifdef NDEBUG
(void)
#else
dp =
#endif
dissect(m, sp, rest, ssub, esub);
assert(dp == rest);
} else /* no */
assert(sp == rest);
@ -399,7 +477,12 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
}
assert(sep == rest); /* must exhaust substring */
assert(slow(m, ssp, sep, ssub, esub) == rest);
dp = dissect(m, ssp, sep, ssub, esub);
#ifdef NDEBUG
(void)
#else
dp =
#endif
dissect(m, ssp, sep, ssub, esub);
assert(dp == sep);
sp = rest;
break;
@ -434,7 +517,12 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
else
assert(OP(m->g->strip[esub]) == O_CH);
}
dp = dissect(m, sp, rest, ssub, esub);
#ifdef NDEBUG
(void)
#else
dp =
#endif
dissect(m, sp, rest, ssub, esub);
assert(dp == rest);
sp = rest;
break;
@ -467,24 +555,35 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
/*
- backref - figure out what matched what, figuring in back references
== static const char *backref(struct match *m, const char *start, \
== const char *stop, sopno startst, sopno stopst, sopno lev);
*/
static char * /* == stop (success) or NULL (failure) */
backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
sopno lev, int rec) /* PLUS nesting level */
static const char * /* == stop (success) or NULL (failure) */
backref(
struct match *m,
const char *start,
const char *stop,
sopno startst,
sopno stopst,
sopno lev) /* PLUS nesting level */
{
int i;
sopno ss; /* start sop of current subRE */
char *sp; /* start of string matched by it */
const char *sp; /* start of string matched by it */
sopno ssub; /* start sop of subsubRE */
sopno esub; /* end sop of subsubRE */
char *ssp; /* start of string matched by subsubRE */
char *dp;
const char *ssp; /* start of string matched by subsubRE */
const char *dp;
size_t len;
int hard;
sop s;
regoff_t offsave;
cset *cs;
_DIAGASSERT(m != NULL);
_DIAGASSERT(start != NULL);
_DIAGASSERT(stop != NULL);
AT("back", start, stop, startst, stopst);
sp = start;
@ -572,51 +671,50 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
case OBACK_: /* the vilest depths */
i = OPND(s);
assert(0 < i && i <= m->g->nsub);
if (m->pmatch[i].rm_eo == -1)
if (m->pmatch[i].rm_eo == (regoff_t)-1)
return(NULL);
assert(m->pmatch[i].rm_so != -1);
len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
if (len == 0 && rec++ > MAX_RECURSION)
assert(m->pmatch[i].rm_so != (regoff_t)-1);
len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so);
if (len == 0)
return(NULL);
assert(stop - m->beginp >= len);
if (sp > stop - len)
return(NULL); /* not enough left to match */
ssp = m->offp + m->pmatch[i].rm_so;
ssp = m->offp + (size_t)m->pmatch[i].rm_so;
if (memcmp(sp, ssp, len) != 0)
return(NULL);
while (m->g->strip[ss] != SOP(O_BACK, i))
ss++;
return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
break;
return(backref(m, sp+len, stop, ss+1, stopst, lev));
case OQUEST_: /* to null or not */
dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
dp = backref(m, sp, stop, ss+1, stopst, lev);
if (dp != NULL)
return(dp); /* not */
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
break;
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
case OPLUS_:
assert(m->lastpos != NULL);
assert(lev+1 <= m->g->nplus);
m->lastpos[lev+1] = sp;
return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
break;
return(backref(m, sp, stop, ss+1, stopst, lev+1));
case O_PLUS:
if (sp == m->lastpos[lev]) /* last pass matched null */
return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
return(backref(m, sp, stop, ss+1, stopst, lev-1));
/* try another pass */
m->lastpos[lev] = sp;
dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
if (dp == NULL)
return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
else
return(dp);
break;
dp = backref(m, sp, stop, ss+1, stopst, lev-1);
return(dp);
case OCH_: /* find the right one, if any */
ssub = ss + 1;
esub = ss + OPND(s) - 1;
assert(OP(m->g->strip[esub]) == OOR1);
for (;;) { /* find first matching branch */
dp = backref(m, sp, stop, ssub, esub, lev, rec);
dp = backref(m, sp, stop, ssub, esub, lev);
if (dp != NULL)
return(dp);
/* that one missed, try next one */
@ -631,29 +729,29 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
else
assert(OP(m->g->strip[esub]) == O_CH);
}
break;
case OLPAREN: /* must undo assignment if rest fails */
i = OPND(s);
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_so;
m->pmatch[i].rm_so = sp - m->offp;
dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
dp = backref(m, sp, stop, ss+1, stopst, lev);
if (dp != NULL)
return(dp);
m->pmatch[i].rm_so = offsave;
return(NULL);
break;
case ORPAREN: /* must undo assignment if rest fails */
i = OPND(s);
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_eo;
m->pmatch[i].rm_eo = sp - m->offp;
dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
dp = backref(m, sp, stop, ss+1, stopst, lev);
if (dp != NULL)
return(dp);
m->pmatch[i].rm_eo = offsave;
return(NULL);
break;
default: /* uh oh */
assert(nope);
break;
@ -662,24 +760,35 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
/* "can't happen" */
assert(nope);
/* NOTREACHED */
return 0;
return NULL;
}
/*
- fast - step through the string at top speed
== static const char *fast(struct match *m, const char *start, \
== const char *stop, sopno startst, sopno stopst);
*/
static char * /* where tentative match ended, or NULL */
fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
static const char * /* where tentative match ended, or NULL */
fast(
struct match *m,
const char *start,
const char *stop,
sopno startst,
sopno stopst)
{
states st = m->st;
states fresh = m->fresh;
states tmp = m->tmp;
char *p = start;
const char *p = start;
int c = (start == m->beginp) ? OUT : *(start-1);
int lastc; /* previous c */
int flagch;
int i;
char *coldp; /* last p after which no match was underway */
size_t i;
const char *coldp; /* last p after which no match was underway */
_DIAGASSERT(m != NULL);
_DIAGASSERT(start != NULL);
_DIAGASSERT(stop != NULL);
CLEAR(st);
SET1(st, startst);
@ -751,19 +860,30 @@ fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
/*
- slow - step through the string more deliberately
== static const char *slow(struct match *m, const char *start, \
== const char *stop, sopno startst, sopno stopst);
*/
static char * /* where it ended */
slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
static const char * /* where it ended */
slow(
struct match *m,
const char *start,
const char *stop,
sopno startst,
sopno stopst)
{
states st = m->st;
states empty = m->empty;
states tmp = m->tmp;
char *p = start;
const char *p = start;
int c = (start == m->beginp) ? OUT : *(start-1);
int lastc; /* previous c */
int flagch;
int i;
char *matchp; /* last p at which a match ended */
size_t i;
const char *matchp; /* last p at which a match ended */
_DIAGASSERT(m != NULL);
_DIAGASSERT(start != NULL);
_DIAGASSERT(stop != NULL);
AT("slow", start, stop, startst, stopst);
CLEAR(st);
@ -831,9 +951,21 @@ slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
/*
- step - map set of states reachable before char to set reachable after
== static states step(struct re_guts *g, sopno start, sopno stop, \
== states bef, int ch, states aft);
== #define BOL (OUT+1)
== #define EOL (BOL+1)
== #define BOLEOL (BOL+2)
== #define NOTHING (BOL+3)
== #define BOW (BOL+4)
== #define EOW (BOL+5)
== #define CODEMAX (BOL+5) // highest code used
== #define NONCHAR(c) ((c) > CHAR_MAX)
== #define NNONCHAR (CODEMAX-CHAR_MAX)
*/
static states
step(struct re_guts *g,
step(
struct re_guts *g,
sopno start, /* start state within strip */
sopno stop, /* state after stop state within strip */
states bef, /* states reachable before */
@ -847,6 +979,8 @@ step(struct re_guts *g,
sopno look;
int i;
_DIAGASSERT(g != NULL);
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
s = g->strip[pc];
switch (OP(s)) {
@ -948,47 +1082,79 @@ step(struct re_guts *g,
#ifdef REDEBUG
/*
- print - print a set of states
== #ifdef REDEBUG
== static void print(struct match *m, char *caption, states st, \
== int ch, FILE *d);
== #endif
*/
static void
print(struct match *m, char *caption, states st, int ch, FILE *d)
print(
struct match *m,
char *caption,
states st,
int ch,
FILE *d)
{
struct re_guts *g = m->g;
int i;
int first = 1;
_DIAGASSERT(m != NULL);
_DIAGASSERT(caption != NULL);
if (!(m->eflags&REG_TRACE))
return;
(void)fprintf(d, "%s", caption);
_DIAGASSERT(d != NULL);
fprintf(d, "%s", caption);
if (ch != '\0')
(void)fprintf(d, " %s", pchar(ch));
fprintf(d, " %s", pchar(ch));
for (i = 0; i < g->nstates; i++)
if (ISSET(st, i)) {
(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
first = 0;
}
(void)fprintf(d, "\n");
fprintf(d, "\n");
}
/*
- at - print current situation
== #ifdef REDEBUG
== static void at(struct match *m, char *title, char *start, char *stop, \
== sopno startst, sopno stopst);
== #endif
*/
static void
at(struct match *m, char *title, char *start, char *stop, sopno startst,
at(
struct match *m,
char *title,
char *start,
char *stop,
sopno startst,
sopno stopst)
{
_DIAGASSERT(m != NULL);
_DIAGASSERT(title != NULL);
_DIAGASSERT(start != NULL);
_DIAGASSERT(stop != NULL);
if (!(m->eflags&REG_TRACE))
return;
(void)printf("%s %s-", title, pchar(*start));
(void)printf("%s ", pchar(*stop));
(void)printf("%ld-%ld\n", (long)startst, (long)stopst);
printf("%s %s-", title, pchar(*start));
printf("%s ", pchar(*stop));
printf("%ld-%ld\n", (long)startst, (long)stopst);
}
#ifndef PCHARDONE
#define PCHARDONE /* never again */
/*
- pchar - make a character printable
== #ifdef REDEBUG
== static char *pchar(int ch);
== #endif
*
* Is this identical to regchar() over in debug.c? Well, yes. But a
* duplicate here avoids having a debugging-capable regexec.o tied to
@ -996,7 +1162,8 @@ at(struct match *m, char *title, char *start, char *stop, sopno startst,
* the non-debug compilation anyway, so it doesn't matter much.
*/
static char * /* -> representation */
pchar(int ch)
pchar(
int ch)
{
static char pbuf[10];

View file

@ -0,0 +1,223 @@
/* $NetBSD: regerror.c,v 1.23 2007/02/09 23:44:18 junyoung Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regerror.c 8.4 (Berkeley) 3/20/94
*/
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regerror.c 8.4 (Berkeley) 3/20/94
*/
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
#if 0
static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94";
#else
__RCSID("$NetBSD: regerror.c,v 1.23 2007/02/09 23:44:18 junyoung Exp $");
#endif
#endif /* LIBC_SCCS and not lint */
#include "namespace.h"
#include <sys/types.h>
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#ifdef __weak_alias
__weak_alias(regerror,_regerror)
#endif
#include "utils.h"
/* ========= begin header generated by ./mkh ========= */
#ifdef __cplusplus
extern "C" {
#endif
/* === regerror.c === */
static const char *regatoi(const regex_t *preg, char *localbuf, size_t buflen);
#ifdef __cplusplus
}
#endif
/* ========= end header generated by ./mkh ========= */
/*
= #define REG_NOMATCH 1
= #define REG_BADPAT 2
= #define REG_ECOLLATE 3
= #define REG_ECTYPE 4
= #define REG_EESCAPE 5
= #define REG_ESUBREG 6
= #define REG_EBRACK 7
= #define REG_EPAREN 8
= #define REG_EBRACE 9
= #define REG_BADBR 10
= #define REG_ERANGE 11
= #define REG_ESPACE 12
= #define REG_BADRPT 13
= #define REG_EMPTY 14
= #define REG_ASSERT 15
= #define REG_INVARG 16
= #define REG_ATOI 255 // convert name to number (!)
= #define REG_ITOA 0400 // convert number to name (!)
*/
static const struct rerr {
int code;
const char *name;
const char *explain;
} rerrs[] = {
{ REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" },
{ REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
{ REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
{ REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
{ REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
{ REG_EBRACE, "REG_EBRACE", "braces not balanced" },
{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
{ REG_ERANGE, "REG_ERANGE", "invalid character range" },
{ REG_ESPACE, "REG_ESPACE", "out of memory" },
{ REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
{ REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
{ REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
{ 0, "", "*** unknown regexp error code ***" }
};
/*
* regerror - the interface to error numbers
* extern size_t regerror(int, const regex_t *, char *, size_t);
*/
/* ARGSUSED */
size_t
regerror(
int errcode,
const regex_t *preg,
char *errbuf,
size_t errbuf_size)
{
const struct rerr *r;
size_t len;
int target = errcode &~ REG_ITOA;
const char *s;
char convbuf[50];
_DIAGASSERT(errcode != REG_ATOI || preg != NULL);
_DIAGASSERT(errbuf != NULL);
if (errcode == REG_ATOI)
s = regatoi(preg, convbuf, sizeof convbuf);
else {
for (r = rerrs; r->code != 0; r++)
if (r->code == target)
break;
if (errcode & REG_ITOA) {
if (r->code != 0) {
(void)strlcpy(convbuf, r->name, sizeof convbuf);
} else
(void)snprintf(convbuf, sizeof convbuf,
"REG_0x%x", target);
s = convbuf;
} else
s = r->explain;
}
len = strlen(s) + 1;
if (errbuf_size > 0)
(void)strlcpy(errbuf, s, errbuf_size);
return(len);
}
/*
* regatoi - internal routine to implement REG_ATOI
* static const char *regatoi(const regex_t *preg, char *localbuf,
* size_t buflen);
*/
static const char *
regatoi(
const regex_t *preg,
char *localbuf,
size_t buflen)
{
const struct rerr *r;
for (r = rerrs; r->code != 0; r++)
if (strcmp(r->name, preg->re_endp) == 0)
break;
if (r->code == 0)
return "0";
(void)snprintf(localbuf, buflen, "%d", r->code);
return localbuf;
}

View file

@ -1,7 +1,6 @@
/* $OpenBSD: regex2.h,v 1.7 2004/11/30 17:04:23 otto Exp $ */
/* $NetBSD: regex2.h,v 1.13 2011/10/09 18:23:00 christos Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@ -35,6 +34,57 @@
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
*/
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
*/
/*
* First, the stuff that ends up in the outside-world include file
= typedef off_t regoff_t;
= typedef struct {
= int re_magic;
= size_t re_nsub; // number of parenthesized subexpressions
= const char *re_endp; // end pointer for REG_PEND
= struct re_guts *re_g; // none of your business :-)
= } regex_t;
= typedef struct {
= regoff_t rm_so; // start of match
= regoff_t rm_eo; // end of match
= } regmatch_t;
*/
/*
* internals of regex_t
*/
@ -59,36 +109,38 @@
* In state representations, an operator's bit is on to signify a state
* immediately *preceding* "execution" of that operator.
*/
typedef unsigned long sop; /* strip operator */
typedef long sopno;
#define OPRMASK 0xf8000000LU
#define OPDMASK 0x07ffffffLU
typedef u_int32_t sop; /* strip operator */
typedef size_t sopno;
#define OPRMASK ((u_int32_t)0xf8000000UL)
#define OPDMASK ((u_int32_t)0x07ffffffUL)
#define OPSHIFT ((unsigned)27)
#define OP(n) ((n)&OPRMASK)
#define OPND(n) ((n)&OPDMASK)
#define OPND(n) ((int)((n)&OPDMASK))
#define SOP(op, opnd) ((op)|(opnd))
/* operators meaning operand */
/* (back, fwd are offsets) */
#define OEND (1LU<<OPSHIFT) /* endmarker - */
#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */
#define OBOL (3LU<<OPSHIFT) /* left anchor - */
#define OEOL (4LU<<OPSHIFT) /* right anchor - */
#define OANY (5LU<<OPSHIFT) /* . - */
#define OANYOF (6LU<<OPSHIFT) /* [...] set number */
#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */
#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */
#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */
#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */
#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */
#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */
#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */
#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */
#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */
#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */
#define OBOW (19LU<<OPSHIFT) /* begin word - */
#define OEOW (20LU<<OPSHIFT) /* end word - */
#define OPC(n) (((u_int32_t)(n))<<OPSHIFT)
/* operators meaning operand */
/* (back, fwd are offsets) */
#define OEND OPC(1) /* endmarker - */
#define OCHAR OPC(2) /* character unsigned char */
#define OBOL OPC(3) /* left anchor - */
#define OEOL OPC(4) /* right anchor - */
#define OANY OPC(5) /* . - */
#define OANYOF OPC(6) /* [...] set number */
#define OBACK_ OPC(7) /* begin \d paren number */
#define O_BACK OPC(8) /* end \d paren number */
#define OPLUS_ OPC(9) /* + prefix fwd to suffix */
#define O_PLUS OPC(10) /* + suffix back to prefix */
#define OQUEST_ OPC(11) /* ? prefix fwd to suffix */
#define O_QUEST OPC(12) /* ? suffix back to prefix */
#define OLPAREN OPC(13) /* ( fwd to ) */
#define ORPAREN OPC(14) /* ) back to ( */
#define OCH_ OPC(15) /* begin choice fwd to OOR2 */
#define OOR1 OPC(16) /* | pt. 1 back to OOR1 or OCH_ */
#define OOR2 OPC(17) /* | pt. 2 fwd to OOR2 or O_CH */
#define O_CH OPC(18) /* end choice back to OOR1 */
#define OBOW OPC(19) /* begin word - */
#define OEOW OPC(20) /* end word - */
/*
* Structure for [] character-set representation. Character sets are
@ -127,8 +179,8 @@ struct re_guts {
int magic;
# define MAGIC2 ((('R'^0200)<<8)|'E')
sop *strip; /* malloced area for strip */
int csetsize; /* number of bits in a cset vector */
int ncsets; /* number of csets in use */
size_t csetsize; /* number of bits in a cset vector */
size_t ncsets; /* number of csets in use */
cset *sets; /* -> cset [ncsets] */
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
int cflags; /* copy of regcomp() cflags argument */
@ -139,12 +191,12 @@ struct re_guts {
# define USEBOL 01 /* used ^ */
# define USEEOL 02 /* used $ */
# define BAD 04 /* something wrong */
int nbol; /* number of ^ used */
int neol; /* number of $ used */
int ncategories; /* how many character categories */
size_t nbol; /* number of ^ used */
size_t neol; /* number of $ used */
size_t ncategories; /* how many character categories */
cat_t *categories; /* ->catspace[-CHAR_MIN] */
char *must; /* match must contain this string */
int mlen; /* length of must */
size_t mlen; /* length of must */
size_t nsub; /* copy of re_nsub */
int backrefs; /* does it use back references? */
sopno nplus; /* how deep does it nest +s? */
@ -154,4 +206,4 @@ struct re_guts {
/* misc utilities */
#define OUT (CHAR_MAX+1) /* a non-character value */
#define ISWORD(c) (isalnum(c) || (c) == '_')
#define ISWORD(c) (isalnum((unsigned char)c) || (c) == '_')

View file

@ -1,6 +1,6 @@
/* $OpenBSD: regexec.c,v 1.11 2005/08/05 13:03:00 espie Exp $ */
/* $NetBSD: regexec.c,v 1.22 2012/03/13 21:13:43 christos Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@ -34,6 +34,52 @@
* @(#)regexec.c 8.3 (Berkeley) 3/20/94
*/
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regexec.c 8.3 (Berkeley) 3/20/94
*/
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
#if 0
static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94";
#else
__RCSID("$NetBSD: regexec.c,v 1.22 2012/03/13 21:13:43 christos Exp $");
#endif
#endif /* LIBC_SCCS and not lint */
/*
* the outer shell of regexec()
*
@ -41,39 +87,46 @@
* macros that code uses. This lets the same code operate on two different
* representations for state sets.
*/
#include "namespace.h"
#include <sys/types.h>
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <ctype.h>
#include <regex.h>
#ifdef __weak_alias
__weak_alias(regexec,_regexec)
#endif
#include "utils.h"
#include "regex2.h"
/* macros for manipulating states, small version */
#define states long
#define states1 states /* for later use in regexec() decision */
#define states unsigned long
#define states1 unsigned long /* for later use in regexec() decision */
#define CLEAR(v) ((v) = 0)
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
#define ASSIGN(d, s) ((d) = (s))
#define EQ(a, b) ((a) == (b))
#define STATEVARS long dummy /* dummy version */
#define STATEVARS int dummy /* dummy version */
#define STATESETUP(m, n) /* nothing */
#define STATETEARDOWN(m) /* nothing */
#define SETUP(v) ((v) = 0)
#define onestate long
#define onestate unsigned long
#define INIT(o, n) ((o) = (unsigned long)1 << (n))
#define INC(o) ((o) <<= 1)
#define INC(o) ((o) <<= 1)
#define ISSTATEIN(v, o) (((v) & (o)) != 0)
/* some abbreviations; note that some of these know variable names! */
/* do "if I'm here, I can also be there" etc without branches */
#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
/* function names */
#define SNAMES /* engine.c looks after details */
@ -102,20 +155,23 @@
/* macros for manipulating states, large version */
#define states char *
#define CLEAR(v) memset(v, 0, m->g->nstates)
#define CLEAR(v) memset(v, 0, (size_t)m->g->nstates)
#define SET0(v, n) ((v)[n] = 0)
#define SET1(v, n) ((v)[n] = 1)
#define ISSET(v, n) ((v)[n])
#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
#define STATEVARS long vn; char *space
#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
if ((m)->space == NULL) return(REG_ESPACE); \
(m)->vn = 0; }
#define STATETEARDOWN(m) { free((m)->space); }
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
#define onestate long
#define INIT(o, n) ((o) = (n))
#define ASSIGN(d, s) memcpy(d, s, (size_t)m->g->nstates)
#define EQ(a, b) (memcmp(a, b, (size_t)m->g->nstates) == 0)
#define STATEVARS int vn; char *space
#define STATESETUP(m, nv) \
if (((m)->space = malloc((size_t)((nv)*(m)->g->nstates))) == NULL) \
return(REG_ESPACE); \
else \
(m)->vn = 0
#define STATETEARDOWN(m) { free((m)->space); m->space = NULL; }
#define SETUP(v) ((v) = &m->space[(size_t)(m->vn++ * m->g->nstates)])
#define onestate int
#define INIT(o, n) ((o) = (int)(n))
#define INC(o) ((o)++)
#define ISSTATEIN(v, o) ((v)[o])
/* some abbreviations; note that some of these know variable names! */
@ -130,22 +186,38 @@
/*
- regexec - interface for matching
= extern int regexec(const regex_t *, const char *, size_t, \
= regmatch_t [], int);
= #define REG_NOTBOL 00001
= #define REG_NOTEOL 00002
= #define REG_STARTEND 00004
= #define REG_TRACE 00400 // tracing of execution
= #define REG_LARGE 01000 // force large representation
= #define REG_BACKR 02000 // force use of backref code
*
* We put this here so we can exploit knowledge of the state representation
* when choosing which matcher to call. Also, by this point the matchers
* have been prototyped.
*/
int /* 0 success, REG_NOMATCH failure */
regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags)
regexec(
const regex_t *preg,
const char *string,
size_t nmatch,
regmatch_t pmatch[],
int eflags)
{
struct re_guts *g = preg->re_g;
char *s;
#ifdef REDEBUG
# define GOODFLAGS(f) (f)
#else
# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
#endif
_DIAGASSERT(preg != NULL);
_DIAGASSERT(string != NULL);
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
return(REG_BADPAT);
assert(!(g->iflags&BAD));
@ -153,8 +225,10 @@ regexec(const regex_t *preg, const char *string, size_t nmatch,
return(REG_BADPAT);
eflags = GOODFLAGS(eflags);
if (g->nstates <= (int)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
s = __UNCONST(string);
if (g->nstates <= (sopno)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
return(smatcher(g, s, nmatch, pmatch, eflags));
else
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
return(lmatcher(g, s, nmatch, pmatch, eflags));
}

View file

@ -0,0 +1,129 @@
/* $NetBSD: regfree.c,v 1.15 2007/02/09 23:44:18 junyoung Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regfree.c 8.3 (Berkeley) 3/20/94
*/
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regfree.c 8.3 (Berkeley) 3/20/94
*/
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
#if 0
static char sccsid[] = "@(#)regfree.c 8.3 (Berkeley) 3/20/94";
#else
__RCSID("$NetBSD: regfree.c,v 1.15 2007/02/09 23:44:18 junyoung Exp $");
#endif
#endif /* LIBC_SCCS and not lint */
#include "namespace.h"
#include <sys/types.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#ifdef __weak_alias
__weak_alias(regfree,_regfree)
#endif
#include "utils.h"
#include "regex2.h"
/*
- regfree - free everything
= extern void regfree(regex_t *);
*/
void
regfree(
regex_t *preg)
{
struct re_guts *g;
_DIAGASSERT(preg != NULL);
_DIAGASSERT(preg->re_magic == MAGIC1);
if (preg->re_magic != MAGIC1) /* oops */
return; /* nice to complain, but hard */
g = preg->re_g;
if (g == NULL || g->magic != MAGIC2) /* oops again */
return;
preg->re_magic = 0; /* mark it invalid */
g->magic = 0; /* mark it invalid */
if (g->strip != NULL)
free(g->strip);
if (g->sets != NULL)
free(g->sets);
if (g->setbits != NULL)
free(g->setbits);
if (g->must != NULL)
free(g->must);
free(g);
}

View file

@ -0,0 +1,91 @@
/* $NetBSD: utils.h,v 1.6 2003/08/07 16:43:21 agc Exp $ */
/*-
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)utils.h 8.3 (Berkeley) 3/20/94
*/
/*-
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)utils.h 8.3 (Berkeley) 3/20/94
*/
/* utility definitions */
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
#define INFINITY (DUPMAX + 1)
#define NC (CHAR_MAX - CHAR_MIN + 1)
typedef unsigned char uch;
/* switch off assertions (if not already off) if no REDEBUG */
#ifndef REDEBUG
#ifndef NDEBUG
#define NDEBUG /* no assertions please */
#endif
#endif
#include <assert.h>
/* for old systems with bcopy() but no memmove() */
#ifdef USEBCOPY
#define memmove(d, s, c) bcopy(s, d, c)
#endif

View file

@ -21,4 +21,7 @@
#include <assert.h>
#define _DIAGASSERT(e) ((e) ? (void) 0 : __assert2(__FILE__, __LINE__, __func__, #e))
// TODO: update our <sys/cdefs.h> to support this properly.
#define __type_fit(t, a) (0 == 0)
#endif

43
tests/Android.mk Normal file
View file

@ -0,0 +1,43 @@
#
# Copyright (C) 2012 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright The Android Open Source Project
LOCAL_PATH := $(call my-dir)
test_module = bionic-unit-tests
test_tags = eng tests
test_src_files = \
regex_test.cpp \
# Build for the device (with bionic). Run with:
# adb shell /data/nativetest/bionic-unit-tests/bionic-unit-tests
include $(CLEAR_VARS)
LOCAL_MODULE := $(test_module)
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_MODULE_TAGS := $(test_tags)
LOCAL_SRC_FILES := $(test_src_files)
include $(BUILD_NATIVE_TEST)
# Build for the host (with glibc).
# Note that this will build against glibc, so it's not useful for testing
# bionic's implementation, but it does let you use glibc as a reference
# implementation for testing the tests themselves.
include $(CLEAR_VARS)
LOCAL_MODULE := $(test_module)-glibc
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
LOCAL_MODULE_TAGS := $(test_tags)
LOCAL_SRC_FILES := $(test_src_files)
include $(BUILD_HOST_NATIVE_TEST)

38
tests/regex_test.cpp Normal file
View file

@ -0,0 +1,38 @@
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <sys/types.h>
#include <regex.h>
TEST(regex, smoke) {
// A quick test of all the regex functions.
regex_t re;
ASSERT_EQ(0, regcomp(&re, "ab*c", 0));
ASSERT_EQ(0, regexec(&re, "abbbc", 0, NULL, 0));
ASSERT_EQ(REG_NOMATCH, regexec(&re, "foo", 0, NULL, 0));
char buf[80];
regerror(REG_NOMATCH, &re, buf, sizeof(buf));
#if __BIONIC__
ASSERT_STREQ("regexec() failed to match", buf);
#else
ASSERT_STREQ("No match", buf);
#endif
regfree(&re);
}