Revert "Switch to PCRE grep."

This reverts commit 90a018a87a.

Not a clean revert because things have changed slightly, and I'm sticking
with the egrep/fgrep shell scripts for now.

Bug: http://b/111849261
Bug: https://bugs.exim.org/show_bug.cgi?id=2294
Test: manual
Change-Id: I72ae637c84f0eb1c2b5291db73ebff1628d54110
This commit is contained in:
Elliott Hughes 2018-07-26 14:43:31 -07:00
parent 46f281edf5
commit f9408d5cdf
8 changed files with 2109 additions and 3 deletions

View file

@ -193,14 +193,12 @@ which whoami xargs xxd yes zcat
Android Q
---------
BSD: fsck\_msdos newfs\_msdos
BSD: grep fsck\_msdos newfs\_msdos
bzip2: bzcat bzip2 bunzip2
one-true-awk: awk
PCRE: egrep fgrep grep
toolbox: getevent getprop
toybox: acpi base64 basename blockdev cal cat chcon chgrp chmod chown

View file

@ -60,3 +60,20 @@ cc_binary {
defaults: ["toolbox_defaults"],
srcs: ["r.c"],
}
// We build BSD grep separately (but see http://b/111849261).
cc_binary {
name: "grep",
defaults: ["toolbox_defaults"],
srcs: [
"upstream-netbsd/usr.bin/grep/fastgrep.c",
"upstream-netbsd/usr.bin/grep/file.c",
"upstream-netbsd/usr.bin/grep/grep.c",
"upstream-netbsd/usr.bin/grep/queue.c",
"upstream-netbsd/usr.bin/grep/util.c",
],
sanitize: {
integer_overflow: false,
},
}

View file

@ -0,0 +1,336 @@
/* $OpenBSD: util.c,v 1.36 2007/10/02 17:59:18 otto Exp $ */
/* $FreeBSD: head/usr.bin/grep/fastgrep.c 211496 2010-08-19 09:28:59Z des $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (C) 2008 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* XXX: This file is a speed up for grep to cover the defects of the
* regex library. These optimizations should practically be implemented
* there keeping this code clean. This is a future TODO, but for the
* meantime, we need to use this workaround.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
__RCSID("$NetBSD: fastgrep.c,v 1.5 2011/04/18 03:27:40 joerg Exp $");
#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include "grep.h"
static inline int grep_cmp(const unsigned char *, const unsigned char *, size_t);
static inline void grep_revstr(unsigned char *, int);
void
fgrepcomp(fastgrep_t *fg, const char *pat)
{
unsigned int i;
/* Initialize. */
fg->len = strlen(pat);
fg->bol = false;
fg->eol = false;
fg->reversed = false;
fg->pattern = (unsigned char *)grep_strdup(pat);
/* Preprocess pattern. */
for (i = 0; i <= UCHAR_MAX; i++)
fg->qsBc[i] = fg->len;
for (i = 1; i < fg->len; i++)
fg->qsBc[fg->pattern[i]] = fg->len - i;
}
/*
* Returns: -1 on failure, 0 on success
*/
int
fastcomp(fastgrep_t *fg, const char *pat)
{
unsigned int i;
int firstHalfDot = -1;
int firstLastHalfDot = -1;
int hasDot = 0;
int lastHalfDot = 0;
int shiftPatternLen;
/* Initialize. */
fg->len = strlen(pat);
fg->bol = false;
fg->eol = false;
fg->reversed = false;
fg->word = wflag;
/* Remove end-of-line character ('$'). */
if (fg->len > 0 && pat[fg->len - 1] == '$') {
fg->eol = true;
fg->len--;
}
/* Remove beginning-of-line character ('^'). */
if (pat[0] == '^') {
fg->bol = true;
fg->len--;
pat++;
}
if (fg->len >= 14 &&
memcmp(pat, "[[:<:]]", 7) == 0 &&
memcmp(pat + fg->len - 7, "[[:>:]]", 7) == 0) {
fg->len -= 14;
pat += 7;
/* Word boundary is handled separately in util.c */
fg->word = true;
}
/*
* pat has been adjusted earlier to not include '^', '$' or
* the word match character classes at the beginning and ending
* of the string respectively.
*/
fg->pattern = grep_malloc(fg->len + 1);
memcpy(fg->pattern, pat, fg->len);
fg->pattern[fg->len] = '\0';
/* Look for ways to cheat...er...avoid the full regex engine. */
for (i = 0; i < fg->len; i++) {
/* Can still cheat? */
if (fg->pattern[i] == '.') {
hasDot = i;
if (i < fg->len / 2) {
if (firstHalfDot < 0)
/* Closest dot to the beginning */
firstHalfDot = i;
} else {
/* Closest dot to the end of the pattern. */
lastHalfDot = i;
if (firstLastHalfDot < 0)
firstLastHalfDot = i;
}
} else {
/* Free memory and let others know this is empty. */
free(fg->pattern);
fg->pattern = NULL;
return (-1);
}
}
/*
* Determine if a reverse search would be faster based on the placement
* of the dots.
*/
if ((!(lflag || cflag)) && ((!(fg->bol || fg->eol)) &&
((lastHalfDot) && ((firstHalfDot < 0) ||
((fg->len - (lastHalfDot + 1)) < (size_t)firstHalfDot)))) &&
!oflag && !color) {
fg->reversed = true;
hasDot = fg->len - (firstHalfDot < 0 ?
firstLastHalfDot : firstHalfDot) - 1;
grep_revstr(fg->pattern, fg->len);
}
/*
* Normal Quick Search would require a shift based on the position the
* next character after the comparison is within the pattern. With
* wildcards, the position of the last dot effects the maximum shift
* distance.
* The closer to the end the wild card is the slower the search. A
* reverse version of this algorithm would be useful for wildcards near
* the end of the string.
*
* Examples:
* Pattern Max shift
* ------- ---------
* this 5
* .his 4
* t.is 3
* th.s 2
* thi. 1
*/
/* Adjust the shift based on location of the last dot ('.'). */
shiftPatternLen = fg->len - hasDot;
/* Preprocess pattern. */
for (i = 0; i <= (signed)UCHAR_MAX; i++)
fg->qsBc[i] = shiftPatternLen;
for (i = hasDot + 1; i < fg->len; i++) {
fg->qsBc[fg->pattern[i]] = fg->len - i;
}
/*
* Put pattern back to normal after pre-processing to allow for easy
* comparisons later.
*/
if (fg->reversed)
grep_revstr(fg->pattern, fg->len);
return (0);
}
int
grep_search(fastgrep_t *fg, const unsigned char *data, size_t len, regmatch_t *pmatch)
{
unsigned int j;
int ret = REG_NOMATCH;
if (pmatch->rm_so == (ssize_t)len)
return (ret);
if (fg->bol && pmatch->rm_so != 0) {
pmatch->rm_so = len;
pmatch->rm_eo = len;
return (ret);
}
/* No point in going farther if we do not have enough data. */
if (len < fg->len)
return (ret);
/* Only try once at the beginning or ending of the line. */
if (fg->bol || fg->eol) {
/* Simple text comparison. */
/* Verify data is >= pattern length before searching on it. */
if (len >= fg->len) {
/* Determine where in data to start search at. */
j = fg->eol ? len - fg->len : 0;
if (!((fg->bol && fg->eol) && (len != fg->len)))
if (grep_cmp(fg->pattern, data + j,
fg->len) == -1) {
pmatch->rm_so = j;
pmatch->rm_eo = j + fg->len;
ret = 0;
}
}
} else if (fg->reversed) {
/* Quick Search algorithm. */
j = len;
do {
if (grep_cmp(fg->pattern, data + j - fg->len,
fg->len) == -1) {
pmatch->rm_so = j - fg->len;
pmatch->rm_eo = j;
ret = 0;
break;
}
/* Shift if within bounds, otherwise, we are done. */
if (j == fg->len)
break;
j -= fg->qsBc[data[j - fg->len - 1]];
} while (j >= fg->len);
} else {
/* Quick Search algorithm. */
j = pmatch->rm_so;
do {
if (grep_cmp(fg->pattern, data + j, fg->len) == -1) {
pmatch->rm_so = j;
pmatch->rm_eo = j + fg->len;
ret = 0;
break;
}
/* Shift if within bounds, otherwise, we are done. */
if (j + fg->len == len)
break;
else
j += fg->qsBc[data[j + fg->len]];
} while (j <= (len - fg->len));
}
return (ret);
}
/*
* Returns: i >= 0 on failure (position that it failed)
* -1 on success
*/
static inline int
grep_cmp(const unsigned char *pat, const unsigned char *data, size_t len)
{
size_t size;
wchar_t *wdata, *wpat;
unsigned int i;
if (iflag) {
if ((size = mbstowcs(NULL, (const char *)data, 0)) ==
((size_t) - 1))
return (-1);
wdata = grep_malloc(size * sizeof(wint_t));
if (mbstowcs(wdata, (const char *)data, size) ==
((size_t) - 1))
return (-1);
if ((size = mbstowcs(NULL, (const char *)pat, 0)) ==
((size_t) - 1))
return (-1);
wpat = grep_malloc(size * sizeof(wint_t));
if (mbstowcs(wpat, (const char *)pat, size) == ((size_t) - 1))
return (-1);
for (i = 0; i < len; i++) {
if ((towlower(wpat[i]) == towlower(wdata[i])) ||
((grepbehave != GREP_FIXED) && wpat[i] == L'.'))
continue;
free(wpat);
free(wdata);
return (i);
}
} else {
for (i = 0; i < len; i++) {
if ((pat[i] == data[i]) || ((grepbehave != GREP_FIXED) &&
pat[i] == '.'))
continue;
return (i);
}
}
return (-1);
}
static inline void
grep_revstr(unsigned char *str, int len)
{
int i;
char c;
for (i = 0; i < len / 2; i++) {
c = str[i];
str[i] = str[len - i - 1];
str[len - i - 1] = c;
}
}

View file

@ -0,0 +1,271 @@
/* $NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $ */
/* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
* Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
__RCSID("$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifndef __ANDROID__
#include <bzlib.h>
#endif
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include <wctype.h>
#ifndef __ANDROID__
#include <zlib.h>
#endif
#include "grep.h"
#define MAXBUFSIZ (32 * 1024)
#define LNBUFBUMP 80
#ifndef __ANDROID__
static gzFile gzbufdesc;
static BZFILE* bzbufdesc;
#endif
static unsigned char buffer[MAXBUFSIZ];
static unsigned char *bufpos;
static size_t bufrem;
static unsigned char *lnbuf;
static size_t lnbuflen;
static inline int
grep_refill(struct file *f)
{
ssize_t nr;
#ifndef __ANDROID__
int bzerr;
#endif
bufpos = buffer;
bufrem = 0;
#ifndef __ANDROID__
if (filebehave == FILE_GZIP)
nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
switch (bzerr) {
case BZ_OK:
case BZ_STREAM_END:
/* No problem, nr will be okay */
break;
case BZ_DATA_ERROR_MAGIC:
/*
* As opposed to gzread(), which simply returns the
* plain file data, if it is not in the correct
* compressed format, BZ2_bzRead() instead aborts.
*
* So, just restart at the beginning of the file again,
* and use plain reads from now on.
*/
BZ2_bzReadClose(&bzerr, bzbufdesc);
bzbufdesc = NULL;
if (lseek(f->fd, 0, SEEK_SET) == -1)
return (-1);
nr = read(f->fd, buffer, MAXBUFSIZ);
break;
default:
/* Make sure we exit with an error */
nr = -1;
}
} else
#endif
nr = read(f->fd, buffer, MAXBUFSIZ);
if (nr < 0)
return (-1);
bufrem = nr;
return (0);
}
static inline int
grep_lnbufgrow(size_t newlen)
{
if (lnbuflen < newlen) {
lnbuf = grep_realloc(lnbuf, newlen);
lnbuflen = newlen;
}
return (0);
}
char *
grep_fgetln(struct file *f, size_t *lenp)
{
unsigned char *p;
char *ret;
size_t len;
size_t off;
ptrdiff_t diff;
/* Fill the buffer, if necessary */
if (bufrem == 0 && grep_refill(f) != 0)
goto error;
if (bufrem == 0) {
/* Return zero length to indicate EOF */
*lenp = 0;
return ((char *)bufpos);
}
/* Look for a newline in the remaining part of the buffer */
if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
++p; /* advance over newline */
ret = (char *)bufpos;
len = p - bufpos;
bufrem -= len;
bufpos = p;
*lenp = len;
return (ret);
}
/* We have to copy the current buffered data to the line buffer */
for (len = bufrem, off = 0; ; len += bufrem) {
/* Make sure there is room for more data */
if (grep_lnbufgrow(len + LNBUFBUMP))
goto error;
memcpy(lnbuf + off, bufpos, len - off);
off = len;
if (grep_refill(f) != 0)
goto error;
if (bufrem == 0)
/* EOF: return partial line */
break;
if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
continue;
/* got it: finish up the line (like code above) */
++p;
diff = p - bufpos;
len += diff;
if (grep_lnbufgrow(len))
goto error;
memcpy(lnbuf + off, bufpos, diff);
bufrem -= diff;
bufpos = p;
break;
}
*lenp = len;
return ((char *)lnbuf);
error:
*lenp = 0;
return (NULL);
}
static inline struct file *
grep_file_init(struct file *f)
{
#ifndef __ANDROID__
if (filebehave == FILE_GZIP &&
(gzbufdesc = gzdopen(f->fd, "r")) == NULL)
goto error;
if (filebehave == FILE_BZIP &&
(bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
goto error;
#endif
/* Fill read buffer, also catches errors early */
if (grep_refill(f) != 0)
goto error;
/* Check for binary stuff, if necessary */
if (!nulldataflag && binbehave != BINFILE_TEXT &&
memchr(bufpos, '\0', bufrem) != NULL)
f->binary = true;
return (f);
error:
close(f->fd);
free(f);
return (NULL);
}
/*
* Opens a file for processing.
*/
struct file *
grep_open(const char *path)
{
struct file *f;
f = grep_malloc(sizeof *f);
memset(f, 0, sizeof *f);
if (path == NULL) {
/* Processing stdin implies --line-buffered. */
lbflag = true;
f->fd = STDIN_FILENO;
} else if ((f->fd = open(path, O_RDONLY)) == -1) {
free(f);
return (NULL);
}
return (grep_file_init(f));
}
/*
* Closes a file.
*/
void
grep_close(struct file *f)
{
close(f->fd);
/* Reset read buffer and line buffer */
bufpos = buffer;
bufrem = 0;
free(lnbuf);
lnbuf = NULL;
lnbuflen = 0;
}

View file

@ -0,0 +1,707 @@
/* $NetBSD: grep.c,v 1.12 2014/07/11 16:30:45 christos Exp $ */
/* $FreeBSD: head/usr.bin/grep/grep.c 211519 2010-08-19 22:55:17Z delphij $ */
/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
__RCSID("$NetBSD: grep.c,v 1.12 2014/07/11 16:30:45 christos Exp $");
#include <sys/stat.h>
#include <sys/types.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <getopt.h>
#include <limits.h>
#include <libgen.h>
#include <locale.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "grep.h"
#ifndef WITHOUT_NLS
#include <nl_types.h>
nl_catd catalog;
#endif
/*
* Default messags to use when NLS is disabled or no catalogue
* is found.
*/
const char *errstr[] = {
"",
/* 1*/ "(standard input)",
/* 2*/ "cannot read bzip2 compressed file",
/* 3*/ "unknown %s option",
/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n",
/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
/* 7*/ "\t[pattern] [file ...]\n",
/* 8*/ "Binary file %s matches\n",
/* 9*/ "%s (BSD grep) %s\n",
};
/* Flags passed to regcomp() and regexec() */
int cflags = 0;
int eflags = REG_STARTEND;
/* Searching patterns */
unsigned int patterns, pattern_sz;
char **pattern;
regex_t *r_pattern;
fastgrep_t *fg_pattern;
/* Filename exclusion/inclusion patterns */
unsigned int fpatterns, fpattern_sz;
unsigned int dpatterns, dpattern_sz;
struct epat *dpattern, *fpattern;
/* For regex errors */
char re_error[RE_ERROR_BUF + 1];
/* Command-line flags */
unsigned long long Aflag; /* -A x: print x lines trailing each match */
unsigned long long Bflag; /* -B x: print x lines leading each match */
bool Hflag; /* -H: always print file name */
bool Lflag; /* -L: only show names of files with no matches */
bool bflag; /* -b: show block numbers for each match */
bool cflag; /* -c: only show a count of matching lines */
bool hflag; /* -h: don't print filename headers */
bool iflag; /* -i: ignore case */
bool lflag; /* -l: only show names of files with matches */
bool mflag; /* -m x: stop reading the files after x matches */
unsigned long long mcount; /* count for -m */
bool nflag; /* -n: show line numbers in front of matching lines */
bool oflag; /* -o: print only matching part */
bool qflag; /* -q: quiet mode (don't output anything) */
bool sflag; /* -s: silent mode (ignore errors) */
bool vflag; /* -v: only show non-matching lines */
bool wflag; /* -w: pattern must start and end on word boundaries */
bool xflag; /* -x: pattern must match entire line */
bool lbflag; /* --line-buffered */
bool nullflag; /* --null */
bool nulldataflag; /* --null-data */
unsigned char line_sep = '\n'; /* 0 for --null-data */
char *label; /* --label */
const char *color; /* --color */
int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
int devbehave = DEV_READ; /* -D: handling of devices */
int dirbehave = DIR_READ; /* -dRr: handling of directories */
int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
bool dexclude, dinclude; /* --exclude-dir and --include-dir */
bool fexclude, finclude; /* --exclude and --include */
enum {
BIN_OPT = CHAR_MAX + 1,
COLOR_OPT,
DECOMPRESS_OPT,
HELP_OPT,
MMAP_OPT,
LINEBUF_OPT,
LABEL_OPT,
R_EXCLUDE_OPT,
R_INCLUDE_OPT,
R_DEXCLUDE_OPT,
R_DINCLUDE_OPT
};
static inline const char *init_color(const char *);
/* Housekeeping */
int tail; /* lines left to print */
bool notfound; /* file not found */
extern char *__progname;
/*
* Prints usage information and returns 2.
*/
__dead static void
usage(void)
{
fprintf(stderr, getstr(4), __progname);
fprintf(stderr, "%s", getstr(5));
fprintf(stderr, "%s", getstr(6));
fprintf(stderr, "%s", getstr(7));
exit(2);
}
static const char optstr[] =
"0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxyz";
struct option long_options[] =
{
{"binary-files", required_argument, NULL, BIN_OPT},
{"decompress", no_argument, NULL, DECOMPRESS_OPT},
{"help", no_argument, NULL, HELP_OPT},
{"mmap", no_argument, NULL, MMAP_OPT},
{"line-buffered", no_argument, NULL, LINEBUF_OPT},
{"label", required_argument, NULL, LABEL_OPT},
{"color", optional_argument, NULL, COLOR_OPT},
{"colour", optional_argument, NULL, COLOR_OPT},
{"exclude", required_argument, NULL, R_EXCLUDE_OPT},
{"include", required_argument, NULL, R_INCLUDE_OPT},
{"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
{"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
{"after-context", required_argument, NULL, 'A'},
{"text", no_argument, NULL, 'a'},
{"before-context", required_argument, NULL, 'B'},
{"byte-offset", no_argument, NULL, 'b'},
{"context", optional_argument, NULL, 'C'},
{"count", no_argument, NULL, 'c'},
{"devices", required_argument, NULL, 'D'},
{"directories", required_argument, NULL, 'd'},
{"extended-regexp", no_argument, NULL, 'E'},
{"regexp", required_argument, NULL, 'e'},
{"fixed-strings", no_argument, NULL, 'F'},
{"file", required_argument, NULL, 'f'},
{"basic-regexp", no_argument, NULL, 'G'},
{"no-filename", no_argument, NULL, 'h'},
{"with-filename", no_argument, NULL, 'H'},
{"ignore-case", no_argument, NULL, 'i'},
{"bz2decompress", no_argument, NULL, 'J'},
{"files-with-matches", no_argument, NULL, 'l'},
{"files-without-match", no_argument, NULL, 'L'},
{"max-count", required_argument, NULL, 'm'},
{"line-number", no_argument, NULL, 'n'},
{"only-matching", no_argument, NULL, 'o'},
{"quiet", no_argument, NULL, 'q'},
{"silent", no_argument, NULL, 'q'},
{"recursive", no_argument, NULL, 'r'},
{"no-messages", no_argument, NULL, 's'},
{"binary", no_argument, NULL, 'U'},
{"unix-byte-offsets", no_argument, NULL, 'u'},
{"invert-match", no_argument, NULL, 'v'},
{"version", no_argument, NULL, 'V'},
{"word-regexp", no_argument, NULL, 'w'},
{"line-regexp", no_argument, NULL, 'x'},
{"null", no_argument, NULL, 'Z'},
{"null-data", no_argument, NULL, 'z'},
{NULL, no_argument, NULL, 0}
};
/*
* Adds a searching pattern to the internal array.
*/
static void
add_pattern(char *pat, size_t len)
{
/* TODO: Check for empty patterns and shortcut */
/* Increase size if necessary */
if (patterns == pattern_sz) {
pattern_sz *= 2;
pattern = grep_realloc(pattern, ++pattern_sz *
sizeof(*pattern));
}
if (len > 0 && pat[len - 1] == '\n')
--len;
/* pat may not be NUL-terminated */
pattern[patterns] = grep_malloc(len + 1);
memcpy(pattern[patterns], pat, len);
pattern[patterns][len] = '\0';
++patterns;
}
/*
* Adds a file include/exclude pattern to the internal array.
*/
static void
add_fpattern(const char *pat, int mode)
{
/* Increase size if necessary */
if (fpatterns == fpattern_sz) {
fpattern_sz *= 2;
fpattern = grep_realloc(fpattern, ++fpattern_sz *
sizeof(struct epat));
}
fpattern[fpatterns].pat = grep_strdup(pat);
fpattern[fpatterns].mode = mode;
++fpatterns;
}
/*
* Adds a directory include/exclude pattern to the internal array.
*/
static void
add_dpattern(const char *pat, int mode)
{
/* Increase size if necessary */
if (dpatterns == dpattern_sz) {
dpattern_sz *= 2;
dpattern = grep_realloc(dpattern, ++dpattern_sz *
sizeof(struct epat));
}
dpattern[dpatterns].pat = grep_strdup(pat);
dpattern[dpatterns].mode = mode;
++dpatterns;
}
/*
* Reads searching patterns from a file and adds them with add_pattern().
*/
static void
read_patterns(const char *fn)
{
FILE *f;
char *line;
size_t len;
ssize_t rlen;
if ((f = fopen(fn, "r")) == NULL)
err(2, "%s", fn);
line = NULL;
len = 0;
while ((rlen = getline(&line, &len, f)) != -1)
add_pattern(line, *line == '\n' ? 0 : (size_t)rlen);
free(line);
if (ferror(f))
err(2, "%s", fn);
fclose(f);
}
static inline const char *
init_color(const char *d)
{
char *c;
c = getenv("GREP_COLOR");
return (c != NULL ? c : d);
}
int
main(int argc, char *argv[])
{
char **aargv, **eargv, *eopts;
char *ep;
unsigned long long l;
unsigned int aargc, eargc, i, j;
int c, lastc, needpattern, newarg, prevoptind;
setlocale(LC_ALL, "");
#ifndef WITHOUT_NLS
catalog = catopen("grep", NL_CAT_LOCALE);
#endif
/* Check what is the program name of the binary. In this
way we can have all the funcionalities in one binary
without the need of scripting and using ugly hacks. */
switch (__progname[0]) {
case 'e':
grepbehave = GREP_EXTENDED;
break;
case 'f':
grepbehave = GREP_FIXED;
break;
case 'g':
grepbehave = GREP_BASIC;
break;
case 'z':
filebehave = FILE_GZIP;
switch(__progname[1]) {
case 'e':
grepbehave = GREP_EXTENDED;
break;
case 'f':
grepbehave = GREP_FIXED;
break;
case 'g':
grepbehave = GREP_BASIC;
break;
}
break;
}
lastc = '\0';
newarg = 1;
prevoptind = 1;
needpattern = 1;
eopts = getenv("GREP_OPTIONS");
/* support for extra arguments in GREP_OPTIONS */
eargc = 0;
if (eopts != NULL) {
char *str;
/* make an estimation of how many extra arguments we have */
for (j = 0; j < strlen(eopts); j++)
if (eopts[j] == ' ')
eargc++;
eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
eargc = 0;
/* parse extra arguments */
while ((str = strsep(&eopts, " ")) != NULL)
eargv[eargc++] = grep_strdup(str);
aargv = (char **)grep_calloc(eargc + argc + 1,
sizeof(char *));
aargv[0] = argv[0];
for (i = 0; i < eargc; i++)
aargv[i + 1] = eargv[i];
for (j = 1; j < (unsigned int)argc; j++, i++)
aargv[i + 1] = argv[j];
aargc = eargc + argc;
} else {
aargv = argv;
aargc = argc;
}
while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
-1)) {
switch (c) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (newarg || !isdigit(lastc))
Aflag = 0;
else if (Aflag > LLONG_MAX / 10) {
errno = ERANGE;
err(2, NULL);
}
Aflag = Bflag = (Aflag * 10) + (c - '0');
break;
case 'C':
if (optarg == NULL) {
Aflag = Bflag = 2;
break;
}
/* FALLTHROUGH */
case 'A':
/* FALLTHROUGH */
case 'B':
errno = 0;
l = strtoull(optarg, &ep, 10);
if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
((errno == EINVAL) && (l == 0)))
err(2, NULL);
else if (ep[0] != '\0') {
errno = EINVAL;
err(2, NULL);
}
if (c == 'A')
Aflag = l;
else if (c == 'B')
Bflag = l;
else
Aflag = Bflag = l;
break;
case 'a':
binbehave = BINFILE_TEXT;
break;
case 'b':
bflag = true;
break;
case 'c':
cflag = true;
break;
case 'D':
if (strcasecmp(optarg, "skip") == 0)
devbehave = DEV_SKIP;
else if (strcasecmp(optarg, "read") == 0)
devbehave = DEV_READ;
else
errx(2, getstr(3), "--devices");
break;
case 'd':
if (strcasecmp("recurse", optarg) == 0) {
Hflag = true;
dirbehave = DIR_RECURSE;
} else if (strcasecmp("skip", optarg) == 0)
dirbehave = DIR_SKIP;
else if (strcasecmp("read", optarg) == 0)
dirbehave = DIR_READ;
else
errx(2, getstr(3), "--directories");
break;
case 'E':
grepbehave = GREP_EXTENDED;
break;
case 'e':
add_pattern(optarg, strlen(optarg));
needpattern = 0;
break;
case 'F':
grepbehave = GREP_FIXED;
break;
case 'f':
read_patterns(optarg);
needpattern = 0;
break;
case 'G':
grepbehave = GREP_BASIC;
break;
case 'H':
Hflag = true;
break;
case 'h':
Hflag = false;
hflag = true;
break;
case 'I':
binbehave = BINFILE_SKIP;
break;
case 'i':
case 'y':
iflag = true;
cflags |= REG_ICASE;
break;
case 'J':
filebehave = FILE_BZIP;
break;
case 'L':
lflag = false;
Lflag = true;
break;
case 'l':
Lflag = false;
lflag = true;
break;
case 'm':
mflag = true;
errno = 0;
mcount = strtoull(optarg, &ep, 10);
if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
((errno == EINVAL) && (mcount == 0)))
err(2, NULL);
else if (ep[0] != '\0') {
errno = EINVAL;
err(2, NULL);
}
break;
case 'n':
nflag = true;
break;
case 'O':
linkbehave = LINK_EXPLICIT;
break;
case 'o':
oflag = true;
break;
case 'p':
linkbehave = LINK_SKIP;
break;
case 'q':
qflag = true;
break;
case 'S':
linkbehave = LINK_READ;
break;
case 'R':
case 'r':
dirbehave = DIR_RECURSE;
Hflag = true;
break;
case 's':
sflag = true;
break;
case 'U':
binbehave = BINFILE_BIN;
break;
case 'u':
case MMAP_OPT:
/* noop, compatibility */
break;
case 'V':
printf(getstr(9), __progname, VERSION);
exit(0);
case 'v':
vflag = true;
break;
case 'w':
wflag = true;
break;
case 'x':
xflag = true;
break;
case 'Z':
nullflag = true;
break;
case 'z':
nulldataflag = true;
line_sep = '\0';
break;
case BIN_OPT:
if (strcasecmp("binary", optarg) == 0)
binbehave = BINFILE_BIN;
else if (strcasecmp("without-match", optarg) == 0)
binbehave = BINFILE_SKIP;
else if (strcasecmp("text", optarg) == 0)
binbehave = BINFILE_TEXT;
else
errx(2, getstr(3), "--binary-files");
break;
case COLOR_OPT:
color = NULL;
if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
strcasecmp("tty", optarg) == 0 ||
strcasecmp("if-tty", optarg) == 0) {
char *term;
term = getenv("TERM");
if (isatty(STDOUT_FILENO) && term != NULL &&
strcasecmp(term, "dumb") != 0)
color = init_color("01;31");
} else if (strcasecmp("always", optarg) == 0 ||
strcasecmp("yes", optarg) == 0 ||
strcasecmp("force", optarg) == 0) {
color = init_color("01;31");
} else if (strcasecmp("never", optarg) != 0 &&
strcasecmp("none", optarg) != 0 &&
strcasecmp("no", optarg) != 0)
errx(2, getstr(3), "--color");
break;
case DECOMPRESS_OPT:
filebehave = FILE_GZIP;
break;
case LABEL_OPT:
label = optarg;
break;
case LINEBUF_OPT:
lbflag = true;
break;
case R_INCLUDE_OPT:
finclude = true;
add_fpattern(optarg, INCL_PAT);
break;
case R_EXCLUDE_OPT:
fexclude = true;
add_fpattern(optarg, EXCL_PAT);
break;
case R_DINCLUDE_OPT:
dinclude = true;
add_dpattern(optarg, INCL_PAT);
break;
case R_DEXCLUDE_OPT:
dexclude = true;
add_dpattern(optarg, EXCL_PAT);
break;
case HELP_OPT:
default:
usage();
}
lastc = c;
newarg = optind != prevoptind;
prevoptind = optind;
}
aargc -= optind;
aargv += optind;
/* Fail if we don't have any pattern */
if (aargc == 0 && needpattern)
usage();
/* Process patterns from command line */
if (aargc != 0 && needpattern) {
add_pattern(*aargv, strlen(*aargv));
--aargc;
++aargv;
}
switch (grepbehave) {
case GREP_FIXED:
case GREP_BASIC:
break;
case GREP_EXTENDED:
cflags |= REG_EXTENDED;
break;
default:
/* NOTREACHED */
usage();
}
fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
/*
* XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
* Optimizations should be done there.
*/
/* Check if cheating is allowed (always is for fgrep). */
if (grepbehave == GREP_FIXED) {
for (i = 0; i < patterns; ++i)
fgrepcomp(&fg_pattern[i], pattern[i]);
} else {
for (i = 0; i < patterns; ++i) {
if (fastcomp(&fg_pattern[i], pattern[i])) {
/* Fall back to full regex library */
c = regcomp(&r_pattern[i], pattern[i], cflags);
if (c != 0) {
regerror(c, &r_pattern[i], re_error,
RE_ERROR_BUF);
errx(2, "%s", re_error);
}
}
}
}
if (lbflag)
setlinebuf(stdout);
if ((aargc == 0 || aargc == 1) && !Hflag)
hflag = true;
if (aargc == 0)
exit(!procfile("-"));
if (dirbehave == DIR_RECURSE)
c = grep_tree(aargv);
else
for (c = 0; aargc--; ++aargv) {
if ((finclude || fexclude) && !file_matching(*aargv))
continue;
c+= procfile(*aargv);
}
#ifndef WITHOUT_NLS
catclose(catalog);
#endif
/* Find out the correct return value according to the
results and the command line option. */
exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
}

View file

@ -0,0 +1,162 @@
/* $NetBSD: grep.h,v 1.8 2012/05/06 22:27:00 joerg Exp $ */
/* $OpenBSD: grep.h,v 1.15 2010/04/05 03:03:55 tedu Exp $ */
/* $FreeBSD: head/usr.bin/grep/grep.h 211496 2010-08-19 09:28:59Z des $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (c) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef __ANDROID__
#include <bzlib.h>
#endif
#include <limits.h>
#include <regex.h>
#include <stdbool.h>
#include <stdio.h>
#ifndef __ANDROID__
#include <zlib.h>
#endif
#ifdef WITHOUT_NLS
#define getstr(n) errstr[n]
#else
#include <nl_types.h>
extern nl_catd catalog;
#define getstr(n) catgets(catalog, 1, n, errstr[n])
#endif
extern const char *errstr[];
#define VERSION "2.5.1-FreeBSD"
#define GREP_FIXED 0
#define GREP_BASIC 1
#define GREP_EXTENDED 2
#define BINFILE_BIN 0
#define BINFILE_SKIP 1
#define BINFILE_TEXT 2
#define FILE_STDIO 0
#define FILE_GZIP 1
#define FILE_BZIP 2
#define DIR_READ 0
#define DIR_SKIP 1
#define DIR_RECURSE 2
#define DEV_READ 0
#define DEV_SKIP 1
#define LINK_READ 0
#define LINK_EXPLICIT 1
#define LINK_SKIP 2
#define EXCL_PAT 0
#define INCL_PAT 1
#define MAX_LINE_MATCHES 32
struct file {
int fd;
bool binary;
};
struct str {
off_t off;
size_t len;
char *dat;
char *file;
int line_no;
};
struct epat {
char *pat;
int mode;
};
typedef struct {
size_t len;
unsigned char *pattern;
int qsBc[UCHAR_MAX + 1];
/* flags */
bool bol;
bool eol;
bool reversed;
bool word;
} fastgrep_t;
/* Flags passed to regcomp() and regexec() */
extern int cflags, eflags;
/* Command line flags */
extern bool Eflag, Fflag, Gflag, Hflag, Lflag,
bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag,
qflag, sflag, vflag, wflag, xflag;
extern bool dexclude, dinclude, fexclude, finclude, lbflag, nullflag, nulldataflag;
extern unsigned char line_sep;
extern unsigned long long Aflag, Bflag, mcount;
extern char *label;
extern const char *color;
extern int binbehave, devbehave, dirbehave, filebehave, grepbehave, linkbehave;
extern bool notfound;
extern int tail;
extern unsigned int dpatterns, fpatterns, patterns;
extern char **pattern;
extern struct epat *dpattern, *fpattern;
extern regex_t *er_pattern, *r_pattern;
extern fastgrep_t *fg_pattern;
/* For regex errors */
#define RE_ERROR_BUF 512
extern char re_error[RE_ERROR_BUF + 1]; /* Seems big enough */
/* util.c */
bool file_matching(const char *fname);
int procfile(const char *fn);
int grep_tree(char **argv);
void *grep_malloc(size_t size);
void *grep_calloc(size_t nmemb, size_t size);
void *grep_realloc(void *ptr, size_t size);
char *grep_strdup(const char *str);
void printline(struct str *line, int sep, regmatch_t *matches, int m);
/* queue.c */
void enqueue(struct str *x);
void printqueue(void);
void clearqueue(void);
/* file.c */
void grep_close(struct file *f);
struct file *grep_open(const char *path);
char *grep_fgetln(struct file *f, size_t *len);
/* fastgrep.c */
int fastcomp(fastgrep_t *, const char *);
void fgrepcomp(fastgrep_t *, const char *);
int grep_search(fastgrep_t *, const unsigned char *, size_t, regmatch_t *);

View file

@ -0,0 +1,116 @@
/* $NetBSD: queue.c,v 1.5 2011/08/31 16:24:57 plunky Exp $ */
/* $FreeBSD: head/usr.bin/grep/queue.c 211496 2010-08-19 09:28:59Z des $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* A really poor man's queue. It does only what it has to and gets out of
* Dodge. It is used in place of <sys/queue.h> to get a better performance.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
__RCSID("$NetBSD: queue.c,v 1.5 2011/08/31 16:24:57 plunky Exp $");
#include <sys/param.h>
#include <sys/queue.h>
#include <stdlib.h>
#include <string.h>
#include "grep.h"
struct qentry {
STAILQ_ENTRY(qentry) list;
struct str data;
};
static STAILQ_HEAD(, qentry) queue = STAILQ_HEAD_INITIALIZER(queue);
static unsigned long long count;
static struct qentry *dequeue(void);
void
enqueue(struct str *x)
{
struct qentry *item;
item = grep_malloc(sizeof(struct qentry));
item->data.dat = grep_malloc(sizeof(char) * x->len);
item->data.len = x->len;
item->data.line_no = x->line_no;
item->data.off = x->off;
memcpy(item->data.dat, x->dat, x->len);
item->data.file = x->file;
STAILQ_INSERT_TAIL(&queue, item, list);
if (++count > Bflag) {
item = dequeue();
free(item->data.dat);
free(item);
}
}
static struct qentry *
dequeue(void)
{
struct qentry *item;
item = STAILQ_FIRST(&queue);
if (item == NULL)
return (NULL);
STAILQ_REMOVE_HEAD(&queue, list);
--count;
return (item);
}
void
printqueue(void)
{
struct qentry *item;
while ((item = dequeue()) != NULL) {
printline(&item->data, '-', NULL, 0);
free(item->data.dat);
free(item);
}
}
void
clearqueue(void)
{
struct qentry *item;
while ((item = dequeue()) != NULL) {
free(item->data.dat);
free(item);
}
}

View file

@ -0,0 +1,499 @@
/* $NetBSD: util.c,v 1.17 2013/01/21 03:24:43 msaitoh Exp $ */
/* $FreeBSD: head/usr.bin/grep/util.c 211496 2010-08-19 09:28:59Z des $ */
/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
__RCSID("$NetBSD: util.c,v 1.17 2013/01/21 03:24:43 msaitoh Exp $");
#include <sys/stat.h>
#include <sys/types.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fnmatch.h>
#include <fts.h>
#include <libgen.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include <wctype.h>
#include "grep.h"
static bool first, first_global = true;
static unsigned long long since_printed;
static int procline(struct str *l, int);
bool
file_matching(const char *fname)
{
char *fname_base, *fname_copy;
unsigned int i;
bool ret;
ret = finclude ? false : true;
fname_copy = grep_strdup(fname);
fname_base = basename(fname_copy);
for (i = 0; i < fpatterns; ++i) {
if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
if (fpattern[i].mode == EXCL_PAT) {
free(fname_copy);
return (false);
} else
ret = true;
}
}
free(fname_copy);
return (ret);
}
static inline bool
dir_matching(const char *dname)
{
unsigned int i;
bool ret;
ret = dinclude ? false : true;
for (i = 0; i < dpatterns; ++i) {
if (dname != NULL &&
fnmatch(dname, dpattern[i].pat, 0) == 0) {
if (dpattern[i].mode == EXCL_PAT)
return (false);
else
ret = true;
}
}
return (ret);
}
/*
* Processes a directory when a recursive search is performed with
* the -R option. Each appropriate file is passed to procfile().
*/
int
grep_tree(char **argv)
{
FTS *fts;
FTSENT *p;
char *d, *dir = NULL;
int c, fts_flags;
bool ok;
c = fts_flags = 0;
switch(linkbehave) {
case LINK_EXPLICIT:
fts_flags = FTS_COMFOLLOW;
break;
case LINK_SKIP:
fts_flags = FTS_PHYSICAL;
break;
default:
fts_flags = FTS_LOGICAL;
}
fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
if (!(fts = fts_open(argv, fts_flags, NULL)))
err(2, "fts_open");
while ((p = fts_read(fts)) != NULL) {
switch (p->fts_info) {
case FTS_DNR:
/* FALLTHROUGH */
case FTS_ERR:
errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
break;
case FTS_D:
/* FALLTHROUGH */
case FTS_DP:
break;
case FTS_DC:
/* Print a warning for recursive directory loop */
warnx("warning: %s: recursive directory loop",
p->fts_path);
break;
default:
/* Check for file exclusion/inclusion */
ok = true;
if (dexclude || dinclude) {
if ((d = strrchr(p->fts_path, '/')) != NULL) {
dir = grep_malloc(sizeof(char) *
(d - p->fts_path + 1));
memcpy(dir, p->fts_path,
d - p->fts_path);
dir[d - p->fts_path] = '\0';
}
ok = dir_matching(dir);
free(dir);
dir = NULL;
}
if (fexclude || finclude)
ok &= file_matching(p->fts_path);
if (ok)
c += procfile(p->fts_path);
break;
}
}
fts_close(fts);
return (c);
}
/*
* Opens a file and processes it. Each file is processed line-by-line
* passing the lines to procline().
*/
int
procfile(const char *fn)
{
struct file *f;
struct stat sb;
struct str ln;
mode_t s;
int c, t;
if (mflag && (mcount <= 0))
return (0);
if (strcmp(fn, "-") == 0) {
fn = label != NULL ? label : getstr(1);
f = grep_open(NULL);
} else {
if (!stat(fn, &sb)) {
/* Check if we need to process the file */
s = sb.st_mode & S_IFMT;
if (s == S_IFDIR && dirbehave == DIR_SKIP)
return (0);
if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
|| s == S_IFSOCK) && devbehave == DEV_SKIP)
return (0);
}
f = grep_open(fn);
}
if (f == NULL) {
if (!sflag)
warn("%s", fn);
if (errno == ENOENT)
notfound = true;
return (0);
}
ln.file = grep_malloc(strlen(fn) + 1);
strcpy(ln.file, fn);
ln.line_no = 0;
ln.len = 0;
tail = 0;
ln.off = -1;
for (first = true, c = 0; c == 0 || !(lflag || qflag); ) {
ln.off += ln.len + 1;
if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0)
break;
if (ln.len > 0 && ln.dat[ln.len - 1] == line_sep)
--ln.len;
ln.line_no++;
/* Return if we need to skip a binary file */
if (f->binary && binbehave == BINFILE_SKIP) {
grep_close(f);
free(ln.file);
free(f);
return (0);
}
/* Process the file line-by-line */
t = procline(&ln, f->binary);
c += t;
/* Count the matches if we have a match limit */
if (mflag) {
mcount -= t;
if (mcount <= 0)
break;
}
}
if (Bflag > 0)
clearqueue();
grep_close(f);
if (cflag) {
if (!hflag)
printf("%s:", ln.file);
printf("%u%c", c, line_sep);
}
if (lflag && !qflag && c != 0)
printf("%s%c", fn, line_sep);
if (Lflag && !qflag && c == 0)
printf("%s%c", fn, line_sep);
if (c && !cflag && !lflag && !Lflag &&
binbehave == BINFILE_BIN && f->binary && !qflag)
printf(getstr(8), fn);
free(ln.file);
free(f);
return (c);
}
#define iswword(x) (iswalnum((x)) || (x) == L'_')
/*
* Processes a line comparing it with the specified patterns. Each pattern
* is looped to be compared along with the full string, saving each and every
* match, which is necessary to colorize the output and to count the
* matches. The matching lines are passed to printline() to display the
* appropriate output.
*/
static int
procline(struct str *l, int nottext)
{
regmatch_t matches[MAX_LINE_MATCHES];
regmatch_t pmatch;
size_t st = 0;
unsigned int i;
int c = 0, m = 0, r = 0;
/* Loop to process the whole line */
while (st <= l->len) {
pmatch.rm_so = st;
pmatch.rm_eo = l->len;
/* Loop to compare with all the patterns */
for (i = 0; i < patterns; i++) {
/*
* XXX: grep_search() is a workaround for speed up and should be
* removed in the future. See fastgrep.c.
*/
if (fg_pattern[i].pattern) {
r = grep_search(&fg_pattern[i],
(unsigned char *)l->dat,
l->len, &pmatch);
r = (r == 0) ? 0 : REG_NOMATCH;
st = pmatch.rm_eo;
} else {
r = regexec(&r_pattern[i], l->dat, 1,
&pmatch, eflags);
r = (r == 0) ? 0 : REG_NOMATCH;
st = pmatch.rm_eo;
}
if (r == REG_NOMATCH)
continue;
/* Check for full match */
if (xflag &&
(pmatch.rm_so != 0 ||
(size_t)pmatch.rm_eo != l->len))
continue;
/* Check for whole word match */
if (fg_pattern[i].word && pmatch.rm_so != 0) {
wchar_t wbegin, wend;
wbegin = wend = L' ';
if (pmatch.rm_so != 0 &&
sscanf(&l->dat[pmatch.rm_so - 1],
"%lc", &wbegin) != 1)
continue;
if ((size_t)pmatch.rm_eo != l->len &&
sscanf(&l->dat[pmatch.rm_eo],
"%lc", &wend) != 1)
continue;
if (iswword(wbegin) || iswword(wend))
continue;
}
c = 1;
if (m < MAX_LINE_MATCHES)
matches[m++] = pmatch;
/* matches - skip further patterns */
if ((color != NULL && !oflag) || qflag || lflag)
break;
}
if (vflag) {
c = !c;
break;
}
/* One pass if we are not recording matches */
if ((color != NULL && !oflag) || qflag || lflag)
break;
if (st == (size_t)pmatch.rm_so)
break; /* No matches */
}
if (c && binbehave == BINFILE_BIN && nottext)
return (c); /* Binary file */
/* Dealing with the context */
if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
if (c) {
if ((Aflag || Bflag) && !first_global &&
(first || since_printed > Bflag))
printf("--\n");
tail = Aflag;
if (Bflag > 0)
printqueue();
printline(l, ':', matches, m);
} else {
printline(l, '-', matches, m);
tail--;
}
first = false;
first_global = false;
since_printed = 0;
} else {
if (Bflag)
enqueue(l);
since_printed++;
}
return (c);
}
/*
* Safe malloc() for internal use.
*/
void *
grep_malloc(size_t size)
{
void *ptr;
if ((ptr = malloc(size)) == NULL)
err(2, "malloc");
return (ptr);
}
/*
* Safe calloc() for internal use.
*/
void *
grep_calloc(size_t nmemb, size_t size)
{
void *ptr;
if ((ptr = calloc(nmemb, size)) == NULL)
err(2, "calloc");
return (ptr);
}
/*
* Safe realloc() for internal use.
*/
void *
grep_realloc(void *ptr, size_t size)
{
if ((ptr = realloc(ptr, size)) == NULL)
err(2, "realloc");
return (ptr);
}
/*
* Safe strdup() for internal use.
*/
char *
grep_strdup(const char *str)
{
char *ret;
if ((ret = strdup(str)) == NULL)
err(2, "strdup");
return (ret);
}
/*
* Prints a matching line according to the command line options.
*/
void
printline(struct str *line, int sep, regmatch_t *matches, int m)
{
size_t a = 0;
int i, n = 0;
if (!hflag) {
if (nullflag == 0)
fputs(line->file, stdout);
else {
printf("%s", line->file);
putchar(0);
}
++n;
}
if (nflag) {
if (n > 0)
putchar(sep);
printf("%d", line->line_no);
++n;
}
if (bflag) {
if (n > 0)
putchar(sep);
printf("%lld", (long long)line->off);
++n;
}
if (n)
putchar(sep);
/* --color and -o */
if ((oflag || color) && m > 0) {
for (i = 0; i < m; i++) {
if (!oflag)
fwrite(line->dat + a, matches[i].rm_so - a, 1,
stdout);
if (color)
fprintf(stdout, "\33[%sm\33[K", color);
fwrite(line->dat + matches[i].rm_so,
matches[i].rm_eo - matches[i].rm_so, 1,
stdout);
if (color)
fprintf(stdout, "\33[m\33[K");
a = matches[i].rm_eo;
if (oflag)
putchar('\n');
}
if (!oflag) {
if (line->len - a > 0)
fwrite(line->dat + a, line->len - a, 1, stdout);
putchar(line_sep);
}
} else {
fwrite(line->dat, line->len, 1, stdout);
putchar(line_sep);
}
}