Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9aca0190 authored by Elliott Hughes's avatar Elliott Hughes Committed by Gerrit Code Review
Browse files

Merge "Switch to PCRE grep."

parents ca07de42 90a018a8
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -195,12 +195,12 @@ vmstat wc which whoami xargs xxd yes zcat
Android Q
---------

BSD: grep

bzip2: bzcat bzip2 bunzip2

one-true-awk: awk

PCRE: egrep fgrep grep

toolbox: getevent getprop newfs\_msdos

toybox: acpi base64 basename blockdev cal cat chcon chgrp chmod chown
+0 −36
Original line number Diff line number Diff line
@@ -61,39 +61,3 @@ cc_binary {
    defaults: ["toolbox_defaults"],
    srcs: ["r.c"],
}

// We build BSD grep separately, so it can provide egrep and fgrep too.
cc_defaults {
    name: "grep_common",
    defaults: ["toolbox_defaults"],
    srcs: [
        "upstream-netbsd/usr.bin/grep/fastgrep.c",
        "upstream-netbsd/usr.bin/grep/file.c",
        "upstream-netbsd/usr.bin/grep/grep.c",
        "upstream-netbsd/usr.bin/grep/queue.c",
        "upstream-netbsd/usr.bin/grep/util.c",
    ],
    symlinks: [
        "egrep",
        "fgrep",
    ],

    sanitize: {
        integer_overflow: false,
    },
}

cc_binary {
    name: "grep",
    defaults: ["grep_common"],
}

// Build vendor grep.
// TODO: Add vendor_available to "grep" module and remove "grep_vendor" module
//       when vendor_available is fully supported.
cc_binary {
    name: "grep_vendor",
    stem: "grep",
    vendor: true,
    defaults: ["grep_common"],
}
+0 −336
Original line number Diff line number Diff line
/*	$OpenBSD: util.c,v 1.36 2007/10/02 17:59:18 otto Exp $	*/
/*	$FreeBSD: head/usr.bin/grep/fastgrep.c 211496 2010-08-19 09:28:59Z des $ */

/*-
 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
 * Copyright (C) 2008 Gabor Kovesdan <gabor@FreeBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * XXX: This file is a speed up for grep to cover the defects of the
 * regex library.  These optimizations should practically be implemented
 * there keeping this code clean.  This is a future TODO, but for the
 * meantime, we need to use this workaround.
 */

#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif

#include <sys/cdefs.h>
__RCSID("$NetBSD: fastgrep.c,v 1.5 2011/04/18 03:27:40 joerg Exp $");

#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>

#include "grep.h"

static inline int	grep_cmp(const unsigned char *, const unsigned char *, size_t);
static inline void	grep_revstr(unsigned char *, int);

void
fgrepcomp(fastgrep_t *fg, const char *pat)
{
	unsigned int i;

	/* Initialize. */
	fg->len = strlen(pat);
	fg->bol = false;
	fg->eol = false;
	fg->reversed = false;

	fg->pattern = (unsigned char *)grep_strdup(pat);

	/* Preprocess pattern. */
	for (i = 0; i <= UCHAR_MAX; i++)
		fg->qsBc[i] = fg->len;
	for (i = 1; i < fg->len; i++)
		fg->qsBc[fg->pattern[i]] = fg->len - i;
}

/*
 * Returns: -1 on failure, 0 on success
 */
int
fastcomp(fastgrep_t *fg, const char *pat)
{
	unsigned int i;
	int firstHalfDot = -1;
	int firstLastHalfDot = -1;
	int hasDot = 0;
	int lastHalfDot = 0;
	int shiftPatternLen;

	/* Initialize. */
	fg->len = strlen(pat);
	fg->bol = false;
	fg->eol = false;
	fg->reversed = false;
	fg->word = wflag;

	/* Remove end-of-line character ('$'). */
	if (fg->len > 0 && pat[fg->len - 1] == '$') {
		fg->eol = true;
		fg->len--;
	}

	/* Remove beginning-of-line character ('^'). */
	if (pat[0] == '^') {
		fg->bol = true;
		fg->len--;
		pat++;
	}

	if (fg->len >= 14 &&
	    memcmp(pat, "[[:<:]]", 7) == 0 &&
	    memcmp(pat + fg->len - 7, "[[:>:]]", 7) == 0) {
		fg->len -= 14;
		pat += 7;
		/* Word boundary is handled separately in util.c */
		fg->word = true;
	}

	/*
	 * pat has been adjusted earlier to not include '^', '$' or
	 * the word match character classes at the beginning and ending
	 * of the string respectively.
	 */
	fg->pattern = grep_malloc(fg->len + 1);
	memcpy(fg->pattern, pat, fg->len);
	fg->pattern[fg->len] = '\0';

	/* Look for ways to cheat...er...avoid the full regex engine. */
	for (i = 0; i < fg->len; i++) {
		/* Can still cheat? */
		if (fg->pattern[i] == '.') {
			hasDot = i;
			if (i < fg->len / 2) {
				if (firstHalfDot < 0)
					/* Closest dot to the beginning */
					firstHalfDot = i;
			} else {
				/* Closest dot to the end of the pattern. */
				lastHalfDot = i;
				if (firstLastHalfDot < 0)
					firstLastHalfDot = i;
			}
		} else {
			/* Free memory and let others know this is empty. */
			free(fg->pattern);
			fg->pattern = NULL;
			return (-1);
		}
	}

	/*
	 * Determine if a reverse search would be faster based on the placement
	 * of the dots.
	 */
	if ((!(lflag || cflag)) && ((!(fg->bol || fg->eol)) &&
	    ((lastHalfDot) && ((firstHalfDot < 0) ||
	    ((fg->len - (lastHalfDot + 1)) < (size_t)firstHalfDot)))) &&
	    !oflag && !color) {
		fg->reversed = true;
		hasDot = fg->len - (firstHalfDot < 0 ?
		    firstLastHalfDot : firstHalfDot) - 1;
		grep_revstr(fg->pattern, fg->len);
	}

	/*
	 * Normal Quick Search would require a shift based on the position the
	 * next character after the comparison is within the pattern.  With
	 * wildcards, the position of the last dot effects the maximum shift
	 * distance.
	 * The closer to the end the wild card is the slower the search.  A
	 * reverse version of this algorithm would be useful for wildcards near
	 * the end of the string.
	 *
	 * Examples:
	 * Pattern	Max shift
	 * -------	---------
	 * this		5
	 * .his		4
	 * t.is		3
	 * th.s		2
	 * thi.		1
	 */

	/* Adjust the shift based on location of the last dot ('.'). */
	shiftPatternLen = fg->len - hasDot;

	/* Preprocess pattern. */
	for (i = 0; i <= (signed)UCHAR_MAX; i++)
		fg->qsBc[i] = shiftPatternLen;
	for (i = hasDot + 1; i < fg->len; i++) {
		fg->qsBc[fg->pattern[i]] = fg->len - i;
	}

	/*
	 * Put pattern back to normal after pre-processing to allow for easy
	 * comparisons later.
	 */
	if (fg->reversed)
		grep_revstr(fg->pattern, fg->len);

	return (0);
}

int
grep_search(fastgrep_t *fg, const unsigned char *data, size_t len, regmatch_t *pmatch)
{
	unsigned int j;
	int ret = REG_NOMATCH;

	if (pmatch->rm_so == (ssize_t)len)
		return (ret);

	if (fg->bol && pmatch->rm_so != 0) {
		pmatch->rm_so = len;
		pmatch->rm_eo = len;
		return (ret);
	}

	/* No point in going farther if we do not have enough data. */
	if (len < fg->len)
		return (ret);

	/* Only try once at the beginning or ending of the line. */
	if (fg->bol || fg->eol) {
		/* Simple text comparison. */
		/* Verify data is >= pattern length before searching on it. */
		if (len >= fg->len) {
			/* Determine where in data to start search at. */
			j = fg->eol ? len - fg->len : 0;
			if (!((fg->bol && fg->eol) && (len != fg->len)))
				if (grep_cmp(fg->pattern, data + j,
				    fg->len) == -1) {
					pmatch->rm_so = j;
					pmatch->rm_eo = j + fg->len;
						ret = 0;
				}
		}
	} else if (fg->reversed) {
		/* Quick Search algorithm. */
		j = len;
		do {
			if (grep_cmp(fg->pattern, data + j - fg->len,
				fg->len) == -1) {
				pmatch->rm_so = j - fg->len;
				pmatch->rm_eo = j;
				ret = 0;
				break;
			}
			/* Shift if within bounds, otherwise, we are done. */
			if (j == fg->len)
				break;
			j -= fg->qsBc[data[j - fg->len - 1]];
		} while (j >= fg->len);
	} else {
		/* Quick Search algorithm. */
		j = pmatch->rm_so;
		do {
			if (grep_cmp(fg->pattern, data + j, fg->len) == -1) {
				pmatch->rm_so = j;
				pmatch->rm_eo = j + fg->len;
				ret = 0;
				break;
			}

			/* Shift if within bounds, otherwise, we are done. */
			if (j + fg->len == len)
				break;
			else
				j += fg->qsBc[data[j + fg->len]];
		} while (j <= (len - fg->len));
	}

	return (ret);
}

/*
 * Returns:	i >= 0 on failure (position that it failed)
 *		-1 on success
 */
static inline int
grep_cmp(const unsigned char *pat, const unsigned char *data, size_t len)
{
	size_t size;
	wchar_t *wdata, *wpat;
	unsigned int i;

	if (iflag) {
		if ((size = mbstowcs(NULL, (const char *)data, 0)) ==
		    ((size_t) - 1))
			return (-1);

		wdata = grep_malloc(size * sizeof(wint_t));

		if (mbstowcs(wdata, (const char *)data, size) ==
		    ((size_t) - 1))
			return (-1);

		if ((size = mbstowcs(NULL, (const char *)pat, 0)) ==
		    ((size_t) - 1))
			return (-1);

		wpat = grep_malloc(size * sizeof(wint_t));

		if (mbstowcs(wpat, (const char *)pat, size) == ((size_t) - 1))
			return (-1);
		for (i = 0; i < len; i++) {
			if ((towlower(wpat[i]) == towlower(wdata[i])) ||
			    ((grepbehave != GREP_FIXED) && wpat[i] == L'.'))
				continue;
			free(wpat);
			free(wdata);
				return (i);
		}
	} else {
		for (i = 0; i < len; i++) {
			if ((pat[i] == data[i]) || ((grepbehave != GREP_FIXED) &&
			    pat[i] == '.'))
				continue;
			return (i);
		}
	}
	return (-1);
}

static inline void
grep_revstr(unsigned char *str, int len)
{
	int i;
	char c;

	for (i = 0; i < len / 2; i++) {
		c = str[i];
		str[i] = str[len - i - 1];
		str[len - i - 1] = c;
	}
}
+0 −271
Original line number Diff line number Diff line
/*	$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $	*/
/*	$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $	*/
/*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/

/*-
 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif

#include <sys/cdefs.h>
__RCSID("$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $");

#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>

#ifndef __ANDROID__
#include <bzlib.h>
#endif
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include <wctype.h>
#ifndef __ANDROID__
#include <zlib.h>
#endif

#include "grep.h"

#define	MAXBUFSIZ	(32 * 1024)
#define	LNBUFBUMP	80

#ifndef __ANDROID__
static gzFile gzbufdesc;
static BZFILE* bzbufdesc;
#endif

static unsigned char buffer[MAXBUFSIZ];
static unsigned char *bufpos;
static size_t bufrem;

static unsigned char *lnbuf;
static size_t lnbuflen;

static inline int
grep_refill(struct file *f)
{
	ssize_t nr;
#ifndef __ANDROID__
	int bzerr;
#endif

	bufpos = buffer;
	bufrem = 0;

#ifndef __ANDROID__
	if (filebehave == FILE_GZIP)
		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
		switch (bzerr) {
		case BZ_OK:
		case BZ_STREAM_END:
			/* No problem, nr will be okay */
			break;
		case BZ_DATA_ERROR_MAGIC:
			/*
			 * As opposed to gzread(), which simply returns the
			 * plain file data, if it is not in the correct
			 * compressed format, BZ2_bzRead() instead aborts.
			 *
			 * So, just restart at the beginning of the file again,
			 * and use plain reads from now on.
			 */
			BZ2_bzReadClose(&bzerr, bzbufdesc);
			bzbufdesc = NULL;
			if (lseek(f->fd, 0, SEEK_SET) == -1)
				return (-1);
			nr = read(f->fd, buffer, MAXBUFSIZ);
			break;
		default:
			/* Make sure we exit with an error */
			nr = -1;
		}
	} else
#endif
		nr = read(f->fd, buffer, MAXBUFSIZ);

	if (nr < 0)
		return (-1);

	bufrem = nr;
	return (0);
}

static inline int
grep_lnbufgrow(size_t newlen)
{

	if (lnbuflen < newlen) {
		lnbuf = grep_realloc(lnbuf, newlen);
		lnbuflen = newlen;
	}

	return (0);
}

char *
grep_fgetln(struct file *f, size_t *lenp)
{
	unsigned char *p;
	char *ret;
	size_t len;
	size_t off;
	ptrdiff_t diff;

	/* Fill the buffer, if necessary */
	if (bufrem == 0 && grep_refill(f) != 0)
		goto error;

	if (bufrem == 0) {
		/* Return zero length to indicate EOF */
		*lenp = 0;
		return ((char *)bufpos);
	}

	/* Look for a newline in the remaining part of the buffer */
	if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
		++p; /* advance over newline */
		ret = (char *)bufpos;
		len = p - bufpos;
		bufrem -= len;
		bufpos = p;
		*lenp = len;
		return (ret);
	}

	/* We have to copy the current buffered data to the line buffer */
	for (len = bufrem, off = 0; ; len += bufrem) {
		/* Make sure there is room for more data */
		if (grep_lnbufgrow(len + LNBUFBUMP))
			goto error;
		memcpy(lnbuf + off, bufpos, len - off);
		off = len;
		if (grep_refill(f) != 0)
			goto error;
		if (bufrem == 0)
			/* EOF: return partial line */
			break;
		if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
			continue;
		/* got it: finish up the line (like code above) */
		++p;
		diff = p - bufpos;
		len += diff;
		if (grep_lnbufgrow(len))
		    goto error;
		memcpy(lnbuf + off, bufpos, diff);
		bufrem -= diff;
		bufpos = p;
		break;
	}
	*lenp = len;
	return ((char *)lnbuf);

error:
	*lenp = 0;
	return (NULL);
}

static inline struct file *
grep_file_init(struct file *f)
{

#ifndef __ANDROID__
	if (filebehave == FILE_GZIP &&
	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
		goto error;

	if (filebehave == FILE_BZIP &&
	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
		goto error;
#endif

	/* Fill read buffer, also catches errors early */
	if (grep_refill(f) != 0)
		goto error;

	/* Check for binary stuff, if necessary */
	if (!nulldataflag && binbehave != BINFILE_TEXT &&
	    memchr(bufpos, '\0', bufrem) != NULL)
		f->binary = true;

	return (f);
error:
	close(f->fd);
	free(f);
	return (NULL);
}

/*
 * Opens a file for processing.
 */
struct file *
grep_open(const char *path)
{
	struct file *f;

	f = grep_malloc(sizeof *f);
	memset(f, 0, sizeof *f);
	if (path == NULL) {
		/* Processing stdin implies --line-buffered. */
		lbflag = true;
		f->fd = STDIN_FILENO;
	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
		free(f);
		return (NULL);
	}

	return (grep_file_init(f));
}

/*
 * Closes a file.
 */
void
grep_close(struct file *f)
{

	close(f->fd);

	/* Reset read buffer and line buffer */
	bufpos = buffer;
	bufrem = 0;

	free(lnbuf);
	lnbuf = NULL;
	lnbuflen = 0;
}
+0 −707

File deleted.

Preview size limit exceeded, changes collapsed.

Loading