302 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			C
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			302 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			C
		
	
	
		
			Executable File
		
	
	
	
	
/*
 | 
						|
 * Regular expression matching for expr(1).  Bugs:  The upper bound of
 | 
						|
 * a range specified by the \{ feature cannot be zero.
 | 
						|
 *
 | 
						|
 * Copyright (C) 1989 by Kenneth Almquist.  All rights reserved.
 | 
						|
 * This file is part of ash, which is distributed under the terms specified
 | 
						|
 * by the Ash General Public License.  See the file named LICENSE.
 | 
						|
 */
 | 
						|
 | 
						|
#include "bltin.h"
 | 
						|
#include "myregexp.h"
 | 
						|
 | 
						|
#include <stdlib.h>
 | 
						|
 | 
						|
#define RE_END 0		/* end of regular expression */
 | 
						|
#define RE_LITERAL 1		/* normal character follows */
 | 
						|
#define RE_DOT 2		/* "." */
 | 
						|
#define RE_CCL 3		/* "[...]" */
 | 
						|
#define RE_NCCL 4		/* "[^...]" */
 | 
						|
#define RE_LP 5			/* "\(" */
 | 
						|
#define RE_RP 6			/* "\)" */
 | 
						|
#define RE_MATCHED 7		/* "\digit" */
 | 
						|
#define RE_EOS 8		/* "$" matches end of string */
 | 
						|
#define RE_STAR 9		/* "*" */
 | 
						|
#define RE_RANGE 10		/* "\{num,num\}" */
 | 
						|
 | 
						|
 | 
						|
 | 
						|
char *match_begin[10];
 | 
						|
short match_length[10];
 | 
						|
short number_parens;
 | 
						|
static int match(char *pattern, char *string);
 | 
						|
 | 
						|
 | 
						|
 | 
						|
char *
 | 
						|
re_compile(pattern)
 | 
						|
	char *pattern;
 | 
						|
	{
 | 
						|
	register char *p;
 | 
						|
	register char c;
 | 
						|
	char *comp;
 | 
						|
	register char *q;
 | 
						|
	char *begin;
 | 
						|
	char *endp;
 | 
						|
	register int len;
 | 
						|
	int first;
 | 
						|
	int type;
 | 
						|
	char *stackp;
 | 
						|
	char stack[10];
 | 
						|
	int paren_num;
 | 
						|
	int i;
 | 
						|
 | 
						|
	p = pattern;
 | 
						|
	if (*p == '^')
 | 
						|
		p++;
 | 
						|
	comp = q = malloc(2 * strlen(p) + 1);
 | 
						|
	begin = q;
 | 
						|
	stackp = stack;
 | 
						|
	paren_num = 0;
 | 
						|
	for (;;) {
 | 
						|
		switch (c = *p++) {
 | 
						|
		case '\0':
 | 
						|
			*q = '\0';
 | 
						|
			goto out;
 | 
						|
		case '.':
 | 
						|
			*q++ = RE_DOT;
 | 
						|
			len = 1;
 | 
						|
			break;
 | 
						|
		case '[':
 | 
						|
			begin = q;
 | 
						|
			*q = RE_CCL;
 | 
						|
			if (*p == '^') {
 | 
						|
				*q = RE_NCCL;
 | 
						|
				p++;
 | 
						|
			}
 | 
						|
			q++;
 | 
						|
			first = 1;
 | 
						|
			while (*p != ']' || first == 1) {
 | 
						|
				if (p[1] == '-' && p[2] != ']') {
 | 
						|
					*q++ = '-';
 | 
						|
					*q++ = p[0];
 | 
						|
					*q++ = p[2];
 | 
						|
					p += 3;
 | 
						|
				} else if (*p == '-') {
 | 
						|
					*q++ = '-';
 | 
						|
					*q++ = '-';
 | 
						|
					*q++ = '-';
 | 
						|
					p++;
 | 
						|
				} else {
 | 
						|
					*q++ = *p++;
 | 
						|
				}
 | 
						|
				first = 0;
 | 
						|
			}
 | 
						|
			p++;
 | 
						|
			*q++ = '\0';
 | 
						|
			len = q - begin;
 | 
						|
			break;
 | 
						|
		case '$':
 | 
						|
			if (*p != '\0')
 | 
						|
				goto dft;
 | 
						|
			*q++ = RE_EOS;
 | 
						|
			break;
 | 
						|
		case '*':
 | 
						|
			if (len == 0)
 | 
						|
				goto dft;
 | 
						|
			type = RE_STAR;
 | 
						|
range:
 | 
						|
			i = (type == RE_RANGE)? 3 : 1;
 | 
						|
			endp = q + i;
 | 
						|
			begin = q - len;
 | 
						|
			do {
 | 
						|
				--q;
 | 
						|
				*(q + i) = *q;
 | 
						|
			} while (--len > 0);
 | 
						|
			q = begin;
 | 
						|
			*q++ = type;
 | 
						|
			if (type == RE_RANGE) {
 | 
						|
				i = 0;
 | 
						|
				while ((unsigned)(*p - '0') <= 9)
 | 
						|
					i = 10 * i + (*p++ - '0');
 | 
						|
				*q++ = i;
 | 
						|
				if (*p != ',') {
 | 
						|
					*q++ = i;
 | 
						|
				} else {
 | 
						|
					p++;
 | 
						|
					i = 0;
 | 
						|
					while ((unsigned)(*p - '0') <= 9)
 | 
						|
						i = 10 * i + (*p++ - '0');
 | 
						|
					*q++ = i;
 | 
						|
				}
 | 
						|
				if (*p != '\\' || *++p != '}')
 | 
						|
					error("RE error");
 | 
						|
				p++;
 | 
						|
			}
 | 
						|
			q = endp;
 | 
						|
			break;
 | 
						|
		case '\\':
 | 
						|
			if ((c = *p++) == '(') {
 | 
						|
				if (++paren_num > 9)
 | 
						|
					error("RE error");
 | 
						|
				*q++ = RE_LP;
 | 
						|
				*q++ = paren_num;
 | 
						|
				*stackp++ = paren_num;
 | 
						|
				len = 0;
 | 
						|
			} else if (c == ')') {
 | 
						|
				if (stackp == stack)
 | 
						|
					error("RE error");
 | 
						|
				*q++ = RE_RP;
 | 
						|
				*q++ = *--stackp;
 | 
						|
				len = 0;
 | 
						|
			} else if (c == '{') {
 | 
						|
				type = RE_RANGE;
 | 
						|
				goto range;
 | 
						|
			} else if ((unsigned)(c - '1') < 9) {
 | 
						|
				/* should check validity here */
 | 
						|
				*q++ = RE_MATCHED;
 | 
						|
				*q++ = c - '0';
 | 
						|
				len = 2;
 | 
						|
			} else {
 | 
						|
				goto dft;
 | 
						|
			}
 | 
						|
			break;
 | 
						|
		default:
 | 
						|
dft:			*q++ = RE_LITERAL;
 | 
						|
			*q++ = c;
 | 
						|
			len = 2;
 | 
						|
			break;
 | 
						|
		}
 | 
						|
	}
 | 
						|
out:
 | 
						|
	if (stackp != stack)
 | 
						|
		error("RE error");
 | 
						|
	number_parens = paren_num;
 | 
						|
	return comp;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
re_match(pattern, string)
 | 
						|
	char *pattern;
 | 
						|
	char *string;
 | 
						|
	{
 | 
						|
	char **pp;
 | 
						|
 | 
						|
	match_begin[0] = string;
 | 
						|
	for (pp = &match_begin[1] ; pp <= &match_begin[9] ; pp++)
 | 
						|
		*pp = 0;
 | 
						|
	return match(pattern, string);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
static
 | 
						|
match(pattern, string)
 | 
						|
	char *pattern;
 | 
						|
	char *string;
 | 
						|
	{
 | 
						|
	register char *p, *q;
 | 
						|
	int counting;
 | 
						|
	int low, high, count;
 | 
						|
	char *curpat;
 | 
						|
	char *start_count;
 | 
						|
	int negate;
 | 
						|
	int found;
 | 
						|
	char *r;
 | 
						|
	int len;
 | 
						|
	char c;
 | 
						|
 | 
						|
	p = pattern;
 | 
						|
	q = string;
 | 
						|
	counting = 0;
 | 
						|
	for (;;) {
 | 
						|
		if (counting) {
 | 
						|
			if (++count > high)
 | 
						|
				goto bad;
 | 
						|
			p = curpat;
 | 
						|
		}
 | 
						|
		switch (*p++) {
 | 
						|
		case RE_END:
 | 
						|
			match_length[0] = q - match_begin[0];
 | 
						|
			return 1;
 | 
						|
		case RE_LITERAL:
 | 
						|
			if (*q++ != *p++)
 | 
						|
				goto bad;
 | 
						|
			break;
 | 
						|
		case RE_DOT:
 | 
						|
			if (*q++ == '\0')
 | 
						|
				goto bad;
 | 
						|
			break;
 | 
						|
		case RE_CCL:
 | 
						|
			negate = 0;
 | 
						|
			goto ccl;
 | 
						|
		case RE_NCCL:
 | 
						|
			negate = 1;
 | 
						|
ccl:
 | 
						|
			found = 0;
 | 
						|
			c = *q++;
 | 
						|
			while (*p) {
 | 
						|
				if (*p == '-') {
 | 
						|
					if (c >= *++p && c <= *++p)
 | 
						|
						found = 1;
 | 
						|
				} else {
 | 
						|
					if (c == *p)
 | 
						|
						found = 1;
 | 
						|
				}
 | 
						|
				p++;
 | 
						|
			}
 | 
						|
			p++;
 | 
						|
			if (found == negate)
 | 
						|
				goto bad;
 | 
						|
			break;
 | 
						|
		case RE_LP:
 | 
						|
			match_begin[*p++] = q;
 | 
						|
			break;
 | 
						|
		case RE_RP:
 | 
						|
			match_length[*p] = q - match_begin[*p];
 | 
						|
			p++;
 | 
						|
			break;
 | 
						|
		case RE_MATCHED:
 | 
						|
			r = match_begin[*p];
 | 
						|
			len = match_length[*p++];
 | 
						|
			while (--len >= 0) {
 | 
						|
				if (*q++ != *r++)
 | 
						|
					goto bad;
 | 
						|
			}
 | 
						|
			break;
 | 
						|
		case RE_EOS:
 | 
						|
			if (*q != '\0')
 | 
						|
				goto bad;
 | 
						|
			break;
 | 
						|
		case RE_STAR:
 | 
						|
			low = 0;
 | 
						|
			high = 32767;
 | 
						|
			goto range;
 | 
						|
		case RE_RANGE:
 | 
						|
			low = *p++;
 | 
						|
			high = *p++;
 | 
						|
			if (high == 0)
 | 
						|
				high = 32767;
 | 
						|
range:
 | 
						|
			curpat = p;
 | 
						|
			start_count = q;
 | 
						|
			count = 0;
 | 
						|
			counting++;
 | 
						|
			break;
 | 
						|
		}
 | 
						|
	}
 | 
						|
bad:
 | 
						|
	if (! counting)
 | 
						|
		return 0;
 | 
						|
	len = 1;
 | 
						|
	if (*curpat == RE_MATCHED)
 | 
						|
		len = match_length[curpat[1]];
 | 
						|
	while (--count >= low) {
 | 
						|
		if (match(p, start_count + count * len))
 | 
						|
			return 1;
 | 
						|
	}
 | 
						|
	return 0;
 | 
						|
}
 |