302 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			302 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Regular expression matching for expr(1).  Bugs:  The upper bound of
 | |
|  * a range specified by the \{ feature cannot be zero.
 | |
|  *
 | |
|  * Copyright (C) 1989 by Kenneth Almquist.  All rights reserved.
 | |
|  * This file is part of ash, which is distributed under the terms specified
 | |
|  * by the Ash General Public License.  See the file named LICENSE.
 | |
|  */
 | |
| 
 | |
| #include "bltin.h"
 | |
| #include "myregexp.h"
 | |
| 
 | |
| #include <stdlib.h>
 | |
| 
 | |
| #define RE_END 0		/* end of regular expression */
 | |
| #define RE_LITERAL 1		/* normal character follows */
 | |
| #define RE_DOT 2		/* "." */
 | |
| #define RE_CCL 3		/* "[...]" */
 | |
| #define RE_NCCL 4		/* "[^...]" */
 | |
| #define RE_LP 5			/* "\(" */
 | |
| #define RE_RP 6			/* "\)" */
 | |
| #define RE_MATCHED 7		/* "\digit" */
 | |
| #define RE_EOS 8		/* "$" matches end of string */
 | |
| #define RE_STAR 9		/* "*" */
 | |
| #define RE_RANGE 10		/* "\{num,num\}" */
 | |
| 
 | |
| 
 | |
| 
 | |
| char *match_begin[10];
 | |
| short match_length[10];
 | |
| short number_parens;
 | |
| static int match(char *pattern, char *string);
 | |
| 
 | |
| 
 | |
| 
 | |
| char *
 | |
| re_compile(pattern)
 | |
| 	char *pattern;
 | |
| 	{
 | |
| 	register char *p;
 | |
| 	register char c;
 | |
| 	char *comp;
 | |
| 	register char *q;
 | |
| 	char *begin;
 | |
| 	char *endp;
 | |
| 	register int len;
 | |
| 	int first;
 | |
| 	int type;
 | |
| 	char *stackp;
 | |
| 	char stack[10];
 | |
| 	int paren_num;
 | |
| 	int i;
 | |
| 
 | |
| 	p = pattern;
 | |
| 	if (*p == '^')
 | |
| 		p++;
 | |
| 	comp = q = malloc(2 * strlen(p) + 1);
 | |
| 	begin = q;
 | |
| 	stackp = stack;
 | |
| 	paren_num = 0;
 | |
| 	for (;;) {
 | |
| 		switch (c = *p++) {
 | |
| 		case '\0':
 | |
| 			*q = '\0';
 | |
| 			goto out;
 | |
| 		case '.':
 | |
| 			*q++ = RE_DOT;
 | |
| 			len = 1;
 | |
| 			break;
 | |
| 		case '[':
 | |
| 			begin = q;
 | |
| 			*q = RE_CCL;
 | |
| 			if (*p == '^') {
 | |
| 				*q = RE_NCCL;
 | |
| 				p++;
 | |
| 			}
 | |
| 			q++;
 | |
| 			first = 1;
 | |
| 			while (*p != ']' || first == 1) {
 | |
| 				if (p[1] == '-' && p[2] != ']') {
 | |
| 					*q++ = '-';
 | |
| 					*q++ = p[0];
 | |
| 					*q++ = p[2];
 | |
| 					p += 3;
 | |
| 				} else if (*p == '-') {
 | |
| 					*q++ = '-';
 | |
| 					*q++ = '-';
 | |
| 					*q++ = '-';
 | |
| 					p++;
 | |
| 				} else {
 | |
| 					*q++ = *p++;
 | |
| 				}
 | |
| 				first = 0;
 | |
| 			}
 | |
| 			p++;
 | |
| 			*q++ = '\0';
 | |
| 			len = q - begin;
 | |
| 			break;
 | |
| 		case '$':
 | |
| 			if (*p != '\0')
 | |
| 				goto dft;
 | |
| 			*q++ = RE_EOS;
 | |
| 			break;
 | |
| 		case '*':
 | |
| 			if (len == 0)
 | |
| 				goto dft;
 | |
| 			type = RE_STAR;
 | |
| range:
 | |
| 			i = (type == RE_RANGE)? 3 : 1;
 | |
| 			endp = q + i;
 | |
| 			begin = q - len;
 | |
| 			do {
 | |
| 				--q;
 | |
| 				*(q + i) = *q;
 | |
| 			} while (--len > 0);
 | |
| 			q = begin;
 | |
| 			*q++ = type;
 | |
| 			if (type == RE_RANGE) {
 | |
| 				i = 0;
 | |
| 				while ((unsigned)(*p - '0') <= 9)
 | |
| 					i = 10 * i + (*p++ - '0');
 | |
| 				*q++ = i;
 | |
| 				if (*p != ',') {
 | |
| 					*q++ = i;
 | |
| 				} else {
 | |
| 					p++;
 | |
| 					i = 0;
 | |
| 					while ((unsigned)(*p - '0') <= 9)
 | |
| 						i = 10 * i + (*p++ - '0');
 | |
| 					*q++ = i;
 | |
| 				}
 | |
| 				if (*p != '\\' || *++p != '}')
 | |
| 					error("RE error");
 | |
| 				p++;
 | |
| 			}
 | |
| 			q = endp;
 | |
| 			break;
 | |
| 		case '\\':
 | |
| 			if ((c = *p++) == '(') {
 | |
| 				if (++paren_num > 9)
 | |
| 					error("RE error");
 | |
| 				*q++ = RE_LP;
 | |
| 				*q++ = paren_num;
 | |
| 				*stackp++ = paren_num;
 | |
| 				len = 0;
 | |
| 			} else if (c == ')') {
 | |
| 				if (stackp == stack)
 | |
| 					error("RE error");
 | |
| 				*q++ = RE_RP;
 | |
| 				*q++ = *--stackp;
 | |
| 				len = 0;
 | |
| 			} else if (c == '{') {
 | |
| 				type = RE_RANGE;
 | |
| 				goto range;
 | |
| 			} else if ((unsigned)(c - '1') < 9) {
 | |
| 				/* should check validity here */
 | |
| 				*q++ = RE_MATCHED;
 | |
| 				*q++ = c - '0';
 | |
| 				len = 2;
 | |
| 			} else {
 | |
| 				goto dft;
 | |
| 			}
 | |
| 			break;
 | |
| 		default:
 | |
| dft:			*q++ = RE_LITERAL;
 | |
| 			*q++ = c;
 | |
| 			len = 2;
 | |
| 			break;
 | |
| 		}
 | |
| 	}
 | |
| out:
 | |
| 	if (stackp != stack)
 | |
| 		error("RE error");
 | |
| 	number_parens = paren_num;
 | |
| 	return comp;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| int
 | |
| re_match(pattern, string)
 | |
| 	char *pattern;
 | |
| 	char *string;
 | |
| 	{
 | |
| 	char **pp;
 | |
| 
 | |
| 	match_begin[0] = string;
 | |
| 	for (pp = &match_begin[1] ; pp <= &match_begin[9] ; pp++)
 | |
| 		*pp = 0;
 | |
| 	return match(pattern, string);
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| static
 | |
| match(pattern, string)
 | |
| 	char *pattern;
 | |
| 	char *string;
 | |
| 	{
 | |
| 	register char *p, *q;
 | |
| 	int counting;
 | |
| 	int low, high, count;
 | |
| 	char *curpat;
 | |
| 	char *start_count;
 | |
| 	int negate;
 | |
| 	int found;
 | |
| 	char *r;
 | |
| 	int len;
 | |
| 	char c;
 | |
| 
 | |
| 	p = pattern;
 | |
| 	q = string;
 | |
| 	counting = 0;
 | |
| 	for (;;) {
 | |
| 		if (counting) {
 | |
| 			if (++count > high)
 | |
| 				goto bad;
 | |
| 			p = curpat;
 | |
| 		}
 | |
| 		switch (*p++) {
 | |
| 		case RE_END:
 | |
| 			match_length[0] = q - match_begin[0];
 | |
| 			return 1;
 | |
| 		case RE_LITERAL:
 | |
| 			if (*q++ != *p++)
 | |
| 				goto bad;
 | |
| 			break;
 | |
| 		case RE_DOT:
 | |
| 			if (*q++ == '\0')
 | |
| 				goto bad;
 | |
| 			break;
 | |
| 		case RE_CCL:
 | |
| 			negate = 0;
 | |
| 			goto ccl;
 | |
| 		case RE_NCCL:
 | |
| 			negate = 1;
 | |
| ccl:
 | |
| 			found = 0;
 | |
| 			c = *q++;
 | |
| 			while (*p) {
 | |
| 				if (*p == '-') {
 | |
| 					if (c >= *++p && c <= *++p)
 | |
| 						found = 1;
 | |
| 				} else {
 | |
| 					if (c == *p)
 | |
| 						found = 1;
 | |
| 				}
 | |
| 				p++;
 | |
| 			}
 | |
| 			p++;
 | |
| 			if (found == negate)
 | |
| 				goto bad;
 | |
| 			break;
 | |
| 		case RE_LP:
 | |
| 			match_begin[*p++] = q;
 | |
| 			break;
 | |
| 		case RE_RP:
 | |
| 			match_length[*p] = q - match_begin[*p];
 | |
| 			p++;
 | |
| 			break;
 | |
| 		case RE_MATCHED:
 | |
| 			r = match_begin[*p];
 | |
| 			len = match_length[*p++];
 | |
| 			while (--len >= 0) {
 | |
| 				if (*q++ != *r++)
 | |
| 					goto bad;
 | |
| 			}
 | |
| 			break;
 | |
| 		case RE_EOS:
 | |
| 			if (*q != '\0')
 | |
| 				goto bad;
 | |
| 			break;
 | |
| 		case RE_STAR:
 | |
| 			low = 0;
 | |
| 			high = 32767;
 | |
| 			goto range;
 | |
| 		case RE_RANGE:
 | |
| 			low = *p++;
 | |
| 			high = *p++;
 | |
| 			if (high == 0)
 | |
| 				high = 32767;
 | |
| range:
 | |
| 			curpat = p;
 | |
| 			start_count = q;
 | |
| 			count = 0;
 | |
| 			counting++;
 | |
| 			break;
 | |
| 		}
 | |
| 	}
 | |
| bad:
 | |
| 	if (! counting)
 | |
| 		return 0;
 | |
| 	len = 1;
 | |
| 	if (*curpat == RE_MATCHED)
 | |
| 		len = match_length[curpat[1]];
 | |
| 	while (--count >= low) {
 | |
| 		if (match(p, start_count + count * len))
 | |
| 			return 1;
 | |
| 	}
 | |
| 	return 0;
 | |
| }
 | 
