336 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			336 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*	$NetBSD: str.c,v 1.12 2009/04/13 23:50:49 lukem Exp $	*/
 | |
| 
 | |
| /*-
 | |
|  * Copyright (c) 1991, 1993
 | |
|  *	The Regents of the University of California.  All rights reserved.
 | |
|  *
 | |
|  * Redistribution and use in source and binary forms, with or without
 | |
|  * modification, are permitted provided that the following conditions
 | |
|  * are met:
 | |
|  * 1. Redistributions of source code must retain the above copyright
 | |
|  *    notice, this list of conditions and the following disclaimer.
 | |
|  * 2. Redistributions in binary form must reproduce the above copyright
 | |
|  *    notice, this list of conditions and the following disclaimer in the
 | |
|  *    documentation and/or other materials provided with the distribution.
 | |
|  * 3. Neither the name of the University nor the names of its contributors
 | |
|  *    may be used to endorse or promote products derived from this software
 | |
|  *    without specific prior written permission.
 | |
|  *
 | |
|  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 | |
|  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | |
|  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | |
|  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 | |
|  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | |
|  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 | |
|  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 | |
|  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 | |
|  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 | |
|  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 | |
|  * SUCH DAMAGE.
 | |
|  */
 | |
| 
 | |
| #include <sys/types.h>
 | |
| 
 | |
| #include <err.h>
 | |
| #include <errno.h>
 | |
| #include <stddef.h>
 | |
| #include <stdio.h>
 | |
| #include <stdlib.h>
 | |
| #include <string.h>
 | |
| #include <ctype.h>
 | |
| 
 | |
| #include "tr.h"
 | |
| 
 | |
| static int	backslash (STR *);
 | |
| static int	bracket (STR *);
 | |
| static int	c_class (const void *, const void *);
 | |
| static void	genclass (STR *);
 | |
| static void	genequiv (STR *);
 | |
| static int	genrange (STR *);
 | |
| static void	genseq (STR *);
 | |
| 
 | |
| int
 | |
| next(s)
 | |
| 	STR *s;
 | |
| {
 | |
| 	int ch;
 | |
| 
 | |
| 	switch (s->state) {
 | |
| 	case EOS:
 | |
| 		return (0);
 | |
| 	case INFINITE:
 | |
| 		return (1);
 | |
| 	case NORMAL:
 | |
| 		switch (ch = *s->str) {
 | |
| 		case '\0':
 | |
| 			s->state = EOS;
 | |
| 			return (0);
 | |
| 		case '\\':
 | |
| 			s->lastch = backslash(s);
 | |
| 			break;
 | |
| 		case '[':
 | |
| 			if (bracket(s))
 | |
| 				return (next(s));
 | |
| 			/* FALLTHROUGH */
 | |
| 		default:
 | |
| 			++s->str;
 | |
| 			s->lastch = ch;
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		/* We can start a range at any time. */
 | |
| 		if (s->str[0] == '-' && genrange(s))
 | |
| 			return (next(s));
 | |
| 		return (1);
 | |
| 	case RANGE:
 | |
| 		if (s->cnt-- == 0) {
 | |
| 			s->state = NORMAL;
 | |
| 			return (next(s));
 | |
| 		}
 | |
| 		++s->lastch;
 | |
| 		return (1);
 | |
| 	case SEQUENCE:
 | |
| 		if (s->cnt-- == 0) {
 | |
| 			s->state = NORMAL;
 | |
| 			return (next(s));
 | |
| 		}
 | |
| 		return (1);
 | |
| 	case SET:
 | |
| 		if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
 | |
| 			s->state = NORMAL;
 | |
| 			return (next(s));
 | |
| 		}
 | |
| 		return (1);
 | |
| 	}
 | |
| 	/* NOTREACHED */
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| static int
 | |
| bracket(s)
 | |
| 	STR *s;
 | |
| {
 | |
| 	char *p;
 | |
| 
 | |
| 	switch (s->str[1]) {
 | |
| 	case ':':				/* "[:class:]" */
 | |
| 		if ((p = strstr((char *) s->str + 2, ":]")) == NULL)
 | |
| 			return (0);
 | |
| 		*p = '\0';
 | |
| 		s->str += 2;
 | |
| 		genclass(s);
 | |
| 		s->str = (unsigned char *) p + 2;
 | |
| 		return (1);
 | |
| 	case '=':				/* "[=equiv=]" */
 | |
| 		if ((p = strstr((char *) s->str + 2, "=]")) == NULL)
 | |
| 			return (0);
 | |
| 		s->str += 2;
 | |
| 		genequiv(s);
 | |
| 		return (1);
 | |
| 	default:				/* "[\###*n]" or "[#*n]" */
 | |
| 		if ((p = strpbrk((char *) s->str + 2, "*]")) == NULL)
 | |
| 			return (0);
 | |
| 		if (p[0] != '*' || strchr(p, ']') == NULL)
 | |
| 			return (0);
 | |
| 		s->str += 1;
 | |
| 		genseq(s);
 | |
| 		return (1);
 | |
| 	}
 | |
| 	/* NOTREACHED */
 | |
| }
 | |
| 
 | |
| typedef struct {
 | |
| 	const char *name;
 | |
| 	int (*func) (int);
 | |
| 	int *set;
 | |
| } CLASS;
 | |
| 
 | |
| static CLASS classes[] = {
 | |
| 	{ "alnum",  isalnum,  NULL, },
 | |
| 	{ "alpha",  isalpha,  NULL, },
 | |
| 	{ "blank",  isblank,  NULL, },
 | |
| 	{ "cntrl",  iscntrl,  NULL, },
 | |
| 	{ "digit",  isdigit,  NULL, },
 | |
| 	{ "graph",  isgraph,  NULL, },
 | |
| 	{ "lower",  islower,  NULL, },
 | |
| 	{ "print",  isprint,  NULL, },
 | |
| 	{ "punct",  ispunct,  NULL, },
 | |
| 	{ "space",  isspace,  NULL, },
 | |
| 	{ "upper",  isupper,  NULL, },
 | |
| 	{ "xdigit", isxdigit, NULL, },
 | |
| };
 | |
| 
 | |
| static void
 | |
| genclass(s)
 | |
| 	STR *s;
 | |
| {
 | |
| 	int cnt, (*func) (int);
 | |
| 	CLASS *cp, tmp;
 | |
| 	int *p;
 | |
| 
 | |
| 	tmp.name = (char *) s->str;
 | |
| 	if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
 | |
| 	    sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) {
 | |
| 		fprintf(stderr, "tr: unknown class %s\n", s->str);
 | |
| 		exit(1);
 | |
| 	}
 | |
| 
 | |
| 	if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) {
 | |
| 		perror("malloc");
 | |
| 		exit(1);
 | |
| 	}
 | |
| 	memset(p, 0, (NCHARS + 1) * sizeof(int));
 | |
| 	for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
 | |
| 		if ((func)(cnt))
 | |
| 			*p++ = cnt;
 | |
| 	*p = OOBCH;
 | |
| 
 | |
| 	s->cnt = 0;
 | |
| 	s->state = SET;
 | |
| 	s->set = cp->set;
 | |
| }
 | |
| 
 | |
| static int
 | |
| c_class(a, b)
 | |
| 	const void *a, *b;
 | |
| {
 | |
| 	return (strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name));
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * English doesn't have any equivalence classes, so for now
 | |
|  * we just syntax check and grab the character.
 | |
|  */
 | |
| static void
 | |
| genequiv(s)
 | |
| 	STR *s;
 | |
| {
 | |
| 	if (*s->str == '\\') {
 | |
| 		s->equiv[0] = backslash(s);
 | |
| 		if (*s->str != '=') {
 | |
| 			fprintf(stderr, "tr: misplaced equivalence equals sign\n");
 | |
| 			exit(1);
 | |
| 		}
 | |
| 	} else {
 | |
| 		s->equiv[0] = s->str[0];
 | |
| 		if (s->str[1] != '=') {
 | |
| 			fprintf(stderr, "tr: misplaced equivalence equals sign\n");
 | |
| 			exit(1);
 | |
| 		}
 | |
| 	}
 | |
| 	s->str += 2;
 | |
| 	s->cnt = 0;
 | |
| 	s->state = SET;
 | |
| 	s->set = s->equiv;
 | |
| }
 | |
| 
 | |
| static int
 | |
| genrange(s)
 | |
| 	STR *s;
 | |
| {
 | |
| 	int stopval;
 | |
| 	unsigned char *savestart;
 | |
| 
 | |
| 	savestart = s->str;
 | |
| 	stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
 | |
| 	if (stopval < (u_char)s->lastch) {
 | |
| 		s->str = savestart;
 | |
| 		return (0);
 | |
| 	}
 | |
| 	s->cnt = stopval - s->lastch + 1;
 | |
| 	s->state = RANGE;
 | |
| 	--s->lastch;
 | |
| 	return (1);
 | |
| }
 | |
| 
 | |
| static void
 | |
| genseq(s)
 | |
| 	STR *s;
 | |
| {
 | |
| 	char *ep;
 | |
| 
 | |
| 	if (s->which == STRING1) {
 | |
| 		fprintf(stderr, "tr: sequences only valid in string2\n");
 | |
| 		exit(1);
 | |
| 	}
 | |
| 
 | |
| 	if (*s->str == '\\')
 | |
| 		s->lastch = backslash(s);
 | |
| 	else
 | |
| 		s->lastch = *s->str++;
 | |
| 	if (*s->str != '*') {
 | |
| 		fprintf(stderr, "tr: misplaced sequence asterisk\n");
 | |
| 		exit(1);
 | |
| 	}
 | |
| 
 | |
| 	switch (*++s->str) {
 | |
| 	case '\\':
 | |
| 		s->cnt = backslash(s);
 | |
| 		break;
 | |
| 	case ']':
 | |
| 		s->cnt = 0;
 | |
| 		++s->str;
 | |
| 		break;
 | |
| 	default:
 | |
| 		if (isdigit(*s->str)) {
 | |
| 			s->cnt = strtol((char *) s->str, &ep, 0);
 | |
| 			if (*ep == ']') {
 | |
| 				s->str = (unsigned char *) ep + 1;
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 		fprintf(stderr, "tr: illegal sequence count\n");
 | |
| 		exit(1);
 | |
| 		/* NOTREACHED */
 | |
| 	}
 | |
| 
 | |
| 	s->state = s->cnt ? SEQUENCE : INFINITE;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
 | |
|  * an escape code or a literal character.
 | |
|  */
 | |
| static int
 | |
| backslash(s)
 | |
| 	STR *s;
 | |
| {
 | |
| 	int ch, cnt, val;
 | |
| 
 | |
| 	for (cnt = val = 0;;) {
 | |
| 		ch = *++s->str;
 | |
| 		if (!isascii(ch) || !isdigit(ch))
 | |
| 			break;
 | |
| 		val = val * 8 + ch - '0';
 | |
| 		if (++cnt == 3) {
 | |
| 			++s->str;
 | |
| 			break;
 | |
| 		}
 | |
| 	}
 | |
| 	if (cnt)
 | |
| 		return (val);
 | |
| 	if (ch != '\0')
 | |
| 		++s->str;
 | |
| 	switch (ch) {
 | |
| 		case 'a':			/* escape characters */
 | |
| 			return ('\7');
 | |
| 		case 'b':
 | |
| 			return ('\b');
 | |
| 		case 'f':
 | |
| 			return ('\f');
 | |
| 		case 'n':
 | |
| 			return ('\n');
 | |
| 		case 'r':
 | |
| 			return ('\r');
 | |
| 		case 't':
 | |
| 			return ('\t');
 | |
| 		case 'v':
 | |
| 			return ('\13');
 | |
| 		case '\0':			/*  \" -> \ */
 | |
| 			s->state = EOS;
 | |
| 			return ('\\');
 | |
| 		default:			/* \x" -> x */
 | |
| 			return (ch);
 | |
| 	}
 | |
| }
 | 
