354 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			354 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
		
			Executable File
		
	
	
	
	
| /*
 | |
|  * a small awk clone
 | |
|  *
 | |
|  * (C) 1989 Saeko Hirabauashi & Kouichi Hirabayashi
 | |
|  *
 | |
|  * Absolutely no warranty. Use this software with your own risk.
 | |
|  *
 | |
|  * Permission to use, copy, modify and distribute this software for any
 | |
|  * purpose and without fee is hereby granted, provided that the above
 | |
|  * copyright and disclaimer notice.
 | |
|  *
 | |
|  * This program was written to fit into 64K+64K memory of the Minix 1.2.
 | |
|  */
 | |
| 
 | |
| 
 | |
| #include <stdio.h>
 | |
| #include <ctype.h>
 | |
| #include "awk.h"
 | |
| 
 | |
| extern char *srcprg;	/* inline program */
 | |
| extern FILE *pfp;	/* program file */
 | |
| 
 | |
| int sym;	/* lexical token */
 | |
| int sym1;	/* auxiliary lexical token */
 | |
| int regexflg;	/* set by parser (y.c) to indicate parsing REGEXPR */
 | |
| int funflg;	/* set by parser (y.c) to indicate parsing FUNCTION */
 | |
| int printflg;	/* set by parser (y.c) to indicate parsing PRINT */
 | |
| int getlineflg;	/* set by parser (y.c) to indicate parsing GETLINE */
 | |
| char text[BUFSIZ];	/* lexical word */
 | |
| char line[BUFSIZ];	/* program line for error message (ring buffer) */
 | |
| char *linep = line;	/* line pointer */
 | |
| char funnam[128];	/* function name for error message */
 | |
| int lineno = 1;
 | |
| 
 | |
| lex()
 | |
| {
 | |
|   int c, d;
 | |
|   char *s;
 | |
| 
 | |
|   if (regexflg)
 | |
| 	return sym = scanreg();
 | |
| next:
 | |
|   while ((c = Getc()) == ' ' || c == '\t')
 | |
| 	;
 | |
|   while (c == '#')
 | |
| 	for (c = Getc(); c != '\n'; c = Getc())
 | |
| 		;
 | |
|   switch (c) {
 | |
|   case '\\':
 | |
| 	if ((c = Getc()) == '\n') {
 | |
| 		lineno++;
 | |
| 		goto next;
 | |
| 	}
 | |
| 	break;
 | |
|   case '\n':
 | |
| 	lineno++;
 | |
| 	break;
 | |
|   }
 | |
|   switch (c) {
 | |
|   case EOF:	return sym = 0;
 | |
|   case '+':	return sym = follow2('=', '+', ADDEQ, INC, ADD);
 | |
|   case '-':	return sym = follow2('=', '-', SUBEQ, DEC, SUB);
 | |
|   case '*':	return sym = follow('=', MULTEQ, MULT);
 | |
|   case '/':	return sym = follow('=', DIVEQ, DIV);
 | |
|   case '%':	return sym = follow('=', MODEQ, MOD);
 | |
|   case '^':	return sym = follow('=', POWEQ, POWER);
 | |
|   case '=':	return sym = follow('=', EQ, ASSIGN);
 | |
|   case '!':	return sym = follow2('=', '~', NE, NOMATCH, NOT);
 | |
|   case '&':	return sym = follow('&', AND, BINAND);
 | |
|   case '|':	sym = follow('|', OR, BINOR);
 | |
| 		if (printflg && sym == BINOR)
 | |
| 			sym = R_POUT;
 | |
| 		return sym;
 | |
|   case '<':	sym = follow2('=', '<', LE, SHIFTL, LT);
 | |
| 		if (getlineflg && sym == LT)
 | |
| 			sym = R_IN;
 | |
| 		return sym;
 | |
|   case '>':	sym = follow2('=', '>', GE, SHIFTR, GT);
 | |
| 		if (printflg) {
 | |
| 			switch (sym) {
 | |
| 			case GT: sym = R_OUT; break;
 | |
| 			case SHIFTR: sym = R_APD; break;
 | |
| 			}
 | |
| 		}
 | |
| 		return sym;
 | |
|   case '~':	return sym = MATCH; break;
 | |
|   case ';': case '\n':	return sym = EOL;
 | |
|   }
 | |
|   if (isalpha(c) || c == '_') {
 | |
| 	for (s = text; isalnum(c) || c == '_'; ) {
 | |
| 		*s++ = c; c = Getc();
 | |
| 	}
 | |
| 	Ungetc(c);
 | |
| 	*s = '\0';
 | |
| 	if ((d = iskeywd(text)) == 0 &&
 | |
| 		(d = isbuiltin(text, &sym1)) == 0) {
 | |
| 			if (c == '(')
 | |
| 				return sym = CALL;
 | |
| 			else if (funflg) {
 | |
| 				if ((sym1 = isarg(text)) != -1)
 | |
| 					return sym = ARG;
 | |
| 			}
 | |
| 	}
 | |
| 	return sym = d ? d : IDENT;
 | |
|   }
 | |
|   else if (c == '.' || (isdigit(c))) {
 | |
| 	Ungetc(c);
 | |
| 	return sym = scannum(text);	/* NUMBER */
 | |
|   }
 | |
|   else if (c == '"')
 | |
| 	return sym = scanstr(text);	/* STRING */
 | |
|   return sym = c;
 | |
| }
 | |
| 
 | |
| static
 | |
| follow(c1, r1, r2)
 | |
| {
 | |
|   register int c;
 | |
| 
 | |
|   if ((c = Getc()) == c1)
 | |
| 	return r1;
 | |
|   else {
 | |
| 	Ungetc(c);
 | |
| 	return r2;
 | |
|   }
 | |
| }
 | |
| 
 | |
| static
 | |
| follow2(c1, c2, r1, r2, r3)
 | |
| {
 | |
|   register int c;
 | |
| 
 | |
|   if ((c = Getc()) == c1)
 | |
| 	return r1;
 | |
|   else if (c == c2)
 | |
| 	return r2;
 | |
|   else {
 | |
| 	Ungetc(c);
 | |
| 	return r3;
 | |
|   }
 | |
| }
 | |
| 
 | |
| static
 | |
| iskeywd(s) char *s;
 | |
| {
 | |
|   static struct { char *kw; int token; } tab[] = {
 | |
| 	"BEGIN", BEGIN,
 | |
| 	"END", END,
 | |
| 	"break", BREAK,
 | |
| 	"continue", CONTIN,
 | |
| 	"delete", DELETE,
 | |
| 	"do", DO,
 | |
| 	"else", ELSE,
 | |
| 	"exit", EXIT,
 | |
| 	"for", FOR,
 | |
| 	"func", FUNC,
 | |
| 	"function", FUNC,
 | |
| 	"getline", GETLINE,
 | |
| 	"if", IF,
 | |
| 	"in", IN,
 | |
| 	"next", NEXT,
 | |
| 	"print", PRINT,
 | |
| 	"printf", PRINTF,
 | |
| 	"return", RETURN,
 | |
| 	"sprint", SPRINT,
 | |
| 	"sprintf", SPRINTF,
 | |
| 	"while", WHILE,
 | |
| 	"", 0, 0
 | |
|   };
 | |
|   register int i;
 | |
| 
 | |
|   for (i = 0; tab[i].token; i++)
 | |
| 	if (strcmp(tab[i].kw, s) == 0)
 | |
| 		break;
 | |
|   return tab[i].token;
 | |
| }
 | |
| 
 | |
| static
 | |
| isbuiltin(s, p) char *s; int *p;
 | |
| {
 | |
|   static struct { char *kw; int type; int token; } tab[] = {
 | |
| 	"atan2", MATHFUN, ATAN2,
 | |
| 	"close", STRFUN, CLOSE,
 | |
| 	"cos", MATHFUN, COS,
 | |
| 	"exp", MATHFUN, EXP,
 | |
| 	"gsub", SUBST, RGSUB,
 | |
| 	"index", STRFUN, INDEX,
 | |
| 	"int", MATHFUN, INT,
 | |
| 	"length", STRFUN, LENGTH,
 | |
| 	"log", MATHFUN, LOG,
 | |
| 	"match", STRFUN, RMATCH,
 | |
| 	"sin", MATHFUN, SIN,
 | |
| 	"sqrt", MATHFUN, SQRT,
 | |
| 	"rand", MATHFUN, RAND,
 | |
| 	"srand", MATHFUN, SRAND,
 | |
| 	"split", STRFUN, SPLIT,
 | |
| 	"sub", SUBST, RSUB,
 | |
| 	"substr", STRFUN, SUBSTR,
 | |
| 	"system", STRFUN, SYSTEM,
 | |
| 	"", 0, 0
 | |
|   };
 | |
|   register int i;
 | |
| 
 | |
|   for (i = 0; tab[i].token; i++)
 | |
| 	if (strcmp(tab[i].kw, s) == 0)
 | |
| 		break;
 | |
|   *p = tab[i].token;
 | |
|   return tab[i].type;
 | |
| }
 | |
| 
 | |
| static
 | |
| scannum(s) char *s;
 | |
| {
 | |
|   register int c;
 | |
|   char *strchr();
 | |
| 
 | |
|   if ((c = Getc()) && strchr("+-", c) != NULL) {
 | |
| 	*s++ = c; c = Getc();
 | |
|   }
 | |
|   while (isdigit(c)) {
 | |
| 	*s++ = c; c = Getc();
 | |
|   }
 | |
|   if (c == '.') {
 | |
| 	*s++ = c; c = Getc();
 | |
| 	while (isdigit(c)) {
 | |
| 		*s++ = c; c = Getc();
 | |
| 	}
 | |
|   }
 | |
|   if (c && strchr("eE", c) != NULL) {
 | |
| 	*s++ = c; c = Getc();
 | |
| 	if (c && strchr("+-", c) != NULL) {
 | |
| 		*s++ = c; c = Getc();
 | |
| 	}
 | |
| 	while (isdigit(c)) {
 | |
| 		*s++ = c; c = Getc();
 | |
| 	}
 | |
|   }
 | |
|   *s = '\0';
 | |
|   Ungetc(c);
 | |
|   return NUMBER;
 | |
| }
 | |
| 
 | |
| static
 | |
| scanstr(s) char *s;
 | |
| {
 | |
|   register int c, i, j;
 | |
| 
 | |
|   for (c = Getc(); c != EOF & c != '"'; ) {
 | |
| 	if (c == '\\') {
 | |
| 		switch (c = Getc()) {
 | |
| 		case 'b': c = '\b'; break;
 | |
| 		case 'f': c = '\f'; break;
 | |
| 		case 'n': c = '\n'; break;
 | |
| 		case 'r': c = '\r'; break;
 | |
| 		case 't': c = '\t'; break;
 | |
| 		default:
 | |
| 		if (isdigit(c)) {
 | |
| 			for (i = j = 0; i < 3 && isdigit(c); c = Getc(), i++)
 | |
| 				j = j * 8 + c - '0';
 | |
| 			Ungetc(c);
 | |
| 			c = j;
 | |
| 		}
 | |
| 		break;
 | |
| 		}
 | |
| 	}
 | |
| 	*s++ = c;
 | |
| 	if (isKanji(c))
 | |
| 		*s++ = Getc();
 | |
| 	c = Getc();
 | |
|   }
 | |
|   *s = '\0';
 | |
|   return STRING;
 | |
| }
 | |
| 
 | |
| static
 | |
| scanreg()
 | |
| {
 | |
|   register int c;
 | |
|   register char *s;
 | |
| 
 | |
|   for (s = text; (c = Getc()) != '/'; )
 | |
| 	if (c == '\n')
 | |
| 		error("newline in regular expression");
 | |
| 	else {
 | |
| 		if (isKanji(c) || c == '\\') {
 | |
| 			*s++ = c; c = Getc();
 | |
| 		}
 | |
| 		*s++ = c;
 | |
| 	}
 | |
|   *s = '\0';
 | |
|   return REGEXP;
 | |
| }
 | |
| 
 | |
| isarrayindex()
 | |
| {
 | |
|   int c, c2;
 | |
| 
 | |
| next:
 | |
|   while ((c = Getc()) == ' ' || c == '\t')
 | |
| 	;
 | |
|   if (c == '\\') {
 | |
| 	if ((c2 = Getc()) == '\n') {
 | |
| 		lineno++;
 | |
| 		goto next;
 | |
| 	}
 | |
| 	Ungetc(c2);
 | |
|   }
 | |
|   if (c != '[') Ungetc(c);
 | |
| 
 | |
|   return (c == '[');
 | |
| }
 | |
| 
 | |
| #define UNGET_DEPTH 2
 | |
| static int unget[UNGET_DEPTH], unget_depth;
 | |
| 
 | |
| Ungetc(c)
 | |
| {
 | |
|   if (unget_depth == UNGET_DEPTH) error("unget buffer overflow");
 | |
|   unget[unget_depth++] = c;
 | |
| 
 | |
|   if (linep > line) {
 | |
| 	if (--linep < line)
 | |
| 		linep == line + BUFSIZ - 1;
 | |
|   }
 | |
| }
 | |
| 
 | |
| Getc()
 | |
| {
 | |
|   register int c;
 | |
|   char *s, *t;
 | |
| 
 | |
|   if (unget_depth > 0)
 | |
| 	c = unget[--unget_depth];
 | |
|   else if (srcprg)
 | |
| 	c = *srcprg ? *srcprg++ : EOF;
 | |
|   else
 | |
| 	c = fgetc(pfp);
 | |
| 
 | |
| #if 0
 | |
|   if (linep - line == BUFSIZ) {
 | |
| printf("!!!\n");
 | |
| 	for (s = line; *s != '\n' && ((s - line) <BUFSIZ); s++)
 | |
| 		;
 | |
| printf("***(%d)***\n", *s);
 | |
| 	for (t = line; s < linep; )
 | |
| 		*t++ = *++s;
 | |
|   }
 | |
| #endif
 | |
|   *linep++ = c;
 | |
|   if ((linep - line) == BUFSIZ)
 | |
| 	linep = line;
 | |
|   return c;
 | |
| }
 | 
