added a simple handmade tokenizer

This commit is contained in:
hneemann 2016-07-03 11:54:22 +02:00
parent 8232f31829
commit afa24f6979
3 changed files with 196 additions and 47 deletions

View File

@ -8,11 +8,9 @@ import de.neemann.digital.lang.Lang;
import java.io.IOException;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.io.StringReader;
import static java.io.StreamTokenizer.TT_EOF;
import static java.io.StreamTokenizer.TT_WORD;
import static de.neemann.digital.analyse.parser.Tokenizer.Token.*;
/**
* Class to parse a string to an expression
@ -21,10 +19,11 @@ import static java.io.StreamTokenizer.TT_WORD;
*/
public class Parser {
private final StreamTokenizer tokenizer;
private final Tokenizer tokenizer;
/**
* Creates a new instance
*
* @param expression the string to parse
*/
public Parser(String expression) {
@ -33,52 +32,31 @@ public class Parser {
/**
* Creates a new instance
*
* @param reader the reader to read the expression
*/
public Parser(Reader reader) {
tokenizer = new StreamTokenizer(reader);
tokenizer.wordChars('_', '_');
tokenizer.wordChars('^', '^');
tokenizer.wordChars('0', '9');
// tokenizer.ordinaryChar('∧'); StreamTokenizer can not handle ordinary chars > 255
// tokenizer.ordinaryChar('');
tokenizer = new Tokenizer(reader);
}
private boolean isNext(String str) throws IOException {
int t = tokenizer.nextToken();
if (t == TT_WORD && tokenizer.sval.equalsIgnoreCase(str))
return true;
tokenizer.pushBack();
return false;
}
private boolean isNext(int c) throws IOException {
int t = tokenizer.nextToken();
if (t == c)
return true;
tokenizer.pushBack();
return false;
}
/**
* Parses the the string expression and returns a expression instance
*
* @return the expresion instance
* @throws IOException IOException
* @throws IOException IOException
* @throws ParseException ParseException
*/
public Expression parse() throws IOException, ParseException {
Expression expr = parseOr();
if (!isNext(TT_EOF))
if (!(tokenizer.next() == EOF))
throw new ParseException(Lang.get("err_parserUnexpectedEndOfExpression"));
return expr;
}
private Expression parseOr() throws IOException, ParseException {
Expression ex = parseAnd();
while (isNext('+') || isNext("") || isNext('|')) {
while (tokenizer.peek() == OR) {
tokenizer.next();
ex = Operation.or(ex, parseAnd());
}
return ex;
@ -86,24 +64,26 @@ public class Parser {
private Expression parseAnd() throws IOException, ParseException {
Expression ex = parseSimpleExp();
while (isNext('*') || isNext("") || isNext('&')) {
while (tokenizer.peek() == AND) {
tokenizer.next();
ex = Operation.and(ex, parseSimpleExp());
}
return ex;
}
private Expression parseSimpleExp() throws IOException, ParseException {
if (isNext('!')) {
return Not.not(parseSimpleExp());
} else if (isNext('(')) {
Expression exp = parseOr();
if (!isNext(')'))
throw new ParseException(Lang.get("err_parserMissingClosedParenthesis"));
return exp;
} else if (isNext(TT_WORD)) {
return new Variable(tokenizer.sval);
} else
throw new ParseException(Lang.get("err_parserUnexpectedToken_N", tokenizer.sval));
switch (tokenizer.next()) {
case NOT:
return Not.not(parseSimpleExp());
case OPEN:
Expression exp = parseOr();
if (!(tokenizer.next() == CLOSE))
throw new ParseException(Lang.get("err_parserMissingClosedParenthesis"));
return exp;
case IDENT:
return new Variable(tokenizer.getIdent());
default:
throw new ParseException(Lang.get("err_parserUnexpectedToken_N", tokenizer.toString()));
}
}
}

View File

@ -0,0 +1,157 @@
package de.neemann.digital.analyse.parser;
import java.io.IOException;
import java.io.Reader;
/**
* @author hneemann
*/
public class Tokenizer {
enum Token {UNKNOWN, IDENT, AND, OR, NOT, OPEN, CLOSE, EOF}
private final Reader in;
private Token token;
private boolean isToken;
private StringBuilder builder;
private boolean isUnreadChar = false;
private int unreadChar;
/**
* Creates a new instance
*
* @param in the reader
*/
public Tokenizer(Reader in) {
this.in = in;
token = Token.UNKNOWN;
isToken = false;
builder = new StringBuilder();
}
/**
* Reads the next token
*
* @return the token
* @throws IOException IOException
*/
public Token next() throws IOException {
peek();
isToken = false;
return token;
}
/**
* peeks the next token.
* The token is kept in the stream, so next will return this token again!
*
* @return the token
* @throws IOException IOException
*/
public Token peek() throws IOException {
if (isToken)
return token;
int c;
do {
c = readChar();
} while (isWhiteSpace(c));
switch (c) {
case -1:
token = Token.EOF;
break;
case '(':
token = Token.OPEN;
break;
case ')':
token = Token.CLOSE;
break;
case '&':
c = readChar();
if (c != '&') unreadChar(c);
case '*':
case '∧':
token = Token.AND;
break;
case '|':
c = readChar();
if (c != '|') unreadChar(c);
case '+':
case '':
token = Token.OR;
break;
case '¬':
case '!':
token = Token.NOT;
break;
default:
if (isIdentChar(c)) {
token = Token.IDENT;
builder.setLength(0);
builder.append((char) c);
boolean wasChar = true;
do {
c = readChar();
if (isIdentChar(c) || isNumberChar(c)) {
builder.append((char) c);
} else {
unreadChar(c);
wasChar = false;
}
} while (wasChar);
} else {
token = Token.UNKNOWN;
builder.setLength(0);
builder.append((char) c);
}
}
isToken = true;
return token;
}
/**
* @return the identifier
*/
public String getIdent() {
return builder.toString();
}
private int readChar() throws IOException {
if (isUnreadChar) {
isUnreadChar = false;
return unreadChar;
} else
return in.read();
}
private void unreadChar(int c) {
unreadChar = c;
isUnreadChar = true;
}
private boolean isIdentChar(int c) {
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c == '_')
|| (c == '^');
}
private boolean isNumberChar(int c) {
return (c >= '0' && c <= '9');
}
private boolean isWhiteSpace(int c) {
return c == ' ' || c == '\n' || c == '\r' || c == '\t';
}
@Override
public String toString() {
if (token == Token.IDENT || token == Token.UNKNOWN)
return getIdent();
else
return token.name();
}
}

View File

@ -22,14 +22,16 @@ public class ParserTest extends TestCase {
public void testParseOr() throws Exception {
assertTrue(new Parser("a+b").parse() instanceof Operation.Or);
assertTrue(new Parser("a b").parse() instanceof Operation.Or);
assertTrue(new Parser("ab").parse() instanceof Operation.Or);
assertTrue(new Parser("a|b").parse() instanceof Operation.Or);
assertTrue(new Parser("a||b").parse() instanceof Operation.Or);
}
public void testParseAnd() throws Exception {
assertTrue(new Parser("a*b").parse() instanceof Operation.And);
assertTrue(new Parser("a b").parse() instanceof Operation.And);
assertTrue(new Parser("a∧b").parse() instanceof Operation.And);
assertTrue(new Parser("a&b").parse() instanceof Operation.And);
assertTrue(new Parser("a&&b").parse() instanceof Operation.And);
}
public void testParseParenthesis() throws Exception {
@ -111,5 +113,15 @@ public class ParserTest extends TestCase {
}
}
public void testParseException5() throws Exception {
Parser p = new Parser("ö");
try {
p.parse();
assertTrue(false);
} catch (ParseException e) {
assertTrue(true);
}
}
}