diff --git a/src/main/java/de/neemann/digital/analyse/parser/Parser.java b/src/main/java/de/neemann/digital/analyse/parser/Parser.java index 0c2097236..f735aff56 100644 --- a/src/main/java/de/neemann/digital/analyse/parser/Parser.java +++ b/src/main/java/de/neemann/digital/analyse/parser/Parser.java @@ -8,11 +8,9 @@ import de.neemann.digital.lang.Lang; import java.io.IOException; import java.io.Reader; -import java.io.StreamTokenizer; import java.io.StringReader; -import static java.io.StreamTokenizer.TT_EOF; -import static java.io.StreamTokenizer.TT_WORD; +import static de.neemann.digital.analyse.parser.Tokenizer.Token.*; /** * Class to parse a string to an expression @@ -21,10 +19,11 @@ import static java.io.StreamTokenizer.TT_WORD; */ public class Parser { - private final StreamTokenizer tokenizer; + private final Tokenizer tokenizer; /** * Creates a new instance + * * @param expression the string to parse */ public Parser(String expression) { @@ -33,52 +32,31 @@ public class Parser { /** * Creates a new instance + * * @param reader the reader to read the expression */ public Parser(Reader reader) { - tokenizer = new StreamTokenizer(reader); - tokenizer.wordChars('_', '_'); - tokenizer.wordChars('^', '^'); - tokenizer.wordChars('0', '9'); -// tokenizer.ordinaryChar('∧'); StreamTokenizer can not handle ordinary chars > 255 -// tokenizer.ordinaryChar('∨'); + tokenizer = new Tokenizer(reader); } - private boolean isNext(String str) throws IOException { - int t = tokenizer.nextToken(); - if (t == TT_WORD && tokenizer.sval.equalsIgnoreCase(str)) - return true; - - tokenizer.pushBack(); - return false; - } - - private boolean isNext(int c) throws IOException { - int t = tokenizer.nextToken(); - if (t == c) - return true; - - tokenizer.pushBack(); - return false; - } - - /** * Parses the the string expression and returns a expression instance + * * @return the expresion instance - * @throws IOException IOException + * @throws IOException IOException * @throws ParseException ParseException */ public Expression parse() throws IOException, ParseException { Expression expr = parseOr(); - if (!isNext(TT_EOF)) + if (!(tokenizer.next() == EOF)) throw new ParseException(Lang.get("err_parserUnexpectedEndOfExpression")); return expr; } private Expression parseOr() throws IOException, ParseException { Expression ex = parseAnd(); - while (isNext('+') || isNext("∨") || isNext('|')) { + while (tokenizer.peek() == OR) { + tokenizer.next(); ex = Operation.or(ex, parseAnd()); } return ex; @@ -86,24 +64,26 @@ public class Parser { private Expression parseAnd() throws IOException, ParseException { Expression ex = parseSimpleExp(); - while (isNext('*') || isNext("∧") || isNext('&')) { + while (tokenizer.peek() == AND) { + tokenizer.next(); ex = Operation.and(ex, parseSimpleExp()); } return ex; } private Expression parseSimpleExp() throws IOException, ParseException { - if (isNext('!')) { - return Not.not(parseSimpleExp()); - } else if (isNext('(')) { - Expression exp = parseOr(); - if (!isNext(')')) - throw new ParseException(Lang.get("err_parserMissingClosedParenthesis")); - return exp; - } else if (isNext(TT_WORD)) { - return new Variable(tokenizer.sval); - } else - throw new ParseException(Lang.get("err_parserUnexpectedToken_N", tokenizer.sval)); + switch (tokenizer.next()) { + case NOT: + return Not.not(parseSimpleExp()); + case OPEN: + Expression exp = parseOr(); + if (!(tokenizer.next() == CLOSE)) + throw new ParseException(Lang.get("err_parserMissingClosedParenthesis")); + return exp; + case IDENT: + return new Variable(tokenizer.getIdent()); + default: + throw new ParseException(Lang.get("err_parserUnexpectedToken_N", tokenizer.toString())); + } } - } diff --git a/src/main/java/de/neemann/digital/analyse/parser/Tokenizer.java b/src/main/java/de/neemann/digital/analyse/parser/Tokenizer.java new file mode 100644 index 000000000..132f6d9a2 --- /dev/null +++ b/src/main/java/de/neemann/digital/analyse/parser/Tokenizer.java @@ -0,0 +1,157 @@ +package de.neemann.digital.analyse.parser; + +import java.io.IOException; +import java.io.Reader; + +/** + * @author hneemann + */ +public class Tokenizer { + + + enum Token {UNKNOWN, IDENT, AND, OR, NOT, OPEN, CLOSE, EOF} + + private final Reader in; + private Token token; + private boolean isToken; + private StringBuilder builder; + private boolean isUnreadChar = false; + private int unreadChar; + + /** + * Creates a new instance + * + * @param in the reader + */ + public Tokenizer(Reader in) { + this.in = in; + token = Token.UNKNOWN; + isToken = false; + builder = new StringBuilder(); + } + + /** + * Reads the next token + * + * @return the token + * @throws IOException IOException + */ + public Token next() throws IOException { + peek(); + isToken = false; + return token; + } + + /** + * peeks the next token. + * The token is kept in the stream, so next will return this token again! + * + * @return the token + * @throws IOException IOException + */ + public Token peek() throws IOException { + if (isToken) + return token; + + int c; + do { + c = readChar(); + } while (isWhiteSpace(c)); + + switch (c) { + case -1: + token = Token.EOF; + break; + case '(': + token = Token.OPEN; + break; + case ')': + token = Token.CLOSE; + break; + case '&': + c = readChar(); + if (c != '&') unreadChar(c); + case '*': + case '∧': + token = Token.AND; + break; + case '|': + c = readChar(); + if (c != '|') unreadChar(c); + case '+': + case '∨': + token = Token.OR; + break; + case '¬': + case '!': + token = Token.NOT; + break; + default: + if (isIdentChar(c)) { + token = Token.IDENT; + builder.setLength(0); + builder.append((char) c); + boolean wasChar = true; + do { + c = readChar(); + if (isIdentChar(c) || isNumberChar(c)) { + builder.append((char) c); + } else { + unreadChar(c); + wasChar = false; + } + } while (wasChar); + } else { + token = Token.UNKNOWN; + builder.setLength(0); + builder.append((char) c); + } + } + + isToken = true; + return token; + } + + /** + * @return the identifier + */ + public String getIdent() { + return builder.toString(); + } + + private int readChar() throws IOException { + if (isUnreadChar) { + isUnreadChar = false; + return unreadChar; + } else + return in.read(); + } + + private void unreadChar(int c) { + unreadChar = c; + isUnreadChar = true; + } + + private boolean isIdentChar(int c) { + return (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c == '_') + || (c == '^'); + } + + private boolean isNumberChar(int c) { + return (c >= '0' && c <= '9'); + } + + private boolean isWhiteSpace(int c) { + return c == ' ' || c == '\n' || c == '\r' || c == '\t'; + } + + @Override + public String toString() { + if (token == Token.IDENT || token == Token.UNKNOWN) + return getIdent(); + else + return token.name(); + } +} diff --git a/src/test/java/de/neemann/digital/analyse/parser/ParserTest.java b/src/test/java/de/neemann/digital/analyse/parser/ParserTest.java index 651b2ea31..8090af171 100644 --- a/src/test/java/de/neemann/digital/analyse/parser/ParserTest.java +++ b/src/test/java/de/neemann/digital/analyse/parser/ParserTest.java @@ -22,14 +22,16 @@ public class ParserTest extends TestCase { public void testParseOr() throws Exception { assertTrue(new Parser("a+b").parse() instanceof Operation.Or); - assertTrue(new Parser("a ∨ b").parse() instanceof Operation.Or); + assertTrue(new Parser("a∨b").parse() instanceof Operation.Or); assertTrue(new Parser("a|b").parse() instanceof Operation.Or); + assertTrue(new Parser("a||b").parse() instanceof Operation.Or); } public void testParseAnd() throws Exception { assertTrue(new Parser("a*b").parse() instanceof Operation.And); - assertTrue(new Parser("a ∧ b").parse() instanceof Operation.And); + assertTrue(new Parser("a∧b").parse() instanceof Operation.And); assertTrue(new Parser("a&b").parse() instanceof Operation.And); + assertTrue(new Parser("a&&b").parse() instanceof Operation.And); } public void testParseParenthesis() throws Exception { @@ -111,5 +113,15 @@ public class ParserTest extends TestCase { } } + public void testParseException5() throws Exception { + Parser p = new Parser("ö"); + try { + p.parse(); + assertTrue(false); + } catch (ParseException e) { + assertTrue(true); + } + } + } \ No newline at end of file