From 0d406c3e36bc6f42d935703f16a8c3403d9fa69c Mon Sep 17 00:00:00 2001 From: Mike Cifelli Date: Sat, 10 Dec 2016 15:25:46 -0500 Subject: [PATCH] More refactoring of the scanner package --- src/scanner/LispScanner.java | 94 +++++++++++------------------------- src/util/Characters.java | 34 ++++++------- 2 files changed, 46 insertions(+), 82 deletions(-) diff --git a/src/scanner/LispScanner.java b/src/scanner/LispScanner.java index 1908ec4..5501dfa 100644 --- a/src/scanner/LispScanner.java +++ b/src/scanner/LispScanner.java @@ -9,6 +9,7 @@ import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.text.MessageFormat; +import java.util.function.Function; import constructs.Token; import constructs.TokenFactory; @@ -34,26 +35,27 @@ public class LispScanner { } public Token getNextToken() throws IOException { - int c; - - while ((c = inputStream.read()) != EOF) { - char nextChar = (char) c; + for (int c = inputStream.read(); c != EOF; c = inputStream.read()) { + char currentCharacter = (char) c; positionTracker.incrementColumn(); - if (Character.isWhitespace(nextChar)) { - if (nextChar == NEWLINE) + if (Character.isWhitespace(currentCharacter)) { + if (currentCharacter == NEWLINE) positionTracker.incrementLine(); - } else { - FilePosition currentPosition = positionTracker.getCurrentPosition(); - String tokenText = retrieveTokenText(nextChar); - - return tokenFactory.createToken(tokenText, currentPosition); - } + } else + return createTokenFromCharacter(currentCharacter); } return tokenFactory.createEOFToken(positionTracker.getCurrentPosition()); } + private Token createTokenFromCharacter(char c) throws IOException { + FilePosition currentPosition = positionTracker.getCurrentPosition(); + String tokenText = retrieveTokenText(c); + + return tokenFactory.createToken(tokenText, currentPosition); + } + private String retrieveTokenText(char firstCharacter) throws IOException { String tokenText = "" + firstCharacter; @@ -74,27 +76,16 @@ public class LispScanner { text.append(firstDoubleQuote); - int c; - - while ((c = inputStream.read()) != EOF) { + for (int c = inputStream.read(); c != EOF; c = inputStream.read()) { char nextChar = (char) c; positionTracker.incrementColumn(); text.append(nextChar); - switch (nextChar) { - case NEWLINE: + if (nextChar == NEWLINE) positionTracker.incrementLine(); - break; - case DOUBLE_QUOTE: - if (prevChar != BACKSLASH) { - // we have found the terminating double quote - - return text.toString(); - } - - // this is an escaped double quote - } + else if ((nextChar == DOUBLE_QUOTE) && (prevChar != BACKSLASH)) + return text.toString(); prevChar = nextChar; } @@ -103,55 +94,28 @@ public class LispScanner { } private String retrieveNumber(char firstDigit) throws IOException { - StringBuilder text = new StringBuilder(); - - text.append(firstDigit); - inputStream.mark(1); - - int c; - - while ((c = inputStream.read()) != EOF) { - char nextChar = (char) c; - - if (Character.isDigit(nextChar)) { - // 'nextChar' is a digit in this number - - text.append(nextChar); - positionTracker.incrementColumn(); - } else { - // we have reached the end of the number - - inputStream.reset(); // unread the last character - - return text.toString(); - } - - inputStream.mark(1); - } - - return text.toString(); + return retrieveNumberOrIdentifier(firstDigit, Character::isDigit); } - private String retrieveIdentifier(char firstChar) throws IOException { + private String retrieveIdentifier(char firstCharacter) throws IOException { + return retrieveNumberOrIdentifier(firstCharacter, Characters::isLegalIdentifierCharacter); + } + + private String retrieveNumberOrIdentifier(char firstCharacter, Function isPartOfToken) + throws IOException { StringBuilder text = new StringBuilder(); - text.append(firstChar); + text.append(firstCharacter); inputStream.mark(1); - int c; - - while ((c = inputStream.read()) != EOF) { + for (int c = inputStream.read(); c != EOF; c = inputStream.read()) { char nextChar = (char) c; - if (Characters.isLegalIdentifierCharacter(nextChar)) { - // 'nextChar' is part of the identifier - + if (isPartOfToken.apply(nextChar)) { text.append(nextChar); positionTracker.incrementColumn(); } else { - // we have reached the end of this identifier - - inputStream.reset(); // unread the last character + inputStream.reset(); return text.toString(); } diff --git a/src/util/Characters.java b/src/util/Characters.java index 6e87a44..b6fac14 100644 --- a/src/util/Characters.java +++ b/src/util/Characters.java @@ -1,7 +1,7 @@ package util; -import java.util.HashMap; -import java.util.Map; +import java.util.HashSet; +import java.util.Set; public class Characters { @@ -19,25 +19,25 @@ public class Characters { public static final char SEMICOLON = ';'; public static final char SINGLE_QUOTE = '\''; public static final char TICK_MARK = '`'; - - public static final Map illegalIdentifierCharacters = new HashMap<>(); + + public static final Set illegalIdentifierCharacters = new HashSet<>(); static { - illegalIdentifierCharacters.put(DOUBLE_QUOTE, true); - illegalIdentifierCharacters.put(SINGLE_QUOTE, true); - illegalIdentifierCharacters.put(BACKSLASH, true); - illegalIdentifierCharacters.put(TICK_MARK, true); - illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true); - illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true); - illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true); - illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true); - illegalIdentifierCharacters.put(HASH, true); - illegalIdentifierCharacters.put(PERIOD, true); - illegalIdentifierCharacters.put(SEMICOLON, true); + illegalIdentifierCharacters.add(DOUBLE_QUOTE); + illegalIdentifierCharacters.add(SINGLE_QUOTE); + illegalIdentifierCharacters.add(BACKSLASH); + illegalIdentifierCharacters.add(TICK_MARK); + illegalIdentifierCharacters.add(LEFT_PARENTHESIS); + illegalIdentifierCharacters.add(RIGHT_PARENTHESIS); + illegalIdentifierCharacters.add(LEFT_SQUARE_BRACKET); + illegalIdentifierCharacters.add(RIGHT_SQUARE_BRACKET); + illegalIdentifierCharacters.add(HASH); + illegalIdentifierCharacters.add(PERIOD); + illegalIdentifierCharacters.add(SEMICOLON); } - + public static boolean isLegalIdentifierCharacter(char c) { - return (! Character.isWhitespace(c)) && (! illegalIdentifierCharacters.containsKey(c)); + return (!Character.isWhitespace(c)) && (!illegalIdentifierCharacters.contains(c)); } }