More refactoring of the scanner package

This commit is contained in:
Mike Cifelli 2016-12-10 15:25:46 -05:00
parent a3a49a2084
commit 0d406c3e36
2 changed files with 46 additions and 82 deletions

View File

@ -9,6 +9,7 @@ import java.io.BufferedInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.text.MessageFormat; import java.text.MessageFormat;
import java.util.function.Function;
import constructs.Token; import constructs.Token;
import constructs.TokenFactory; import constructs.TokenFactory;
@ -34,26 +35,27 @@ public class LispScanner {
} }
public Token getNextToken() throws IOException { public Token getNextToken() throws IOException {
int c; for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
char currentCharacter = (char) c;
while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
positionTracker.incrementColumn(); positionTracker.incrementColumn();
if (Character.isWhitespace(nextChar)) { if (Character.isWhitespace(currentCharacter)) {
if (nextChar == NEWLINE) if (currentCharacter == NEWLINE)
positionTracker.incrementLine(); positionTracker.incrementLine();
} else { } else
FilePosition currentPosition = positionTracker.getCurrentPosition(); return createTokenFromCharacter(currentCharacter);
String tokenText = retrieveTokenText(nextChar);
return tokenFactory.createToken(tokenText, currentPosition);
}
} }
return tokenFactory.createEOFToken(positionTracker.getCurrentPosition()); return tokenFactory.createEOFToken(positionTracker.getCurrentPosition());
} }
private Token createTokenFromCharacter(char c) throws IOException {
FilePosition currentPosition = positionTracker.getCurrentPosition();
String tokenText = retrieveTokenText(c);
return tokenFactory.createToken(tokenText, currentPosition);
}
private String retrieveTokenText(char firstCharacter) throws IOException { private String retrieveTokenText(char firstCharacter) throws IOException {
String tokenText = "" + firstCharacter; String tokenText = "" + firstCharacter;
@ -74,27 +76,16 @@ public class LispScanner {
text.append(firstDoubleQuote); text.append(firstDoubleQuote);
int c; for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c; char nextChar = (char) c;
positionTracker.incrementColumn(); positionTracker.incrementColumn();
text.append(nextChar); text.append(nextChar);
switch (nextChar) { if (nextChar == NEWLINE)
case NEWLINE:
positionTracker.incrementLine(); positionTracker.incrementLine();
break; else if ((nextChar == DOUBLE_QUOTE) && (prevChar != BACKSLASH))
case DOUBLE_QUOTE: return text.toString();
if (prevChar != BACKSLASH) {
// we have found the terminating double quote
return text.toString();
}
// this is an escaped double quote
}
prevChar = nextChar; prevChar = nextChar;
} }
@ -103,55 +94,28 @@ public class LispScanner {
} }
private String retrieveNumber(char firstDigit) throws IOException { private String retrieveNumber(char firstDigit) throws IOException {
StringBuilder text = new StringBuilder(); return retrieveNumberOrIdentifier(firstDigit, Character::isDigit);
text.append(firstDigit);
inputStream.mark(1);
int c;
while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
if (Character.isDigit(nextChar)) {
// 'nextChar' is a digit in this number
text.append(nextChar);
positionTracker.incrementColumn();
} else {
// we have reached the end of the number
inputStream.reset(); // unread the last character
return text.toString();
}
inputStream.mark(1);
}
return text.toString();
} }
private String retrieveIdentifier(char firstChar) throws IOException { private String retrieveIdentifier(char firstCharacter) throws IOException {
return retrieveNumberOrIdentifier(firstCharacter, Characters::isLegalIdentifierCharacter);
}
private String retrieveNumberOrIdentifier(char firstCharacter, Function<Character, Boolean> isPartOfToken)
throws IOException {
StringBuilder text = new StringBuilder(); StringBuilder text = new StringBuilder();
text.append(firstChar); text.append(firstCharacter);
inputStream.mark(1); inputStream.mark(1);
int c; for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c; char nextChar = (char) c;
if (Characters.isLegalIdentifierCharacter(nextChar)) { if (isPartOfToken.apply(nextChar)) {
// 'nextChar' is part of the identifier
text.append(nextChar); text.append(nextChar);
positionTracker.incrementColumn(); positionTracker.incrementColumn();
} else { } else {
// we have reached the end of this identifier inputStream.reset();
inputStream.reset(); // unread the last character
return text.toString(); return text.toString();
} }

View File

@ -1,7 +1,7 @@
package util; package util;
import java.util.HashMap; import java.util.HashSet;
import java.util.Map; import java.util.Set;
public class Characters { public class Characters {
@ -20,24 +20,24 @@ public class Characters {
public static final char SINGLE_QUOTE = '\''; public static final char SINGLE_QUOTE = '\'';
public static final char TICK_MARK = '`'; public static final char TICK_MARK = '`';
public static final Map<Character, Boolean> illegalIdentifierCharacters = new HashMap<>(); public static final Set<Character> illegalIdentifierCharacters = new HashSet<>();
static { static {
illegalIdentifierCharacters.put(DOUBLE_QUOTE, true); illegalIdentifierCharacters.add(DOUBLE_QUOTE);
illegalIdentifierCharacters.put(SINGLE_QUOTE, true); illegalIdentifierCharacters.add(SINGLE_QUOTE);
illegalIdentifierCharacters.put(BACKSLASH, true); illegalIdentifierCharacters.add(BACKSLASH);
illegalIdentifierCharacters.put(TICK_MARK, true); illegalIdentifierCharacters.add(TICK_MARK);
illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true); illegalIdentifierCharacters.add(LEFT_PARENTHESIS);
illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true); illegalIdentifierCharacters.add(RIGHT_PARENTHESIS);
illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true); illegalIdentifierCharacters.add(LEFT_SQUARE_BRACKET);
illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true); illegalIdentifierCharacters.add(RIGHT_SQUARE_BRACKET);
illegalIdentifierCharacters.put(HASH, true); illegalIdentifierCharacters.add(HASH);
illegalIdentifierCharacters.put(PERIOD, true); illegalIdentifierCharacters.add(PERIOD);
illegalIdentifierCharacters.put(SEMICOLON, true); illegalIdentifierCharacters.add(SEMICOLON);
} }
public static boolean isLegalIdentifierCharacter(char c) { public static boolean isLegalIdentifierCharacter(char c) {
return (! Character.isWhitespace(c)) && (! illegalIdentifierCharacters.containsKey(c)); return (!Character.isWhitespace(c)) && (!illegalIdentifierCharacters.contains(c));
} }
} }