More refactoring of the scanner package

This commit is contained in:
Mike Cifelli 2016-12-10 15:25:46 -05:00
parent a3a49a2084
commit 0d406c3e36
2 changed files with 46 additions and 82 deletions

View File

@ -9,6 +9,7 @@ import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.MessageFormat;
import java.util.function.Function;
import constructs.Token;
import constructs.TokenFactory;
@ -34,26 +35,27 @@ public class LispScanner {
}
public Token getNextToken() throws IOException {
int c;
while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
char currentCharacter = (char) c;
positionTracker.incrementColumn();
if (Character.isWhitespace(nextChar)) {
if (nextChar == NEWLINE)
if (Character.isWhitespace(currentCharacter)) {
if (currentCharacter == NEWLINE)
positionTracker.incrementLine();
} else {
FilePosition currentPosition = positionTracker.getCurrentPosition();
String tokenText = retrieveTokenText(nextChar);
return tokenFactory.createToken(tokenText, currentPosition);
}
} else
return createTokenFromCharacter(currentCharacter);
}
return tokenFactory.createEOFToken(positionTracker.getCurrentPosition());
}
private Token createTokenFromCharacter(char c) throws IOException {
FilePosition currentPosition = positionTracker.getCurrentPosition();
String tokenText = retrieveTokenText(c);
return tokenFactory.createToken(tokenText, currentPosition);
}
private String retrieveTokenText(char firstCharacter) throws IOException {
String tokenText = "" + firstCharacter;
@ -74,27 +76,16 @@ public class LispScanner {
text.append(firstDoubleQuote);
int c;
while ((c = inputStream.read()) != EOF) {
for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
char nextChar = (char) c;
positionTracker.incrementColumn();
text.append(nextChar);
switch (nextChar) {
case NEWLINE:
if (nextChar == NEWLINE)
positionTracker.incrementLine();
break;
case DOUBLE_QUOTE:
if (prevChar != BACKSLASH) {
// we have found the terminating double quote
return text.toString();
}
// this is an escaped double quote
}
else if ((nextChar == DOUBLE_QUOTE) && (prevChar != BACKSLASH))
return text.toString();
prevChar = nextChar;
}
@ -103,55 +94,28 @@ public class LispScanner {
}
private String retrieveNumber(char firstDigit) throws IOException {
StringBuilder text = new StringBuilder();
text.append(firstDigit);
inputStream.mark(1);
int c;
while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
if (Character.isDigit(nextChar)) {
// 'nextChar' is a digit in this number
text.append(nextChar);
positionTracker.incrementColumn();
} else {
// we have reached the end of the number
inputStream.reset(); // unread the last character
return text.toString();
}
inputStream.mark(1);
}
return text.toString();
return retrieveNumberOrIdentifier(firstDigit, Character::isDigit);
}
private String retrieveIdentifier(char firstChar) throws IOException {
private String retrieveIdentifier(char firstCharacter) throws IOException {
return retrieveNumberOrIdentifier(firstCharacter, Characters::isLegalIdentifierCharacter);
}
private String retrieveNumberOrIdentifier(char firstCharacter, Function<Character, Boolean> isPartOfToken)
throws IOException {
StringBuilder text = new StringBuilder();
text.append(firstChar);
text.append(firstCharacter);
inputStream.mark(1);
int c;
while ((c = inputStream.read()) != EOF) {
for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
char nextChar = (char) c;
if (Characters.isLegalIdentifierCharacter(nextChar)) {
// 'nextChar' is part of the identifier
if (isPartOfToken.apply(nextChar)) {
text.append(nextChar);
positionTracker.incrementColumn();
} else {
// we have reached the end of this identifier
inputStream.reset(); // unread the last character
inputStream.reset();
return text.toString();
}

View File

@ -1,7 +1,7 @@
package util;
import java.util.HashMap;
import java.util.Map;
import java.util.HashSet;
import java.util.Set;
public class Characters {
@ -19,25 +19,25 @@ public class Characters {
public static final char SEMICOLON = ';';
public static final char SINGLE_QUOTE = '\'';
public static final char TICK_MARK = '`';
public static final Map<Character, Boolean> illegalIdentifierCharacters = new HashMap<>();
public static final Set<Character> illegalIdentifierCharacters = new HashSet<>();
static {
illegalIdentifierCharacters.put(DOUBLE_QUOTE, true);
illegalIdentifierCharacters.put(SINGLE_QUOTE, true);
illegalIdentifierCharacters.put(BACKSLASH, true);
illegalIdentifierCharacters.put(TICK_MARK, true);
illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true);
illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true);
illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true);
illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true);
illegalIdentifierCharacters.put(HASH, true);
illegalIdentifierCharacters.put(PERIOD, true);
illegalIdentifierCharacters.put(SEMICOLON, true);
illegalIdentifierCharacters.add(DOUBLE_QUOTE);
illegalIdentifierCharacters.add(SINGLE_QUOTE);
illegalIdentifierCharacters.add(BACKSLASH);
illegalIdentifierCharacters.add(TICK_MARK);
illegalIdentifierCharacters.add(LEFT_PARENTHESIS);
illegalIdentifierCharacters.add(RIGHT_PARENTHESIS);
illegalIdentifierCharacters.add(LEFT_SQUARE_BRACKET);
illegalIdentifierCharacters.add(RIGHT_SQUARE_BRACKET);
illegalIdentifierCharacters.add(HASH);
illegalIdentifierCharacters.add(PERIOD);
illegalIdentifierCharacters.add(SEMICOLON);
}
public static boolean isLegalIdentifierCharacter(char c) {
return (! Character.isWhitespace(c)) && (! illegalIdentifierCharacters.containsKey(c));
return (!Character.isWhitespace(c)) && (!illegalIdentifierCharacters.contains(c));
}
}