More refactoring of the scanner package
This commit is contained in:
parent
a3a49a2084
commit
0d406c3e36
|
@ -9,6 +9,7 @@ import java.io.BufferedInputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.function.Function;
|
||||
|
||||
import constructs.Token;
|
||||
import constructs.TokenFactory;
|
||||
|
@ -34,26 +35,27 @@ public class LispScanner {
|
|||
}
|
||||
|
||||
public Token getNextToken() throws IOException {
|
||||
int c;
|
||||
|
||||
while ((c = inputStream.read()) != EOF) {
|
||||
char nextChar = (char) c;
|
||||
for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
|
||||
char currentCharacter = (char) c;
|
||||
positionTracker.incrementColumn();
|
||||
|
||||
if (Character.isWhitespace(nextChar)) {
|
||||
if (nextChar == NEWLINE)
|
||||
if (Character.isWhitespace(currentCharacter)) {
|
||||
if (currentCharacter == NEWLINE)
|
||||
positionTracker.incrementLine();
|
||||
} else {
|
||||
FilePosition currentPosition = positionTracker.getCurrentPosition();
|
||||
String tokenText = retrieveTokenText(nextChar);
|
||||
|
||||
return tokenFactory.createToken(tokenText, currentPosition);
|
||||
}
|
||||
} else
|
||||
return createTokenFromCharacter(currentCharacter);
|
||||
}
|
||||
|
||||
return tokenFactory.createEOFToken(positionTracker.getCurrentPosition());
|
||||
}
|
||||
|
||||
private Token createTokenFromCharacter(char c) throws IOException {
|
||||
FilePosition currentPosition = positionTracker.getCurrentPosition();
|
||||
String tokenText = retrieveTokenText(c);
|
||||
|
||||
return tokenFactory.createToken(tokenText, currentPosition);
|
||||
}
|
||||
|
||||
private String retrieveTokenText(char firstCharacter) throws IOException {
|
||||
String tokenText = "" + firstCharacter;
|
||||
|
||||
|
@ -74,27 +76,16 @@ public class LispScanner {
|
|||
|
||||
text.append(firstDoubleQuote);
|
||||
|
||||
int c;
|
||||
|
||||
while ((c = inputStream.read()) != EOF) {
|
||||
for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
|
||||
char nextChar = (char) c;
|
||||
|
||||
positionTracker.incrementColumn();
|
||||
text.append(nextChar);
|
||||
|
||||
switch (nextChar) {
|
||||
case NEWLINE:
|
||||
if (nextChar == NEWLINE)
|
||||
positionTracker.incrementLine();
|
||||
break;
|
||||
case DOUBLE_QUOTE:
|
||||
if (prevChar != BACKSLASH) {
|
||||
// we have found the terminating double quote
|
||||
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
// this is an escaped double quote
|
||||
}
|
||||
else if ((nextChar == DOUBLE_QUOTE) && (prevChar != BACKSLASH))
|
||||
return text.toString();
|
||||
|
||||
prevChar = nextChar;
|
||||
}
|
||||
|
@ -103,55 +94,28 @@ public class LispScanner {
|
|||
}
|
||||
|
||||
private String retrieveNumber(char firstDigit) throws IOException {
|
||||
StringBuilder text = new StringBuilder();
|
||||
|
||||
text.append(firstDigit);
|
||||
inputStream.mark(1);
|
||||
|
||||
int c;
|
||||
|
||||
while ((c = inputStream.read()) != EOF) {
|
||||
char nextChar = (char) c;
|
||||
|
||||
if (Character.isDigit(nextChar)) {
|
||||
// 'nextChar' is a digit in this number
|
||||
|
||||
text.append(nextChar);
|
||||
positionTracker.incrementColumn();
|
||||
} else {
|
||||
// we have reached the end of the number
|
||||
|
||||
inputStream.reset(); // unread the last character
|
||||
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
inputStream.mark(1);
|
||||
}
|
||||
|
||||
return text.toString();
|
||||
return retrieveNumberOrIdentifier(firstDigit, Character::isDigit);
|
||||
}
|
||||
|
||||
private String retrieveIdentifier(char firstChar) throws IOException {
|
||||
private String retrieveIdentifier(char firstCharacter) throws IOException {
|
||||
return retrieveNumberOrIdentifier(firstCharacter, Characters::isLegalIdentifierCharacter);
|
||||
}
|
||||
|
||||
private String retrieveNumberOrIdentifier(char firstCharacter, Function<Character, Boolean> isPartOfToken)
|
||||
throws IOException {
|
||||
StringBuilder text = new StringBuilder();
|
||||
|
||||
text.append(firstChar);
|
||||
text.append(firstCharacter);
|
||||
inputStream.mark(1);
|
||||
|
||||
int c;
|
||||
|
||||
while ((c = inputStream.read()) != EOF) {
|
||||
for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
|
||||
char nextChar = (char) c;
|
||||
|
||||
if (Characters.isLegalIdentifierCharacter(nextChar)) {
|
||||
// 'nextChar' is part of the identifier
|
||||
|
||||
if (isPartOfToken.apply(nextChar)) {
|
||||
text.append(nextChar);
|
||||
positionTracker.incrementColumn();
|
||||
} else {
|
||||
// we have reached the end of this identifier
|
||||
|
||||
inputStream.reset(); // unread the last character
|
||||
inputStream.reset();
|
||||
|
||||
return text.toString();
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
package util;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class Characters {
|
||||
|
||||
|
@ -19,25 +19,25 @@ public class Characters {
|
|||
public static final char SEMICOLON = ';';
|
||||
public static final char SINGLE_QUOTE = '\'';
|
||||
public static final char TICK_MARK = '`';
|
||||
|
||||
public static final Map<Character, Boolean> illegalIdentifierCharacters = new HashMap<>();
|
||||
|
||||
public static final Set<Character> illegalIdentifierCharacters = new HashSet<>();
|
||||
|
||||
static {
|
||||
illegalIdentifierCharacters.put(DOUBLE_QUOTE, true);
|
||||
illegalIdentifierCharacters.put(SINGLE_QUOTE, true);
|
||||
illegalIdentifierCharacters.put(BACKSLASH, true);
|
||||
illegalIdentifierCharacters.put(TICK_MARK, true);
|
||||
illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true);
|
||||
illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true);
|
||||
illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true);
|
||||
illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true);
|
||||
illegalIdentifierCharacters.put(HASH, true);
|
||||
illegalIdentifierCharacters.put(PERIOD, true);
|
||||
illegalIdentifierCharacters.put(SEMICOLON, true);
|
||||
illegalIdentifierCharacters.add(DOUBLE_QUOTE);
|
||||
illegalIdentifierCharacters.add(SINGLE_QUOTE);
|
||||
illegalIdentifierCharacters.add(BACKSLASH);
|
||||
illegalIdentifierCharacters.add(TICK_MARK);
|
||||
illegalIdentifierCharacters.add(LEFT_PARENTHESIS);
|
||||
illegalIdentifierCharacters.add(RIGHT_PARENTHESIS);
|
||||
illegalIdentifierCharacters.add(LEFT_SQUARE_BRACKET);
|
||||
illegalIdentifierCharacters.add(RIGHT_SQUARE_BRACKET);
|
||||
illegalIdentifierCharacters.add(HASH);
|
||||
illegalIdentifierCharacters.add(PERIOD);
|
||||
illegalIdentifierCharacters.add(SEMICOLON);
|
||||
}
|
||||
|
||||
|
||||
public static boolean isLegalIdentifierCharacter(char c) {
|
||||
return (! Character.isWhitespace(c)) && (! illegalIdentifierCharacters.containsKey(c));
|
||||
return (!Character.isWhitespace(c)) && (!illegalIdentifierCharacters.contains(c));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue