More refactoring of the scanner package
This commit is contained in:
parent
a3a49a2084
commit
0d406c3e36
|
@ -9,6 +9,7 @@ import java.io.BufferedInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.text.MessageFormat;
|
import java.text.MessageFormat;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
import constructs.Token;
|
import constructs.Token;
|
||||||
import constructs.TokenFactory;
|
import constructs.TokenFactory;
|
||||||
|
@ -34,26 +35,27 @@ public class LispScanner {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Token getNextToken() throws IOException {
|
public Token getNextToken() throws IOException {
|
||||||
int c;
|
for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
|
||||||
|
char currentCharacter = (char) c;
|
||||||
while ((c = inputStream.read()) != EOF) {
|
|
||||||
char nextChar = (char) c;
|
|
||||||
positionTracker.incrementColumn();
|
positionTracker.incrementColumn();
|
||||||
|
|
||||||
if (Character.isWhitespace(nextChar)) {
|
if (Character.isWhitespace(currentCharacter)) {
|
||||||
if (nextChar == NEWLINE)
|
if (currentCharacter == NEWLINE)
|
||||||
positionTracker.incrementLine();
|
positionTracker.incrementLine();
|
||||||
} else {
|
} else
|
||||||
FilePosition currentPosition = positionTracker.getCurrentPosition();
|
return createTokenFromCharacter(currentCharacter);
|
||||||
String tokenText = retrieveTokenText(nextChar);
|
|
||||||
|
|
||||||
return tokenFactory.createToken(tokenText, currentPosition);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return tokenFactory.createEOFToken(positionTracker.getCurrentPosition());
|
return tokenFactory.createEOFToken(positionTracker.getCurrentPosition());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Token createTokenFromCharacter(char c) throws IOException {
|
||||||
|
FilePosition currentPosition = positionTracker.getCurrentPosition();
|
||||||
|
String tokenText = retrieveTokenText(c);
|
||||||
|
|
||||||
|
return tokenFactory.createToken(tokenText, currentPosition);
|
||||||
|
}
|
||||||
|
|
||||||
private String retrieveTokenText(char firstCharacter) throws IOException {
|
private String retrieveTokenText(char firstCharacter) throws IOException {
|
||||||
String tokenText = "" + firstCharacter;
|
String tokenText = "" + firstCharacter;
|
||||||
|
|
||||||
|
@ -74,27 +76,16 @@ public class LispScanner {
|
||||||
|
|
||||||
text.append(firstDoubleQuote);
|
text.append(firstDoubleQuote);
|
||||||
|
|
||||||
int c;
|
for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
|
||||||
|
|
||||||
while ((c = inputStream.read()) != EOF) {
|
|
||||||
char nextChar = (char) c;
|
char nextChar = (char) c;
|
||||||
|
|
||||||
positionTracker.incrementColumn();
|
positionTracker.incrementColumn();
|
||||||
text.append(nextChar);
|
text.append(nextChar);
|
||||||
|
|
||||||
switch (nextChar) {
|
if (nextChar == NEWLINE)
|
||||||
case NEWLINE:
|
|
||||||
positionTracker.incrementLine();
|
positionTracker.incrementLine();
|
||||||
break;
|
else if ((nextChar == DOUBLE_QUOTE) && (prevChar != BACKSLASH))
|
||||||
case DOUBLE_QUOTE:
|
|
||||||
if (prevChar != BACKSLASH) {
|
|
||||||
// we have found the terminating double quote
|
|
||||||
|
|
||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
|
||||||
|
|
||||||
// this is an escaped double quote
|
|
||||||
}
|
|
||||||
|
|
||||||
prevChar = nextChar;
|
prevChar = nextChar;
|
||||||
}
|
}
|
||||||
|
@ -103,55 +94,28 @@ public class LispScanner {
|
||||||
}
|
}
|
||||||
|
|
||||||
private String retrieveNumber(char firstDigit) throws IOException {
|
private String retrieveNumber(char firstDigit) throws IOException {
|
||||||
|
return retrieveNumberOrIdentifier(firstDigit, Character::isDigit);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String retrieveIdentifier(char firstCharacter) throws IOException {
|
||||||
|
return retrieveNumberOrIdentifier(firstCharacter, Characters::isLegalIdentifierCharacter);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String retrieveNumberOrIdentifier(char firstCharacter, Function<Character, Boolean> isPartOfToken)
|
||||||
|
throws IOException {
|
||||||
StringBuilder text = new StringBuilder();
|
StringBuilder text = new StringBuilder();
|
||||||
|
|
||||||
text.append(firstDigit);
|
text.append(firstCharacter);
|
||||||
inputStream.mark(1);
|
inputStream.mark(1);
|
||||||
|
|
||||||
int c;
|
for (int c = inputStream.read(); c != EOF; c = inputStream.read()) {
|
||||||
|
|
||||||
while ((c = inputStream.read()) != EOF) {
|
|
||||||
char nextChar = (char) c;
|
char nextChar = (char) c;
|
||||||
|
|
||||||
if (Character.isDigit(nextChar)) {
|
if (isPartOfToken.apply(nextChar)) {
|
||||||
// 'nextChar' is a digit in this number
|
|
||||||
|
|
||||||
text.append(nextChar);
|
text.append(nextChar);
|
||||||
positionTracker.incrementColumn();
|
positionTracker.incrementColumn();
|
||||||
} else {
|
} else {
|
||||||
// we have reached the end of the number
|
inputStream.reset();
|
||||||
|
|
||||||
inputStream.reset(); // unread the last character
|
|
||||||
|
|
||||||
return text.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
inputStream.mark(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return text.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
private String retrieveIdentifier(char firstChar) throws IOException {
|
|
||||||
StringBuilder text = new StringBuilder();
|
|
||||||
|
|
||||||
text.append(firstChar);
|
|
||||||
inputStream.mark(1);
|
|
||||||
|
|
||||||
int c;
|
|
||||||
|
|
||||||
while ((c = inputStream.read()) != EOF) {
|
|
||||||
char nextChar = (char) c;
|
|
||||||
|
|
||||||
if (Characters.isLegalIdentifierCharacter(nextChar)) {
|
|
||||||
// 'nextChar' is part of the identifier
|
|
||||||
|
|
||||||
text.append(nextChar);
|
|
||||||
positionTracker.incrementColumn();
|
|
||||||
} else {
|
|
||||||
// we have reached the end of this identifier
|
|
||||||
|
|
||||||
inputStream.reset(); // unread the last character
|
|
||||||
|
|
||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
package util;
|
package util;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Set;
|
||||||
|
|
||||||
public class Characters {
|
public class Characters {
|
||||||
|
|
||||||
|
@ -20,24 +20,24 @@ public class Characters {
|
||||||
public static final char SINGLE_QUOTE = '\'';
|
public static final char SINGLE_QUOTE = '\'';
|
||||||
public static final char TICK_MARK = '`';
|
public static final char TICK_MARK = '`';
|
||||||
|
|
||||||
public static final Map<Character, Boolean> illegalIdentifierCharacters = new HashMap<>();
|
public static final Set<Character> illegalIdentifierCharacters = new HashSet<>();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
illegalIdentifierCharacters.put(DOUBLE_QUOTE, true);
|
illegalIdentifierCharacters.add(DOUBLE_QUOTE);
|
||||||
illegalIdentifierCharacters.put(SINGLE_QUOTE, true);
|
illegalIdentifierCharacters.add(SINGLE_QUOTE);
|
||||||
illegalIdentifierCharacters.put(BACKSLASH, true);
|
illegalIdentifierCharacters.add(BACKSLASH);
|
||||||
illegalIdentifierCharacters.put(TICK_MARK, true);
|
illegalIdentifierCharacters.add(TICK_MARK);
|
||||||
illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true);
|
illegalIdentifierCharacters.add(LEFT_PARENTHESIS);
|
||||||
illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true);
|
illegalIdentifierCharacters.add(RIGHT_PARENTHESIS);
|
||||||
illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true);
|
illegalIdentifierCharacters.add(LEFT_SQUARE_BRACKET);
|
||||||
illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true);
|
illegalIdentifierCharacters.add(RIGHT_SQUARE_BRACKET);
|
||||||
illegalIdentifierCharacters.put(HASH, true);
|
illegalIdentifierCharacters.add(HASH);
|
||||||
illegalIdentifierCharacters.put(PERIOD, true);
|
illegalIdentifierCharacters.add(PERIOD);
|
||||||
illegalIdentifierCharacters.put(SEMICOLON, true);
|
illegalIdentifierCharacters.add(SEMICOLON);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isLegalIdentifierCharacter(char c) {
|
public static boolean isLegalIdentifierCharacter(char c) {
|
||||||
return (! Character.isWhitespace(c)) && (! illegalIdentifierCharacters.containsKey(c));
|
return (!Character.isWhitespace(c)) && (!illegalIdentifierCharacters.contains(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue