182 lines
6.0 KiB
Java
182 lines
6.0 KiB
Java
package scanner;
|
|
|
|
import error.LineColumnException;
|
|
import file.FilePosition;
|
|
import file.FilePositionTracker;
|
|
import token.Token;
|
|
import token.TokenFactory;
|
|
import token.TokenFactoryImpl;
|
|
import util.Characters;
|
|
|
|
import java.io.InputStream;
|
|
import java.util.function.Function;
|
|
|
|
import static java.lang.Character.isDigit;
|
|
import static java.lang.Character.isWhitespace;
|
|
|
|
/**
|
|
* Converts a stream of bytes into a stream of Lisp tokens.
|
|
*/
|
|
public class LispScanner {
|
|
|
|
private LispInputStream inputStream;
|
|
private FilePositionTracker positionTracker;
|
|
private TokenFactory tokenFactory;
|
|
|
|
public LispScanner(InputStream inputStream, String fileName) {
|
|
this.inputStream = new LispCommentRemovingInputStream(inputStream);
|
|
this.positionTracker = new FilePositionTracker(fileName);
|
|
this.tokenFactory = new TokenFactoryImpl();
|
|
}
|
|
|
|
public Token getNextToken() {
|
|
for (int c = inputStream.read(); c != Characters.EOF; c = inputStream.read()) {
|
|
char currentCharacter = (char) c;
|
|
positionTracker.incrementColumn();
|
|
|
|
if (!isWhitespace(currentCharacter))
|
|
return createTokenFromCharacter(currentCharacter);
|
|
else if (currentCharacter == Characters.NEWLINE)
|
|
positionTracker.incrementLine();
|
|
}
|
|
|
|
return tokenFactory.createEofToken(positionTracker.currentPosition());
|
|
}
|
|
|
|
private Token createTokenFromCharacter(char c) {
|
|
FilePosition currentPosition = positionTracker.currentPosition();
|
|
String tokenText = retrieveTokenText(c);
|
|
|
|
return tokenFactory.createToken(tokenText, currentPosition);
|
|
}
|
|
|
|
private String retrieveTokenText(char firstCharacter) {
|
|
String tokenText = "" + firstCharacter;
|
|
|
|
if (firstCharacter == Characters.DOUBLE_QUOTE)
|
|
tokenText = retrieveStringTokenText(firstCharacter);
|
|
else if (Characters.INSTANCE.isNumberPrefix(firstCharacter))
|
|
tokenText = retrieveNumberOrIdentifierTokenText(firstCharacter);
|
|
else if (isDigit(firstCharacter))
|
|
tokenText = retrieveNumberTokenText(firstCharacter);
|
|
else if (Characters.INSTANCE.isLegalIdentifierCharacter(firstCharacter))
|
|
tokenText = retrieveIdentifierTokenText(firstCharacter);
|
|
|
|
return tokenText;
|
|
}
|
|
|
|
private String retrieveStringTokenText(char firstDoubleQuote) {
|
|
ComplexTokenTextRetriever retriever = new ComplexTokenTextRetriever(firstDoubleQuote,
|
|
Characters.INSTANCE::isLegalStringCharacter);
|
|
|
|
return retriever.retrieveToken();
|
|
}
|
|
|
|
private String retrieveNumberOrIdentifierTokenText(char firstCharacter) {
|
|
char nextCharacter = (char) inputStream.read();
|
|
inputStream.unreadLastCharacter();
|
|
|
|
if (isDigit(nextCharacter))
|
|
return retrieveNumberTokenText(firstCharacter);
|
|
|
|
return retrieveIdentifierTokenText(firstCharacter);
|
|
}
|
|
|
|
private String retrieveNumberTokenText(char firstCharacter) {
|
|
ComplexTokenTextRetriever retriever = new ComplexTokenTextRetriever(firstCharacter, Character::isDigit);
|
|
|
|
return retriever.retrieveToken();
|
|
}
|
|
|
|
private String retrieveIdentifierTokenText(char firstCharacter) {
|
|
ComplexTokenTextRetriever retriever = new ComplexTokenTextRetriever(firstCharacter,
|
|
Characters.INSTANCE::isLegalIdentifierCharacter);
|
|
|
|
return retriever.retrieveToken();
|
|
}
|
|
|
|
private class ComplexTokenTextRetriever {
|
|
|
|
Function<Character, Boolean> isPartOfToken;
|
|
StringBuilder text;
|
|
FilePosition position;
|
|
char firstCharacter;
|
|
char currentCharacter;
|
|
char previousCharacter;
|
|
|
|
public ComplexTokenTextRetriever(char firstCharacter, Function<Character, Boolean> isPartOfToken) {
|
|
this.isPartOfToken = isPartOfToken;
|
|
this.text = new StringBuilder();
|
|
this.position = positionTracker.currentPosition();
|
|
this.firstCharacter = firstCharacter;
|
|
this.currentCharacter = firstCharacter;
|
|
this.previousCharacter = firstCharacter;
|
|
}
|
|
|
|
public String retrieveToken() {
|
|
text.append(firstCharacter);
|
|
|
|
for (int c = inputStream.read(); c != Characters.EOF; c = inputStream.read()) {
|
|
currentCharacter = (char) c;
|
|
|
|
if (!isPartOfToken.apply(currentCharacter)) {
|
|
inputStream.unreadLastCharacter();
|
|
|
|
return text.toString();
|
|
}
|
|
|
|
addCharacterToToken();
|
|
|
|
if (isTerminatingCharacter())
|
|
return text.toString();
|
|
|
|
previousCharacter = currentCharacter;
|
|
}
|
|
|
|
return terminateTokenWithEof();
|
|
}
|
|
|
|
private void addCharacterToToken() {
|
|
text.append(currentCharacter);
|
|
positionTracker.incrementColumn();
|
|
|
|
if (currentCharacter == Characters.NEWLINE)
|
|
positionTracker.incrementLine();
|
|
}
|
|
|
|
private boolean isTerminatingCharacter() {
|
|
return isStringToken() && isTerminatingDoubleQuote();
|
|
}
|
|
|
|
private boolean isStringToken() {
|
|
return firstCharacter == Characters.DOUBLE_QUOTE;
|
|
}
|
|
|
|
private boolean isTerminatingDoubleQuote() {
|
|
return (currentCharacter == Characters.DOUBLE_QUOTE) &&
|
|
(previousCharacter != Characters.BACKSLASH);
|
|
}
|
|
|
|
private String terminateTokenWithEof() {
|
|
if (isStringToken())
|
|
throw new UnterminatedStringException(position);
|
|
|
|
return text.toString();
|
|
}
|
|
}
|
|
|
|
public static class UnterminatedStringException extends LineColumnException {
|
|
|
|
private static final long serialVersionUID = 1L;
|
|
|
|
public UnterminatedStringException(FilePosition position) {
|
|
super(position);
|
|
}
|
|
|
|
@Override
|
|
public String getMessagePrefix() {
|
|
return "unterminated quoted string";
|
|
}
|
|
}
|
|
}
|