Added unit tests and started refactoring LispScanner
This commit is contained in:
parent
4a5f169076
commit
9b3261f575
|
@ -1,5 +1,11 @@
|
|||
package scanner;
|
||||
|
||||
import static util.Characters.BACKSLASH;
|
||||
import static util.Characters.DOUBLE_QUOTE;
|
||||
import static util.Characters.EOF;
|
||||
import static util.Characters.NEWLINE;
|
||||
import static util.Characters.SEMICOLON;
|
||||
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
@ -44,11 +50,11 @@ public class LispFilterInputStream extends FilterInputStream {
|
|||
}
|
||||
|
||||
private boolean haveEncounteredStringBoundary() {
|
||||
return (previousCharacter != '\\') && (nextCharacter == '\"');
|
||||
return (previousCharacter != BACKSLASH) && (nextCharacter == DOUBLE_QUOTE);
|
||||
}
|
||||
|
||||
private boolean haveEnteredComment() {
|
||||
return (nextCharacter == ';') && (!isInQuotedString);
|
||||
return (nextCharacter == SEMICOLON) && (!isInQuotedString);
|
||||
}
|
||||
|
||||
private void consumeAllBytesInComment() throws IOException {
|
||||
|
@ -57,7 +63,7 @@ public class LispFilterInputStream extends FilterInputStream {
|
|||
}
|
||||
|
||||
private boolean stillInComment() {
|
||||
return (nextCharacter != '\n') && (nextCharacter != -1);
|
||||
return (nextCharacter != NEWLINE) && (nextCharacter != EOF);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,171 +1,120 @@
|
|||
/*
|
||||
* Name: Mike Cifelli
|
||||
* Course: CIS 443 - Programming Languages
|
||||
* Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
|
||||
*/
|
||||
|
||||
package scanner;
|
||||
|
||||
import static util.Characters.BACKSLASH;
|
||||
import static util.Characters.DOUBLE_QUOTE;
|
||||
import static util.Characters.EOF;
|
||||
import static util.Characters.HASH;
|
||||
import static util.Characters.LEFT_PARENTHESIS;
|
||||
import static util.Characters.LEFT_SQUARE_BRACKET;
|
||||
import static util.Characters.NEWLINE;
|
||||
import static util.Characters.PERIOD;
|
||||
import static util.Characters.RIGHT_PARENTHESIS;
|
||||
import static util.Characters.RIGHT_SQUARE_BRACKET;
|
||||
import static util.Characters.SEMICOLON;
|
||||
import static util.Characters.SINGLE_QUOTE;
|
||||
import static util.Characters.TICK_MARK;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp
|
||||
* tokens. When the end of stream has been reached a token with a type of
|
||||
* <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>
|
||||
* method of this scanner.
|
||||
* Converts a stream of bytes into a stream of Lisp tokens.
|
||||
*/
|
||||
public class LispScanner {
|
||||
|
||||
private LispFilterInputStream inStream;
|
||||
private Token currToken;
|
||||
private String fileName;
|
||||
private int line;
|
||||
private int column;
|
||||
private static Map<Character, Boolean> illegalIdentifierCharacters = new HashMap<>();
|
||||
|
||||
static {
|
||||
illegalIdentifierCharacters.put(DOUBLE_QUOTE, true);
|
||||
illegalIdentifierCharacters.put(SINGLE_QUOTE, true);
|
||||
illegalIdentifierCharacters.put(BACKSLASH, true);
|
||||
illegalIdentifierCharacters.put(TICK_MARK, true);
|
||||
illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true);
|
||||
illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true);
|
||||
illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true);
|
||||
illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true);
|
||||
illegalIdentifierCharacters.put(HASH, true);
|
||||
illegalIdentifierCharacters.put(PERIOD, true);
|
||||
illegalIdentifierCharacters.put(SEMICOLON, true);
|
||||
}
|
||||
|
||||
private InputStream inputStream;
|
||||
private String inputStreamName;
|
||||
private int lineNumber;
|
||||
private int columnNumber;
|
||||
|
||||
/**
|
||||
* Create a new <code>LispScanner</code> that produces Lisp tokens from the
|
||||
* specified input stream.
|
||||
*
|
||||
* @param in
|
||||
* the input stream to obtain Lisp tokens from (must not be
|
||||
* <code>null</code>)
|
||||
* @param fileName
|
||||
* the name of the file that <code>in</code> is reading from
|
||||
*/
|
||||
public LispScanner(InputStream in, String fileName) {
|
||||
this.inStream = new LispFilterInputStream(new BufferedInputStream(in));
|
||||
this.currToken = null;
|
||||
this.fileName = fileName;
|
||||
this.line = 1;
|
||||
this.column = 0;
|
||||
this.inputStream = new LispFilterInputStream(new BufferedInputStream(in));
|
||||
this.inputStreamName = fileName;
|
||||
this.lineNumber = 1;
|
||||
this.columnNumber = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the same Lisp token returned from the last call to the
|
||||
* <code>nextToken</code> method of this scanner. In the case that no calls
|
||||
* to <code>nextToken</code> have been made yet, this method returns
|
||||
* <code>null</code>.
|
||||
*
|
||||
* @return the last Lisp token returned from this scanner or
|
||||
* <code>null</code> (if no tokens have been returned from this
|
||||
* scanner yet)
|
||||
*/
|
||||
public Token getCurrToken() {
|
||||
return currToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next Lisp token from this scanner.
|
||||
*
|
||||
* @return the next Lisp token from this scanner.
|
||||
* @throws RuntimeException
|
||||
* Indicates that an illegal character or an unterminated quoted
|
||||
* string was encountered in the input stream (not counting
|
||||
* comments).
|
||||
* @throws IOException
|
||||
* Indicates that an I/O error has occurred.
|
||||
*/
|
||||
public Token nextToken() throws IOException {
|
||||
currToken = retrieveNextToken();
|
||||
|
||||
return currToken;
|
||||
}
|
||||
|
||||
// Retrieve the next Lisp token from 'inStream'.
|
||||
//
|
||||
// Returns: the next Lisp token found in 'inStream'
|
||||
// Precondition: 'inStream' must not be null.
|
||||
// Throws: RuntimeException - Indicates that an illegal character or an
|
||||
// unterminated quoted string was encountered in
|
||||
// 'inStream'.
|
||||
// Throws: IOException - Indicates that an I/O error has occurred.
|
||||
private Token retrieveNextToken() throws IOException {
|
||||
int c;
|
||||
|
||||
while ((c = inStream.read()) != -1) {
|
||||
while ((c = inputStream.read()) != EOF) {
|
||||
char nextChar = (char) c;
|
||||
|
||||
++column;
|
||||
++columnNumber;
|
||||
|
||||
// determine the type of the Lisp token from the character obtained
|
||||
// from 'inStream'
|
||||
switch (nextChar) {
|
||||
case '\n':
|
||||
// we have hit a new line so increment 'line' and reset
|
||||
// 'column'
|
||||
++line;
|
||||
column = 0;
|
||||
|
||||
case NEWLINE:
|
||||
moveToNewLine();
|
||||
break;
|
||||
case '(':
|
||||
return new Token(Token.Type.LEFT_PAREN, "(", fileName, line, column);
|
||||
case ')':
|
||||
return new Token(Token.Type.RIGHT_PAREN, ")", fileName, line, column);
|
||||
case '\'':
|
||||
return new Token(Token.Type.QUOTE_MARK, "\'", fileName, line, column);
|
||||
case '\"':
|
||||
case LEFT_PARENTHESIS:
|
||||
return new Token(Token.Type.LEFT_PAREN, "(", inputStreamName, lineNumber, columnNumber);
|
||||
case RIGHT_PARENTHESIS:
|
||||
return new Token(Token.Type.RIGHT_PAREN, ")", inputStreamName, lineNumber, columnNumber);
|
||||
case SINGLE_QUOTE:
|
||||
return new Token(Token.Type.QUOTE_MARK, "\'", inputStreamName, lineNumber, columnNumber);
|
||||
case DOUBLE_QUOTE:
|
||||
return retrieveString(nextChar);
|
||||
default:
|
||||
if (Character.isWhitespace(nextChar)) { // skip whitespace
|
||||
if (Character.isWhitespace(nextChar)) {
|
||||
continue;
|
||||
} else if (Character.isDigit(nextChar)) { // number
|
||||
} else if (Character.isDigit(nextChar)) {
|
||||
return retrieveNumber(nextChar);
|
||||
} else if (isLegalIdChar(nextChar)) { // identifier
|
||||
} else if (isLegalIdentifierCharacter(nextChar)) {
|
||||
return retrieveIdentifier(nextChar);
|
||||
} else {
|
||||
// 'nextChar' can not start any Lisp token
|
||||
|
||||
throw new RuntimeException(
|
||||
"illegal character " + "\'" + nextChar + "\'" + " - line " + line + " column " + column);
|
||||
throw new RuntimeException("illegal character " + "\'" + nextChar + "\'" + " - line " + lineNumber
|
||||
+ " column " + columnNumber);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we have reached the end of 'inStream' so we return an end-of-file
|
||||
// token
|
||||
return new Token(Token.Type.EOF, "EOF", fileName, line, column);
|
||||
return new Token(Token.Type.EOF, "EOF", inputStreamName, lineNumber, columnNumber);
|
||||
}
|
||||
|
||||
// Retrieve a quoted string token from 'inStream'.
|
||||
//
|
||||
// Parameters: firstDoubleQuote - the opening double quote of this quoted
|
||||
// string
|
||||
// Returns: a quoted string token obtained from 'instream'
|
||||
// Throws: RuntimeException - Indicates that this quoted string was
|
||||
// missing its terminating double quote.
|
||||
// Throws: IOException - Indicates that an I/O error has occurred.
|
||||
// Precondition: 'firstDoubleQuote' must be the leading double quote
|
||||
// character of this quoted string and 'inStream' must not
|
||||
// be null.
|
||||
private Token retrieveString(char firstDoubleQuote) throws IOException {
|
||||
StringBuffer text = new StringBuffer();
|
||||
int startLine = line;
|
||||
int startColumn = column;
|
||||
int startLine = lineNumber;
|
||||
int startColumn = columnNumber;
|
||||
char prevChar = firstDoubleQuote;
|
||||
|
||||
text.append(firstDoubleQuote);
|
||||
|
||||
int c;
|
||||
|
||||
while ((c = inStream.read()) != -1) {
|
||||
while ((c = inputStream.read()) != EOF) {
|
||||
char nextChar = (char) c;
|
||||
|
||||
++column;
|
||||
++columnNumber;
|
||||
text.append(nextChar);
|
||||
|
||||
switch (nextChar) {
|
||||
case '\n':
|
||||
++line;
|
||||
column = 0;
|
||||
|
||||
case NEWLINE:
|
||||
moveToNewLine();
|
||||
break;
|
||||
case '\"':
|
||||
if (prevChar != '\\') {
|
||||
case DOUBLE_QUOTE:
|
||||
if (prevChar != BACKSLASH) {
|
||||
// we have found the terminating double quote
|
||||
|
||||
return new Token(Token.Type.STRING, text.toString(), fileName, startLine, startColumn);
|
||||
return new Token(Token.Type.STRING, text.toString(), inputStreamName, startLine, startColumn);
|
||||
}
|
||||
|
||||
// this is an escaped double quote
|
||||
|
@ -180,96 +129,81 @@ public class LispScanner {
|
|||
throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn);
|
||||
}
|
||||
|
||||
// Retrieve a number token from 'inStream'.
|
||||
//
|
||||
// Parameters: firstDigit - the first digit of this number
|
||||
// Returns: a number token obtained from 'inStream'
|
||||
// Throws: IOException - Indicates that an I/O error has occurred.
|
||||
// Precondition: 'firstDigit' must be the first digit of this number and
|
||||
// 'inStream' must not be null.
|
||||
private Token retrieveNumber(char firstDigit) throws IOException {
|
||||
StringBuffer text = new StringBuffer();
|
||||
int startColumn = column;
|
||||
int startColumn = columnNumber;
|
||||
|
||||
text.append(firstDigit);
|
||||
inStream.mark(1);
|
||||
inputStream.mark(1);
|
||||
|
||||
int c;
|
||||
|
||||
while ((c = inStream.read()) != -1) {
|
||||
while ((c = inputStream.read()) != EOF) {
|
||||
char nextChar = (char) c;
|
||||
|
||||
if (Character.isDigit(nextChar)) {
|
||||
// 'nextChar' is a digit in this number
|
||||
|
||||
text.append(nextChar);
|
||||
++column;
|
||||
++columnNumber;
|
||||
} else {
|
||||
// we have reached the end of the number
|
||||
|
||||
inStream.reset(); // unread the last character
|
||||
inputStream.reset(); // unread the last character
|
||||
|
||||
return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
|
||||
return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
|
||||
}
|
||||
|
||||
inStream.mark(1);
|
||||
inputStream.mark(1);
|
||||
}
|
||||
|
||||
// there are no more bytes to be read from 'inStream' after this number
|
||||
// token
|
||||
|
||||
return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
|
||||
return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
|
||||
}
|
||||
|
||||
// Retrieve an identifier token from 'inStream'.
|
||||
//
|
||||
// Parameters: firstChar - the first character of this identifier
|
||||
// Returns: an identifier token obtained from 'inStream'
|
||||
// Throws: IOException - Indicates that an I/O error has occurred.
|
||||
// Precondition: 'firsChar' must be the first character of this identifier
|
||||
// and 'inStream' must not be null.
|
||||
private Token retrieveIdentifier(char firstChar) throws IOException {
|
||||
StringBuffer text = new StringBuffer();
|
||||
int startColumn = column;
|
||||
int startColumn = columnNumber;
|
||||
|
||||
text.append(firstChar);
|
||||
inStream.mark(1);
|
||||
inputStream.mark(1);
|
||||
|
||||
int c;
|
||||
|
||||
while ((c = inStream.read()) != -1) {
|
||||
while ((c = inputStream.read()) != EOF) {
|
||||
char nextChar = (char) c;
|
||||
|
||||
if (isLegalIdChar(nextChar)) {
|
||||
if (isLegalIdentifierCharacter(nextChar)) {
|
||||
// 'nextChar' is part of the identifier
|
||||
|
||||
text.append(nextChar);
|
||||
++column;
|
||||
++columnNumber;
|
||||
} else {
|
||||
// we have reached the end of this identifier
|
||||
|
||||
inStream.reset(); // unread the last character
|
||||
inputStream.reset(); // unread the last character
|
||||
|
||||
return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
|
||||
return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
|
||||
}
|
||||
|
||||
inStream.mark(1);
|
||||
inputStream.mark(1);
|
||||
}
|
||||
|
||||
// there are no more bytes to be read from 'inStream' after this
|
||||
// identifier token
|
||||
|
||||
return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
|
||||
return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
|
||||
}
|
||||
|
||||
// Test if a character is legal to be contained within an identifier in
|
||||
// Lisp.
|
||||
//
|
||||
// Returns: 'true' if the character can be found within an identifier in
|
||||
// Lisp; 'false' otherwise
|
||||
private boolean isLegalIdChar(char c) {
|
||||
return ((!Character.isWhitespace(c)) && (c != '\"') && (c != '\'') && (c != '\\') && (c != '`') && (c != '(')
|
||||
&& (c != ')') && (c != '[') && (c != ']') && (c != '#') && (c != '.') && (c != ';'));
|
||||
private void moveToNewLine() {
|
||||
lineNumber++;
|
||||
columnNumber = 0;
|
||||
}
|
||||
|
||||
private boolean isLegalIdentifierCharacter(char c) {
|
||||
return (!Character.isWhitespace(c)) && (illegalIdentifierCharacters.get(c) == null);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
package util;
|
||||
|
||||
public class Characters {
|
||||
|
||||
public static final char BACKSLASH = '\\';
|
||||
public static final char DOUBLE_QUOTE = '\"';
|
||||
public static final char HASH = '#';
|
||||
public static final char LEFT_PARENTHESIS = '(';
|
||||
public static final char LEFT_SQUARE_BRACKET = '[';
|
||||
public static final char NEWLINE = '\n';
|
||||
public static final char PERIOD = '.';
|
||||
public static final char RIGHT_PARENTHESIS = ')';
|
||||
public static final char RIGHT_SQUARE_BRACKET = ']';
|
||||
public static final char SEMICOLON = ';';
|
||||
public static final char SINGLE_QUOTE = '\'';
|
||||
public static final char TICK_MARK = '`';
|
||||
|
||||
public static final int EOF = -1;
|
||||
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
package scanner;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import testutils.TestUtilities;
|
||||
|
||||
public class LispScannerLineColumnTester {
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenSimpleString_RecordsCorrectLocation() throws IOException {
|
||||
String input = "\"string\"";
|
||||
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1) };
|
||||
|
||||
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenMultipleStrings_RecordsCorrectLocations() throws IOException {
|
||||
String input = "\"string1\" \n \"string2 \n with newline\" \n \"string3\"";
|
||||
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(2, 2),
|
||||
LineColumn.create(4, 3) };
|
||||
|
||||
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenQuotedList_RecordsCorrectLocations() throws IOException {
|
||||
String input = "'(1 2 3 4 5)";
|
||||
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(1, 2),
|
||||
LineColumn.create(1, 3), LineColumn.create(1, 5),
|
||||
LineColumn.create(1, 7), LineColumn.create(1, 9),
|
||||
LineColumn.create(1, 11), LineColumn.create(1, 12) };
|
||||
|
||||
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenListSpanningMultipleLines_RecordsCorrectLocations() throws IOException {
|
||||
String input = " ( 1 2 \n 3 4 \n5 ) ";
|
||||
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 2), LineColumn.create(1, 4),
|
||||
LineColumn.create(1, 6), LineColumn.create(2, 2),
|
||||
LineColumn.create(2, 4), LineColumn.create(3, 1),
|
||||
LineColumn.create(3, 3) };
|
||||
|
||||
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
|
||||
}
|
||||
|
||||
private void assertTokenLineAndColumnsMatch(String input, LineColumn[] expectedLineColumnList) throws IOException {
|
||||
InputStream stringInputStream = TestUtilities.createInputStreamFromString(input);
|
||||
LispScanner lispScanner = new LispScanner(stringInputStream, "stringInputStream");
|
||||
|
||||
for (LineColumn lineColumn : expectedLineColumnList) {
|
||||
Token nextToken = lispScanner.nextToken();
|
||||
assertTrue(lineColumn.isEqual(nextToken));
|
||||
}
|
||||
}
|
||||
|
||||
private static class LineColumn {
|
||||
|
||||
private int line;
|
||||
private int column;
|
||||
|
||||
public static LineColumn create(int line, int column) {
|
||||
LineColumn lineColumn = new LineColumn();
|
||||
lineColumn.line = line;
|
||||
lineColumn.column = column;
|
||||
|
||||
return lineColumn;
|
||||
}
|
||||
|
||||
public boolean isEqual(Token token) {
|
||||
return (this.line == token.getLine()) && (this.column == token.getColumn());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -10,7 +10,7 @@ import org.junit.Test;
|
|||
import scanner.Token.Type;
|
||||
import testutils.TestUtilities;
|
||||
|
||||
public class LispScannerTester {
|
||||
public class LispScannerTypeTester {
|
||||
|
||||
@Test
|
||||
public void givenEmptyFile_returnsCorrectTokenTypes() throws IOException {
|
Loading…
Reference in New Issue