Added unit tests and started refactoring LispScanner

This commit is contained in:
Mike Cifelli 2016-12-09 12:29:56 -05:00
parent 4a5f169076
commit 9b3261f575
5 changed files with 208 additions and 161 deletions

View File

@ -1,5 +1,11 @@
package scanner;
import static util.Characters.BACKSLASH;
import static util.Characters.DOUBLE_QUOTE;
import static util.Characters.EOF;
import static util.Characters.NEWLINE;
import static util.Characters.SEMICOLON;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
@ -44,11 +50,11 @@ public class LispFilterInputStream extends FilterInputStream {
}
private boolean haveEncounteredStringBoundary() {
return (previousCharacter != '\\') && (nextCharacter == '\"');
return (previousCharacter != BACKSLASH) && (nextCharacter == DOUBLE_QUOTE);
}
private boolean haveEnteredComment() {
return (nextCharacter == ';') && (!isInQuotedString);
return (nextCharacter == SEMICOLON) && (!isInQuotedString);
}
private void consumeAllBytesInComment() throws IOException {
@ -57,7 +63,7 @@ public class LispFilterInputStream extends FilterInputStream {
}
private boolean stillInComment() {
return (nextCharacter != '\n') && (nextCharacter != -1);
return (nextCharacter != NEWLINE) && (nextCharacter != EOF);
}
}

View File

@ -1,171 +1,120 @@
/*
* Name: Mike Cifelli
* Course: CIS 443 - Programming Languages
* Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
*/
package scanner;
import static util.Characters.BACKSLASH;
import static util.Characters.DOUBLE_QUOTE;
import static util.Characters.EOF;
import static util.Characters.HASH;
import static util.Characters.LEFT_PARENTHESIS;
import static util.Characters.LEFT_SQUARE_BRACKET;
import static util.Characters.NEWLINE;
import static util.Characters.PERIOD;
import static util.Characters.RIGHT_PARENTHESIS;
import static util.Characters.RIGHT_SQUARE_BRACKET;
import static util.Characters.SEMICOLON;
import static util.Characters.SINGLE_QUOTE;
import static util.Characters.TICK_MARK;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
/**
* A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp
* tokens. When the end of stream has been reached a token with a type of
* <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>
* method of this scanner.
* Converts a stream of bytes into a stream of Lisp tokens.
*/
public class LispScanner {
private LispFilterInputStream inStream;
private Token currToken;
private String fileName;
private int line;
private int column;
private static Map<Character, Boolean> illegalIdentifierCharacters = new HashMap<>();
static {
illegalIdentifierCharacters.put(DOUBLE_QUOTE, true);
illegalIdentifierCharacters.put(SINGLE_QUOTE, true);
illegalIdentifierCharacters.put(BACKSLASH, true);
illegalIdentifierCharacters.put(TICK_MARK, true);
illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true);
illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true);
illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true);
illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true);
illegalIdentifierCharacters.put(HASH, true);
illegalIdentifierCharacters.put(PERIOD, true);
illegalIdentifierCharacters.put(SEMICOLON, true);
}
private InputStream inputStream;
private String inputStreamName;
private int lineNumber;
private int columnNumber;
/**
* Create a new <code>LispScanner</code> that produces Lisp tokens from the
* specified input stream.
*
* @param in
* the input stream to obtain Lisp tokens from (must not be
* <code>null</code>)
* @param fileName
* the name of the file that <code>in</code> is reading from
*/
public LispScanner(InputStream in, String fileName) {
this.inStream = new LispFilterInputStream(new BufferedInputStream(in));
this.currToken = null;
this.fileName = fileName;
this.line = 1;
this.column = 0;
this.inputStream = new LispFilterInputStream(new BufferedInputStream(in));
this.inputStreamName = fileName;
this.lineNumber = 1;
this.columnNumber = 0;
}
/**
* Returns the same Lisp token returned from the last call to the
* <code>nextToken</code> method of this scanner. In the case that no calls
* to <code>nextToken</code> have been made yet, this method returns
* <code>null</code>.
*
* @return the last Lisp token returned from this scanner or
* <code>null</code> (if no tokens have been returned from this
* scanner yet)
*/
public Token getCurrToken() {
return currToken;
}
/**
* Returns the next Lisp token from this scanner.
*
* @return the next Lisp token from this scanner.
* @throws RuntimeException
* Indicates that an illegal character or an unterminated quoted
* string was encountered in the input stream (not counting
* comments).
* @throws IOException
* Indicates that an I/O error has occurred.
*/
public Token nextToken() throws IOException {
currToken = retrieveNextToken();
return currToken;
}
// Retrieve the next Lisp token from 'inStream'.
//
// Returns: the next Lisp token found in 'inStream'
// Precondition: 'inStream' must not be null.
// Throws: RuntimeException - Indicates that an illegal character or an
// unterminated quoted string was encountered in
// 'inStream'.
// Throws: IOException - Indicates that an I/O error has occurred.
private Token retrieveNextToken() throws IOException {
int c;
while ((c = inStream.read()) != -1) {
while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
++column;
++columnNumber;
// determine the type of the Lisp token from the character obtained
// from 'inStream'
switch (nextChar) {
case '\n':
// we have hit a new line so increment 'line' and reset
// 'column'
++line;
column = 0;
case NEWLINE:
moveToNewLine();
break;
case '(':
return new Token(Token.Type.LEFT_PAREN, "(", fileName, line, column);
case ')':
return new Token(Token.Type.RIGHT_PAREN, ")", fileName, line, column);
case '\'':
return new Token(Token.Type.QUOTE_MARK, "\'", fileName, line, column);
case '\"':
case LEFT_PARENTHESIS:
return new Token(Token.Type.LEFT_PAREN, "(", inputStreamName, lineNumber, columnNumber);
case RIGHT_PARENTHESIS:
return new Token(Token.Type.RIGHT_PAREN, ")", inputStreamName, lineNumber, columnNumber);
case SINGLE_QUOTE:
return new Token(Token.Type.QUOTE_MARK, "\'", inputStreamName, lineNumber, columnNumber);
case DOUBLE_QUOTE:
return retrieveString(nextChar);
default:
if (Character.isWhitespace(nextChar)) { // skip whitespace
if (Character.isWhitespace(nextChar)) {
continue;
} else if (Character.isDigit(nextChar)) { // number
} else if (Character.isDigit(nextChar)) {
return retrieveNumber(nextChar);
} else if (isLegalIdChar(nextChar)) { // identifier
} else if (isLegalIdentifierCharacter(nextChar)) {
return retrieveIdentifier(nextChar);
} else {
// 'nextChar' can not start any Lisp token
throw new RuntimeException(
"illegal character " + "\'" + nextChar + "\'" + " - line " + line + " column " + column);
throw new RuntimeException("illegal character " + "\'" + nextChar + "\'" + " - line " + lineNumber
+ " column " + columnNumber);
}
}
}
// we have reached the end of 'inStream' so we return an end-of-file
// token
return new Token(Token.Type.EOF, "EOF", fileName, line, column);
return new Token(Token.Type.EOF, "EOF", inputStreamName, lineNumber, columnNumber);
}
// Retrieve a quoted string token from 'inStream'.
//
// Parameters: firstDoubleQuote - the opening double quote of this quoted
// string
// Returns: a quoted string token obtained from 'instream'
// Throws: RuntimeException - Indicates that this quoted string was
// missing its terminating double quote.
// Throws: IOException - Indicates that an I/O error has occurred.
// Precondition: 'firstDoubleQuote' must be the leading double quote
// character of this quoted string and 'inStream' must not
// be null.
private Token retrieveString(char firstDoubleQuote) throws IOException {
StringBuffer text = new StringBuffer();
int startLine = line;
int startColumn = column;
int startLine = lineNumber;
int startColumn = columnNumber;
char prevChar = firstDoubleQuote;
text.append(firstDoubleQuote);
int c;
while ((c = inStream.read()) != -1) {
while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
++column;
++columnNumber;
text.append(nextChar);
switch (nextChar) {
case '\n':
++line;
column = 0;
case NEWLINE:
moveToNewLine();
break;
case '\"':
if (prevChar != '\\') {
case DOUBLE_QUOTE:
if (prevChar != BACKSLASH) {
// we have found the terminating double quote
return new Token(Token.Type.STRING, text.toString(), fileName, startLine, startColumn);
return new Token(Token.Type.STRING, text.toString(), inputStreamName, startLine, startColumn);
}
// this is an escaped double quote
@ -180,96 +129,81 @@ public class LispScanner {
throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn);
}
// Retrieve a number token from 'inStream'.
//
// Parameters: firstDigit - the first digit of this number
// Returns: a number token obtained from 'inStream'
// Throws: IOException - Indicates that an I/O error has occurred.
// Precondition: 'firstDigit' must be the first digit of this number and
// 'inStream' must not be null.
private Token retrieveNumber(char firstDigit) throws IOException {
StringBuffer text = new StringBuffer();
int startColumn = column;
int startColumn = columnNumber;
text.append(firstDigit);
inStream.mark(1);
inputStream.mark(1);
int c;
while ((c = inStream.read()) != -1) {
while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
if (Character.isDigit(nextChar)) {
// 'nextChar' is a digit in this number
text.append(nextChar);
++column;
++columnNumber;
} else {
// we have reached the end of the number
inStream.reset(); // unread the last character
inputStream.reset(); // unread the last character
return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
}
inStream.mark(1);
inputStream.mark(1);
}
// there are no more bytes to be read from 'inStream' after this number
// token
return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
}
// Retrieve an identifier token from 'inStream'.
//
// Parameters: firstChar - the first character of this identifier
// Returns: an identifier token obtained from 'inStream'
// Throws: IOException - Indicates that an I/O error has occurred.
// Precondition: 'firsChar' must be the first character of this identifier
// and 'inStream' must not be null.
private Token retrieveIdentifier(char firstChar) throws IOException {
StringBuffer text = new StringBuffer();
int startColumn = column;
int startColumn = columnNumber;
text.append(firstChar);
inStream.mark(1);
inputStream.mark(1);
int c;
while ((c = inStream.read()) != -1) {
while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
if (isLegalIdChar(nextChar)) {
if (isLegalIdentifierCharacter(nextChar)) {
// 'nextChar' is part of the identifier
text.append(nextChar);
++column;
++columnNumber;
} else {
// we have reached the end of this identifier
inStream.reset(); // unread the last character
inputStream.reset(); // unread the last character
return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
}
inStream.mark(1);
inputStream.mark(1);
}
// there are no more bytes to be read from 'inStream' after this
// identifier token
return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
}
// Test if a character is legal to be contained within an identifier in
// Lisp.
//
// Returns: 'true' if the character can be found within an identifier in
// Lisp; 'false' otherwise
private boolean isLegalIdChar(char c) {
return ((!Character.isWhitespace(c)) && (c != '\"') && (c != '\'') && (c != '\\') && (c != '`') && (c != '(')
&& (c != ')') && (c != '[') && (c != ']') && (c != '#') && (c != '.') && (c != ';'));
private void moveToNewLine() {
lineNumber++;
columnNumber = 0;
}
private boolean isLegalIdentifierCharacter(char c) {
return (!Character.isWhitespace(c)) && (illegalIdentifierCharacters.get(c) == null);
}
}

20
src/util/Characters.java Normal file
View File

@ -0,0 +1,20 @@
package util;
public class Characters {
public static final char BACKSLASH = '\\';
public static final char DOUBLE_QUOTE = '\"';
public static final char HASH = '#';
public static final char LEFT_PARENTHESIS = '(';
public static final char LEFT_SQUARE_BRACKET = '[';
public static final char NEWLINE = '\n';
public static final char PERIOD = '.';
public static final char RIGHT_PARENTHESIS = ')';
public static final char RIGHT_SQUARE_BRACKET = ']';
public static final char SEMICOLON = ';';
public static final char SINGLE_QUOTE = '\'';
public static final char TICK_MARK = '`';
public static final int EOF = -1;
}

View File

@ -0,0 +1,87 @@
package scanner;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.io.InputStream;
import org.junit.Before;
import org.junit.Test;
import testutils.TestUtilities;
public class LispScannerLineColumnTester {
@Before
public void setUp() throws Exception {
}
@Test
public void givenSimpleString_RecordsCorrectLocation() throws IOException {
String input = "\"string\"";
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1) };
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
}
@Test
public void givenMultipleStrings_RecordsCorrectLocations() throws IOException {
String input = "\"string1\" \n \"string2 \n with newline\" \n \"string3\"";
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(2, 2),
LineColumn.create(4, 3) };
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
}
@Test
public void givenQuotedList_RecordsCorrectLocations() throws IOException {
String input = "'(1 2 3 4 5)";
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(1, 2),
LineColumn.create(1, 3), LineColumn.create(1, 5),
LineColumn.create(1, 7), LineColumn.create(1, 9),
LineColumn.create(1, 11), LineColumn.create(1, 12) };
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
}
@Test
public void givenListSpanningMultipleLines_RecordsCorrectLocations() throws IOException {
String input = " ( 1 2 \n 3 4 \n5 ) ";
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 2), LineColumn.create(1, 4),
LineColumn.create(1, 6), LineColumn.create(2, 2),
LineColumn.create(2, 4), LineColumn.create(3, 1),
LineColumn.create(3, 3) };
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
}
private void assertTokenLineAndColumnsMatch(String input, LineColumn[] expectedLineColumnList) throws IOException {
InputStream stringInputStream = TestUtilities.createInputStreamFromString(input);
LispScanner lispScanner = new LispScanner(stringInputStream, "stringInputStream");
for (LineColumn lineColumn : expectedLineColumnList) {
Token nextToken = lispScanner.nextToken();
assertTrue(lineColumn.isEqual(nextToken));
}
}
private static class LineColumn {
private int line;
private int column;
public static LineColumn create(int line, int column) {
LineColumn lineColumn = new LineColumn();
lineColumn.line = line;
lineColumn.column = column;
return lineColumn;
}
public boolean isEqual(Token token) {
return (this.line == token.getLine()) && (this.column == token.getColumn());
}
}
}

View File

@ -10,7 +10,7 @@ import org.junit.Test;
import scanner.Token.Type;
import testutils.TestUtilities;
public class LispScannerTester {
public class LispScannerTypeTester {
@Test
public void givenEmptyFile_returnsCorrectTokenTypes() throws IOException {