Added unit tests and started refactoring LispScanner

This commit is contained in:
Mike Cifelli 2016-12-09 12:29:56 -05:00
parent 4a5f169076
commit 9b3261f575
5 changed files with 208 additions and 161 deletions

View File

@ -1,5 +1,11 @@
package scanner; package scanner;
import static util.Characters.BACKSLASH;
import static util.Characters.DOUBLE_QUOTE;
import static util.Characters.EOF;
import static util.Characters.NEWLINE;
import static util.Characters.SEMICOLON;
import java.io.FilterInputStream; import java.io.FilterInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -44,11 +50,11 @@ public class LispFilterInputStream extends FilterInputStream {
} }
private boolean haveEncounteredStringBoundary() { private boolean haveEncounteredStringBoundary() {
return (previousCharacter != '\\') && (nextCharacter == '\"'); return (previousCharacter != BACKSLASH) && (nextCharacter == DOUBLE_QUOTE);
} }
private boolean haveEnteredComment() { private boolean haveEnteredComment() {
return (nextCharacter == ';') && (!isInQuotedString); return (nextCharacter == SEMICOLON) && (!isInQuotedString);
} }
private void consumeAllBytesInComment() throws IOException { private void consumeAllBytesInComment() throws IOException {
@ -57,7 +63,7 @@ public class LispFilterInputStream extends FilterInputStream {
} }
private boolean stillInComment() { private boolean stillInComment() {
return (nextCharacter != '\n') && (nextCharacter != -1); return (nextCharacter != NEWLINE) && (nextCharacter != EOF);
} }
} }

View File

@ -1,171 +1,120 @@
/*
* Name: Mike Cifelli
* Course: CIS 443 - Programming Languages
* Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
*/
package scanner; package scanner;
import static util.Characters.BACKSLASH;
import static util.Characters.DOUBLE_QUOTE;
import static util.Characters.EOF;
import static util.Characters.HASH;
import static util.Characters.LEFT_PARENTHESIS;
import static util.Characters.LEFT_SQUARE_BRACKET;
import static util.Characters.NEWLINE;
import static util.Characters.PERIOD;
import static util.Characters.RIGHT_PARENTHESIS;
import static util.Characters.RIGHT_SQUARE_BRACKET;
import static util.Characters.SEMICOLON;
import static util.Characters.SINGLE_QUOTE;
import static util.Characters.TICK_MARK;
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
/** /**
* A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp * Converts a stream of bytes into a stream of Lisp tokens.
* tokens. When the end of stream has been reached a token with a type of
* <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>
* method of this scanner.
*/ */
public class LispScanner { public class LispScanner {
private LispFilterInputStream inStream; private static Map<Character, Boolean> illegalIdentifierCharacters = new HashMap<>();
private Token currToken;
private String fileName; static {
private int line; illegalIdentifierCharacters.put(DOUBLE_QUOTE, true);
private int column; illegalIdentifierCharacters.put(SINGLE_QUOTE, true);
illegalIdentifierCharacters.put(BACKSLASH, true);
illegalIdentifierCharacters.put(TICK_MARK, true);
illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true);
illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true);
illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true);
illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true);
illegalIdentifierCharacters.put(HASH, true);
illegalIdentifierCharacters.put(PERIOD, true);
illegalIdentifierCharacters.put(SEMICOLON, true);
}
private InputStream inputStream;
private String inputStreamName;
private int lineNumber;
private int columnNumber;
/**
* Create a new <code>LispScanner</code> that produces Lisp tokens from the
* specified input stream.
*
* @param in
* the input stream to obtain Lisp tokens from (must not be
* <code>null</code>)
* @param fileName
* the name of the file that <code>in</code> is reading from
*/
public LispScanner(InputStream in, String fileName) { public LispScanner(InputStream in, String fileName) {
this.inStream = new LispFilterInputStream(new BufferedInputStream(in)); this.inputStream = new LispFilterInputStream(new BufferedInputStream(in));
this.currToken = null; this.inputStreamName = fileName;
this.fileName = fileName; this.lineNumber = 1;
this.line = 1; this.columnNumber = 0;
this.column = 0;
} }
/**
* Returns the same Lisp token returned from the last call to the
* <code>nextToken</code> method of this scanner. In the case that no calls
* to <code>nextToken</code> have been made yet, this method returns
* <code>null</code>.
*
* @return the last Lisp token returned from this scanner or
* <code>null</code> (if no tokens have been returned from this
* scanner yet)
*/
public Token getCurrToken() {
return currToken;
}
/**
* Returns the next Lisp token from this scanner.
*
* @return the next Lisp token from this scanner.
* @throws RuntimeException
* Indicates that an illegal character or an unterminated quoted
* string was encountered in the input stream (not counting
* comments).
* @throws IOException
* Indicates that an I/O error has occurred.
*/
public Token nextToken() throws IOException { public Token nextToken() throws IOException {
currToken = retrieveNextToken();
return currToken;
}
// Retrieve the next Lisp token from 'inStream'.
//
// Returns: the next Lisp token found in 'inStream'
// Precondition: 'inStream' must not be null.
// Throws: RuntimeException - Indicates that an illegal character or an
// unterminated quoted string was encountered in
// 'inStream'.
// Throws: IOException - Indicates that an I/O error has occurred.
private Token retrieveNextToken() throws IOException {
int c; int c;
while ((c = inStream.read()) != -1) { while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c; char nextChar = (char) c;
++column; ++columnNumber;
// determine the type of the Lisp token from the character obtained
// from 'inStream'
switch (nextChar) { switch (nextChar) {
case '\n': case NEWLINE:
// we have hit a new line so increment 'line' and reset moveToNewLine();
// 'column'
++line;
column = 0;
break; break;
case '(': case LEFT_PARENTHESIS:
return new Token(Token.Type.LEFT_PAREN, "(", fileName, line, column); return new Token(Token.Type.LEFT_PAREN, "(", inputStreamName, lineNumber, columnNumber);
case ')': case RIGHT_PARENTHESIS:
return new Token(Token.Type.RIGHT_PAREN, ")", fileName, line, column); return new Token(Token.Type.RIGHT_PAREN, ")", inputStreamName, lineNumber, columnNumber);
case '\'': case SINGLE_QUOTE:
return new Token(Token.Type.QUOTE_MARK, "\'", fileName, line, column); return new Token(Token.Type.QUOTE_MARK, "\'", inputStreamName, lineNumber, columnNumber);
case '\"': case DOUBLE_QUOTE:
return retrieveString(nextChar); return retrieveString(nextChar);
default: default:
if (Character.isWhitespace(nextChar)) { // skip whitespace if (Character.isWhitespace(nextChar)) {
continue; continue;
} else if (Character.isDigit(nextChar)) { // number } else if (Character.isDigit(nextChar)) {
return retrieveNumber(nextChar); return retrieveNumber(nextChar);
} else if (isLegalIdChar(nextChar)) { // identifier } else if (isLegalIdentifierCharacter(nextChar)) {
return retrieveIdentifier(nextChar); return retrieveIdentifier(nextChar);
} else { } else {
// 'nextChar' can not start any Lisp token throw new RuntimeException("illegal character " + "\'" + nextChar + "\'" + " - line " + lineNumber
+ " column " + columnNumber);
throw new RuntimeException(
"illegal character " + "\'" + nextChar + "\'" + " - line " + line + " column " + column);
} }
} }
} }
// we have reached the end of 'inStream' so we return an end-of-file return new Token(Token.Type.EOF, "EOF", inputStreamName, lineNumber, columnNumber);
// token
return new Token(Token.Type.EOF, "EOF", fileName, line, column);
} }
// Retrieve a quoted string token from 'inStream'.
//
// Parameters: firstDoubleQuote - the opening double quote of this quoted
// string
// Returns: a quoted string token obtained from 'instream'
// Throws: RuntimeException - Indicates that this quoted string was
// missing its terminating double quote.
// Throws: IOException - Indicates that an I/O error has occurred.
// Precondition: 'firstDoubleQuote' must be the leading double quote
// character of this quoted string and 'inStream' must not
// be null.
private Token retrieveString(char firstDoubleQuote) throws IOException { private Token retrieveString(char firstDoubleQuote) throws IOException {
StringBuffer text = new StringBuffer(); StringBuffer text = new StringBuffer();
int startLine = line; int startLine = lineNumber;
int startColumn = column; int startColumn = columnNumber;
char prevChar = firstDoubleQuote; char prevChar = firstDoubleQuote;
text.append(firstDoubleQuote); text.append(firstDoubleQuote);
int c; int c;
while ((c = inStream.read()) != -1) { while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c; char nextChar = (char) c;
++column; ++columnNumber;
text.append(nextChar); text.append(nextChar);
switch (nextChar) { switch (nextChar) {
case '\n': case NEWLINE:
++line; moveToNewLine();
column = 0;
break; break;
case '\"': case DOUBLE_QUOTE:
if (prevChar != '\\') { if (prevChar != BACKSLASH) {
// we have found the terminating double quote // we have found the terminating double quote
return new Token(Token.Type.STRING, text.toString(), fileName, startLine, startColumn); return new Token(Token.Type.STRING, text.toString(), inputStreamName, startLine, startColumn);
} }
// this is an escaped double quote // this is an escaped double quote
@ -180,96 +129,81 @@ public class LispScanner {
throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn); throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn);
} }
// Retrieve a number token from 'inStream'.
//
// Parameters: firstDigit - the first digit of this number
// Returns: a number token obtained from 'inStream'
// Throws: IOException - Indicates that an I/O error has occurred.
// Precondition: 'firstDigit' must be the first digit of this number and
// 'inStream' must not be null.
private Token retrieveNumber(char firstDigit) throws IOException { private Token retrieveNumber(char firstDigit) throws IOException {
StringBuffer text = new StringBuffer(); StringBuffer text = new StringBuffer();
int startColumn = column; int startColumn = columnNumber;
text.append(firstDigit); text.append(firstDigit);
inStream.mark(1); inputStream.mark(1);
int c; int c;
while ((c = inStream.read()) != -1) { while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c; char nextChar = (char) c;
if (Character.isDigit(nextChar)) { if (Character.isDigit(nextChar)) {
// 'nextChar' is a digit in this number // 'nextChar' is a digit in this number
text.append(nextChar); text.append(nextChar);
++column; ++columnNumber;
} else { } else {
// we have reached the end of the number // we have reached the end of the number
inStream.reset(); // unread the last character inputStream.reset(); // unread the last character
return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn); return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
} }
inStream.mark(1); inputStream.mark(1);
} }
// there are no more bytes to be read from 'inStream' after this number // there are no more bytes to be read from 'inStream' after this number
// token // token
return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn); return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
} }
// Retrieve an identifier token from 'inStream'.
//
// Parameters: firstChar - the first character of this identifier
// Returns: an identifier token obtained from 'inStream'
// Throws: IOException - Indicates that an I/O error has occurred.
// Precondition: 'firsChar' must be the first character of this identifier
// and 'inStream' must not be null.
private Token retrieveIdentifier(char firstChar) throws IOException { private Token retrieveIdentifier(char firstChar) throws IOException {
StringBuffer text = new StringBuffer(); StringBuffer text = new StringBuffer();
int startColumn = column; int startColumn = columnNumber;
text.append(firstChar); text.append(firstChar);
inStream.mark(1); inputStream.mark(1);
int c; int c;
while ((c = inStream.read()) != -1) { while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c; char nextChar = (char) c;
if (isLegalIdChar(nextChar)) { if (isLegalIdentifierCharacter(nextChar)) {
// 'nextChar' is part of the identifier // 'nextChar' is part of the identifier
text.append(nextChar); text.append(nextChar);
++column; ++columnNumber;
} else { } else {
// we have reached the end of this identifier // we have reached the end of this identifier
inStream.reset(); // unread the last character inputStream.reset(); // unread the last character
return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn); return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
} }
inStream.mark(1); inputStream.mark(1);
} }
// there are no more bytes to be read from 'inStream' after this // there are no more bytes to be read from 'inStream' after this
// identifier token // identifier token
return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn); return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
} }
// Test if a character is legal to be contained within an identifier in private void moveToNewLine() {
// Lisp. lineNumber++;
// columnNumber = 0;
// Returns: 'true' if the character can be found within an identifier in }
// Lisp; 'false' otherwise
private boolean isLegalIdChar(char c) { private boolean isLegalIdentifierCharacter(char c) {
return ((!Character.isWhitespace(c)) && (c != '\"') && (c != '\'') && (c != '\\') && (c != '`') && (c != '(') return (!Character.isWhitespace(c)) && (illegalIdentifierCharacters.get(c) == null);
&& (c != ')') && (c != '[') && (c != ']') && (c != '#') && (c != '.') && (c != ';'));
} }
} }

20
src/util/Characters.java Normal file
View File

@ -0,0 +1,20 @@
package util;
public class Characters {
public static final char BACKSLASH = '\\';
public static final char DOUBLE_QUOTE = '\"';
public static final char HASH = '#';
public static final char LEFT_PARENTHESIS = '(';
public static final char LEFT_SQUARE_BRACKET = '[';
public static final char NEWLINE = '\n';
public static final char PERIOD = '.';
public static final char RIGHT_PARENTHESIS = ')';
public static final char RIGHT_SQUARE_BRACKET = ']';
public static final char SEMICOLON = ';';
public static final char SINGLE_QUOTE = '\'';
public static final char TICK_MARK = '`';
public static final int EOF = -1;
}

View File

@ -0,0 +1,87 @@
package scanner;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.io.InputStream;
import org.junit.Before;
import org.junit.Test;
import testutils.TestUtilities;
public class LispScannerLineColumnTester {
@Before
public void setUp() throws Exception {
}
@Test
public void givenSimpleString_RecordsCorrectLocation() throws IOException {
String input = "\"string\"";
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1) };
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
}
@Test
public void givenMultipleStrings_RecordsCorrectLocations() throws IOException {
String input = "\"string1\" \n \"string2 \n with newline\" \n \"string3\"";
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(2, 2),
LineColumn.create(4, 3) };
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
}
@Test
public void givenQuotedList_RecordsCorrectLocations() throws IOException {
String input = "'(1 2 3 4 5)";
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(1, 2),
LineColumn.create(1, 3), LineColumn.create(1, 5),
LineColumn.create(1, 7), LineColumn.create(1, 9),
LineColumn.create(1, 11), LineColumn.create(1, 12) };
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
}
@Test
public void givenListSpanningMultipleLines_RecordsCorrectLocations() throws IOException {
String input = " ( 1 2 \n 3 4 \n5 ) ";
LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 2), LineColumn.create(1, 4),
LineColumn.create(1, 6), LineColumn.create(2, 2),
LineColumn.create(2, 4), LineColumn.create(3, 1),
LineColumn.create(3, 3) };
assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
}
private void assertTokenLineAndColumnsMatch(String input, LineColumn[] expectedLineColumnList) throws IOException {
InputStream stringInputStream = TestUtilities.createInputStreamFromString(input);
LispScanner lispScanner = new LispScanner(stringInputStream, "stringInputStream");
for (LineColumn lineColumn : expectedLineColumnList) {
Token nextToken = lispScanner.nextToken();
assertTrue(lineColumn.isEqual(nextToken));
}
}
private static class LineColumn {
private int line;
private int column;
public static LineColumn create(int line, int column) {
LineColumn lineColumn = new LineColumn();
lineColumn.line = line;
lineColumn.column = column;
return lineColumn;
}
public boolean isEqual(Token token) {
return (this.line == token.getLine()) && (this.column == token.getColumn());
}
}
}

View File

@ -10,7 +10,7 @@ import org.junit.Test;
import scanner.Token.Type; import scanner.Token.Type;
import testutils.TestUtilities; import testutils.TestUtilities;
public class LispScannerTester { public class LispScannerTypeTester {
@Test @Test
public void givenEmptyFile_returnsCorrectTokenTypes() throws IOException { public void givenEmptyFile_returnsCorrectTokenTypes() throws IOException {