diff --git a/src/scanner/LispFilterInputStream.java b/src/scanner/LispFilterInputStream.java
index 8e550b0..e60a01b 100644
--- a/src/scanner/LispFilterInputStream.java
+++ b/src/scanner/LispFilterInputStream.java
@@ -1,5 +1,11 @@
package scanner;
+import static util.Characters.BACKSLASH;
+import static util.Characters.DOUBLE_QUOTE;
+import static util.Characters.EOF;
+import static util.Characters.NEWLINE;
+import static util.Characters.SEMICOLON;
+
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -44,11 +50,11 @@ public class LispFilterInputStream extends FilterInputStream {
}
private boolean haveEncounteredStringBoundary() {
- return (previousCharacter != '\\') && (nextCharacter == '\"');
+ return (previousCharacter != BACKSLASH) && (nextCharacter == DOUBLE_QUOTE);
}
private boolean haveEnteredComment() {
- return (nextCharacter == ';') && (!isInQuotedString);
+ return (nextCharacter == SEMICOLON) && (!isInQuotedString);
}
private void consumeAllBytesInComment() throws IOException {
@@ -57,7 +63,7 @@ public class LispFilterInputStream extends FilterInputStream {
}
private boolean stillInComment() {
- return (nextCharacter != '\n') && (nextCharacter != -1);
+ return (nextCharacter != NEWLINE) && (nextCharacter != EOF);
}
}
diff --git a/src/scanner/LispScanner.java b/src/scanner/LispScanner.java
index 1788d85..d1232ec 100644
--- a/src/scanner/LispScanner.java
+++ b/src/scanner/LispScanner.java
@@ -1,171 +1,120 @@
-/*
- * Name: Mike Cifelli
- * Course: CIS 443 - Programming Languages
- * Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
- */
-
package scanner;
+import static util.Characters.BACKSLASH;
+import static util.Characters.DOUBLE_QUOTE;
+import static util.Characters.EOF;
+import static util.Characters.HASH;
+import static util.Characters.LEFT_PARENTHESIS;
+import static util.Characters.LEFT_SQUARE_BRACKET;
+import static util.Characters.NEWLINE;
+import static util.Characters.PERIOD;
+import static util.Characters.RIGHT_PARENTHESIS;
+import static util.Characters.RIGHT_SQUARE_BRACKET;
+import static util.Characters.SEMICOLON;
+import static util.Characters.SINGLE_QUOTE;
+import static util.Characters.TICK_MARK;
+
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
/**
- * A LispScanner
converts a stream of bytes into a stream of Lisp
- * tokens. When the end of stream has been reached a token with a type of
- * Token.Type.EOF
is returned from the nextToken
- * method of this scanner.
+ * Converts a stream of bytes into a stream of Lisp tokens.
*/
public class LispScanner {
- private LispFilterInputStream inStream;
- private Token currToken;
- private String fileName;
- private int line;
- private int column;
+ private static Map illegalIdentifierCharacters = new HashMap<>();
+
+ static {
+ illegalIdentifierCharacters.put(DOUBLE_QUOTE, true);
+ illegalIdentifierCharacters.put(SINGLE_QUOTE, true);
+ illegalIdentifierCharacters.put(BACKSLASH, true);
+ illegalIdentifierCharacters.put(TICK_MARK, true);
+ illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true);
+ illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true);
+ illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true);
+ illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true);
+ illegalIdentifierCharacters.put(HASH, true);
+ illegalIdentifierCharacters.put(PERIOD, true);
+ illegalIdentifierCharacters.put(SEMICOLON, true);
+ }
+
+ private InputStream inputStream;
+ private String inputStreamName;
+ private int lineNumber;
+ private int columnNumber;
- /**
- * Create a new LispScanner
that produces Lisp tokens from the
- * specified input stream.
- *
- * @param in
- * the input stream to obtain Lisp tokens from (must not be
- * null
)
- * @param fileName
- * the name of the file that in
is reading from
- */
public LispScanner(InputStream in, String fileName) {
- this.inStream = new LispFilterInputStream(new BufferedInputStream(in));
- this.currToken = null;
- this.fileName = fileName;
- this.line = 1;
- this.column = 0;
+ this.inputStream = new LispFilterInputStream(new BufferedInputStream(in));
+ this.inputStreamName = fileName;
+ this.lineNumber = 1;
+ this.columnNumber = 0;
}
- /**
- * Returns the same Lisp token returned from the last call to the
- * nextToken
method of this scanner. In the case that no calls
- * to nextToken
have been made yet, this method returns
- * null
.
- *
- * @return the last Lisp token returned from this scanner or
- * null
(if no tokens have been returned from this
- * scanner yet)
- */
- public Token getCurrToken() {
- return currToken;
- }
-
- /**
- * Returns the next Lisp token from this scanner.
- *
- * @return the next Lisp token from this scanner.
- * @throws RuntimeException
- * Indicates that an illegal character or an unterminated quoted
- * string was encountered in the input stream (not counting
- * comments).
- * @throws IOException
- * Indicates that an I/O error has occurred.
- */
public Token nextToken() throws IOException {
- currToken = retrieveNextToken();
-
- return currToken;
- }
-
- // Retrieve the next Lisp token from 'inStream'.
- //
- // Returns: the next Lisp token found in 'inStream'
- // Precondition: 'inStream' must not be null.
- // Throws: RuntimeException - Indicates that an illegal character or an
- // unterminated quoted string was encountered in
- // 'inStream'.
- // Throws: IOException - Indicates that an I/O error has occurred.
- private Token retrieveNextToken() throws IOException {
int c;
- while ((c = inStream.read()) != -1) {
+ while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
- ++column;
+ ++columnNumber;
- // determine the type of the Lisp token from the character obtained
- // from 'inStream'
switch (nextChar) {
- case '\n':
- // we have hit a new line so increment 'line' and reset
- // 'column'
- ++line;
- column = 0;
-
+ case NEWLINE:
+ moveToNewLine();
break;
- case '(':
- return new Token(Token.Type.LEFT_PAREN, "(", fileName, line, column);
- case ')':
- return new Token(Token.Type.RIGHT_PAREN, ")", fileName, line, column);
- case '\'':
- return new Token(Token.Type.QUOTE_MARK, "\'", fileName, line, column);
- case '\"':
+ case LEFT_PARENTHESIS:
+ return new Token(Token.Type.LEFT_PAREN, "(", inputStreamName, lineNumber, columnNumber);
+ case RIGHT_PARENTHESIS:
+ return new Token(Token.Type.RIGHT_PAREN, ")", inputStreamName, lineNumber, columnNumber);
+ case SINGLE_QUOTE:
+ return new Token(Token.Type.QUOTE_MARK, "\'", inputStreamName, lineNumber, columnNumber);
+ case DOUBLE_QUOTE:
return retrieveString(nextChar);
default:
- if (Character.isWhitespace(nextChar)) { // skip whitespace
+ if (Character.isWhitespace(nextChar)) {
continue;
- } else if (Character.isDigit(nextChar)) { // number
+ } else if (Character.isDigit(nextChar)) {
return retrieveNumber(nextChar);
- } else if (isLegalIdChar(nextChar)) { // identifier
+ } else if (isLegalIdentifierCharacter(nextChar)) {
return retrieveIdentifier(nextChar);
} else {
- // 'nextChar' can not start any Lisp token
-
- throw new RuntimeException(
- "illegal character " + "\'" + nextChar + "\'" + " - line " + line + " column " + column);
+ throw new RuntimeException("illegal character " + "\'" + nextChar + "\'" + " - line " + lineNumber
+ + " column " + columnNumber);
}
}
}
- // we have reached the end of 'inStream' so we return an end-of-file
- // token
- return new Token(Token.Type.EOF, "EOF", fileName, line, column);
+ return new Token(Token.Type.EOF, "EOF", inputStreamName, lineNumber, columnNumber);
}
- // Retrieve a quoted string token from 'inStream'.
- //
- // Parameters: firstDoubleQuote - the opening double quote of this quoted
- // string
- // Returns: a quoted string token obtained from 'instream'
- // Throws: RuntimeException - Indicates that this quoted string was
- // missing its terminating double quote.
- // Throws: IOException - Indicates that an I/O error has occurred.
- // Precondition: 'firstDoubleQuote' must be the leading double quote
- // character of this quoted string and 'inStream' must not
- // be null.
private Token retrieveString(char firstDoubleQuote) throws IOException {
StringBuffer text = new StringBuffer();
- int startLine = line;
- int startColumn = column;
+ int startLine = lineNumber;
+ int startColumn = columnNumber;
char prevChar = firstDoubleQuote;
text.append(firstDoubleQuote);
int c;
- while ((c = inStream.read()) != -1) {
+ while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
- ++column;
+ ++columnNumber;
text.append(nextChar);
switch (nextChar) {
- case '\n':
- ++line;
- column = 0;
-
+ case NEWLINE:
+ moveToNewLine();
break;
- case '\"':
- if (prevChar != '\\') {
+ case DOUBLE_QUOTE:
+ if (prevChar != BACKSLASH) {
// we have found the terminating double quote
- return new Token(Token.Type.STRING, text.toString(), fileName, startLine, startColumn);
+ return new Token(Token.Type.STRING, text.toString(), inputStreamName, startLine, startColumn);
}
// this is an escaped double quote
@@ -180,96 +129,81 @@ public class LispScanner {
throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn);
}
- // Retrieve a number token from 'inStream'.
- //
- // Parameters: firstDigit - the first digit of this number
- // Returns: a number token obtained from 'inStream'
- // Throws: IOException - Indicates that an I/O error has occurred.
- // Precondition: 'firstDigit' must be the first digit of this number and
- // 'inStream' must not be null.
private Token retrieveNumber(char firstDigit) throws IOException {
StringBuffer text = new StringBuffer();
- int startColumn = column;
+ int startColumn = columnNumber;
text.append(firstDigit);
- inStream.mark(1);
+ inputStream.mark(1);
int c;
- while ((c = inStream.read()) != -1) {
+ while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
if (Character.isDigit(nextChar)) {
// 'nextChar' is a digit in this number
text.append(nextChar);
- ++column;
+ ++columnNumber;
} else {
// we have reached the end of the number
- inStream.reset(); // unread the last character
+ inputStream.reset(); // unread the last character
- return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
+ return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
}
- inStream.mark(1);
+ inputStream.mark(1);
}
// there are no more bytes to be read from 'inStream' after this number
// token
- return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
+ return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
}
- // Retrieve an identifier token from 'inStream'.
- //
- // Parameters: firstChar - the first character of this identifier
- // Returns: an identifier token obtained from 'inStream'
- // Throws: IOException - Indicates that an I/O error has occurred.
- // Precondition: 'firsChar' must be the first character of this identifier
- // and 'inStream' must not be null.
private Token retrieveIdentifier(char firstChar) throws IOException {
StringBuffer text = new StringBuffer();
- int startColumn = column;
+ int startColumn = columnNumber;
text.append(firstChar);
- inStream.mark(1);
+ inputStream.mark(1);
int c;
- while ((c = inStream.read()) != -1) {
+ while ((c = inputStream.read()) != EOF) {
char nextChar = (char) c;
- if (isLegalIdChar(nextChar)) {
+ if (isLegalIdentifierCharacter(nextChar)) {
// 'nextChar' is part of the identifier
text.append(nextChar);
- ++column;
+ ++columnNumber;
} else {
// we have reached the end of this identifier
- inStream.reset(); // unread the last character
+ inputStream.reset(); // unread the last character
- return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
+ return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
}
- inStream.mark(1);
+ inputStream.mark(1);
}
// there are no more bytes to be read from 'inStream' after this
// identifier token
- return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
+ return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
}
- // Test if a character is legal to be contained within an identifier in
- // Lisp.
- //
- // Returns: 'true' if the character can be found within an identifier in
- // Lisp; 'false' otherwise
- private boolean isLegalIdChar(char c) {
- return ((!Character.isWhitespace(c)) && (c != '\"') && (c != '\'') && (c != '\\') && (c != '`') && (c != '(')
- && (c != ')') && (c != '[') && (c != ']') && (c != '#') && (c != '.') && (c != ';'));
+ private void moveToNewLine() {
+ lineNumber++;
+ columnNumber = 0;
+ }
+
+ private boolean isLegalIdentifierCharacter(char c) {
+ return (!Character.isWhitespace(c)) && (illegalIdentifierCharacters.get(c) == null);
}
}
diff --git a/src/util/Characters.java b/src/util/Characters.java
new file mode 100644
index 0000000..9c6c731
--- /dev/null
+++ b/src/util/Characters.java
@@ -0,0 +1,20 @@
+package util;
+
+public class Characters {
+
+ public static final char BACKSLASH = '\\';
+ public static final char DOUBLE_QUOTE = '\"';
+ public static final char HASH = '#';
+ public static final char LEFT_PARENTHESIS = '(';
+ public static final char LEFT_SQUARE_BRACKET = '[';
+ public static final char NEWLINE = '\n';
+ public static final char PERIOD = '.';
+ public static final char RIGHT_PARENTHESIS = ')';
+ public static final char RIGHT_SQUARE_BRACKET = ']';
+ public static final char SEMICOLON = ';';
+ public static final char SINGLE_QUOTE = '\'';
+ public static final char TICK_MARK = '`';
+
+ public static final int EOF = -1;
+
+}
diff --git a/test/scanner/LispScannerLineColumnTester.java b/test/scanner/LispScannerLineColumnTester.java
new file mode 100644
index 0000000..3a6f148
--- /dev/null
+++ b/test/scanner/LispScannerLineColumnTester.java
@@ -0,0 +1,87 @@
+package scanner;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import testutils.TestUtilities;
+
+public class LispScannerLineColumnTester {
+
+ @Before
+ public void setUp() throws Exception {
+ }
+
+ @Test
+ public void givenSimpleString_RecordsCorrectLocation() throws IOException {
+ String input = "\"string\"";
+ LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1) };
+
+ assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+ }
+
+ @Test
+ public void givenMultipleStrings_RecordsCorrectLocations() throws IOException {
+ String input = "\"string1\" \n \"string2 \n with newline\" \n \"string3\"";
+ LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(2, 2),
+ LineColumn.create(4, 3) };
+
+ assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+ }
+
+ @Test
+ public void givenQuotedList_RecordsCorrectLocations() throws IOException {
+ String input = "'(1 2 3 4 5)";
+ LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(1, 2),
+ LineColumn.create(1, 3), LineColumn.create(1, 5),
+ LineColumn.create(1, 7), LineColumn.create(1, 9),
+ LineColumn.create(1, 11), LineColumn.create(1, 12) };
+
+ assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+ }
+
+ @Test
+ public void givenListSpanningMultipleLines_RecordsCorrectLocations() throws IOException {
+ String input = " ( 1 2 \n 3 4 \n5 ) ";
+ LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 2), LineColumn.create(1, 4),
+ LineColumn.create(1, 6), LineColumn.create(2, 2),
+ LineColumn.create(2, 4), LineColumn.create(3, 1),
+ LineColumn.create(3, 3) };
+
+ assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+ }
+
+ private void assertTokenLineAndColumnsMatch(String input, LineColumn[] expectedLineColumnList) throws IOException {
+ InputStream stringInputStream = TestUtilities.createInputStreamFromString(input);
+ LispScanner lispScanner = new LispScanner(stringInputStream, "stringInputStream");
+
+ for (LineColumn lineColumn : expectedLineColumnList) {
+ Token nextToken = lispScanner.nextToken();
+ assertTrue(lineColumn.isEqual(nextToken));
+ }
+ }
+
+ private static class LineColumn {
+
+ private int line;
+ private int column;
+
+ public static LineColumn create(int line, int column) {
+ LineColumn lineColumn = new LineColumn();
+ lineColumn.line = line;
+ lineColumn.column = column;
+
+ return lineColumn;
+ }
+
+ public boolean isEqual(Token token) {
+ return (this.line == token.getLine()) && (this.column == token.getColumn());
+ }
+
+ }
+
+}
diff --git a/test/scanner/LispScannerTester.java b/test/scanner/LispScannerTypeTester.java
similarity index 98%
rename from test/scanner/LispScannerTester.java
rename to test/scanner/LispScannerTypeTester.java
index a0da5df..cc920e7 100644
--- a/test/scanner/LispScannerTester.java
+++ b/test/scanner/LispScannerTypeTester.java
@@ -10,7 +10,7 @@ import org.junit.Test;
import scanner.Token.Type;
import testutils.TestUtilities;
-public class LispScannerTester {
+public class LispScannerTypeTester {
@Test
public void givenEmptyFile_returnsCorrectTokenTypes() throws IOException {