From 9b3261f5756f8a5f672645e959d8d4ea188d2488 Mon Sep 17 00:00:00 2001 From: Mike Cifelli Date: Fri, 9 Dec 2016 12:29:56 -0500 Subject: [PATCH] Added unit tests and started refactoring LispScanner --- src/scanner/LispFilterInputStream.java | 12 +- src/scanner/LispScanner.java | 248 +++++++----------- src/util/Characters.java | 20 ++ test/scanner/LispScannerLineColumnTester.java | 87 ++++++ ...Tester.java => LispScannerTypeTester.java} | 2 +- 5 files changed, 208 insertions(+), 161 deletions(-) create mode 100644 src/util/Characters.java create mode 100644 test/scanner/LispScannerLineColumnTester.java rename test/scanner/{LispScannerTester.java => LispScannerTypeTester.java} (98%) diff --git a/src/scanner/LispFilterInputStream.java b/src/scanner/LispFilterInputStream.java index 8e550b0..e60a01b 100644 --- a/src/scanner/LispFilterInputStream.java +++ b/src/scanner/LispFilterInputStream.java @@ -1,5 +1,11 @@ package scanner; +import static util.Characters.BACKSLASH; +import static util.Characters.DOUBLE_QUOTE; +import static util.Characters.EOF; +import static util.Characters.NEWLINE; +import static util.Characters.SEMICOLON; + import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; @@ -44,11 +50,11 @@ public class LispFilterInputStream extends FilterInputStream { } private boolean haveEncounteredStringBoundary() { - return (previousCharacter != '\\') && (nextCharacter == '\"'); + return (previousCharacter != BACKSLASH) && (nextCharacter == DOUBLE_QUOTE); } private boolean haveEnteredComment() { - return (nextCharacter == ';') && (!isInQuotedString); + return (nextCharacter == SEMICOLON) && (!isInQuotedString); } private void consumeAllBytesInComment() throws IOException { @@ -57,7 +63,7 @@ public class LispFilterInputStream extends FilterInputStream { } private boolean stillInComment() { - return (nextCharacter != '\n') && (nextCharacter != -1); + return (nextCharacter != NEWLINE) && (nextCharacter != EOF); } } diff --git a/src/scanner/LispScanner.java b/src/scanner/LispScanner.java index 1788d85..d1232ec 100644 --- a/src/scanner/LispScanner.java +++ b/src/scanner/LispScanner.java @@ -1,171 +1,120 @@ -/* - * Name: Mike Cifelli - * Course: CIS 443 - Programming Languages - * Assignment: Lisp Interpreter Phase 1 - Lexical Analysis - */ - package scanner; +import static util.Characters.BACKSLASH; +import static util.Characters.DOUBLE_QUOTE; +import static util.Characters.EOF; +import static util.Characters.HASH; +import static util.Characters.LEFT_PARENTHESIS; +import static util.Characters.LEFT_SQUARE_BRACKET; +import static util.Characters.NEWLINE; +import static util.Characters.PERIOD; +import static util.Characters.RIGHT_PARENTHESIS; +import static util.Characters.RIGHT_SQUARE_BRACKET; +import static util.Characters.SEMICOLON; +import static util.Characters.SINGLE_QUOTE; +import static util.Characters.TICK_MARK; + import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; /** - * A LispScanner converts a stream of bytes into a stream of Lisp - * tokens. When the end of stream has been reached a token with a type of - * Token.Type.EOF is returned from the nextToken - * method of this scanner. + * Converts a stream of bytes into a stream of Lisp tokens. */ public class LispScanner { - private LispFilterInputStream inStream; - private Token currToken; - private String fileName; - private int line; - private int column; + private static Map illegalIdentifierCharacters = new HashMap<>(); + + static { + illegalIdentifierCharacters.put(DOUBLE_QUOTE, true); + illegalIdentifierCharacters.put(SINGLE_QUOTE, true); + illegalIdentifierCharacters.put(BACKSLASH, true); + illegalIdentifierCharacters.put(TICK_MARK, true); + illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true); + illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true); + illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true); + illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true); + illegalIdentifierCharacters.put(HASH, true); + illegalIdentifierCharacters.put(PERIOD, true); + illegalIdentifierCharacters.put(SEMICOLON, true); + } + + private InputStream inputStream; + private String inputStreamName; + private int lineNumber; + private int columnNumber; - /** - * Create a new LispScanner that produces Lisp tokens from the - * specified input stream. - * - * @param in - * the input stream to obtain Lisp tokens from (must not be - * null) - * @param fileName - * the name of the file that in is reading from - */ public LispScanner(InputStream in, String fileName) { - this.inStream = new LispFilterInputStream(new BufferedInputStream(in)); - this.currToken = null; - this.fileName = fileName; - this.line = 1; - this.column = 0; + this.inputStream = new LispFilterInputStream(new BufferedInputStream(in)); + this.inputStreamName = fileName; + this.lineNumber = 1; + this.columnNumber = 0; } - /** - * Returns the same Lisp token returned from the last call to the - * nextToken method of this scanner. In the case that no calls - * to nextToken have been made yet, this method returns - * null. - * - * @return the last Lisp token returned from this scanner or - * null (if no tokens have been returned from this - * scanner yet) - */ - public Token getCurrToken() { - return currToken; - } - - /** - * Returns the next Lisp token from this scanner. - * - * @return the next Lisp token from this scanner. - * @throws RuntimeException - * Indicates that an illegal character or an unterminated quoted - * string was encountered in the input stream (not counting - * comments). - * @throws IOException - * Indicates that an I/O error has occurred. - */ public Token nextToken() throws IOException { - currToken = retrieveNextToken(); - - return currToken; - } - - // Retrieve the next Lisp token from 'inStream'. - // - // Returns: the next Lisp token found in 'inStream' - // Precondition: 'inStream' must not be null. - // Throws: RuntimeException - Indicates that an illegal character or an - // unterminated quoted string was encountered in - // 'inStream'. - // Throws: IOException - Indicates that an I/O error has occurred. - private Token retrieveNextToken() throws IOException { int c; - while ((c = inStream.read()) != -1) { + while ((c = inputStream.read()) != EOF) { char nextChar = (char) c; - ++column; + ++columnNumber; - // determine the type of the Lisp token from the character obtained - // from 'inStream' switch (nextChar) { - case '\n': - // we have hit a new line so increment 'line' and reset - // 'column' - ++line; - column = 0; - + case NEWLINE: + moveToNewLine(); break; - case '(': - return new Token(Token.Type.LEFT_PAREN, "(", fileName, line, column); - case ')': - return new Token(Token.Type.RIGHT_PAREN, ")", fileName, line, column); - case '\'': - return new Token(Token.Type.QUOTE_MARK, "\'", fileName, line, column); - case '\"': + case LEFT_PARENTHESIS: + return new Token(Token.Type.LEFT_PAREN, "(", inputStreamName, lineNumber, columnNumber); + case RIGHT_PARENTHESIS: + return new Token(Token.Type.RIGHT_PAREN, ")", inputStreamName, lineNumber, columnNumber); + case SINGLE_QUOTE: + return new Token(Token.Type.QUOTE_MARK, "\'", inputStreamName, lineNumber, columnNumber); + case DOUBLE_QUOTE: return retrieveString(nextChar); default: - if (Character.isWhitespace(nextChar)) { // skip whitespace + if (Character.isWhitespace(nextChar)) { continue; - } else if (Character.isDigit(nextChar)) { // number + } else if (Character.isDigit(nextChar)) { return retrieveNumber(nextChar); - } else if (isLegalIdChar(nextChar)) { // identifier + } else if (isLegalIdentifierCharacter(nextChar)) { return retrieveIdentifier(nextChar); } else { - // 'nextChar' can not start any Lisp token - - throw new RuntimeException( - "illegal character " + "\'" + nextChar + "\'" + " - line " + line + " column " + column); + throw new RuntimeException("illegal character " + "\'" + nextChar + "\'" + " - line " + lineNumber + + " column " + columnNumber); } } } - // we have reached the end of 'inStream' so we return an end-of-file - // token - return new Token(Token.Type.EOF, "EOF", fileName, line, column); + return new Token(Token.Type.EOF, "EOF", inputStreamName, lineNumber, columnNumber); } - // Retrieve a quoted string token from 'inStream'. - // - // Parameters: firstDoubleQuote - the opening double quote of this quoted - // string - // Returns: a quoted string token obtained from 'instream' - // Throws: RuntimeException - Indicates that this quoted string was - // missing its terminating double quote. - // Throws: IOException - Indicates that an I/O error has occurred. - // Precondition: 'firstDoubleQuote' must be the leading double quote - // character of this quoted string and 'inStream' must not - // be null. private Token retrieveString(char firstDoubleQuote) throws IOException { StringBuffer text = new StringBuffer(); - int startLine = line; - int startColumn = column; + int startLine = lineNumber; + int startColumn = columnNumber; char prevChar = firstDoubleQuote; text.append(firstDoubleQuote); int c; - while ((c = inStream.read()) != -1) { + while ((c = inputStream.read()) != EOF) { char nextChar = (char) c; - ++column; + ++columnNumber; text.append(nextChar); switch (nextChar) { - case '\n': - ++line; - column = 0; - + case NEWLINE: + moveToNewLine(); break; - case '\"': - if (prevChar != '\\') { + case DOUBLE_QUOTE: + if (prevChar != BACKSLASH) { // we have found the terminating double quote - return new Token(Token.Type.STRING, text.toString(), fileName, startLine, startColumn); + return new Token(Token.Type.STRING, text.toString(), inputStreamName, startLine, startColumn); } // this is an escaped double quote @@ -180,96 +129,81 @@ public class LispScanner { throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn); } - // Retrieve a number token from 'inStream'. - // - // Parameters: firstDigit - the first digit of this number - // Returns: a number token obtained from 'inStream' - // Throws: IOException - Indicates that an I/O error has occurred. - // Precondition: 'firstDigit' must be the first digit of this number and - // 'inStream' must not be null. private Token retrieveNumber(char firstDigit) throws IOException { StringBuffer text = new StringBuffer(); - int startColumn = column; + int startColumn = columnNumber; text.append(firstDigit); - inStream.mark(1); + inputStream.mark(1); int c; - while ((c = inStream.read()) != -1) { + while ((c = inputStream.read()) != EOF) { char nextChar = (char) c; if (Character.isDigit(nextChar)) { // 'nextChar' is a digit in this number text.append(nextChar); - ++column; + ++columnNumber; } else { // we have reached the end of the number - inStream.reset(); // unread the last character + inputStream.reset(); // unread the last character - return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn); + return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn); } - inStream.mark(1); + inputStream.mark(1); } // there are no more bytes to be read from 'inStream' after this number // token - return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn); + return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn); } - // Retrieve an identifier token from 'inStream'. - // - // Parameters: firstChar - the first character of this identifier - // Returns: an identifier token obtained from 'inStream' - // Throws: IOException - Indicates that an I/O error has occurred. - // Precondition: 'firsChar' must be the first character of this identifier - // and 'inStream' must not be null. private Token retrieveIdentifier(char firstChar) throws IOException { StringBuffer text = new StringBuffer(); - int startColumn = column; + int startColumn = columnNumber; text.append(firstChar); - inStream.mark(1); + inputStream.mark(1); int c; - while ((c = inStream.read()) != -1) { + while ((c = inputStream.read()) != EOF) { char nextChar = (char) c; - if (isLegalIdChar(nextChar)) { + if (isLegalIdentifierCharacter(nextChar)) { // 'nextChar' is part of the identifier text.append(nextChar); - ++column; + ++columnNumber; } else { // we have reached the end of this identifier - inStream.reset(); // unread the last character + inputStream.reset(); // unread the last character - return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn); + return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn); } - inStream.mark(1); + inputStream.mark(1); } // there are no more bytes to be read from 'inStream' after this // identifier token - return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn); + return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn); } - // Test if a character is legal to be contained within an identifier in - // Lisp. - // - // Returns: 'true' if the character can be found within an identifier in - // Lisp; 'false' otherwise - private boolean isLegalIdChar(char c) { - return ((!Character.isWhitespace(c)) && (c != '\"') && (c != '\'') && (c != '\\') && (c != '`') && (c != '(') - && (c != ')') && (c != '[') && (c != ']') && (c != '#') && (c != '.') && (c != ';')); + private void moveToNewLine() { + lineNumber++; + columnNumber = 0; + } + + private boolean isLegalIdentifierCharacter(char c) { + return (!Character.isWhitespace(c)) && (illegalIdentifierCharacters.get(c) == null); } } diff --git a/src/util/Characters.java b/src/util/Characters.java new file mode 100644 index 0000000..9c6c731 --- /dev/null +++ b/src/util/Characters.java @@ -0,0 +1,20 @@ +package util; + +public class Characters { + + public static final char BACKSLASH = '\\'; + public static final char DOUBLE_QUOTE = '\"'; + public static final char HASH = '#'; + public static final char LEFT_PARENTHESIS = '('; + public static final char LEFT_SQUARE_BRACKET = '['; + public static final char NEWLINE = '\n'; + public static final char PERIOD = '.'; + public static final char RIGHT_PARENTHESIS = ')'; + public static final char RIGHT_SQUARE_BRACKET = ']'; + public static final char SEMICOLON = ';'; + public static final char SINGLE_QUOTE = '\''; + public static final char TICK_MARK = '`'; + + public static final int EOF = -1; + +} diff --git a/test/scanner/LispScannerLineColumnTester.java b/test/scanner/LispScannerLineColumnTester.java new file mode 100644 index 0000000..3a6f148 --- /dev/null +++ b/test/scanner/LispScannerLineColumnTester.java @@ -0,0 +1,87 @@ +package scanner; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.io.InputStream; + +import org.junit.Before; +import org.junit.Test; + +import testutils.TestUtilities; + +public class LispScannerLineColumnTester { + + @Before + public void setUp() throws Exception { + } + + @Test + public void givenSimpleString_RecordsCorrectLocation() throws IOException { + String input = "\"string\""; + LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1) }; + + assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns); + } + + @Test + public void givenMultipleStrings_RecordsCorrectLocations() throws IOException { + String input = "\"string1\" \n \"string2 \n with newline\" \n \"string3\""; + LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(2, 2), + LineColumn.create(4, 3) }; + + assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns); + } + + @Test + public void givenQuotedList_RecordsCorrectLocations() throws IOException { + String input = "'(1 2 3 4 5)"; + LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(1, 2), + LineColumn.create(1, 3), LineColumn.create(1, 5), + LineColumn.create(1, 7), LineColumn.create(1, 9), + LineColumn.create(1, 11), LineColumn.create(1, 12) }; + + assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns); + } + + @Test + public void givenListSpanningMultipleLines_RecordsCorrectLocations() throws IOException { + String input = " ( 1 2 \n 3 4 \n5 ) "; + LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 2), LineColumn.create(1, 4), + LineColumn.create(1, 6), LineColumn.create(2, 2), + LineColumn.create(2, 4), LineColumn.create(3, 1), + LineColumn.create(3, 3) }; + + assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns); + } + + private void assertTokenLineAndColumnsMatch(String input, LineColumn[] expectedLineColumnList) throws IOException { + InputStream stringInputStream = TestUtilities.createInputStreamFromString(input); + LispScanner lispScanner = new LispScanner(stringInputStream, "stringInputStream"); + + for (LineColumn lineColumn : expectedLineColumnList) { + Token nextToken = lispScanner.nextToken(); + assertTrue(lineColumn.isEqual(nextToken)); + } + } + + private static class LineColumn { + + private int line; + private int column; + + public static LineColumn create(int line, int column) { + LineColumn lineColumn = new LineColumn(); + lineColumn.line = line; + lineColumn.column = column; + + return lineColumn; + } + + public boolean isEqual(Token token) { + return (this.line == token.getLine()) && (this.column == token.getColumn()); + } + + } + +} diff --git a/test/scanner/LispScannerTester.java b/test/scanner/LispScannerTypeTester.java similarity index 98% rename from test/scanner/LispScannerTester.java rename to test/scanner/LispScannerTypeTester.java index a0da5df..cc920e7 100644 --- a/test/scanner/LispScannerTester.java +++ b/test/scanner/LispScannerTypeTester.java @@ -10,7 +10,7 @@ import org.junit.Test; import scanner.Token.Type; import testutils.TestUtilities; -public class LispScannerTester { +public class LispScannerTypeTester { @Test public void givenEmptyFile_returnsCorrectTokenTypes() throws IOException {