Added unit tests and started refactoring LispScanner

2016-12-09 12:29:56 -05:00 · 2016-12-09 12:29:56 -05:00 · 9b3261f575
commit 9b3261f575
parent 4a5f169076
5 changed files with 208 additions and 161 deletions
--- a/src/scanner/LispFilterInputStream.java
+++ b/src/scanner/LispFilterInputStream.java
@ -1,5 +1,11 @@
 package scanner;

+import static util.Characters.BACKSLASH;
+import static util.Characters.DOUBLE_QUOTE;
+import static util.Characters.EOF;
+import static util.Characters.NEWLINE;
+import static util.Characters.SEMICOLON;
+
 import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@ -44,11 +50,11 @@ public class LispFilterInputStream extends FilterInputStream {
    }

    private boolean haveEncounteredStringBoundary() {
-        return (previousCharacter != '\\') && (nextCharacter == '\"');
+        return (previousCharacter != BACKSLASH) && (nextCharacter == DOUBLE_QUOTE);
    }

    private boolean haveEnteredComment() {
-        return (nextCharacter == ';') && (!isInQuotedString);
+        return (nextCharacter == SEMICOLON) && (!isInQuotedString);
    }

    private void consumeAllBytesInComment() throws IOException {
@ -57,7 +63,7 @@ public class LispFilterInputStream extends FilterInputStream {
    }

    private boolean stillInComment() {
-        return (nextCharacter != '\n') && (nextCharacter != -1);
+        return (nextCharacter != NEWLINE) && (nextCharacter != EOF);
    }

 }
--- a/src/scanner/LispScanner.java
+++ b/src/scanner/LispScanner.java
@ -1,171 +1,120 @@
-/*
- * Name: Mike Cifelli
- * Course: CIS 443 - Programming Languages
- * Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
- */
-
 package scanner;

+import static util.Characters.BACKSLASH;
+import static util.Characters.DOUBLE_QUOTE;
+import static util.Characters.EOF;
+import static util.Characters.HASH;
+import static util.Characters.LEFT_PARENTHESIS;
+import static util.Characters.LEFT_SQUARE_BRACKET;
+import static util.Characters.NEWLINE;
+import static util.Characters.PERIOD;
+import static util.Characters.RIGHT_PARENTHESIS;
+import static util.Characters.RIGHT_SQUARE_BRACKET;
+import static util.Characters.SEMICOLON;
+import static util.Characters.SINGLE_QUOTE;
+import static util.Characters.TICK_MARK;
+
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;

 /**
- * A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp
- * tokens. When the end of stream has been reached a token with a type of
- * <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>
- * method of this scanner.
+ * Converts a stream of bytes into a stream of Lisp tokens.
 */
 public class LispScanner {

-    private LispFilterInputStream inStream;
-    private Token currToken;
-    private String fileName;
-    private int line;
-    private int column;
+    private static Map<Character, Boolean> illegalIdentifierCharacters = new HashMap<>();
+
+    static {
+        illegalIdentifierCharacters.put(DOUBLE_QUOTE, true);
+        illegalIdentifierCharacters.put(SINGLE_QUOTE, true);
+        illegalIdentifierCharacters.put(BACKSLASH, true);
+        illegalIdentifierCharacters.put(TICK_MARK, true);
+        illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true);
+        illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true);
+        illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true);
+        illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true);
+        illegalIdentifierCharacters.put(HASH, true);
+        illegalIdentifierCharacters.put(PERIOD, true);
+        illegalIdentifierCharacters.put(SEMICOLON, true);
+    }
+
+    private InputStream inputStream;
+    private String inputStreamName;
+    private int lineNumber;
+    private int columnNumber;

-    /**
-     * Create a new <code>LispScanner</code> that produces Lisp tokens from the
-     * specified input stream.
-     *
-     * @param in
-     *            the input stream to obtain Lisp tokens from (must not be
-     *            <code>null</code>)
-     * @param fileName
-     *            the name of the file that <code>in</code> is reading from
-     */
    public LispScanner(InputStream in, String fileName) {
-        this.inStream = new LispFilterInputStream(new BufferedInputStream(in));
-        this.currToken = null;
-        this.fileName = fileName;
-        this.line = 1;
-        this.column = 0;
+        this.inputStream = new LispFilterInputStream(new BufferedInputStream(in));
+        this.inputStreamName = fileName;
+        this.lineNumber = 1;
+        this.columnNumber = 0;
    }

-    /**
-     * Returns the same Lisp token returned from the last call to the
-     * <code>nextToken</code> method of this scanner. In the case that no calls
-     * to <code>nextToken</code> have been made yet, this method returns
-     * <code>null</code>.
-     *
-     * @return the last Lisp token returned from this scanner or
-     *         <code>null</code> (if no tokens have been returned from this
-     *         scanner yet)
-     */
-    public Token getCurrToken() {
-        return currToken;
-    }
-
-    /**
-     * Returns the next Lisp token from this scanner.
-     *
-     * @return the next Lisp token from this scanner.
-     * @throws RuntimeException
-     *             Indicates that an illegal character or an unterminated quoted
-     *             string was encountered in the input stream (not counting
-     *             comments).
-     * @throws IOException
-     *             Indicates that an I/O error has occurred.
-     */
    public Token nextToken() throws IOException {
-        currToken = retrieveNextToken();
-
-        return currToken;
-    }
-
-    // Retrieve the next Lisp token from 'inStream'.
-    //
-    // Returns: the next Lisp token found in 'inStream'
-    // Precondition: 'inStream' must not be null.
-    // Throws: RuntimeException - Indicates that an illegal character or an
-    // unterminated quoted string was encountered in
-    // 'inStream'.
-    // Throws: IOException - Indicates that an I/O error has occurred.
-    private Token retrieveNextToken() throws IOException {
        int c;

-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
            char nextChar = (char) c;

-            ++column;
+            ++columnNumber;

-            // determine the type of the Lisp token from the character obtained
-            // from 'inStream'
            switch (nextChar) {
-            case '\n':
-                // we have hit a new line so increment 'line' and reset
-                // 'column'
-                ++line;
-                column = 0;
-
+            case NEWLINE:
+                moveToNewLine();
                break;
-            case '(':
-                return new Token(Token.Type.LEFT_PAREN, "(", fileName, line, column);
-            case ')':
-                return new Token(Token.Type.RIGHT_PAREN, ")", fileName, line, column);
-            case '\'':
-                return new Token(Token.Type.QUOTE_MARK, "\'", fileName, line, column);
-            case '\"':
+            case LEFT_PARENTHESIS:
+                return new Token(Token.Type.LEFT_PAREN, "(", inputStreamName, lineNumber, columnNumber);
+            case RIGHT_PARENTHESIS:
+                return new Token(Token.Type.RIGHT_PAREN, ")", inputStreamName, lineNumber, columnNumber);
+            case SINGLE_QUOTE:
+                return new Token(Token.Type.QUOTE_MARK, "\'", inputStreamName, lineNumber, columnNumber);
+            case DOUBLE_QUOTE:
                return retrieveString(nextChar);
            default:
-                if (Character.isWhitespace(nextChar)) { // skip whitespace
+                if (Character.isWhitespace(nextChar)) {
                    continue;
-                } else if (Character.isDigit(nextChar)) { // number
+                } else if (Character.isDigit(nextChar)) {
                    return retrieveNumber(nextChar);
-                } else if (isLegalIdChar(nextChar)) { // identifier
+                } else if (isLegalIdentifierCharacter(nextChar)) {
                    return retrieveIdentifier(nextChar);
                } else {
-                    // 'nextChar' can not start any Lisp token
-
-                    throw new RuntimeException(
-                            "illegal character " + "\'" + nextChar + "\'" + " - line " + line + " column " + column);
+                    throw new RuntimeException("illegal character " + "\'" + nextChar + "\'" + " - line " + lineNumber
+                            + " column " + columnNumber);
                }
            }
        }

-        // we have reached the end of 'inStream' so we return an end-of-file
-        // token
-        return new Token(Token.Type.EOF, "EOF", fileName, line, column);
+        return new Token(Token.Type.EOF, "EOF", inputStreamName, lineNumber, columnNumber);
    }

-    // Retrieve a quoted string token from 'inStream'.
-    //
-    // Parameters: firstDoubleQuote - the opening double quote of this quoted
-    // string
-    // Returns: a quoted string token obtained from 'instream'
-    // Throws: RuntimeException - Indicates that this quoted string was
-    // missing its terminating double quote.
-    // Throws: IOException - Indicates that an I/O error has occurred.
-    // Precondition: 'firstDoubleQuote' must be the leading double quote
-    // character of this quoted string and 'inStream' must not
-    // be null.
    private Token retrieveString(char firstDoubleQuote) throws IOException {
        StringBuffer text = new StringBuffer();
-        int startLine = line;
-        int startColumn = column;
+        int startLine = lineNumber;
+        int startColumn = columnNumber;
        char prevChar = firstDoubleQuote;

        text.append(firstDoubleQuote);

        int c;

-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
            char nextChar = (char) c;

-            ++column;
+            ++columnNumber;
            text.append(nextChar);

            switch (nextChar) {
-            case '\n':
-                ++line;
-                column = 0;
-
+            case NEWLINE:
+                moveToNewLine();
                break;
-            case '\"':
-                if (prevChar != '\\') {
+            case DOUBLE_QUOTE:
+                if (prevChar != BACKSLASH) {
                    // we have found the terminating double quote

-                    return new Token(Token.Type.STRING, text.toString(), fileName, startLine, startColumn);
+                    return new Token(Token.Type.STRING, text.toString(), inputStreamName, startLine, startColumn);
                }

                // this is an escaped double quote
@ -180,96 +129,81 @@ public class LispScanner {
        throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn);
    }

-    // Retrieve a number token from 'inStream'.
-    //
-    // Parameters: firstDigit - the first digit of this number
-    // Returns: a number token obtained from 'inStream'
-    // Throws: IOException - Indicates that an I/O error has occurred.
-    // Precondition: 'firstDigit' must be the first digit of this number and
-    // 'inStream' must not be null.
    private Token retrieveNumber(char firstDigit) throws IOException {
        StringBuffer text = new StringBuffer();
-        int startColumn = column;
+        int startColumn = columnNumber;

        text.append(firstDigit);
-        inStream.mark(1);
+        inputStream.mark(1);

        int c;

-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
            char nextChar = (char) c;

            if (Character.isDigit(nextChar)) {
                // 'nextChar' is a digit in this number

                text.append(nextChar);
-                ++column;
+                ++columnNumber;
            } else {
                // we have reached the end of the number

-                inStream.reset(); // unread the last character
+                inputStream.reset(); // unread the last character

-                return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
+                return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
            }

-            inStream.mark(1);
+            inputStream.mark(1);
        }

        // there are no more bytes to be read from 'inStream' after this number
        // token

-        return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
+        return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
    }

-    // Retrieve an identifier token from 'inStream'.
-    //
-    // Parameters: firstChar - the first character of this identifier
-    // Returns: an identifier token obtained from 'inStream'
-    // Throws: IOException - Indicates that an I/O error has occurred.
-    // Precondition: 'firsChar' must be the first character of this identifier
-    // and 'inStream' must not be null.
    private Token retrieveIdentifier(char firstChar) throws IOException {
        StringBuffer text = new StringBuffer();
-        int startColumn = column;
+        int startColumn = columnNumber;

        text.append(firstChar);
-        inStream.mark(1);
+        inputStream.mark(1);

        int c;

-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
            char nextChar = (char) c;

-            if (isLegalIdChar(nextChar)) {
+            if (isLegalIdentifierCharacter(nextChar)) {
                // 'nextChar' is part of the identifier

                text.append(nextChar);
-                ++column;
+                ++columnNumber;
            } else {
                // we have reached the end of this identifier

-                inStream.reset(); // unread the last character
+                inputStream.reset(); // unread the last character

-                return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
+                return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
            }

-            inStream.mark(1);
+            inputStream.mark(1);
        }

        // there are no more bytes to be read from 'inStream' after this
        // identifier token

-        return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
+        return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
    }

-    // Test if a character is legal to be contained within an identifier in
-    // Lisp.
-    //
-    // Returns: 'true' if the character can be found within an identifier in
-    // Lisp; 'false' otherwise
-    private boolean isLegalIdChar(char c) {
-        return ((!Character.isWhitespace(c)) && (c != '\"') && (c != '\'') && (c != '\\') && (c != '`') && (c != '(')
-                && (c != ')') && (c != '[') && (c != ']') && (c != '#') && (c != '.') && (c != ';'));
+    private void moveToNewLine() {
+        lineNumber++;
+        columnNumber = 0;
+    }
+
+    private boolean isLegalIdentifierCharacter(char c) {
+        return (!Character.isWhitespace(c)) && (illegalIdentifierCharacters.get(c) == null);
    }

 }
--- a/src/util/Characters.java
+++ b/src/util/Characters.java
@ -0,0 +1,20 @@
+package util;
+
+public class Characters {
+
+    public static final char BACKSLASH = '\\';
+    public static final char DOUBLE_QUOTE = '\"';
+    public static final char HASH = '#';
+    public static final char LEFT_PARENTHESIS = '(';
+    public static final char LEFT_SQUARE_BRACKET = '[';
+    public static final char NEWLINE = '\n';
+    public static final char PERIOD = '.';
+    public static final char RIGHT_PARENTHESIS = ')';
+    public static final char RIGHT_SQUARE_BRACKET = ']';
+    public static final char SEMICOLON = ';';
+    public static final char SINGLE_QUOTE = '\'';
+    public static final char TICK_MARK = '`';
+
+    public static final int EOF = -1;
+
+}
--- a/test/scanner/LispScannerLineColumnTester.java
+++ b/test/scanner/LispScannerLineColumnTester.java
@ -0,0 +1,87 @@
+package scanner;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import testutils.TestUtilities;
+
+public class LispScannerLineColumnTester {
+
+    @Before
+    public void setUp() throws Exception {
+    }
+
+    @Test
+    public void givenSimpleString_RecordsCorrectLocation() throws IOException {
+        String input = "\"string\"";
+        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1) };
+
+        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+    }
+
+    @Test
+    public void givenMultipleStrings_RecordsCorrectLocations() throws IOException {
+        String input = "\"string1\" \n \"string2 \n with newline\" \n  \"string3\"";
+        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(2, 2),
+                                                 LineColumn.create(4, 3) };
+
+        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+    }
+
+    @Test
+    public void givenQuotedList_RecordsCorrectLocations() throws IOException {
+        String input = "'(1 2 3 4 5)";
+        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(1, 2),
+                                                 LineColumn.create(1, 3), LineColumn.create(1, 5),
+                                                 LineColumn.create(1, 7), LineColumn.create(1, 9),
+                                                 LineColumn.create(1, 11), LineColumn.create(1, 12) };
+
+        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+    }
+
+    @Test
+    public void givenListSpanningMultipleLines_RecordsCorrectLocations() throws IOException {
+        String input = " ( 1 2 \n 3 4 \n5 ) ";
+        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 2), LineColumn.create(1, 4),
+                                                 LineColumn.create(1, 6), LineColumn.create(2, 2),
+                                                 LineColumn.create(2, 4), LineColumn.create(3, 1),
+                                                 LineColumn.create(3, 3) };
+
+        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+    }
+
+    private void assertTokenLineAndColumnsMatch(String input, LineColumn[] expectedLineColumnList) throws IOException {
+        InputStream stringInputStream = TestUtilities.createInputStreamFromString(input);
+        LispScanner lispScanner = new LispScanner(stringInputStream, "stringInputStream");
+
+        for (LineColumn lineColumn : expectedLineColumnList) {
+            Token nextToken = lispScanner.nextToken();
+            assertTrue(lineColumn.isEqual(nextToken));
+        }
+    }
+
+    private static class LineColumn {
+
+        private int line;
+        private int column;
+
+        public static LineColumn create(int line, int column) {
+            LineColumn lineColumn = new LineColumn();
+            lineColumn.line = line;
+            lineColumn.column = column;
+
+            return lineColumn;
+        }
+
+        public boolean isEqual(Token token) {
+            return (this.line == token.getLine()) && (this.column == token.getColumn());
+        }
+
+    }
+
+}
--- a/test/scanner/LispScannerTypeTester.java
+++ b/test/scanner/LispScannerTypeTester.java
@ -10,7 +10,7 @@ import org.junit.Test;
 import scanner.Token.Type;
 import testutils.TestUtilities;

-public class LispScannerTester {
+public class LispScannerTypeTester {

    @Test
    public void givenEmptyFile_returnsCorrectTokenTypes() throws IOException {