Added unit tests and started refactoring LispScanner

2016-12-09 12:29:56 -05:00 · 2016-12-09 12:29:56 -05:00 · 9b3261f575
commit 9b3261f575
parent 4a5f169076
5 changed files with 208 additions and 161 deletions
--- a/src/scanner/LispFilterInputStream.java
+++ b/src/scanner/LispFilterInputStream.java
@ -1,5 +1,11 @@
 package scanner;
 import static util.Characters.BACKSLASH;
 import static util.Characters.DOUBLE_QUOTE;
 import static util.Characters.EOF;
 import static util.Characters.NEWLINE;
 import static util.Characters.SEMICOLON;
 import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@ -44,11 +50,11 @@ public class LispFilterInputStream extends FilterInputStream {
    }
    private boolean haveEncounteredStringBoundary() {
-        return (previousCharacter != '\\') && (nextCharacter == '\"');
+        return (previousCharacter != BACKSLASH) && (nextCharacter == DOUBLE_QUOTE);
    }
    private boolean haveEnteredComment() {
-        return (nextCharacter == ';') && (!isInQuotedString);
+        return (nextCharacter == SEMICOLON) && (!isInQuotedString);
    }
    private void consumeAllBytesInComment() throws IOException {
@ -57,7 +63,7 @@ public class LispFilterInputStream extends FilterInputStream {
    }
    private boolean stillInComment() {
-        return (nextCharacter != '\n') && (nextCharacter != -1);
+        return (nextCharacter != NEWLINE) && (nextCharacter != EOF);
    }
 }
--- a/src/scanner/LispScanner.java
+++ b/src/scanner/LispScanner.java
@ -1,171 +1,120 @@
 /*
 * Name: Mike Cifelli
 * Course: CIS 443 - Programming Languages
 * Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
 */
 package scanner;
 import static util.Characters.BACKSLASH;
 import static util.Characters.DOUBLE_QUOTE;
 import static util.Characters.EOF;
 import static util.Characters.HASH;
 import static util.Characters.LEFT_PARENTHESIS;
 import static util.Characters.LEFT_SQUARE_BRACKET;
 import static util.Characters.NEWLINE;
 import static util.Characters.PERIOD;
 import static util.Characters.RIGHT_PARENTHESIS;
 import static util.Characters.RIGHT_SQUARE_BRACKET;
 import static util.Characters.SEMICOLON;
 import static util.Characters.SINGLE_QUOTE;
 import static util.Characters.TICK_MARK;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashMap;
 import java.util.Map;
 /**
- * A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp
+ * Converts a stream of bytes into a stream of Lisp tokens.
 * tokens. When the end of stream has been reached a token with a type of
 * <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>
 * method of this scanner.
 */
 public class LispScanner {
-    private LispFilterInputStream inStream;
+    private static Map<Character, Boolean> illegalIdentifierCharacters = new HashMap<>();
-    private Token currToken;
+
-    private String fileName;
+    static {
-    private int line;
+        illegalIdentifierCharacters.put(DOUBLE_QUOTE, true);
-    private int column;
+        illegalIdentifierCharacters.put(SINGLE_QUOTE, true);
        illegalIdentifierCharacters.put(BACKSLASH, true);
        illegalIdentifierCharacters.put(TICK_MARK, true);
        illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true);
        illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true);
        illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true);
        illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true);
        illegalIdentifierCharacters.put(HASH, true);
        illegalIdentifierCharacters.put(PERIOD, true);
        illegalIdentifierCharacters.put(SEMICOLON, true);
    }
    private InputStream inputStream;
    private String inputStreamName;
    private int lineNumber;
    private int columnNumber;
    /**
     * Create a new <code>LispScanner</code> that produces Lisp tokens from the
     * specified input stream.
     *
     * @param in
     *            the input stream to obtain Lisp tokens from (must not be
     *            <code>null</code>)
     * @param fileName
     *            the name of the file that <code>in</code> is reading from
     */
    public LispScanner(InputStream in, String fileName) {
-        this.inStream = new LispFilterInputStream(new BufferedInputStream(in));
+        this.inputStream = new LispFilterInputStream(new BufferedInputStream(in));
-        this.currToken = null;
+        this.inputStreamName = fileName;
-        this.fileName = fileName;
+        this.lineNumber = 1;
-        this.line = 1;
+        this.columnNumber = 0;
        this.column = 0;
    }
    /**
     * Returns the same Lisp token returned from the last call to the
     * <code>nextToken</code> method of this scanner. In the case that no calls
     * to <code>nextToken</code> have been made yet, this method returns
     * <code>null</code>.
     *
     * @return the last Lisp token returned from this scanner or
     *         <code>null</code> (if no tokens have been returned from this
     *         scanner yet)
     */
    public Token getCurrToken() {
        return currToken;
    }
    /**
     * Returns the next Lisp token from this scanner.
     *
     * @return the next Lisp token from this scanner.
     * @throws RuntimeException
     *             Indicates that an illegal character or an unterminated quoted
     *             string was encountered in the input stream (not counting
     *             comments).
     * @throws IOException
     *             Indicates that an I/O error has occurred.
     */
    public Token nextToken() throws IOException {
        currToken = retrieveNextToken();
        return currToken;
    }
    // Retrieve the next Lisp token from 'inStream'.
    //
    // Returns: the next Lisp token found in 'inStream'
    // Precondition: 'inStream' must not be null.
    // Throws: RuntimeException - Indicates that an illegal character or an
    // unterminated quoted string was encountered in
    // 'inStream'.
    // Throws: IOException - Indicates that an I/O error has occurred.
    private Token retrieveNextToken() throws IOException {
        int c;
-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
            char nextChar = (char) c;
-            ++column;
+            ++columnNumber;
            // determine the type of the Lisp token from the character obtained
            // from 'inStream'
            switch (nextChar) {
-            case '\n':
+            case NEWLINE:
-                // we have hit a new line so increment 'line' and reset
+                moveToNewLine();
                // 'column'
                ++line;
                column = 0;
                break;
-            case '(':
+            case LEFT_PARENTHESIS:
-                return new Token(Token.Type.LEFT_PAREN, "(", fileName, line, column);
+                return new Token(Token.Type.LEFT_PAREN, "(", inputStreamName, lineNumber, columnNumber);
-            case ')':
+            case RIGHT_PARENTHESIS:
-                return new Token(Token.Type.RIGHT_PAREN, ")", fileName, line, column);
+                return new Token(Token.Type.RIGHT_PAREN, ")", inputStreamName, lineNumber, columnNumber);
-            case '\'':
+            case SINGLE_QUOTE:
-                return new Token(Token.Type.QUOTE_MARK, "\'", fileName, line, column);
+                return new Token(Token.Type.QUOTE_MARK, "\'", inputStreamName, lineNumber, columnNumber);
-            case '\"':
+            case DOUBLE_QUOTE:
                return retrieveString(nextChar);
            default:
-                if (Character.isWhitespace(nextChar)) { // skip whitespace
+                if (Character.isWhitespace(nextChar)) {
                    continue;
-                } else if (Character.isDigit(nextChar)) { // number
+                } else if (Character.isDigit(nextChar)) {
                    return retrieveNumber(nextChar);
-                } else if (isLegalIdChar(nextChar)) { // identifier
+                } else if (isLegalIdentifierCharacter(nextChar)) {
                    return retrieveIdentifier(nextChar);
                } else {
-                    // 'nextChar' can not start any Lisp token
+                    throw new RuntimeException("illegal character " + "\'" + nextChar + "\'" + " - line " + lineNumber
-
+                            + " column " + columnNumber);
                    throw new RuntimeException(
                            "illegal character " + "\'" + nextChar + "\'" + " - line " + line + " column " + column);
                }
            }
        }
-        // we have reached the end of 'inStream' so we return an end-of-file
+        return new Token(Token.Type.EOF, "EOF", inputStreamName, lineNumber, columnNumber);
        // token
        return new Token(Token.Type.EOF, "EOF", fileName, line, column);
    }
    // Retrieve a quoted string token from 'inStream'.
    //
    // Parameters: firstDoubleQuote - the opening double quote of this quoted
    // string
    // Returns: a quoted string token obtained from 'instream'
    // Throws: RuntimeException - Indicates that this quoted string was
    // missing its terminating double quote.
    // Throws: IOException - Indicates that an I/O error has occurred.
    // Precondition: 'firstDoubleQuote' must be the leading double quote
    // character of this quoted string and 'inStream' must not
    // be null.
    private Token retrieveString(char firstDoubleQuote) throws IOException {
        StringBuffer text = new StringBuffer();
-        int startLine = line;
+        int startLine = lineNumber;
-        int startColumn = column;
+        int startColumn = columnNumber;
        char prevChar = firstDoubleQuote;
        text.append(firstDoubleQuote);
        int c;
-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
            char nextChar = (char) c;
-            ++column;
+            ++columnNumber;
            text.append(nextChar);
            switch (nextChar) {
-            case '\n':
+            case NEWLINE:
-                ++line;
+                moveToNewLine();
                column = 0;
                break;
-            case '\"':
+            case DOUBLE_QUOTE:
-                if (prevChar != '\\') {
+                if (prevChar != BACKSLASH) {
                    // we have found the terminating double quote
-                    return new Token(Token.Type.STRING, text.toString(), fileName, startLine, startColumn);
+                    return new Token(Token.Type.STRING, text.toString(), inputStreamName, startLine, startColumn);
                }
                // this is an escaped double quote
@ -180,96 +129,81 @@ public class LispScanner {
        throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn);
    }
    // Retrieve a number token from 'inStream'.
    //
    // Parameters: firstDigit - the first digit of this number
    // Returns: a number token obtained from 'inStream'
    // Throws: IOException - Indicates that an I/O error has occurred.
    // Precondition: 'firstDigit' must be the first digit of this number and
    // 'inStream' must not be null.
    private Token retrieveNumber(char firstDigit) throws IOException {
        StringBuffer text = new StringBuffer();
-        int startColumn = column;
+        int startColumn = columnNumber;
        text.append(firstDigit);
-        inStream.mark(1);
+        inputStream.mark(1);
        int c;
-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
            char nextChar = (char) c;
            if (Character.isDigit(nextChar)) {
                // 'nextChar' is a digit in this number
                text.append(nextChar);
-                ++column;
+                ++columnNumber;
            } else {
                // we have reached the end of the number
-                inStream.reset(); // unread the last character
+                inputStream.reset(); // unread the last character
-                return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
+                return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
            }
-            inStream.mark(1);
+            inputStream.mark(1);
        }
        // there are no more bytes to be read from 'inStream' after this number
        // token
-        return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
+        return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
    }
    // Retrieve an identifier token from 'inStream'.
    //
    // Parameters: firstChar - the first character of this identifier
    // Returns: an identifier token obtained from 'inStream'
    // Throws: IOException - Indicates that an I/O error has occurred.
    // Precondition: 'firsChar' must be the first character of this identifier
    // and 'inStream' must not be null.
    private Token retrieveIdentifier(char firstChar) throws IOException {
        StringBuffer text = new StringBuffer();
-        int startColumn = column;
+        int startColumn = columnNumber;
        text.append(firstChar);
-        inStream.mark(1);
+        inputStream.mark(1);
        int c;
-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
            char nextChar = (char) c;
-            if (isLegalIdChar(nextChar)) {
+            if (isLegalIdentifierCharacter(nextChar)) {
                // 'nextChar' is part of the identifier
                text.append(nextChar);
-                ++column;
+                ++columnNumber;
            } else {
                // we have reached the end of this identifier
-                inStream.reset(); // unread the last character
+                inputStream.reset(); // unread the last character
-                return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
+                return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
            }
-            inStream.mark(1);
+            inputStream.mark(1);
        }
        // there are no more bytes to be read from 'inStream' after this
        // identifier token
-        return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
+        return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
    }
-    // Test if a character is legal to be contained within an identifier in
+    private void moveToNewLine() {
-    // Lisp.
+        lineNumber++;
-    //
+        columnNumber = 0;
-    // Returns: 'true' if the character can be found within an identifier in
+    }
-    // Lisp; 'false' otherwise
+
-    private boolean isLegalIdChar(char c) {
+    private boolean isLegalIdentifierCharacter(char c) {
-        return ((!Character.isWhitespace(c)) && (c != '\"') && (c != '\'') && (c != '\\') && (c != '`') && (c != '(')
+        return (!Character.isWhitespace(c)) && (illegalIdentifierCharacters.get(c) == null);
                && (c != ')') && (c != '[') && (c != ']') && (c != '#') && (c != '.') && (c != ';'));
    }
 }
--- a/src/util/Characters.java
+++ b/src/util/Characters.java
@ -0,0 +1,20 @@
 package util;
 public class Characters {
    public static final char BACKSLASH = '\\';
    public static final char DOUBLE_QUOTE = '\"';
    public static final char HASH = '#';
    public static final char LEFT_PARENTHESIS = '(';
    public static final char LEFT_SQUARE_BRACKET = '[';
    public static final char NEWLINE = '\n';
    public static final char PERIOD = '.';
    public static final char RIGHT_PARENTHESIS = ')';
    public static final char RIGHT_SQUARE_BRACKET = ']';
    public static final char SEMICOLON = ';';
    public static final char SINGLE_QUOTE = '\'';
    public static final char TICK_MARK = '`';
    public static final int EOF = -1;
 }
--- a/test/scanner/LispScannerLineColumnTester.java
+++ b/test/scanner/LispScannerLineColumnTester.java
@ -0,0 +1,87 @@
 package scanner;
 import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.io.InputStream;
 import org.junit.Before;
 import org.junit.Test;
 import testutils.TestUtilities;
 public class LispScannerLineColumnTester {
    @Before
    public void setUp() throws Exception {
    }
    @Test
    public void givenSimpleString_RecordsCorrectLocation() throws IOException {
        String input = "\"string\"";
        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1) };
        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
    }
    @Test
    public void givenMultipleStrings_RecordsCorrectLocations() throws IOException {
        String input = "\"string1\" \n \"string2 \n with newline\" \n  \"string3\"";
        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(2, 2),
                                                 LineColumn.create(4, 3) };
        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
    }
    @Test
    public void givenQuotedList_RecordsCorrectLocations() throws IOException {
        String input = "'(1 2 3 4 5)";
        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(1, 2),
                                                 LineColumn.create(1, 3), LineColumn.create(1, 5),
                                                 LineColumn.create(1, 7), LineColumn.create(1, 9),
                                                 LineColumn.create(1, 11), LineColumn.create(1, 12) };
        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
    }
    @Test
    public void givenListSpanningMultipleLines_RecordsCorrectLocations() throws IOException {
        String input = " ( 1 2 \n 3 4 \n5 ) ";
        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 2), LineColumn.create(1, 4),
                                                 LineColumn.create(1, 6), LineColumn.create(2, 2),
                                                 LineColumn.create(2, 4), LineColumn.create(3, 1),
                                                 LineColumn.create(3, 3) };
        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
    }
    private void assertTokenLineAndColumnsMatch(String input, LineColumn[] expectedLineColumnList) throws IOException {
        InputStream stringInputStream = TestUtilities.createInputStreamFromString(input);
        LispScanner lispScanner = new LispScanner(stringInputStream, "stringInputStream");
        for (LineColumn lineColumn : expectedLineColumnList) {
            Token nextToken = lispScanner.nextToken();
            assertTrue(lineColumn.isEqual(nextToken));
        }
    }
    private static class LineColumn {
        private int line;
        private int column;
        public static LineColumn create(int line, int column) {
            LineColumn lineColumn = new LineColumn();
            lineColumn.line = line;
            lineColumn.column = column;
            return lineColumn;
        }
        public boolean isEqual(Token token) {
            return (this.line == token.getLine()) && (this.column == token.getColumn());
        }
    }
 }
--- a/test/scanner/LispScannerTypeTester.java
+++ b/test/scanner/LispScannerTypeTester.java
@ -10,7 +10,7 @@ import org.junit.Test;
 import scanner.Token.Type;
 import testutils.TestUtilities;
-public class LispScannerTester {
+public class LispScannerTypeTester {
    @Test
    public void givenEmptyFile_returnsCorrectTokenTypes() throws IOException {