From 9b3261f5756f8a5f672645e959d8d4ea188d2488 Mon Sep 17 00:00:00 2001
From: Mike Cifelli <macifell@us.ibm.com>
Date: Fri, 9 Dec 2016 12:29:56 -0500
Subject: [PATCH] Added unit tests and started refactoring LispScanner

---
 src/scanner/LispFilterInputStream.java        |  12 +-
 src/scanner/LispScanner.java                  | 248 +++++++-----------
 src/util/Characters.java                      |  20 ++
 test/scanner/LispScannerLineColumnTester.java |  87 ++++++
 ...Tester.java => LispScannerTypeTester.java} |   2 +-
 5 files changed, 208 insertions(+), 161 deletions(-)
 create mode 100644 src/util/Characters.java
 create mode 100644 test/scanner/LispScannerLineColumnTester.java
 rename test/scanner/{LispScannerTester.java => LispScannerTypeTester.java} (98%)
diff --git a/src/scanner/LispFilterInputStream.java b/src/scanner/LispFilterInputStream.java
index 8e550b0..e60a01b 100644
--- a/src/scanner/LispFilterInputStream.java
+++ b/src/scanner/LispFilterInputStream.java
@@ -1,5 +1,11 @@
 package scanner;
 
+import static util.Characters.BACKSLASH;
+import static util.Characters.DOUBLE_QUOTE;
+import static util.Characters.EOF;
+import static util.Characters.NEWLINE;
+import static util.Characters.SEMICOLON;
+
 import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -44,11 +50,11 @@ public class LispFilterInputStream extends FilterInputStream {
     }
 
     private boolean haveEncounteredStringBoundary() {
-        return (previousCharacter != '\\') && (nextCharacter == '\"');
+        return (previousCharacter != BACKSLASH) && (nextCharacter == DOUBLE_QUOTE);
     }
 
     private boolean haveEnteredComment() {
-        return (nextCharacter == ';') && (!isInQuotedString);
+        return (nextCharacter == SEMICOLON) && (!isInQuotedString);
     }
 
     private void consumeAllBytesInComment() throws IOException {
@@ -57,7 +63,7 @@ public class LispFilterInputStream extends FilterInputStream {
     }
 
     private boolean stillInComment() {
-        return (nextCharacter != '\n') && (nextCharacter != -1);
+        return (nextCharacter != NEWLINE) && (nextCharacter != EOF);
     }
 
 }
diff --git a/src/scanner/LispScanner.java b/src/scanner/LispScanner.java
index 1788d85..d1232ec 100644
--- a/src/scanner/LispScanner.java
+++ b/src/scanner/LispScanner.java
@@ -1,171 +1,120 @@
-/*
- * Name: Mike Cifelli
- * Course: CIS 443 - Programming Languages
- * Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
- */
-
 package scanner;
 
+import static util.Characters.BACKSLASH;
+import static util.Characters.DOUBLE_QUOTE;
+import static util.Characters.EOF;
+import static util.Characters.HASH;
+import static util.Characters.LEFT_PARENTHESIS;
+import static util.Characters.LEFT_SQUARE_BRACKET;
+import static util.Characters.NEWLINE;
+import static util.Characters.PERIOD;
+import static util.Characters.RIGHT_PARENTHESIS;
+import static util.Characters.RIGHT_SQUARE_BRACKET;
+import static util.Characters.SEMICOLON;
+import static util.Characters.SINGLE_QUOTE;
+import static util.Characters.TICK_MARK;
+
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
- * A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp
- * tokens. When the end of stream has been reached a token with a type of
- * <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>
- * method of this scanner.
+ * Converts a stream of bytes into a stream of Lisp tokens.
  */
 public class LispScanner {
 
-    private LispFilterInputStream inStream;
-    private Token currToken;
-    private String fileName;
-    private int line;
-    private int column;
+    private static Map<Character, Boolean> illegalIdentifierCharacters = new HashMap<>();
+
+    static {
+        illegalIdentifierCharacters.put(DOUBLE_QUOTE, true);
+        illegalIdentifierCharacters.put(SINGLE_QUOTE, true);
+        illegalIdentifierCharacters.put(BACKSLASH, true);
+        illegalIdentifierCharacters.put(TICK_MARK, true);
+        illegalIdentifierCharacters.put(LEFT_PARENTHESIS, true);
+        illegalIdentifierCharacters.put(RIGHT_PARENTHESIS, true);
+        illegalIdentifierCharacters.put(LEFT_SQUARE_BRACKET, true);
+        illegalIdentifierCharacters.put(RIGHT_SQUARE_BRACKET, true);
+        illegalIdentifierCharacters.put(HASH, true);
+        illegalIdentifierCharacters.put(PERIOD, true);
+        illegalIdentifierCharacters.put(SEMICOLON, true);
+    }
+
+    private InputStream inputStream;
+    private String inputStreamName;
+    private int lineNumber;
+    private int columnNumber;
 
-    /**
-     * Create a new <code>LispScanner</code> that produces Lisp tokens from the
-     * specified input stream.
-     *
-     * @param in
-     *            the input stream to obtain Lisp tokens from (must not be
-     *            <code>null</code>)
-     * @param fileName
-     *            the name of the file that <code>in</code> is reading from
-     */
     public LispScanner(InputStream in, String fileName) {
-        this.inStream = new LispFilterInputStream(new BufferedInputStream(in));
-        this.currToken = null;
-        this.fileName = fileName;
-        this.line = 1;
-        this.column = 0;
+        this.inputStream = new LispFilterInputStream(new BufferedInputStream(in));
+        this.inputStreamName = fileName;
+        this.lineNumber = 1;
+        this.columnNumber = 0;
     }
 
-    /**
-     * Returns the same Lisp token returned from the last call to the
-     * <code>nextToken</code> method of this scanner. In the case that no calls
-     * to <code>nextToken</code> have been made yet, this method returns
-     * <code>null</code>.
-     *
-     * @return the last Lisp token returned from this scanner or
-     *         <code>null</code> (if no tokens have been returned from this
-     *         scanner yet)
-     */
-    public Token getCurrToken() {
-        return currToken;
-    }
-
-    /**
-     * Returns the next Lisp token from this scanner.
-     *
-     * @return the next Lisp token from this scanner.
-     * @throws RuntimeException
-     *             Indicates that an illegal character or an unterminated quoted
-     *             string was encountered in the input stream (not counting
-     *             comments).
-     * @throws IOException
-     *             Indicates that an I/O error has occurred.
-     */
     public Token nextToken() throws IOException {
-        currToken = retrieveNextToken();
-
-        return currToken;
-    }
-
-    // Retrieve the next Lisp token from 'inStream'.
-    //
-    // Returns: the next Lisp token found in 'inStream'
-    // Precondition: 'inStream' must not be null.
-    // Throws: RuntimeException - Indicates that an illegal character or an
-    // unterminated quoted string was encountered in
-    // 'inStream'.
-    // Throws: IOException - Indicates that an I/O error has occurred.
-    private Token retrieveNextToken() throws IOException {
         int c;
 
-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
             char nextChar = (char) c;
 
-            ++column;
+            ++columnNumber;
 
-            // determine the type of the Lisp token from the character obtained
-            // from 'inStream'
             switch (nextChar) {
-            case '\n':
-                // we have hit a new line so increment 'line' and reset
-                // 'column'
-                ++line;
-                column = 0;
-
+            case NEWLINE:
+                moveToNewLine();
                 break;
-            case '(':
-                return new Token(Token.Type.LEFT_PAREN, "(", fileName, line, column);
-            case ')':
-                return new Token(Token.Type.RIGHT_PAREN, ")", fileName, line, column);
-            case '\'':
-                return new Token(Token.Type.QUOTE_MARK, "\'", fileName, line, column);
-            case '\"':
+            case LEFT_PARENTHESIS:
+                return new Token(Token.Type.LEFT_PAREN, "(", inputStreamName, lineNumber, columnNumber);
+            case RIGHT_PARENTHESIS:
+                return new Token(Token.Type.RIGHT_PAREN, ")", inputStreamName, lineNumber, columnNumber);
+            case SINGLE_QUOTE:
+                return new Token(Token.Type.QUOTE_MARK, "\'", inputStreamName, lineNumber, columnNumber);
+            case DOUBLE_QUOTE:
                 return retrieveString(nextChar);
             default:
-                if (Character.isWhitespace(nextChar)) { // skip whitespace
+                if (Character.isWhitespace(nextChar)) {
                     continue;
-                } else if (Character.isDigit(nextChar)) { // number
+                } else if (Character.isDigit(nextChar)) {
                     return retrieveNumber(nextChar);
-                } else if (isLegalIdChar(nextChar)) { // identifier
+                } else if (isLegalIdentifierCharacter(nextChar)) {
                     return retrieveIdentifier(nextChar);
                 } else {
-                    // 'nextChar' can not start any Lisp token
-
-                    throw new RuntimeException(
-                            "illegal character " + "\'" + nextChar + "\'" + " - line " + line + " column " + column);
+                    throw new RuntimeException("illegal character " + "\'" + nextChar + "\'" + " - line " + lineNumber
+                            + " column " + columnNumber);
                 }
             }
         }
 
-        // we have reached the end of 'inStream' so we return an end-of-file
-        // token
-        return new Token(Token.Type.EOF, "EOF", fileName, line, column);
+        return new Token(Token.Type.EOF, "EOF", inputStreamName, lineNumber, columnNumber);
     }
 
-    // Retrieve a quoted string token from 'inStream'.
-    //
-    // Parameters: firstDoubleQuote - the opening double quote of this quoted
-    // string
-    // Returns: a quoted string token obtained from 'instream'
-    // Throws: RuntimeException - Indicates that this quoted string was
-    // missing its terminating double quote.
-    // Throws: IOException - Indicates that an I/O error has occurred.
-    // Precondition: 'firstDoubleQuote' must be the leading double quote
-    // character of this quoted string and 'inStream' must not
-    // be null.
     private Token retrieveString(char firstDoubleQuote) throws IOException {
         StringBuffer text = new StringBuffer();
-        int startLine = line;
-        int startColumn = column;
+        int startLine = lineNumber;
+        int startColumn = columnNumber;
         char prevChar = firstDoubleQuote;
 
         text.append(firstDoubleQuote);
 
         int c;
 
-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
             char nextChar = (char) c;
 
-            ++column;
+            ++columnNumber;
             text.append(nextChar);
 
             switch (nextChar) {
-            case '\n':
-                ++line;
-                column = 0;
-
+            case NEWLINE:
+                moveToNewLine();
                 break;
-            case '\"':
-                if (prevChar != '\\') {
+            case DOUBLE_QUOTE:
+                if (prevChar != BACKSLASH) {
                     // we have found the terminating double quote
 
-                    return new Token(Token.Type.STRING, text.toString(), fileName, startLine, startColumn);
+                    return new Token(Token.Type.STRING, text.toString(), inputStreamName, startLine, startColumn);
                 }
 
                 // this is an escaped double quote
@@ -180,96 +129,81 @@ public class LispScanner {
         throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn);
     }
 
-    // Retrieve a number token from 'inStream'.
-    //
-    // Parameters: firstDigit - the first digit of this number
-    // Returns: a number token obtained from 'inStream'
-    // Throws: IOException - Indicates that an I/O error has occurred.
-    // Precondition: 'firstDigit' must be the first digit of this number and
-    // 'inStream' must not be null.
     private Token retrieveNumber(char firstDigit) throws IOException {
         StringBuffer text = new StringBuffer();
-        int startColumn = column;
+        int startColumn = columnNumber;
 
         text.append(firstDigit);
-        inStream.mark(1);
+        inputStream.mark(1);
 
         int c;
 
-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
             char nextChar = (char) c;
 
             if (Character.isDigit(nextChar)) {
                 // 'nextChar' is a digit in this number
 
                 text.append(nextChar);
-                ++column;
+                ++columnNumber;
             } else {
                 // we have reached the end of the number
 
-                inStream.reset(); // unread the last character
+                inputStream.reset(); // unread the last character
 
-                return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
+                return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
             }
 
-            inStream.mark(1);
+            inputStream.mark(1);
         }
 
         // there are no more bytes to be read from 'inStream' after this number
         // token
 
-        return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
+        return new Token(Token.Type.NUMBER, text.toString(), inputStreamName, lineNumber, startColumn);
     }
 
-    // Retrieve an identifier token from 'inStream'.
-    //
-    // Parameters: firstChar - the first character of this identifier
-    // Returns: an identifier token obtained from 'inStream'
-    // Throws: IOException - Indicates that an I/O error has occurred.
-    // Precondition: 'firsChar' must be the first character of this identifier
-    // and 'inStream' must not be null.
     private Token retrieveIdentifier(char firstChar) throws IOException {
         StringBuffer text = new StringBuffer();
-        int startColumn = column;
+        int startColumn = columnNumber;
 
         text.append(firstChar);
-        inStream.mark(1);
+        inputStream.mark(1);
 
         int c;
 
-        while ((c = inStream.read()) != -1) {
+        while ((c = inputStream.read()) != EOF) {
             char nextChar = (char) c;
 
-            if (isLegalIdChar(nextChar)) {
+            if (isLegalIdentifierCharacter(nextChar)) {
                 // 'nextChar' is part of the identifier
 
                 text.append(nextChar);
-                ++column;
+                ++columnNumber;
             } else {
                 // we have reached the end of this identifier
 
-                inStream.reset(); // unread the last character
+                inputStream.reset(); // unread the last character
 
-                return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
+                return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
             }
 
-            inStream.mark(1);
+            inputStream.mark(1);
         }
 
         // there are no more bytes to be read from 'inStream' after this
         // identifier token
 
-        return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
+        return new Token(Token.Type.IDENTIFIER, text.toString(), inputStreamName, lineNumber, startColumn);
     }
 
-    // Test if a character is legal to be contained within an identifier in
-    // Lisp.
-    //
-    // Returns: 'true' if the character can be found within an identifier in
-    // Lisp; 'false' otherwise
-    private boolean isLegalIdChar(char c) {
-        return ((!Character.isWhitespace(c)) && (c != '\"') && (c != '\'') && (c != '\\') && (c != '`') && (c != '(')
-                && (c != ')') && (c != '[') && (c != ']') && (c != '#') && (c != '.') && (c != ';'));
+    private void moveToNewLine() {
+        lineNumber++;
+        columnNumber = 0;
+    }
+
+    private boolean isLegalIdentifierCharacter(char c) {
+        return (!Character.isWhitespace(c)) && (illegalIdentifierCharacters.get(c) == null);
     }
 
 }
diff --git a/src/util/Characters.java b/src/util/Characters.java
new file mode 100644
index 0000000..9c6c731
--- /dev/null
+++ b/src/util/Characters.java
@@ -0,0 +1,20 @@
+package util;
+
+public class Characters {
+
+    public static final char BACKSLASH = '\\';
+    public static final char DOUBLE_QUOTE = '\"';
+    public static final char HASH = '#';
+    public static final char LEFT_PARENTHESIS = '(';
+    public static final char LEFT_SQUARE_BRACKET = '[';
+    public static final char NEWLINE = '\n';
+    public static final char PERIOD = '.';
+    public static final char RIGHT_PARENTHESIS = ')';
+    public static final char RIGHT_SQUARE_BRACKET = ']';
+    public static final char SEMICOLON = ';';
+    public static final char SINGLE_QUOTE = '\'';
+    public static final char TICK_MARK = '`';
+
+    public static final int EOF = -1;
+
+}
diff --git a/test/scanner/LispScannerLineColumnTester.java b/test/scanner/LispScannerLineColumnTester.java
new file mode 100644
index 0000000..3a6f148
--- /dev/null
+++ b/test/scanner/LispScannerLineColumnTester.java
@@ -0,0 +1,87 @@
+package scanner;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import testutils.TestUtilities;
+
+public class LispScannerLineColumnTester {
+
+    @Before
+    public void setUp() throws Exception {
+    }
+
+    @Test
+    public void givenSimpleString_RecordsCorrectLocation() throws IOException {
+        String input = "\"string\"";
+        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1) };
+
+        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+    }
+
+    @Test
+    public void givenMultipleStrings_RecordsCorrectLocations() throws IOException {
+        String input = "\"string1\" \n \"string2 \n with newline\" \n  \"string3\"";
+        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(2, 2),
+                                                 LineColumn.create(4, 3) };
+
+        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+    }
+
+    @Test
+    public void givenQuotedList_RecordsCorrectLocations() throws IOException {
+        String input = "'(1 2 3 4 5)";
+        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 1), LineColumn.create(1, 2),
+                                                 LineColumn.create(1, 3), LineColumn.create(1, 5),
+                                                 LineColumn.create(1, 7), LineColumn.create(1, 9),
+                                                 LineColumn.create(1, 11), LineColumn.create(1, 12) };
+
+        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+    }
+
+    @Test
+    public void givenListSpanningMultipleLines_RecordsCorrectLocations() throws IOException {
+        String input = " ( 1 2 \n 3 4 \n5 ) ";
+        LineColumn[] expectedLinesAndColumns = { LineColumn.create(1, 2), LineColumn.create(1, 4),
+                                                 LineColumn.create(1, 6), LineColumn.create(2, 2),
+                                                 LineColumn.create(2, 4), LineColumn.create(3, 1),
+                                                 LineColumn.create(3, 3) };
+
+        assertTokenLineAndColumnsMatch(input, expectedLinesAndColumns);
+    }
+
+    private void assertTokenLineAndColumnsMatch(String input, LineColumn[] expectedLineColumnList) throws IOException {
+        InputStream stringInputStream = TestUtilities.createInputStreamFromString(input);
+        LispScanner lispScanner = new LispScanner(stringInputStream, "stringInputStream");
+
+        for (LineColumn lineColumn : expectedLineColumnList) {
+            Token nextToken = lispScanner.nextToken();
+            assertTrue(lineColumn.isEqual(nextToken));
+        }
+    }
+
+    private static class LineColumn {
+
+        private int line;
+        private int column;
+
+        public static LineColumn create(int line, int column) {
+            LineColumn lineColumn = new LineColumn();
+            lineColumn.line = line;
+            lineColumn.column = column;
+
+            return lineColumn;
+        }
+
+        public boolean isEqual(Token token) {
+            return (this.line == token.getLine()) && (this.column == token.getColumn());
+        }
+
+    }
+
+}
diff --git a/test/scanner/LispScannerTester.java b/test/scanner/LispScannerTypeTester.java
similarity index 98%
rename from test/scanner/LispScannerTester.java
rename to test/scanner/LispScannerTypeTester.java
index a0da5df..cc920e7 100644
--- a/test/scanner/LispScannerTester.java
+++ b/test/scanner/LispScannerTypeTester.java
@@ -10,7 +10,7 @@ import org.junit.Test;
 import scanner.Token.Type;
 import testutils.TestUtilities;
 
-public class LispScannerTester {
+public class LispScannerTypeTester {
 
     @Test
     public void givenEmptyFile_returnsCorrectTokenTypes() throws IOException {