transcendental-lisp/scanner/LispScanner.java

/*
 * Name: Mike Cifelli
 * Course: CIS 443 - Programming Languages
 * Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
 */

package scanner;

import java.io.InputStream;
import java.io.BufferedInputStream;
import java.io.IOException;

/**
 * A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp
 * tokens. When the end of stream has been reached a token with a type of
 * <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>
 * method of this scanner.
 */
public class LispScanner {

    private LispFilterStream inStream;
    private Token currToken;
    private String fileName;
    private int line;
    private int column;

    /**
     * Create a new <code>LispScanner</code> that produces Lisp tokens from the
     * specified input stream.
     *
     * @param in
     *  the input stream to obtain Lisp tokens from (must not be
     *  <code>null</code>)
     * @param fileName
     *  the name of the file that <code>in</code> is reading from
     */
    public LispScanner(InputStream in, String fileName) {
        this.inStream = new LispFilterStream(new BufferedInputStream(in));
        this.currToken = null;
        this.fileName = fileName;
        this.line = 1;
        this.column = 0;
    }

    /**
     * Returns the same Lisp token returned from the last call to the
     * <code>nextToken</code> method of this scanner. In the case that no calls
     * to <code>nextToken</code> have been made yet, this method returns
     * <code>null</code>.
     *
     * @return
     *  the last Lisp token returned from this scanner or <code>null</code> (if
     *  no tokens have been returned from this scanner yet)
     */
    public Token getCurrToken() {
        return currToken;
    }

    /**
     * Returns the next Lisp token from this scanner.
     *
     * @return
     *  the next Lisp token from this scanner.
     * @throws RuntimeException
     *  Indicates that an illegal character or an unterminated quoted string
     *  was encountered in the input stream (not counting comments).
     * @throws IOException
     *  Indicates that an I/O error has occurred.
     */
    public Token nextToken() throws IOException {
        currToken = retrieveNextToken();

        return currToken;
    }

    // Retrieve the next Lisp token from 'inStream'.
    //
    // Returns: the next Lisp token found in 'inStream'
    // Precondition: 'inStream' must not be null.
    // Throws: RuntimeException - Indicates that an illegal character or an
    //                            unterminated quoted string was encountered in
    //                            'inStream'.
    // Throws: IOException - Indicates that an I/O error has occurred.
    private Token retrieveNextToken() throws IOException {
        int c;

        while ((c = inStream.read()) != -1) {
            char nextChar = (char) c;

            ++column;

            // determine the type of the Lisp token from the character obtained
            // from 'inStream'
            switch (nextChar) {
            case '\n':
                // we have hit a new line so increment 'line' and reset
                // 'column'
                ++line;
                column = 0;

                break;
            case '(':
                return new Token(Token.Type.LEFT_PAREN,
                                 "(",
                                 fileName,
                                 line,
                                 column);
            case ')':
                return new Token(Token.Type.RIGHT_PAREN,
                                 ")",
                                 fileName,
                                 line,
                                 column);
            case '\'':
                return new Token(Token.Type.QUOTE_MARK,
                                 "\'",
                                 fileName,
                                 line,
                                 column);
            case '\"':
                return retrieveString(nextChar);
            default:
                if (Character.isWhitespace(nextChar)) {  // skip whitespace
                    continue;
                } else if (Character.isDigit(nextChar)) {  // number
                    return retrieveNumber(nextChar);
                } else if (isLegalIdChar(nextChar)) {  // identifier
                    return retrieveIdentifier(nextChar);
                } else {
                    // 'nextChar' can not start any Lisp token

                    throw new RuntimeException("illegal character " +
                                               "\'" + nextChar + "\'" +
                                               " - line " + line +
                                               " column " + column);
                }
            }
        }

        // we have reached the end of 'inStream' so we return an end-of-file
        // token
        return new Token(Token.Type.EOF, "EOF", fileName, line, column);
    }

    // Retrieve a quoted string token from 'inStream'.
    //
    // Parameters: firstDoubleQuote - the opening double quote of this quoted
    //                                string
    // Returns: a quoted string token obtained from 'instream'
    // Throws: RuntimeException - Indicates that this quoted string was
    //                            missing its terminating double quote.
    // Throws: IOException - Indicates that an I/O error has occurred.
    // Precondition: 'firstDoubleQuote' must be the leading double quote
    //               character of this quoted string and 'inStream' must not
    //               be null.
    private Token retrieveString(char firstDoubleQuote) throws IOException {
        StringBuffer text = new StringBuffer();
        int startLine = line;
        int startColumn = column;
        char prevChar = firstDoubleQuote;

        text.append(firstDoubleQuote);

        int c;

        while ((c = inStream.read()) != -1) {
            char nextChar = (char) c;

            ++column;
            text.append(nextChar);

            switch(nextChar) {
                case '\n':
                    ++line;
                    column = 0;

                    break;
                case '\"':
                    if (prevChar != '\\') {
                        // we have found the terminating double quote

                        return new Token(Token.Type.STRING,
                                         text.toString(),
                                         fileName,
                                         startLine,
                                         startColumn);
                    }

                    // this is an escaped double quote
            }

            prevChar = nextChar;
        }

        // the end of 'inStream' was reached before the terminating double
        // quote

        throw new RuntimeException("unterminated quoted string" +
                                   " - line " + startLine +
                                   " column " + startColumn);
    }

    // Retrieve a number token from 'inStream'.
    //
    // Parameters: firstDigit - the first digit of this number
    // Returns: a number token obtained from 'inStream'
    // Throws: IOException - Indicates that an I/O error has occurred.
    // Precondition: 'firstDigit' must be the first digit of this number and
    //               'inStream' must not be null.
    private Token retrieveNumber(char firstDigit) throws IOException {
        StringBuffer text = new StringBuffer();
        int startColumn = column;

        text.append(firstDigit);
        inStream.mark(1);

        int c;

        while ((c = inStream.read()) != -1) {
            char nextChar = (char) c;

            if (Character.isDigit(nextChar)) {
                // 'nextChar' is a digit in this number

                text.append(nextChar);
                ++column;
            } else {
                // we have reached the end of the number

                inStream.reset();  // unread the last character

                return new Token(Token.Type.NUMBER,
                                 text.toString(),
                                 fileName,
                                 line,
                                 startColumn);
            }

            inStream.mark(1);
        }

        // there are no more bytes to be read from 'inStream' after this number
        // token

        return new Token(Token.Type.NUMBER,
                         text.toString(),
                         fileName,
                         line,
                         startColumn);
    }

    // Retrieve an identifier token from 'inStream'.
    //
    // Parameters: firstChar - the first character of this identifier
    // Returns: an identifier token obtained from 'inStream'
    // Throws: IOException - Indicates that an I/O error has occurred.
    // Precondition: 'firsChar' must be the first character of this identifier
    //               and 'inStream' must not be null.
    private Token retrieveIdentifier(char firstChar) throws IOException {
        StringBuffer text = new StringBuffer();
        int startColumn = column;

        text.append(firstChar);
        inStream.mark(1);

        int c;

        while ((c = inStream.read()) != -1) {
            char nextChar = (char) c;

            if (isLegalIdChar(nextChar)) {
                // 'nextChar' is part of the identifier

                text.append(nextChar);
                ++column;
            } else {
                // we have reached the end of this identifier

                inStream.reset();  // unread the last character

                return new Token(Token.Type.IDENTIFIER,
                                 text.toString(),
                                 fileName,
                                 line,
                                 startColumn);
            }

            inStream.mark(1);
        }

        // there are no more bytes to be read from 'inStream' after this
        // identifier token

        return new Token(Token.Type.IDENTIFIER,
                         text.toString(),
                         fileName,
                         line,
                         startColumn);
    }

    // Test if a character is legal to be contained within an identifier in
    // Lisp.
    //
    // Returns: 'true' if the character can be found within an identifier in
    //          Lisp; 'false' otherwise
    private boolean isLegalIdChar(char c) {
        return ((! Character.isWhitespace(c)) && (c != '\"')
                                              && (c != '\'')
                                              && (c != '\\')
                                              && (c != '`')
                                              && (c != '(')
                                              && (c != ')')
                                              && (c != '[')
                                              && (c != ']')
                                              && (c != '#')
                                              && (c != '.')
                                              && (c != ';'));
    }

}
Initial commit 2016-12-07 14:16:45 -05:00			`/*`
			`* Name: Mike Cifelli`
			`* Course: CIS 443 - Programming Languages`
			`* Assignment: Lisp Interpreter Phase 1 - Lexical Analysis`
			`*/`

			`package scanner;`

			`import java.io.InputStream;`
			`import java.io.BufferedInputStream;`
			`import java.io.IOException;`

			`/**`
			`* A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp`
			`* tokens. When the end of stream has been reached a token with a type of`
			`* <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>`
			`* method of this scanner.`
			`*/`
			`public class LispScanner {`

			`private LispFilterStream inStream;`
			`private Token currToken;`
			`private String fileName;`
			`private int line;`
			`private int column;`

			`/**`
			`* Create a new <code>LispScanner</code> that produces Lisp tokens from the`
			`* specified input stream.`
			`*`
			`* @param in`
			`* the input stream to obtain Lisp tokens from (must not be`
			`* <code>null</code>)`
			`* @param fileName`
			`* the name of the file that <code>in</code> is reading from`
			`*/`
			`public LispScanner(InputStream in, String fileName) {`
			`this.inStream = new LispFilterStream(new BufferedInputStream(in));`
			`this.currToken = null;`
			`this.fileName = fileName;`
			`this.line = 1;`
			`this.column = 0;`
			`}`

			`/**`
			`* Returns the same Lisp token returned from the last call to the`
			`* <code>nextToken</code> method of this scanner. In the case that no calls`
			`* to <code>nextToken</code> have been made yet, this method returns`
			`* <code>null</code>.`
			`*`
			`* @return`
			`* the last Lisp token returned from this scanner or <code>null</code> (if`
			`* no tokens have been returned from this scanner yet)`
			`*/`
			`public Token getCurrToken() {`
			`return currToken;`
			`}`

			`/**`
			`* Returns the next Lisp token from this scanner.`
			`*`
			`* @return`
			`* the next Lisp token from this scanner.`
			`* @throws RuntimeException`
			`* Indicates that an illegal character or an unterminated quoted string`
			`* was encountered in the input stream (not counting comments).`
			`* @throws IOException`
			`* Indicates that an I/O error has occurred.`
			`*/`
			`public Token nextToken() throws IOException {`
			`currToken = retrieveNextToken();`

			`return currToken;`
			`}`

			`// Retrieve the next Lisp token from 'inStream'.`
			`//`
			`// Returns: the next Lisp token found in 'inStream'`
			`// Precondition: 'inStream' must not be null.`
			`// Throws: RuntimeException - Indicates that an illegal character or an`
			`// unterminated quoted string was encountered in`
			`// 'inStream'.`
			`// Throws: IOException - Indicates that an I/O error has occurred.`
			`private Token retrieveNextToken() throws IOException {`
			`int c;`

			`while ((c = inStream.read()) != -1) {`
			`char nextChar = (char) c;`

			`++column;`

			`// determine the type of the Lisp token from the character obtained`
			`// from 'inStream'`
			`switch (nextChar) {`
			`case '\n':`
			`// we have hit a new line so increment 'line' and reset`
			`// 'column'`
			`++line;`
			`column = 0;`

			`break;`
			`case '(':`
			`return new Token(Token.Type.LEFT_PAREN,`
			`"(",`
			`fileName,`
			`line,`
			`column);`
			`case ')':`
			`return new Token(Token.Type.RIGHT_PAREN,`
			`")",`
			`fileName,`
			`line,`
			`column);`
			`case '\'':`
			`return new Token(Token.Type.QUOTE_MARK,`
			`"\'",`
			`fileName,`
			`line,`
			`column);`
			`case '\"':`
			`return retrieveString(nextChar);`
			`default:`
			`if (Character.isWhitespace(nextChar)) { // skip whitespace`
			`continue;`
			`} else if (Character.isDigit(nextChar)) { // number`
			`return retrieveNumber(nextChar);`
			`} else if (isLegalIdChar(nextChar)) { // identifier`
			`return retrieveIdentifier(nextChar);`
			`} else {`
			`// 'nextChar' can not start any Lisp token`

			`throw new RuntimeException("illegal character " +`
			`"\'" + nextChar + "\'" +`
			`" - line " + line +`
			`" column " + column);`
			`}`
			`}`
			`}`

			`// we have reached the end of 'inStream' so we return an end-of-file`
			`// token`
			`return new Token(Token.Type.EOF, "EOF", fileName, line, column);`
			`}`

			`// Retrieve a quoted string token from 'inStream'.`
			`//`
			`// Parameters: firstDoubleQuote - the opening double quote of this quoted`
			`// string`
			`// Returns: a quoted string token obtained from 'instream'`
			`// Throws: RuntimeException - Indicates that this quoted string was`
			`// missing its terminating double quote.`
			`// Throws: IOException - Indicates that an I/O error has occurred.`
			`// Precondition: 'firstDoubleQuote' must be the leading double quote`
			`// character of this quoted string and 'inStream' must not`
			`// be null.`
			`private Token retrieveString(char firstDoubleQuote) throws IOException {`
			`StringBuffer text = new StringBuffer();`
			`int startLine = line;`
			`int startColumn = column;`
			`char prevChar = firstDoubleQuote;`

			`text.append(firstDoubleQuote);`

			`int c;`

			`while ((c = inStream.read()) != -1) {`
			`char nextChar = (char) c;`

			`++column;`
			`text.append(nextChar);`

			`switch(nextChar) {`
			`case '\n':`
			`++line;`
			`column = 0;`

			`break;`
			`case '\"':`
			`if (prevChar != '\\') {`
			`// we have found the terminating double quote`

			`return new Token(Token.Type.STRING,`
			`text.toString(),`
			`fileName,`
			`startLine,`
			`startColumn);`
			`}`

			`// this is an escaped double quote`
			`}`

			`prevChar = nextChar;`
			`}`

			`// the end of 'inStream' was reached before the terminating double`
			`// quote`

			`throw new RuntimeException("unterminated quoted string" +`
			`" - line " + startLine +`
			`" column " + startColumn);`
			`}`

			`// Retrieve a number token from 'inStream'.`
			`//`
			`// Parameters: firstDigit - the first digit of this number`
			`// Returns: a number token obtained from 'inStream'`
			`// Throws: IOException - Indicates that an I/O error has occurred.`
			`// Precondition: 'firstDigit' must be the first digit of this number and`
			`// 'inStream' must not be null.`
			`private Token retrieveNumber(char firstDigit) throws IOException {`
			`StringBuffer text = new StringBuffer();`
			`int startColumn = column;`

			`text.append(firstDigit);`
			`inStream.mark(1);`

			`int c;`

			`while ((c = inStream.read()) != -1) {`
			`char nextChar = (char) c;`

			`if (Character.isDigit(nextChar)) {`
			`// 'nextChar' is a digit in this number`

			`text.append(nextChar);`
			`++column;`
			`} else {`
			`// we have reached the end of the number`

			`inStream.reset(); // unread the last character`

			`return new Token(Token.Type.NUMBER,`
			`text.toString(),`
			`fileName,`
			`line,`
			`startColumn);`
			`}`

			`inStream.mark(1);`
			`}`

			`// there are no more bytes to be read from 'inStream' after this number`
			`// token`

			`return new Token(Token.Type.NUMBER,`
			`text.toString(),`
			`fileName,`
			`line,`
			`startColumn);`
			`}`

			`// Retrieve an identifier token from 'inStream'.`
			`//`
			`// Parameters: firstChar - the first character of this identifier`
			`// Returns: an identifier token obtained from 'inStream'`
			`// Throws: IOException - Indicates that an I/O error has occurred.`
			`// Precondition: 'firsChar' must be the first character of this identifier`
			`// and 'inStream' must not be null.`
			`private Token retrieveIdentifier(char firstChar) throws IOException {`
			`StringBuffer text = new StringBuffer();`
			`int startColumn = column;`

			`text.append(firstChar);`
			`inStream.mark(1);`

			`int c;`

			`while ((c = inStream.read()) != -1) {`
			`char nextChar = (char) c;`

			`if (isLegalIdChar(nextChar)) {`
			`// 'nextChar' is part of the identifier`

			`text.append(nextChar);`
			`++column;`
			`} else {`
			`// we have reached the end of this identifier`

			`inStream.reset(); // unread the last character`

			`return new Token(Token.Type.IDENTIFIER,`
			`text.toString(),`
			`fileName,`
			`line,`
			`startColumn);`
			`}`

			`inStream.mark(1);`
			`}`

			`// there are no more bytes to be read from 'inStream' after this`
			`// identifier token`

			`return new Token(Token.Type.IDENTIFIER,`
			`text.toString(),`
			`fileName,`
			`line,`
			`startColumn);`
			`}`

			`// Test if a character is legal to be contained within an identifier in`
			`// Lisp.`
			`//`
			`// Returns: 'true' if the character can be found within an identifier in`
			`// Lisp; 'false' otherwise`
			`private boolean isLegalIdChar(char c) {`
			`return ((! Character.isWhitespace(c)) && (c != '\"')`
			`&& (c != '\'')`
			`&& (c != '\\')`
			&& (c != '`')
			`&& (c != '(')`
			`&& (c != ')')`
			`&& (c != '[')`
			`&& (c != ']')`
			`&& (c != '#')`
			`&& (c != '.')`
			`&& (c != ';'));`
			`}`

			`}`