/*
 * Name: Mike Cifelli
 * Course: CIS 443 - Programming Languages
 * Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
 */

package scanner;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;

/**
 * A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp
 * tokens. When the end of stream has been reached a token with a type of
 * <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>
 * method of this scanner.
 */
public class LispScanner {

    private LispFilterInputStream inStream;
    private Token currToken;
    private String fileName;
    private int line;
    private int column;

    /**
     * Create a new <code>LispScanner</code> that produces Lisp tokens from the
     * specified input stream.
     *
     * @param in
     *            the input stream to obtain Lisp tokens from (must not be
     *            <code>null</code>)
     * @param fileName
     *            the name of the file that <code>in</code> is reading from
     */
    public LispScanner(InputStream in, String fileName) {
        this.inStream = new LispFilterInputStream(new BufferedInputStream(in));
        this.currToken = null;
        this.fileName = fileName;
        this.line = 1;
        this.column = 0;
    }

    /**
     * Returns the same Lisp token returned from the last call to the
     * <code>nextToken</code> method of this scanner. In the case that no calls
     * to <code>nextToken</code> have been made yet, this method returns
     * <code>null</code>.
     *
     * @return the last Lisp token returned from this scanner or
     *         <code>null</code> (if no tokens have been returned from this
     *         scanner yet)
     */
    public Token getCurrToken() {
        return currToken;
    }

    /**
     * Returns the next Lisp token from this scanner.
     *
     * @return the next Lisp token from this scanner.
     * @throws RuntimeException
     *             Indicates that an illegal character or an unterminated quoted
     *             string was encountered in the input stream (not counting
     *             comments).
     * @throws IOException
     *             Indicates that an I/O error has occurred.
     */
    public Token nextToken() throws IOException {
        currToken = retrieveNextToken();

        return currToken;
    }

    // Retrieve the next Lisp token from 'inStream'.
    //
    // Returns: the next Lisp token found in 'inStream'
    // Precondition: 'inStream' must not be null.
    // Throws: RuntimeException - Indicates that an illegal character or an
    // unterminated quoted string was encountered in
    // 'inStream'.
    // Throws: IOException - Indicates that an I/O error has occurred.
    private Token retrieveNextToken() throws IOException {
        int c;

        while ((c = inStream.read()) != -1) {
            char nextChar = (char) c;

            ++column;

            // determine the type of the Lisp token from the character obtained
            // from 'inStream'
            switch (nextChar) {
            case '\n':
                // we have hit a new line so increment 'line' and reset
                // 'column'
                ++line;
                column = 0;

                break;
            case '(':
                return new Token(Token.Type.LEFT_PAREN, "(", fileName, line, column);
            case ')':
                return new Token(Token.Type.RIGHT_PAREN, ")", fileName, line, column);
            case '\'':
                return new Token(Token.Type.QUOTE_MARK, "\'", fileName, line, column);
            case '\"':
                return retrieveString(nextChar);
            default:
                if (Character.isWhitespace(nextChar)) { // skip whitespace
                    continue;
                } else if (Character.isDigit(nextChar)) { // number
                    return retrieveNumber(nextChar);
                } else if (isLegalIdChar(nextChar)) { // identifier
                    return retrieveIdentifier(nextChar);
                } else {
                    // 'nextChar' can not start any Lisp token

                    throw new RuntimeException(
                            "illegal character " + "\'" + nextChar + "\'" + " - line " + line + " column " + column);
                }
            }
        }

        // we have reached the end of 'inStream' so we return an end-of-file
        // token
        return new Token(Token.Type.EOF, "EOF", fileName, line, column);
    }

    // Retrieve a quoted string token from 'inStream'.
    //
    // Parameters: firstDoubleQuote - the opening double quote of this quoted
    // string
    // Returns: a quoted string token obtained from 'instream'
    // Throws: RuntimeException - Indicates that this quoted string was
    // missing its terminating double quote.
    // Throws: IOException - Indicates that an I/O error has occurred.
    // Precondition: 'firstDoubleQuote' must be the leading double quote
    // character of this quoted string and 'inStream' must not
    // be null.
    private Token retrieveString(char firstDoubleQuote) throws IOException {
        StringBuffer text = new StringBuffer();
        int startLine = line;
        int startColumn = column;
        char prevChar = firstDoubleQuote;

        text.append(firstDoubleQuote);

        int c;

        while ((c = inStream.read()) != -1) {
            char nextChar = (char) c;

            ++column;
            text.append(nextChar);

            switch (nextChar) {
            case '\n':
                ++line;
                column = 0;

                break;
            case '\"':
                if (prevChar != '\\') {
                    // we have found the terminating double quote

                    return new Token(Token.Type.STRING, text.toString(), fileName, startLine, startColumn);
                }

                // this is an escaped double quote
            }

            prevChar = nextChar;
        }

        // the end of 'inStream' was reached before the terminating double
        // quote

        throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn);
    }

    // Retrieve a number token from 'inStream'.
    //
    // Parameters: firstDigit - the first digit of this number
    // Returns: a number token obtained from 'inStream'
    // Throws: IOException - Indicates that an I/O error has occurred.
    // Precondition: 'firstDigit' must be the first digit of this number and
    // 'inStream' must not be null.
    private Token retrieveNumber(char firstDigit) throws IOException {
        StringBuffer text = new StringBuffer();
        int startColumn = column;

        text.append(firstDigit);
        inStream.mark(1);

        int c;

        while ((c = inStream.read()) != -1) {
            char nextChar = (char) c;

            if (Character.isDigit(nextChar)) {
                // 'nextChar' is a digit in this number

                text.append(nextChar);
                ++column;
            } else {
                // we have reached the end of the number

                inStream.reset(); // unread the last character

                return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
            }

            inStream.mark(1);
        }

        // there are no more bytes to be read from 'inStream' after this number
        // token

        return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn);
    }

    // Retrieve an identifier token from 'inStream'.
    //
    // Parameters: firstChar - the first character of this identifier
    // Returns: an identifier token obtained from 'inStream'
    // Throws: IOException - Indicates that an I/O error has occurred.
    // Precondition: 'firsChar' must be the first character of this identifier
    // and 'inStream' must not be null.
    private Token retrieveIdentifier(char firstChar) throws IOException {
        StringBuffer text = new StringBuffer();
        int startColumn = column;

        text.append(firstChar);
        inStream.mark(1);

        int c;

        while ((c = inStream.read()) != -1) {
            char nextChar = (char) c;

            if (isLegalIdChar(nextChar)) {
                // 'nextChar' is part of the identifier

                text.append(nextChar);
                ++column;
            } else {
                // we have reached the end of this identifier

                inStream.reset(); // unread the last character

                return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
            }

            inStream.mark(1);
        }

        // there are no more bytes to be read from 'inStream' after this
        // identifier token

        return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn);
    }

    // Test if a character is legal to be contained within an identifier in
    // Lisp.
    //
    // Returns: 'true' if the character can be found within an identifier in
    // Lisp; 'false' otherwise
    private boolean isLegalIdChar(char c) {
        return ((!Character.isWhitespace(c)) && (c != '\"') && (c != '\'') && (c != '\\') && (c != '`') && (c != '(')
                && (c != ')') && (c != '[') && (c != ']') && (c != '#') && (c != '.') && (c != ';'));
    }

}