/* * Name: Mike Cifelli * Course: CIS 443 - Programming Languages * Assignment: Lisp Interpreter Phase 1 - Lexical Analysis */ package scanner; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; /** * A LispScanner converts a stream of bytes into a stream of Lisp * tokens. When the end of stream has been reached a token with a type of * Token.Type.EOF is returned from the nextToken * method of this scanner. */ public class LispScanner { private LispFilterInputStream inStream; private Token currToken; private String fileName; private int line; private int column; /** * Create a new LispScanner that produces Lisp tokens from the * specified input stream. * * @param in * the input stream to obtain Lisp tokens from (must not be * null) * @param fileName * the name of the file that in is reading from */ public LispScanner(InputStream in, String fileName) { this.inStream = new LispFilterInputStream(new BufferedInputStream(in)); this.currToken = null; this.fileName = fileName; this.line = 1; this.column = 0; } /** * Returns the same Lisp token returned from the last call to the * nextToken method of this scanner. In the case that no calls * to nextToken have been made yet, this method returns * null. * * @return the last Lisp token returned from this scanner or * null (if no tokens have been returned from this * scanner yet) */ public Token getCurrToken() { return currToken; } /** * Returns the next Lisp token from this scanner. * * @return the next Lisp token from this scanner. * @throws RuntimeException * Indicates that an illegal character or an unterminated quoted * string was encountered in the input stream (not counting * comments). * @throws IOException * Indicates that an I/O error has occurred. */ public Token nextToken() throws IOException { currToken = retrieveNextToken(); return currToken; } // Retrieve the next Lisp token from 'inStream'. // // Returns: the next Lisp token found in 'inStream' // Precondition: 'inStream' must not be null. // Throws: RuntimeException - Indicates that an illegal character or an // unterminated quoted string was encountered in // 'inStream'. // Throws: IOException - Indicates that an I/O error has occurred. private Token retrieveNextToken() throws IOException { int c; while ((c = inStream.read()) != -1) { char nextChar = (char) c; ++column; // determine the type of the Lisp token from the character obtained // from 'inStream' switch (nextChar) { case '\n': // we have hit a new line so increment 'line' and reset // 'column' ++line; column = 0; break; case '(': return new Token(Token.Type.LEFT_PAREN, "(", fileName, line, column); case ')': return new Token(Token.Type.RIGHT_PAREN, ")", fileName, line, column); case '\'': return new Token(Token.Type.QUOTE_MARK, "\'", fileName, line, column); case '\"': return retrieveString(nextChar); default: if (Character.isWhitespace(nextChar)) { // skip whitespace continue; } else if (Character.isDigit(nextChar)) { // number return retrieveNumber(nextChar); } else if (isLegalIdChar(nextChar)) { // identifier return retrieveIdentifier(nextChar); } else { // 'nextChar' can not start any Lisp token throw new RuntimeException( "illegal character " + "\'" + nextChar + "\'" + " - line " + line + " column " + column); } } } // we have reached the end of 'inStream' so we return an end-of-file // token return new Token(Token.Type.EOF, "EOF", fileName, line, column); } // Retrieve a quoted string token from 'inStream'. // // Parameters: firstDoubleQuote - the opening double quote of this quoted // string // Returns: a quoted string token obtained from 'instream' // Throws: RuntimeException - Indicates that this quoted string was // missing its terminating double quote. // Throws: IOException - Indicates that an I/O error has occurred. // Precondition: 'firstDoubleQuote' must be the leading double quote // character of this quoted string and 'inStream' must not // be null. private Token retrieveString(char firstDoubleQuote) throws IOException { StringBuffer text = new StringBuffer(); int startLine = line; int startColumn = column; char prevChar = firstDoubleQuote; text.append(firstDoubleQuote); int c; while ((c = inStream.read()) != -1) { char nextChar = (char) c; ++column; text.append(nextChar); switch (nextChar) { case '\n': ++line; column = 0; break; case '\"': if (prevChar != '\\') { // we have found the terminating double quote return new Token(Token.Type.STRING, text.toString(), fileName, startLine, startColumn); } // this is an escaped double quote } prevChar = nextChar; } // the end of 'inStream' was reached before the terminating double // quote throw new RuntimeException("unterminated quoted string" + " - line " + startLine + " column " + startColumn); } // Retrieve a number token from 'inStream'. // // Parameters: firstDigit - the first digit of this number // Returns: a number token obtained from 'inStream' // Throws: IOException - Indicates that an I/O error has occurred. // Precondition: 'firstDigit' must be the first digit of this number and // 'inStream' must not be null. private Token retrieveNumber(char firstDigit) throws IOException { StringBuffer text = new StringBuffer(); int startColumn = column; text.append(firstDigit); inStream.mark(1); int c; while ((c = inStream.read()) != -1) { char nextChar = (char) c; if (Character.isDigit(nextChar)) { // 'nextChar' is a digit in this number text.append(nextChar); ++column; } else { // we have reached the end of the number inStream.reset(); // unread the last character return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn); } inStream.mark(1); } // there are no more bytes to be read from 'inStream' after this number // token return new Token(Token.Type.NUMBER, text.toString(), fileName, line, startColumn); } // Retrieve an identifier token from 'inStream'. // // Parameters: firstChar - the first character of this identifier // Returns: an identifier token obtained from 'inStream' // Throws: IOException - Indicates that an I/O error has occurred. // Precondition: 'firsChar' must be the first character of this identifier // and 'inStream' must not be null. private Token retrieveIdentifier(char firstChar) throws IOException { StringBuffer text = new StringBuffer(); int startColumn = column; text.append(firstChar); inStream.mark(1); int c; while ((c = inStream.read()) != -1) { char nextChar = (char) c; if (isLegalIdChar(nextChar)) { // 'nextChar' is part of the identifier text.append(nextChar); ++column; } else { // we have reached the end of this identifier inStream.reset(); // unread the last character return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn); } inStream.mark(1); } // there are no more bytes to be read from 'inStream' after this // identifier token return new Token(Token.Type.IDENTIFIER, text.toString(), fileName, line, startColumn); } // Test if a character is legal to be contained within an identifier in // Lisp. // // Returns: 'true' if the character can be found within an identifier in // Lisp; 'false' otherwise private boolean isLegalIdChar(char c) { return ((!Character.isWhitespace(c)) && (c != '\"') && (c != '\'') && (c != '\\') && (c != '`') && (c != '(') && (c != ')') && (c != '[') && (c != ']') && (c != '#') && (c != '.') && (c != ';')); } }