transcendental-lisp/scanner/LispScanner.java

321 lines
11 KiB
Java
Raw Normal View History

2016-12-07 14:16:45 -05:00
/*
* Name: Mike Cifelli
* Course: CIS 443 - Programming Languages
* Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
*/
package scanner;
import java.io.InputStream;
import java.io.BufferedInputStream;
import java.io.IOException;
/**
* A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp
* tokens. When the end of stream has been reached a token with a type of
* <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>
* method of this scanner.
*/
public class LispScanner {
private LispFilterStream inStream;
private Token currToken;
private String fileName;
private int line;
private int column;
/**
* Create a new <code>LispScanner</code> that produces Lisp tokens from the
* specified input stream.
*
* @param in
* the input stream to obtain Lisp tokens from (must not be
* <code>null</code>)
* @param fileName
* the name of the file that <code>in</code> is reading from
*/
public LispScanner(InputStream in, String fileName) {
this.inStream = new LispFilterStream(new BufferedInputStream(in));
this.currToken = null;
this.fileName = fileName;
this.line = 1;
this.column = 0;
}
/**
* Returns the same Lisp token returned from the last call to the
* <code>nextToken</code> method of this scanner. In the case that no calls
* to <code>nextToken</code> have been made yet, this method returns
* <code>null</code>.
*
* @return
* the last Lisp token returned from this scanner or <code>null</code> (if
* no tokens have been returned from this scanner yet)
*/
public Token getCurrToken() {
return currToken;
}
/**
* Returns the next Lisp token from this scanner.
*
* @return
* the next Lisp token from this scanner.
* @throws RuntimeException
* Indicates that an illegal character or an unterminated quoted string
* was encountered in the input stream (not counting comments).
* @throws IOException
* Indicates that an I/O error has occurred.
*/
public Token nextToken() throws IOException {
currToken = retrieveNextToken();
return currToken;
}
// Retrieve the next Lisp token from 'inStream'.
//
// Returns: the next Lisp token found in 'inStream'
// Precondition: 'inStream' must not be null.
// Throws: RuntimeException - Indicates that an illegal character or an
// unterminated quoted string was encountered in
// 'inStream'.
// Throws: IOException - Indicates that an I/O error has occurred.
private Token retrieveNextToken() throws IOException {
int c;
while ((c = inStream.read()) != -1) {
char nextChar = (char) c;
++column;
// determine the type of the Lisp token from the character obtained
// from 'inStream'
switch (nextChar) {
case '\n':
// we have hit a new line so increment 'line' and reset
// 'column'
++line;
column = 0;
break;
case '(':
return new Token(Token.Type.LEFT_PAREN,
"(",
fileName,
line,
column);
case ')':
return new Token(Token.Type.RIGHT_PAREN,
")",
fileName,
line,
column);
case '\'':
return new Token(Token.Type.QUOTE_MARK,
"\'",
fileName,
line,
column);
case '\"':
return retrieveString(nextChar);
default:
if (Character.isWhitespace(nextChar)) { // skip whitespace
continue;
} else if (Character.isDigit(nextChar)) { // number
return retrieveNumber(nextChar);
} else if (isLegalIdChar(nextChar)) { // identifier
return retrieveIdentifier(nextChar);
} else {
// 'nextChar' can not start any Lisp token
throw new RuntimeException("illegal character " +
"\'" + nextChar + "\'" +
" - line " + line +
" column " + column);
}
}
}
// we have reached the end of 'inStream' so we return an end-of-file
// token
return new Token(Token.Type.EOF, "EOF", fileName, line, column);
}
// Retrieve a quoted string token from 'inStream'.
//
// Parameters: firstDoubleQuote - the opening double quote of this quoted
// string
// Returns: a quoted string token obtained from 'instream'
// Throws: RuntimeException - Indicates that this quoted string was
// missing its terminating double quote.
// Throws: IOException - Indicates that an I/O error has occurred.
// Precondition: 'firstDoubleQuote' must be the leading double quote
// character of this quoted string and 'inStream' must not
// be null.
private Token retrieveString(char firstDoubleQuote) throws IOException {
StringBuffer text = new StringBuffer();
int startLine = line;
int startColumn = column;
char prevChar = firstDoubleQuote;
text.append(firstDoubleQuote);
int c;
while ((c = inStream.read()) != -1) {
char nextChar = (char) c;
++column;
text.append(nextChar);
switch(nextChar) {
case '\n':
++line;
column = 0;
break;
case '\"':
if (prevChar != '\\') {
// we have found the terminating double quote
return new Token(Token.Type.STRING,
text.toString(),
fileName,
startLine,
startColumn);
}
// this is an escaped double quote
}
prevChar = nextChar;
}
// the end of 'inStream' was reached before the terminating double
// quote
throw new RuntimeException("unterminated quoted string" +
" - line " + startLine +
" column " + startColumn);
}
// Retrieve a number token from 'inStream'.
//
// Parameters: firstDigit - the first digit of this number
// Returns: a number token obtained from 'inStream'
// Throws: IOException - Indicates that an I/O error has occurred.
// Precondition: 'firstDigit' must be the first digit of this number and
// 'inStream' must not be null.
private Token retrieveNumber(char firstDigit) throws IOException {
StringBuffer text = new StringBuffer();
int startColumn = column;
text.append(firstDigit);
inStream.mark(1);
int c;
while ((c = inStream.read()) != -1) {
char nextChar = (char) c;
if (Character.isDigit(nextChar)) {
// 'nextChar' is a digit in this number
text.append(nextChar);
++column;
} else {
// we have reached the end of the number
inStream.reset(); // unread the last character
return new Token(Token.Type.NUMBER,
text.toString(),
fileName,
line,
startColumn);
}
inStream.mark(1);
}
// there are no more bytes to be read from 'inStream' after this number
// token
return new Token(Token.Type.NUMBER,
text.toString(),
fileName,
line,
startColumn);
}
// Retrieve an identifier token from 'inStream'.
//
// Parameters: firstChar - the first character of this identifier
// Returns: an identifier token obtained from 'inStream'
// Throws: IOException - Indicates that an I/O error has occurred.
// Precondition: 'firsChar' must be the first character of this identifier
// and 'inStream' must not be null.
private Token retrieveIdentifier(char firstChar) throws IOException {
StringBuffer text = new StringBuffer();
int startColumn = column;
text.append(firstChar);
inStream.mark(1);
int c;
while ((c = inStream.read()) != -1) {
char nextChar = (char) c;
if (isLegalIdChar(nextChar)) {
// 'nextChar' is part of the identifier
text.append(nextChar);
++column;
} else {
// we have reached the end of this identifier
inStream.reset(); // unread the last character
return new Token(Token.Type.IDENTIFIER,
text.toString(),
fileName,
line,
startColumn);
}
inStream.mark(1);
}
// there are no more bytes to be read from 'inStream' after this
// identifier token
return new Token(Token.Type.IDENTIFIER,
text.toString(),
fileName,
line,
startColumn);
}
// Test if a character is legal to be contained within an identifier in
// Lisp.
//
// Returns: 'true' if the character can be found within an identifier in
// Lisp; 'false' otherwise
private boolean isLegalIdChar(char c) {
return ((! Character.isWhitespace(c)) && (c != '\"')
&& (c != '\'')
&& (c != '\\')
&& (c != '`')
&& (c != '(')
&& (c != ')')
&& (c != '[')
&& (c != ']')
&& (c != '#')
&& (c != '.')
&& (c != ';'));
}
}