321 lines
11 KiB
Java
321 lines
11 KiB
Java
|
/*
|
||
|
* Name: Mike Cifelli
|
||
|
* Course: CIS 443 - Programming Languages
|
||
|
* Assignment: Lisp Interpreter Phase 1 - Lexical Analysis
|
||
|
*/
|
||
|
|
||
|
package scanner;
|
||
|
|
||
|
import java.io.InputStream;
|
||
|
import java.io.BufferedInputStream;
|
||
|
import java.io.IOException;
|
||
|
|
||
|
/**
|
||
|
* A <code>LispScanner</code> converts a stream of bytes into a stream of Lisp
|
||
|
* tokens. When the end of stream has been reached a token with a type of
|
||
|
* <code>Token.Type.EOF</code> is returned from the <code>nextToken</code>
|
||
|
* method of this scanner.
|
||
|
*/
|
||
|
public class LispScanner {
|
||
|
|
||
|
private LispFilterStream inStream;
|
||
|
private Token currToken;
|
||
|
private String fileName;
|
||
|
private int line;
|
||
|
private int column;
|
||
|
|
||
|
/**
|
||
|
* Create a new <code>LispScanner</code> that produces Lisp tokens from the
|
||
|
* specified input stream.
|
||
|
*
|
||
|
* @param in
|
||
|
* the input stream to obtain Lisp tokens from (must not be
|
||
|
* <code>null</code>)
|
||
|
* @param fileName
|
||
|
* the name of the file that <code>in</code> is reading from
|
||
|
*/
|
||
|
public LispScanner(InputStream in, String fileName) {
|
||
|
this.inStream = new LispFilterStream(new BufferedInputStream(in));
|
||
|
this.currToken = null;
|
||
|
this.fileName = fileName;
|
||
|
this.line = 1;
|
||
|
this.column = 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns the same Lisp token returned from the last call to the
|
||
|
* <code>nextToken</code> method of this scanner. In the case that no calls
|
||
|
* to <code>nextToken</code> have been made yet, this method returns
|
||
|
* <code>null</code>.
|
||
|
*
|
||
|
* @return
|
||
|
* the last Lisp token returned from this scanner or <code>null</code> (if
|
||
|
* no tokens have been returned from this scanner yet)
|
||
|
*/
|
||
|
public Token getCurrToken() {
|
||
|
return currToken;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns the next Lisp token from this scanner.
|
||
|
*
|
||
|
* @return
|
||
|
* the next Lisp token from this scanner.
|
||
|
* @throws RuntimeException
|
||
|
* Indicates that an illegal character or an unterminated quoted string
|
||
|
* was encountered in the input stream (not counting comments).
|
||
|
* @throws IOException
|
||
|
* Indicates that an I/O error has occurred.
|
||
|
*/
|
||
|
public Token nextToken() throws IOException {
|
||
|
currToken = retrieveNextToken();
|
||
|
|
||
|
return currToken;
|
||
|
}
|
||
|
|
||
|
// Retrieve the next Lisp token from 'inStream'.
|
||
|
//
|
||
|
// Returns: the next Lisp token found in 'inStream'
|
||
|
// Precondition: 'inStream' must not be null.
|
||
|
// Throws: RuntimeException - Indicates that an illegal character or an
|
||
|
// unterminated quoted string was encountered in
|
||
|
// 'inStream'.
|
||
|
// Throws: IOException - Indicates that an I/O error has occurred.
|
||
|
private Token retrieveNextToken() throws IOException {
|
||
|
int c;
|
||
|
|
||
|
while ((c = inStream.read()) != -1) {
|
||
|
char nextChar = (char) c;
|
||
|
|
||
|
++column;
|
||
|
|
||
|
// determine the type of the Lisp token from the character obtained
|
||
|
// from 'inStream'
|
||
|
switch (nextChar) {
|
||
|
case '\n':
|
||
|
// we have hit a new line so increment 'line' and reset
|
||
|
// 'column'
|
||
|
++line;
|
||
|
column = 0;
|
||
|
|
||
|
break;
|
||
|
case '(':
|
||
|
return new Token(Token.Type.LEFT_PAREN,
|
||
|
"(",
|
||
|
fileName,
|
||
|
line,
|
||
|
column);
|
||
|
case ')':
|
||
|
return new Token(Token.Type.RIGHT_PAREN,
|
||
|
")",
|
||
|
fileName,
|
||
|
line,
|
||
|
column);
|
||
|
case '\'':
|
||
|
return new Token(Token.Type.QUOTE_MARK,
|
||
|
"\'",
|
||
|
fileName,
|
||
|
line,
|
||
|
column);
|
||
|
case '\"':
|
||
|
return retrieveString(nextChar);
|
||
|
default:
|
||
|
if (Character.isWhitespace(nextChar)) { // skip whitespace
|
||
|
continue;
|
||
|
} else if (Character.isDigit(nextChar)) { // number
|
||
|
return retrieveNumber(nextChar);
|
||
|
} else if (isLegalIdChar(nextChar)) { // identifier
|
||
|
return retrieveIdentifier(nextChar);
|
||
|
} else {
|
||
|
// 'nextChar' can not start any Lisp token
|
||
|
|
||
|
throw new RuntimeException("illegal character " +
|
||
|
"\'" + nextChar + "\'" +
|
||
|
" - line " + line +
|
||
|
" column " + column);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// we have reached the end of 'inStream' so we return an end-of-file
|
||
|
// token
|
||
|
return new Token(Token.Type.EOF, "EOF", fileName, line, column);
|
||
|
}
|
||
|
|
||
|
// Retrieve a quoted string token from 'inStream'.
|
||
|
//
|
||
|
// Parameters: firstDoubleQuote - the opening double quote of this quoted
|
||
|
// string
|
||
|
// Returns: a quoted string token obtained from 'instream'
|
||
|
// Throws: RuntimeException - Indicates that this quoted string was
|
||
|
// missing its terminating double quote.
|
||
|
// Throws: IOException - Indicates that an I/O error has occurred.
|
||
|
// Precondition: 'firstDoubleQuote' must be the leading double quote
|
||
|
// character of this quoted string and 'inStream' must not
|
||
|
// be null.
|
||
|
private Token retrieveString(char firstDoubleQuote) throws IOException {
|
||
|
StringBuffer text = new StringBuffer();
|
||
|
int startLine = line;
|
||
|
int startColumn = column;
|
||
|
char prevChar = firstDoubleQuote;
|
||
|
|
||
|
text.append(firstDoubleQuote);
|
||
|
|
||
|
int c;
|
||
|
|
||
|
while ((c = inStream.read()) != -1) {
|
||
|
char nextChar = (char) c;
|
||
|
|
||
|
++column;
|
||
|
text.append(nextChar);
|
||
|
|
||
|
switch(nextChar) {
|
||
|
case '\n':
|
||
|
++line;
|
||
|
column = 0;
|
||
|
|
||
|
break;
|
||
|
case '\"':
|
||
|
if (prevChar != '\\') {
|
||
|
// we have found the terminating double quote
|
||
|
|
||
|
return new Token(Token.Type.STRING,
|
||
|
text.toString(),
|
||
|
fileName,
|
||
|
startLine,
|
||
|
startColumn);
|
||
|
}
|
||
|
|
||
|
// this is an escaped double quote
|
||
|
}
|
||
|
|
||
|
prevChar = nextChar;
|
||
|
}
|
||
|
|
||
|
// the end of 'inStream' was reached before the terminating double
|
||
|
// quote
|
||
|
|
||
|
throw new RuntimeException("unterminated quoted string" +
|
||
|
" - line " + startLine +
|
||
|
" column " + startColumn);
|
||
|
}
|
||
|
|
||
|
// Retrieve a number token from 'inStream'.
|
||
|
//
|
||
|
// Parameters: firstDigit - the first digit of this number
|
||
|
// Returns: a number token obtained from 'inStream'
|
||
|
// Throws: IOException - Indicates that an I/O error has occurred.
|
||
|
// Precondition: 'firstDigit' must be the first digit of this number and
|
||
|
// 'inStream' must not be null.
|
||
|
private Token retrieveNumber(char firstDigit) throws IOException {
|
||
|
StringBuffer text = new StringBuffer();
|
||
|
int startColumn = column;
|
||
|
|
||
|
text.append(firstDigit);
|
||
|
inStream.mark(1);
|
||
|
|
||
|
int c;
|
||
|
|
||
|
while ((c = inStream.read()) != -1) {
|
||
|
char nextChar = (char) c;
|
||
|
|
||
|
if (Character.isDigit(nextChar)) {
|
||
|
// 'nextChar' is a digit in this number
|
||
|
|
||
|
text.append(nextChar);
|
||
|
++column;
|
||
|
} else {
|
||
|
// we have reached the end of the number
|
||
|
|
||
|
inStream.reset(); // unread the last character
|
||
|
|
||
|
return new Token(Token.Type.NUMBER,
|
||
|
text.toString(),
|
||
|
fileName,
|
||
|
line,
|
||
|
startColumn);
|
||
|
}
|
||
|
|
||
|
inStream.mark(1);
|
||
|
}
|
||
|
|
||
|
// there are no more bytes to be read from 'inStream' after this number
|
||
|
// token
|
||
|
|
||
|
return new Token(Token.Type.NUMBER,
|
||
|
text.toString(),
|
||
|
fileName,
|
||
|
line,
|
||
|
startColumn);
|
||
|
}
|
||
|
|
||
|
// Retrieve an identifier token from 'inStream'.
|
||
|
//
|
||
|
// Parameters: firstChar - the first character of this identifier
|
||
|
// Returns: an identifier token obtained from 'inStream'
|
||
|
// Throws: IOException - Indicates that an I/O error has occurred.
|
||
|
// Precondition: 'firsChar' must be the first character of this identifier
|
||
|
// and 'inStream' must not be null.
|
||
|
private Token retrieveIdentifier(char firstChar) throws IOException {
|
||
|
StringBuffer text = new StringBuffer();
|
||
|
int startColumn = column;
|
||
|
|
||
|
text.append(firstChar);
|
||
|
inStream.mark(1);
|
||
|
|
||
|
int c;
|
||
|
|
||
|
while ((c = inStream.read()) != -1) {
|
||
|
char nextChar = (char) c;
|
||
|
|
||
|
if (isLegalIdChar(nextChar)) {
|
||
|
// 'nextChar' is part of the identifier
|
||
|
|
||
|
text.append(nextChar);
|
||
|
++column;
|
||
|
} else {
|
||
|
// we have reached the end of this identifier
|
||
|
|
||
|
inStream.reset(); // unread the last character
|
||
|
|
||
|
return new Token(Token.Type.IDENTIFIER,
|
||
|
text.toString(),
|
||
|
fileName,
|
||
|
line,
|
||
|
startColumn);
|
||
|
}
|
||
|
|
||
|
inStream.mark(1);
|
||
|
}
|
||
|
|
||
|
// there are no more bytes to be read from 'inStream' after this
|
||
|
// identifier token
|
||
|
|
||
|
return new Token(Token.Type.IDENTIFIER,
|
||
|
text.toString(),
|
||
|
fileName,
|
||
|
line,
|
||
|
startColumn);
|
||
|
}
|
||
|
|
||
|
// Test if a character is legal to be contained within an identifier in
|
||
|
// Lisp.
|
||
|
//
|
||
|
// Returns: 'true' if the character can be found within an identifier in
|
||
|
// Lisp; 'false' otherwise
|
||
|
private boolean isLegalIdChar(char c) {
|
||
|
return ((! Character.isWhitespace(c)) && (c != '\"')
|
||
|
&& (c != '\'')
|
||
|
&& (c != '\\')
|
||
|
&& (c != '`')
|
||
|
&& (c != '(')
|
||
|
&& (c != ')')
|
||
|
&& (c != '[')
|
||
|
&& (c != ']')
|
||
|
&& (c != '#')
|
||
|
&& (c != '.')
|
||
|
&& (c != ';'));
|
||
|
}
|
||
|
|
||
|
}
|