2007-05-03
Writing a parser: ADL Parser - part 1
We'll now write the Parser class:
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using TC.Adl.ParserNodes;
namespace TC.Adl
{
public class Parser
{
Tokenizer fTokenizer;
Token fCurrentToken;
}
}Our Parser class has only 2 fields:
fTokenizer- The Tokenizer to read tokens from.
fCurrentToken- The current token (most recently read).
fTokenizer and read the first token: public Parser(TextReader source)
{
if (source == null) throw new ArgumentNullException("source");
fTokenizer = new Tokenizer(source);
ReadNextToken();
}
Now we'll add some private helper methods.
Reading a token is simple. just callTokenizer.ReadNextToken(), which returns a Token or null at the end of the source code. void ReadNextToken() { fCurrentToken = fTokenizer.ReadNextToken(); }To determine if we're at the end of the source, we just have to check the current token for null: bool AtEndOfSource { get { return fCurrentToken == null; } }We'll need a method that throws an exception when the end of the source has been reached unexpectedly: void CheckForUnexpectedEndOfSource()
{
if (AtEndOfSource)
throw new ParserException("Unexpected end of source.");
}We'll also need a method that verifies the current token and skips it: void SkipExpected(TokenType type, string value)
{
CheckForUnexpectedEndOfSource();
if (!fCurrentToken.Equals(type, value))
throw new ParserException("Expected '" + value + "'.");
ReadNextToken();
}Now that we've written the private helper methods, we can write the only public method: the ReadNextStatement method. This methods reads a statement and returns it. If we've reached the end of the source, we'll return null, else we'll check the first token to determine the type of statement:
- If the current token is the word
if, it's an if-statement. - If the current token is the word
while, it's a while-statement. - If the current token is the word
for, it's a for-statement. - If it's any other word, we assume it's an assignment or a function call.
public Statement ReadNextStatement()
{
if (AtEndOfSource)
return null;
// all the statements start with a word
if (fCurrentToken.Type != TokenType.Word)
throw new ParserException("Expected a statement.");
if (fCurrentToken.Value == "if")
return ParseIfStatement();
if (fCurrentToken.Value == "while")
return ParseWhileStatement();
if (fCurrentToken.Value == "for")
return ParseForStatement();
return ParseAssignmentOrFunctionCallStatement();
}An if-statement starts with the word if, followed by a condition, the word then, a block of statements, an optional block of statements prefixed with the word else and the words end if: IfStatement ParseIfStatement()
{
ReadNextToken(); // skip 'if'
Expression lCondition = ParseExpression();
SkipExpected(TokenType.Word, "then"); // skip 'then'
List lTrueStatements = new List();
List lFalseStatements = new List();
List lStatements = lTrueStatements;
Statement lStatement;
CheckForUnexpectedEndOfSource();
while (!fCurrentToken.Equals(TokenType.Word, "end"))
{
if (fCurrentToken.Equals(TokenType.Word, "else"))
{
ReadNextToken(); // skip 'else'
CheckForUnexpectedEndOfSource();
lStatements = lFalseStatements;
}
if ((lStatement = ReadNextStatement()) != null)
lStatements.Add(lStatement);
else throw new ParserException("Unexpected end of source.");
}
ReadNextToken(); // skip 'end'
SkipExpected(TokenType.Word, "if"); // skip 'if'
return new IfStatement(lCondition
, new StatementCollection(lTrueStatements)
, new StatementCollection(lFalseStatements));
} A while-statement starts with the word while, followed by a condition, the word do, a block of statements and the words end while: WhileStatement ParseWhileStatement()
{
ReadNextToken(); // skip 'while'
Expression lCondition = ParseExpression();
SkipExpected(TokenType.Word, "do"); // skip 'do'
List lStatements = new List();
Statement lStatement;
CheckForUnexpectedEndOfSource();
while (!fCurrentToken.Equals(TokenType.Word, "end"))
{
if ((lStatement = ReadNextStatement()) != null)
lStatements.Add(lStatement);
else throw new ParserException("Unexpected end of source.");
}
ReadNextToken(); // skip 'end'
SkipExpected(TokenType.Word, "while"); // skip 'while'
return new WhileStatement(lCondition, new StatementCollection(lStatements));
} A for-statement starts with the word for, followed by a variable, the symbol :=, a start-value, the word to, an end-value, optionally the word by with a step-size, the word do, a block of statements and the words end for: ForStatement ParseForStatement()
{
ReadNextToken(); // skip 'for'
CheckForUnexpectedEndOfSource();
if (fCurrentToken.Type != TokenType.Word)
throw new ParserException("Expected a variable.");
Variable lVariable = new Variable(fCurrentToken.Value);
ReadNextToken();
SkipExpected(TokenType.Symbol, ":="); // skip ':='
Expression lStartValue = ParseExpression();
SkipExpected(TokenType.Word, "to"); // skip 'to'
Expression lEndValue = ParseExpression();
CheckForUnexpectedEndOfSource();
Expression lStepSize;
if (fCurrentToken.Equals(TokenType.Word, "by"))
{
ReadNextToken(); // skip 'by'
lStepSize = ParseExpression();
}
else lStepSize = new IntegerConstant(1);
SkipExpected(TokenType.Word, "do");
List lStatements = new List();
Statement lStatement;
CheckForUnexpectedEndOfSource();
while (!fCurrentToken.Equals(TokenType.Word, "end"))
{
if ((lStatement = ReadNextStatement()) != null)
lStatements.Add(lStatement);
else throw new ParserException("Unexpected end of source.");
}
ReadNextToken(); // skip 'end'
SkipExpected(TokenType.Word, "for"); // skip 'for'
return new ForStatement(lVariable, lStartValue, lEndValue, lStepSize, new StatementCollection(lStatements));
} An assignment and a function call statement both start with an identifier, so we'll have to read the next token to determine if it's an assignment or a function call statement: Statement ParseAssignmentOrFunctionCallStatement()
{
Token lToken = fCurrentToken;
ReadNextToken();
CheckForUnexpectedEndOfSource();
if (fCurrentToken.Equals(TokenType.Symbol, ":="))
return ParseAssignment(new Variable(lToken.Value));
if (fCurrentToken.Equals(TokenType.Symbol, "("))
return new FunctionCallStatement(ParseFunctionCall(lToken.Value));
throw new ParserException("Expected a statement.");
}An assignment just has an expression after the :=: Assignment ParseAssignment(Variable variable)
{
ReadNextToken(); // skip ':='
return new Assignment(variable, ParseExpression());
}In the next post, we'll write the methods for parsing expression.
