2007-05-03

Writing a parser: ADL Parser - part 1

We'll now write the Parser class:
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using TC.Adl.ParserNodes;

namespace TC.Adl
{
    public class Parser
    {
        Tokenizer fTokenizer;
        Token fCurrentToken;
    }
}
Our Parser class has only 2 fields:
fTokenizer
The Tokenizer to read tokens from.
fCurrentToken
The current token (most recently read).
The constructor of the Parser class will accept a TextReader argument, create a Tokenizer that uses that TextReader, store it in fTokenizer and read the first token:
public Parser(TextReader source)
{
    if (source == null) throw new ArgumentNullException("source");

    fTokenizer = new Tokenizer(source);
    ReadNextToken();
}

Now we'll add some private helper methods.

Reading a token is simple. just call Tokenizer.ReadNextToken(), which returns a Token or null at the end of the source code.
void ReadNextToken() { fCurrentToken = fTokenizer.ReadNextToken(); }
To determine if we're at the end of the source, we just have to check the current token for null:
bool AtEndOfSource { get { return fCurrentToken == null; } }
We'll need a method that throws an exception when the end of the source has been reached unexpectedly:
void CheckForUnexpectedEndOfSource()
{
    if (AtEndOfSource)
        throw new ParserException("Unexpected end of source.");
}
We'll also need a method that verifies the current token and skips it:
void SkipExpected(TokenType type, string value)
{
    CheckForUnexpectedEndOfSource();
    if (!fCurrentToken.Equals(type, value))
        throw new ParserException("Expected '" + value + "'.");
    ReadNextToken();
}
Now that we've written the private helper methods, we can write the only public method: the ReadNextStatement method. This methods reads a statement and returns it. If we've reached the end of the source, we'll return null, else we'll check the first token to determine the type of statement:
public Statement ReadNextStatement()
{
    if (AtEndOfSource)
        return null;

    // all the statements start with a word
    if (fCurrentToken.Type != TokenType.Word)
        throw new ParserException("Expected a statement.");

    if (fCurrentToken.Value == "if")
        return ParseIfStatement();

    if (fCurrentToken.Value == "while")
        return ParseWhileStatement();

    if (fCurrentToken.Value == "for")
        return ParseForStatement();

    return ParseAssignmentOrFunctionCallStatement();
}
An if-statement starts with the word if, followed by a condition, the word then, a block of statements, an optional block of statements prefixed with the word else and the words end if:
IfStatement ParseIfStatement()
{
    ReadNextToken(); // skip 'if'

    Expression lCondition = ParseExpression();

    SkipExpected(TokenType.Word, "then"); // skip 'then'

    List lTrueStatements = new List();
    List lFalseStatements = new List();
    List lStatements = lTrueStatements;
    Statement lStatement;

    CheckForUnexpectedEndOfSource();
    while (!fCurrentToken.Equals(TokenType.Word, "end"))
    {
        if (fCurrentToken.Equals(TokenType.Word, "else"))
        {
            ReadNextToken(); // skip 'else'
            CheckForUnexpectedEndOfSource();
            lStatements = lFalseStatements;
        }

        if ((lStatement = ReadNextStatement()) != null)
            lStatements.Add(lStatement);
        else throw new ParserException("Unexpected end of source.");
    }

    ReadNextToken(); // skip 'end'
    SkipExpected(TokenType.Word, "if"); // skip 'if'

    return new IfStatement(lCondition
        , new StatementCollection(lTrueStatements)
        , new StatementCollection(lFalseStatements));
}
A while-statement starts with the word while, followed by a condition, the word do, a block of statements and the words end while:
WhileStatement ParseWhileStatement()
{
    ReadNextToken(); // skip 'while'

    Expression lCondition = ParseExpression();

    SkipExpected(TokenType.Word, "do"); // skip 'do'

    List lStatements = new List();
    Statement lStatement;
    CheckForUnexpectedEndOfSource();
    while (!fCurrentToken.Equals(TokenType.Word, "end"))
    {
        if ((lStatement = ReadNextStatement()) != null)
            lStatements.Add(lStatement);
        else throw new ParserException("Unexpected end of source.");
    }

    ReadNextToken(); // skip 'end'
    SkipExpected(TokenType.Word, "while"); // skip 'while'

    return new WhileStatement(lCondition, new StatementCollection(lStatements));
}
A for-statement starts with the word for, followed by a variable, the symbol :=, a start-value, the word to, an end-value, optionally the word by with a step-size, the word do, a block of statements and the words end for:
ForStatement ParseForStatement()
{
    ReadNextToken(); // skip 'for'
    CheckForUnexpectedEndOfSource();

    if (fCurrentToken.Type != TokenType.Word)
        throw new ParserException("Expected a variable.");

    Variable lVariable = new Variable(fCurrentToken.Value);
    ReadNextToken();

    SkipExpected(TokenType.Symbol, ":="); // skip ':='
    Expression lStartValue = ParseExpression();

    SkipExpected(TokenType.Word, "to"); // skip 'to'
    Expression lEndValue = ParseExpression();
    CheckForUnexpectedEndOfSource();

    Expression lStepSize;
    if (fCurrentToken.Equals(TokenType.Word, "by"))
    {
        ReadNextToken(); // skip 'by'
        lStepSize = ParseExpression();
    }
    else lStepSize = new IntegerConstant(1);

    SkipExpected(TokenType.Word, "do");
    List lStatements = new List();
    Statement lStatement;
    CheckForUnexpectedEndOfSource();
    while (!fCurrentToken.Equals(TokenType.Word, "end"))
    {
        if ((lStatement = ReadNextStatement()) != null)
            lStatements.Add(lStatement);
        else throw new ParserException("Unexpected end of source.");
    }

    ReadNextToken(); // skip 'end'
    SkipExpected(TokenType.Word, "for"); // skip 'for'

    return new ForStatement(lVariable, lStartValue, lEndValue, lStepSize, new StatementCollection(lStatements));
}
An assignment and a function call statement both start with an identifier, so we'll have to read the next token to determine if it's an assignment or a function call statement:
Statement ParseAssignmentOrFunctionCallStatement()
{
    Token lToken = fCurrentToken;
    ReadNextToken();
    CheckForUnexpectedEndOfSource();

    if (fCurrentToken.Equals(TokenType.Symbol, ":="))
        return ParseAssignment(new Variable(lToken.Value));

    if (fCurrentToken.Equals(TokenType.Symbol, "("))
        return new FunctionCallStatement(ParseFunctionCall(lToken.Value));

    throw new ParserException("Expected a statement.");
}
An assignment just has an expression after the :=:
Assignment ParseAssignment(Variable variable)
{
    ReadNextToken(); // skip ':='
    return new Assignment(variable, ParseExpression());
}
In the next post, we'll write the methods for parsing expression.
Comments:
pls can you help me with some guidelines to write a symbol table program
 
Post a Comment

Links to this post:

Create a Link