2007-05-03

Writing a parser: ADL Parser - part 2

In the previous part we wrote the first Parser methods, now we'll write the methods for parsing expressions. In the introduction to ADL, I showed the order in which expressions are evaluated. To parse expressions, we'll reverse the order and assume that everything is an AND-expression (lowest precedence):
Expression ParseExpression()
{
    return ParseAndExpression();
}
An AND-expression consists of one or more OR-expressions, separated by and operators:
Expression ParseAndExpression()
{
    Expression lNode = ParseOrExpression();

    while (!AtEndOfSource && fCurrentToken.Equals(TokenType.Word, "and"))
    {
        ReadNextToken(); // skip 'and'
        lNode = new AndExpression(lNode, ParseOrExpression());
    }

    return lNode;
}
An OR-expression consists of one or more comparisons, separated by or operators:
Expression ParseOrExpression()
{
    Expression lNode = ParseComparison();

    while (!AtEndOfSource && fCurrentToken.Equals(TokenType.Word, "or"))
    {
        ReadNextToken(); // skip 'or'
        lNode = new OrExpression(lNode, ParseComparison());
    }

    return lNode;
}
A comparison is an additive expression, or 2 additive expressions separated by a comparison operator:
Expression ParseComparison()
{
    Expression lNode = ParseAdditiveExpression();

    if (!AtEndOfSource && fCurrentToken.Type == TokenType.Symbol)
    {
        ComparisonOperator lOperator;
        switch (fCurrentToken.Value)
        {
            case "==": lOperator = ComparisonOperator.Equal; break;
            case "<>": lOperator = ComparisonOperator.NotEquals; break;
            case "<":  lOperator = ComparisonOperator.LessThan; break;
            case ">":  lOperator = ComparisonOperator.GreaterThan; break;
            case "<=": lOperator = ComparisonOperator.LessThanOrEqual; break;
            case ">=": lOperator = ComparisonOperator.GreaterThanOrEqual; break;
            default: return lNode;
        }
        ReadNextToken(); // skip comparison operator
        return new Comparison(lOperator, lNode, ParseAdditiveExpression());
    }
    else return lNode;
}
An additive expression consists of one or more multiplicative expressions, separated by + or - operators:
Expression ParseAdditiveExpression()
{
    Expression lNode = ParseMultiplicativeExpression();

    while (!AtEndOfSource)
    {
        if (fCurrentToken.Equals(TokenType.Symbol, "+"))
        {
            ReadNextToken(); // skip '+'
            lNode = new Addition(lNode, ParseMultiplicativeExpression());
        }
        else if (fCurrentToken.Equals(TokenType.Symbol, "-"))
        {
            ReadNextToken(); // skip '-'
            lNode = new Subtraction(lNode, ParseMultiplicativeExpression());
        }
        else break;
    }

    return lNode;
}
A multiplicative expression consists of one or more unary expressions, separated by * or / operators:
Expression ParseMultiplicativeExpression()
{
    Expression lNode = ParseUnaryExpression();

    while (!AtEndOfSource)
    {
        if (fCurrentToken.Equals(TokenType.Symbol, "*"))
        {
            ReadNextToken(); // skip '*'
            lNode = new Multiplication(lNode, ParseUnaryExpression());
        }
        else if (fCurrentToken.Equals(TokenType.Symbol, "/"))
        {
            ReadNextToken(); // skip '/'
            lNode = new Division(lNode, ParseUnaryExpression());
        }
        else break;
    }

    return lNode;
}
A unary expression is a base expression, optionally prefixed by a -, + or not operator:
Expression ParseUnaryExpression()
{
    CheckForUnexpectedEndOfSource();

    if (fCurrentToken.Equals(TokenType.Symbol, "-"))
    {
        ReadNextToken(); // skip '-'
        return new Negation(ParseBaseExpression());
    }
    else if (fCurrentToken.Equals(TokenType.Word, "not"))
    {
        ReadNextToken(); // skip 'not'
        return new NotExpression(ParseBaseExpression());
    }
    else if (fCurrentToken.Equals(TokenType.Symbol, "+"))
        ReadNextToken(); // skip '+'

    return ParseBaseExpression();
}
A base expression is either an integer constant, a string constant, a variable, a function call or a group expression:
Expression ParseBaseExpression()
{
    CheckForUnexpectedEndOfSource();

    switch(fCurrentToken.Type)
    {
        case TokenType.Integer: return ParseIntegerConstant();
        case TokenType.String: return ParseStringConstant();
        case TokenType.Word: return ParseVariableOrFunctionCall();
        default: // TokenType.Symbol
            if (fCurrentToken.Value == "(")
                return ParseGroupExpression();
            else throw new ParserException("Expected an expression.");
    }
}
A group expression is an expression between parenthesis:
Expression ParseGroupExpression()
{
    ReadNextToken(); // skip '('
    Expression lExpression = ParseExpression();
    SkipExpected(TokenType.Symbol, ")"); // skip ')'
    return lExpression;
}
A variable and a function call both start with an identifier. To disambiguate, we'll have to read the next token:
Expression ParseVariableOrFunctionCall()
{
    string lName = fCurrentToken.Value;
    ReadNextToken();
    if (!AtEndOfSource && fCurrentToken.Equals(TokenType.Symbol, "("))
        return ParseFunctionCall(lName);
    else return new Variable(lName);
}
A string constant is just the read string token:
Expression ParseStringConstant()
{
    string lValue = fCurrentToken.Value;
    ReadNextToken(); // skip string constant
    return new StringConstant(lValue);
}
An integer constant has to be parsed from the string value:
Expression ParseIntegerConstant()
{
    int lValue;
    if (int.TryParse(fCurrentToken.Value, out lValue))
    {
        ReadNextToken(); // skip integer constant
        return new IntegerConstant(lValue);
    }
    else throw new ParserException("Invalid integer constant " + fCurrentToken.Value);
}
A function call consists of an identifier, followed by zero or more arguments between parentheses:
FunctionCall ParseFunctionCall(string name)
{
    ReadNextToken(); // skip '('
    CheckForUnexpectedEndOfSource();

    List lArguments = new List();
    if (!fCurrentToken.Equals(TokenType.Symbol, ")")
    {
        lArguments.Add(ParseExpression());
        CheckForUnexpectedEndOfSource();

        while (fCurrentToken.Equals(TokenType.Symbol, ","))
        {
            ReadNextToken(); // skip ','
            lArguments.Add(ParseExpression());
            CheckForUnexpectedEndOfSource();
        }

        if (!fCurrentToken.Equals(TokenType.Symbol, ")")
            throw new ParserException("Expected ')'.");
    }

    ReadNextToken(); // skip ')'
    return new FunctionCall(name, lArguments.ToArray());
}
Now our parser is ready. To test the parser, I've created a test application that reads code from a TextBox, parses it and shows the parse tree in a TreeView. I'm not going to show the source code of the test application in this post, but you can download the complete Visual Studio 2005 solution with both projects (TC.Adl and TC.Adl.Test) here.