2007-05-18

Buzzword screencast

I just watched a screencast of Buzzword (recorded by Dan Bricklin), Virtual Ubiquity's not-yet-released word processor for the web that looks very promising. It's a Flash/Flex application that looks so much better than any web word processor I've seen before. I'll be keeping an eye on this.

2007-05-06

slimCODE against diabetes

I just read on Martin Plante's blog that he's donating this week's earnings of his slimKEYS application to Team Hanselman to fight diabetes. slimKEYS is a cool product, and this is a great initiative.

2007-05-03

Writing a parser: overview

Here's a quick overview of the different blog posts in the series Writing a parser:

Writing a parser: ADL Parser - part 2

In the previous part we wrote the first Parser methods, now we'll write the methods for parsing expressions. In the introduction to ADL, I showed the order in which expressions are evaluated. To parse expressions, we'll reverse the order and assume that everything is an AND-expression (lowest precedence):
Expression ParseExpression()
{
    return ParseAndExpression();
}
An AND-expression consists of one or more OR-expressions, separated by and operators:
Expression ParseAndExpression()
{
    Expression lNode = ParseOrExpression();

    while (!AtEndOfSource && fCurrentToken.Equals(TokenType.Word, "and"))
    {
        ReadNextToken(); // skip 'and'
        lNode = new AndExpression(lNode, ParseOrExpression());
    }

    return lNode;
}
An OR-expression consists of one or more comparisons, separated by or operators:
Expression ParseOrExpression()
{
    Expression lNode = ParseComparison();

    while (!AtEndOfSource && fCurrentToken.Equals(TokenType.Word, "or"))
    {
        ReadNextToken(); // skip 'or'
        lNode = new OrExpression(lNode, ParseComparison());
    }

    return lNode;
}
A comparison is an additive expression, or 2 additive expressions separated by a comparison operator:
Expression ParseComparison()
{
    Expression lNode = ParseAdditiveExpression();

    if (!AtEndOfSource && fCurrentToken.Type == TokenType.Symbol)
    {
        ComparisonOperator lOperator;
        switch (fCurrentToken.Value)
        {
            case "==": lOperator = ComparisonOperator.Equal; break;
            case "<>": lOperator = ComparisonOperator.NotEquals; break;
            case "<":  lOperator = ComparisonOperator.LessThan; break;
            case ">":  lOperator = ComparisonOperator.GreaterThan; break;
            case "<=": lOperator = ComparisonOperator.LessThanOrEqual; break;
            case ">=": lOperator = ComparisonOperator.GreaterThanOrEqual; break;
            default: return lNode;
        }
        ReadNextToken(); // skip comparison operator
        return new Comparison(lOperator, lNode, ParseAdditiveExpression());
    }
    else return lNode;
}
An additive expression consists of one or more multiplicative expressions, separated by + or - operators:
Expression ParseAdditiveExpression()
{
    Expression lNode = ParseMultiplicativeExpression();

    while (!AtEndOfSource)
    {
        if (fCurrentToken.Equals(TokenType.Symbol, "+"))
        {
            ReadNextToken(); // skip '+'
            lNode = new Addition(lNode, ParseMultiplicativeExpression());
        }
        else if (fCurrentToken.Equals(TokenType.Symbol, "-"))
        {
            ReadNextToken(); // skip '-'
            lNode = new Subtraction(lNode, ParseMultiplicativeExpression());
        }
        else break;
    }

    return lNode;
}
A multiplicative expression consists of one or more unary expressions, separated by * or / operators:
Expression ParseMultiplicativeExpression()
{
    Expression lNode = ParseUnaryExpression();

    while (!AtEndOfSource)
    {
        if (fCurrentToken.Equals(TokenType.Symbol, "*"))
        {
            ReadNextToken(); // skip '*'
            lNode = new Multiplication(lNode, ParseUnaryExpression());
        }
        else if (fCurrentToken.Equals(TokenType.Symbol, "/"))
        {
            ReadNextToken(); // skip '/'
            lNode = new Division(lNode, ParseUnaryExpression());
        }
        else break;
    }

    return lNode;
}
A unary expression is a base expression, optionally prefixed by a -, + or not operator:
Expression ParseUnaryExpression()
{
    CheckForUnexpectedEndOfSource();

    if (fCurrentToken.Equals(TokenType.Symbol, "-"))
    {
        ReadNextToken(); // skip '-'
        return new Negation(ParseBaseExpression());
    }
    else if (fCurrentToken.Equals(TokenType.Word, "not"))
    {
        ReadNextToken(); // skip 'not'
        return new NotExpression(ParseBaseExpression());
    }
    else if (fCurrentToken.Equals(TokenType.Symbol, "+"))
        ReadNextToken(); // skip '+'

    return ParseBaseExpression();
}
A base expression is either an integer constant, a string constant, a variable, a function call or a group expression:
Expression ParseBaseExpression()
{
    CheckForUnexpectedEndOfSource();

    switch(fCurrentToken.Type)
    {
        case TokenType.Integer: return ParseIntegerConstant();
        case TokenType.String: return ParseStringConstant();
        case TokenType.Word: return ParseVariableOrFunctionCall();
        default: // TokenType.Symbol
            if (fCurrentToken.Value == "(")
                return ParseGroupExpression();
            else throw new ParserException("Expected an expression.");
    }
}
A group expression is an expression between parenthesis:
Expression ParseGroupExpression()
{
    ReadNextToken(); // skip '('
    Expression lExpression = ParseExpression();
    SkipExpected(TokenType.Symbol, ")"); // skip ')'
    return lExpression;
}
A variable and a function call both start with an identifier. To disambiguate, we'll have to read the next token:
Expression ParseVariableOrFunctionCall()
{
    string lName = fCurrentToken.Value;
    ReadNextToken();
    if (!AtEndOfSource && fCurrentToken.Equals(TokenType.Symbol, "("))
        return ParseFunctionCall(lName);
    else return new Variable(lName);
}
A string constant is just the read string token:
Expression ParseStringConstant()
{
    string lValue = fCurrentToken.Value;
    ReadNextToken(); // skip string constant
    return new StringConstant(lValue);
}
An integer constant has to be parsed from the string value:
Expression ParseIntegerConstant()
{
    int lValue;
    if (int.TryParse(fCurrentToken.Value, out lValue))
    {
        ReadNextToken(); // skip integer constant
        return new IntegerConstant(lValue);
    }
    else throw new ParserException("Invalid integer constant " + fCurrentToken.Value);
}
A function call consists of an identifier, followed by zero or more arguments between parentheses:
FunctionCall ParseFunctionCall(string name)
{
    ReadNextToken(); // skip '('
    CheckForUnexpectedEndOfSource();

    List lArguments = new List();
    if (!fCurrentToken.Equals(TokenType.Symbol, ")")
    {
        lArguments.Add(ParseExpression());
        CheckForUnexpectedEndOfSource();

        while (fCurrentToken.Equals(TokenType.Symbol, ","))
        {
            ReadNextToken(); // skip ','
            lArguments.Add(ParseExpression());
            CheckForUnexpectedEndOfSource();
        }

        if (!fCurrentToken.Equals(TokenType.Symbol, ")")
            throw new ParserException("Expected ')'.");
    }

    ReadNextToken(); // skip ')'
    return new FunctionCall(name, lArguments.ToArray());
}
Now our parser is ready. To test the parser, I've created a test application that reads code from a TextBox, parses it and shows the parse tree in a TreeView. I'm not going to show the source code of the test application in this post, but you can download the complete Visual Studio 2005 solution with both projects (TC.Adl and TC.Adl.Test) here.

Writing a parser: ADL Parser - part 1

We'll now write the Parser class:
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using TC.Adl.ParserNodes;

namespace TC.Adl
{
    public class Parser
    {
        Tokenizer fTokenizer;
        Token fCurrentToken;
    }
}
Our Parser class has only 2 fields:
fTokenizer
The Tokenizer to read tokens from.
fCurrentToken
The current token (most recently read).
The constructor of the Parser class will accept a TextReader argument, create a Tokenizer that uses that TextReader, store it in fTokenizer and read the first token:
public Parser(TextReader source)
{
    if (source == null) throw new ArgumentNullException("source");

    fTokenizer = new Tokenizer(source);
    ReadNextToken();
}

Now we'll add some private helper methods.

Reading a token is simple. just call Tokenizer.ReadNextToken(), which returns a Token or null at the end of the source code.
void ReadNextToken() { fCurrentToken = fTokenizer.ReadNextToken(); }
To determine if we're at the end of the source, we just have to check the current token for null:
bool AtEndOfSource { get { return fCurrentToken == null; } }
We'll need a method that throws an exception when the end of the source has been reached unexpectedly:
void CheckForUnexpectedEndOfSource()
{
    if (AtEndOfSource)
        throw new ParserException("Unexpected end of source.");
}
We'll also need a method that verifies the current token and skips it:
void SkipExpected(TokenType type, string value)
{
    CheckForUnexpectedEndOfSource();
    if (!fCurrentToken.Equals(type, value))
        throw new ParserException("Expected '" + value + "'.");
    ReadNextToken();
}
Now that we've written the private helper methods, we can write the only public method: the ReadNextStatement method. This methods reads a statement and returns it. If we've reached the end of the source, we'll return null, else we'll check the first token to determine the type of statement:
public Statement ReadNextStatement()
{
    if (AtEndOfSource)
        return null;

    // all the statements start with a word
    if (fCurrentToken.Type != TokenType.Word)
        throw new ParserException("Expected a statement.");

    if (fCurrentToken.Value == "if")
        return ParseIfStatement();

    if (fCurrentToken.Value == "while")
        return ParseWhileStatement();

    if (fCurrentToken.Value == "for")
        return ParseForStatement();

    return ParseAssignmentOrFunctionCallStatement();
}
An if-statement starts with the word if, followed by a condition, the word then, a block of statements, an optional block of statements prefixed with the word else and the words end if:
IfStatement ParseIfStatement()
{
    ReadNextToken(); // skip 'if'

    Expression lCondition = ParseExpression();

    SkipExpected(TokenType.Word, "then"); // skip 'then'

    List lTrueStatements = new List();
    List lFalseStatements = new List();
    List lStatements = lTrueStatements;
    Statement lStatement;

    CheckForUnexpectedEndOfSource();
    while (!fCurrentToken.Equals(TokenType.Word, "end"))
    {
        if (fCurrentToken.Equals(TokenType.Word, "else"))
        {
            ReadNextToken(); // skip 'else'
            CheckForUnexpectedEndOfSource();
            lStatements = lFalseStatements;
        }

        if ((lStatement = ReadNextStatement()) != null)
            lStatements.Add(lStatement);
        else throw new ParserException("Unexpected end of source.");
    }

    ReadNextToken(); // skip 'end'
    SkipExpected(TokenType.Word, "if"); // skip 'if'

    return new IfStatement(lCondition
        , new StatementCollection(lTrueStatements)
        , new StatementCollection(lFalseStatements));
}
A while-statement starts with the word while, followed by a condition, the word do, a block of statements and the words end while:
WhileStatement ParseWhileStatement()
{
    ReadNextToken(); // skip 'while'

    Expression lCondition = ParseExpression();

    SkipExpected(TokenType.Word, "do"); // skip 'do'

    List lStatements = new List();
    Statement lStatement;
    CheckForUnexpectedEndOfSource();
    while (!fCurrentToken.Equals(TokenType.Word, "end"))
    {
        if ((lStatement = ReadNextStatement()) != null)
            lStatements.Add(lStatement);
        else throw new ParserException("Unexpected end of source.");
    }

    ReadNextToken(); // skip 'end'
    SkipExpected(TokenType.Word, "while"); // skip 'while'

    return new WhileStatement(lCondition, new StatementCollection(lStatements));
}
A for-statement starts with the word for, followed by a variable, the symbol :=, a start-value, the word to, an end-value, optionally the word by with a step-size, the word do, a block of statements and the words end for:
ForStatement ParseForStatement()
{
    ReadNextToken(); // skip 'for'
    CheckForUnexpectedEndOfSource();

    if (fCurrentToken.Type != TokenType.Word)
        throw new ParserException("Expected a variable.");

    Variable lVariable = new Variable(fCurrentToken.Value);
    ReadNextToken();

    SkipExpected(TokenType.Symbol, ":="); // skip ':='
    Expression lStartValue = ParseExpression();

    SkipExpected(TokenType.Word, "to"); // skip 'to'
    Expression lEndValue = ParseExpression();
    CheckForUnexpectedEndOfSource();

    Expression lStepSize;
    if (fCurrentToken.Equals(TokenType.Word, "by"))
    {
        ReadNextToken(); // skip 'by'
        lStepSize = ParseExpression();
    }
    else lStepSize = new IntegerConstant(1);

    SkipExpected(TokenType.Word, "do");
    List lStatements = new List();
    Statement lStatement;
    CheckForUnexpectedEndOfSource();
    while (!fCurrentToken.Equals(TokenType.Word, "end"))
    {
        if ((lStatement = ReadNextStatement()) != null)
            lStatements.Add(lStatement);
        else throw new ParserException("Unexpected end of source.");
    }

    ReadNextToken(); // skip 'end'
    SkipExpected(TokenType.Word, "for"); // skip 'for'

    return new ForStatement(lVariable, lStartValue, lEndValue, lStepSize, new StatementCollection(lStatements));
}
An assignment and a function call statement both start with an identifier, so we'll have to read the next token to determine if it's an assignment or a function call statement:
Statement ParseAssignmentOrFunctionCallStatement()
{
    Token lToken = fCurrentToken;
    ReadNextToken();
    CheckForUnexpectedEndOfSource();

    if (fCurrentToken.Equals(TokenType.Symbol, ":="))
        return ParseAssignment(new Variable(lToken.Value));

    if (fCurrentToken.Equals(TokenType.Symbol, "("))
        return new FunctionCallStatement(ParseFunctionCall(lToken.Value));

    throw new ParserException("Expected a statement.");
}
An assignment just has an expression after the :=:
Assignment ParseAssignment(Variable variable)
{
    ReadNextToken(); // skip ':='
    return new Assignment(variable, ParseExpression());
}
In the next post, we'll write the methods for parsing expression.