view lexer/Lexer.d @ 28:69464d465284 new_gen

Now supporting structs - both read and write. Still a few errors though, so watch out.
author Anders Johnsen <skabet@gmail.com>
date Sun, 20 Apr 2008 11:20:28 +0200
parents 59bfbaf8847f
children 41d23f2762c3
line wrap: on
line source

module lexer.Lexer;

import misc.Error,
       misc.DataSource;

import lexer.Token,
       lexer.Keyword;

import tango.io.Stdout;

class Lexer
{
public:
    this (DataSource source)
    {
        this.source = source;
        this.position = 0;
    }

    Token next ()
    {
        switch (getNextChar)
        {
            case CharType.EOF:
                Location l;
                return Token (Tok.EOF, l, 0); 

            case CharType.Whitespace:
                position += 1;
                return this.next;

            case CharType.Symbol:
                return lexSymbol;

            case CharType.Letter:
                return lexLetter;

            case CharType.Number:
                return lexNumber;
        }
    }

    Token peek ( int skip = 0)
    {
        int oldPosition = this.position;
        while(skip-- > 0)
            this.next;
        Token t = this.next;
        this.position = oldPosition;
        return t;
    }

    public Error[] getErrors()
    {
        return this.errors;
    }
private:

    Token lexNumber ()
    {
        int i = 0;
        while(getNextChar(++i) == CharType.Number)
        {}

        position += i;

        return Token(Tok.Integer, Location(position - i, this.source), i);
    }

    Token lexSymbol ()
    {
        switch(source.data[position++])
        {
            case '(':
                return Token(Tok.OpenParentheses, Location(position - 1, this.source), 1);
            case ')':
                return Token(Tok.CloseParentheses, Location(position - 1, this.source), 1);
            case '{':
                return Token(Tok.OpenBrace, Location(position - 1, this.source), 1);
            case '}':
                return Token(Tok.CloseBrace, Location(position - 1, this.source), 1);
            case ';':
                return Token(Tok.Seperator, Location(position - 1, this.source), 1);
            case '.':
                return Token(Tok.Dot, Location(position - 1, this.source), 1);
            case ',':
                return Token(Tok.Comma, Location(position - 1, this.source), 1);
            case '=':
                if(source.data[position] == '=')
                    return Token(Tok.Eq, Location(position++ - 1, this.source), 2);
                return Token(Tok.Assign, Location(position - 1, this.source), 1);
            case '!':
                if(source.data[position] == '=')
                    return Token(Tok.Ne, Location(position++ - 1, this.source), 2);
                return Token(Tok.Not, Location(position - 1, this.source), 1);
            case '<':
                if(source.data[position] == '=')
                    return Token(Tok.Le, Location(position++ - 1, this.source), 2);
                return Token(Tok.Lt, Location(position - 1, this.source), 1);
            case '>':
                if(source.data[position] == '=')
                    return Token(Tok.Ge, Location(position++ - 1, this.source), 2);
                return Token(Tok.Gt, Location(position - 1, this.source), 1);
            case '+':
                return Token(Tok.Add, Location(position - 1, this.source), 1);
            case '-':
                return Token(Tok.Sub, Location(position - 1, this.source), 1);
            case '*':
                return Token(Tok.Mul, Location(position - 1, this.source), 1);
            case '/':
                switch(source.data[position])
                {
                    case '/':
                        while(getNextChar != CharType.EOF)
                        {
                            if(source.data[position++] == '\n')
                                return this.next;
                        }
                        return Token(Tok.EOF, Location(position, this.source), 0);

                    case '*':
                        position += 2;
                        while(getNextChar != CharType.EOF)
                        {
                            ++position;
                            if(source.data[position-2] == '*')
                                if(source.data[position-1] == '/')
                                   return this.next;
                        }
                        throw new Error("Unexpected end of file. Unclosed comment block", 
                                Location(position, source));

                    case '+':
                        position += 2;
                        int nesting = 1;
                        while(getNextChar != CharType.EOF)
                        {
                            ++position;
                            if(source.data[position-2] == '+')
                                if(source.data[position-1] == '/')
                                {
                                    position++;
                                    nesting--;
                                }

                            if(source.data[position-2] == '/')
                                if(source.data[position-1] == '+')
                                {
                                    nesting++;
                                    position++;
                                }

                            if(nesting == 0)
                                return this.next;
                        }
                        throw new Error("Unexpected end of file. Unclosed comment block", 
                                Location(position, source));

                    default:
                        return Token(Tok.Div, Location(position - 1, this.source), 1);
                }
        }
    }

    Token lexLetter ()
    {
        int i = 0;
        bool hasNumber = false;
        while (getNextChar(++i) == CharType.Letter || 
                getNextChar(i) == CharType.Number)
        {
            if (getNextChar(i) == CharType.Number)
            {
                hasNumber = true;
            }
        }

        Token t = Token(Tok.Identifier, Location(position, source), i);

        if (!hasNumber)
        {
            char[] str = source.data[position .. position + i];
            if(str in keywords)
                t.type = keywords[str];
        }

        position += i;

        return t;
    }

    CharType getNextChar(int offset = 0)
    {
        if (position + offset >= this.source.data.length)
            return CharType.EOF;

        char current = source.data[position + offset];

        if (current >= 'A' && current <= 'Z' ||
            current >= 'a' && current <= 'z' || current > 127)
            return CharType.Letter;

        if (current >= '0' && current <= '9')
            return CharType.Number;

        switch(current)
        {
            case ' ':
            case '\n':
                return CharType.Whitespace;

            case '(':
            case ')':
            case '{':
            case '}':
            case ';':
            case '.':
            case ',':
            case '=':
            case '!':
            case '<':
            case '>':
            case '+':
            case '-':
            case '*':
            case '/':
                return CharType.Symbol;

            default:
                throw new Error("Read invalid symbol: '" ~ current ~ "'", Location(position, source));
        }

    }

    DataSource source;
    int position;
    Error[] errors;
}

enum CharType : ubyte
{
    Letter,
    Number,
    Symbol,
    Whitespace,

    EOF
}