view src/dil/lexer/Token.d @ 820:1d06b4aed7cf

Revised code in the first pass. Added code to handle anonymous unions and structs. Hope the idea will work. Added type to class Aggregate and isAnonymous to some other Symbol classes.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Fri, 14 Mar 2008 15:42:08 +0100
parents bcb74c9b895c
children
line wrap: on
line source

/++
  Author: Aziz Köksal
  License: GPL3
+/
module dil.lexer.Token;

import dil.lexer.Identifier;
import dil.lexer.Funcs;
import dil.Location;
import tango.stdc.stdlib : malloc, free;
import tango.core.Exception;
import common;

public import dil.lexer.TokensEnum;

/// A Token is a sequence of characters formed by the lexical analyzer.
struct Token
{ /// Flags set by the Lexer.
  enum Flags : ushort
  {
    None,
    Whitespace = 1, /// Tokens with this flag are ignored by the Parser.
  }

  TOK kind; /// The token kind.
  Flags flags; /// The flags of the token.
  /// Pointers to the next and previous tokens (doubly-linked list.)
  Token* next, prev;

  /// Start of whitespace characters before token. Null if no WS.
  /// TODO: remove to save space; can be replaced by 'prev.end'.
  char* ws;
  char* start; /// Points to the first character of the token.
  char* end;   /// Points one character past the end of the token.

  /// Data associated with this token.
  /// TODO: move data structures out; use only pointers here to keep Token.sizeof small.
  union
  {
    /// For newline tokens.
    NewlineData newline;
    /// For #line tokens.
    struct
    {
      Token* tokLineNum; /// #line number
      Token* tokLineFilespec; /// #line number filespec
    }
    /// The value of a string token.
    struct
    {
      string str; /// Zero-terminated string. (The zero is included in the length.)
      char pf;    /// Postfix 'c', 'w', 'd' or 0 for none.
    version(D2)
      Token* tok_str; /++ Points to the contents of a token string stored as a
                          doubly linked list. The last token is always '}' or
                          EOF in case end of source text is "q{" EOF.
                      +/
    }
    Identifier* ident; /// For keywords and identifiers.
    dchar  dchar_;   /// A character value.
    long   long_;    /// A long integer value.
    ulong  ulong_;   /// An unsigned long integer value.
    int    int_;     /// An integer value.
    uint   uint_;    /// An unsigned integer value.
    float  float_;   /// A float value.
    double double_;  /// A double value.
    real   real_;    /// A real value.
  }

  /// Returns the text of the token.
  string srcText()
  {
    assert(start && end);
    return start[0 .. end - start];
  }

  /// Returns the preceding whitespace of the token.
  string wsChars()
  {
    assert(ws && start);
    return ws[0 .. start - ws];
  }

  /// Finds the next non-whitespace token.
  /// Returns: 'this' token if the next token is TOK.EOF or null.
  Token* nextNWS()
  out(token)
  {
    assert(token !is null);
  }
  body
  {
    auto token = next;
    while (token !is null && token.isWhitespace)
      token = token.next;
    if (token is null || token.kind == TOK.EOF)
      return this;
    return token;
  }

  /// Finds the previous non-whitespace token.
  /// Returns: 'this' token if the previous token is TOK.HEAD or null.
  Token* prevNWS()
  out(token)
  {
    assert(token !is null);
  }
  body
  {
    auto token = prev;
    while (token !is null && token.isWhitespace)
      token = token.prev;
    if (token is null || token.kind == TOK.HEAD)
      return this;
    return token;
  }

  /// Returns the string for a token kind.
  static string toString(TOK kind)
  {
    return tokToString[kind];
  }

  /// Adds Flags.Whitespace to this.flags.
  void setWhitespaceFlag()
  {
    this.flags |= Flags.Whitespace;
  }

  /// Returns true if this is a token that can have newlines in it.
  ///
  /// These can be block and nested comments and any string literal
  /// except for escape string literals.
  bool isMultiline()
  {
    return kind == TOK.String && start[0] != '\\' ||
           kind == TOK.Comment && start[1] != '/';
  }

  /// Returns true if this is a keyword token.
  bool isKeyword()
  {
    return KeywordsBegin <= kind && kind <= KeywordsEnd;
  }

  /// Returns true if this is an integral type token.
  bool isIntegralType()
  {
    return IntegralTypeBegin <= kind && kind <= IntegralTypeEnd;
  }

  /// Returns true if this is a whitespace token.
  bool isWhitespace()
  {
    return !!(flags & Flags.Whitespace);
  }

  /// Returns true if this is a special token.
  bool isSpecialToken()
  {
    return SpecialTokensBegin <= kind && kind <= SpecialTokensEnd;
  }

version(D2)
{
  /// Returns true if this is a token string literal.
  bool isTokenStringLiteral()
  {
    return kind == TOK.String && tok_str !is null;
  }
}

  /// Returns true if this token starts a DeclarationDefinition.
  bool isDeclDefStart()
  {
    return isDeclDefStartToken(kind);
  }

  /// Returns true if this token starts a Statement.
  bool isStatementStart()
  {
    return isStatementStartToken(kind);
  }

  /// Returns true if this token starts an AsmStatement.
  bool isAsmStatementStart()
  {
    return isAsmStatementStartToken(kind);
  }

  int opEquals(TOK kind2)
  {
    return kind == kind2;
  }

  int opCmp(Token* rhs)
  {
    return start < rhs.start;
  }

  /// Returns the Location of this token.
  Location getLocation(bool realLocation)()
  {
    auto search_t = this.prev;
    // Find previous newline token.
    while (search_t.kind != TOK.Newline)
      search_t = search_t.prev;
    static if (realLocation)
    {
      auto filePath  = search_t.newline.filePaths.oriPath;
      auto lineNum   = search_t.newline.oriLineNum;
    }
    else
    {
      auto filePath  = search_t.newline.filePaths.setPath;
      auto lineNum   = search_t.newline.oriLineNum - search_t.newline.setLineNum;
    }
    auto lineBegin = search_t.end;
    // Determine actual line begin and line number.
    while (1)
    {
      search_t = search_t.next;
      if (search_t == this)
        break;
      // Multiline tokens must be rescanned for newlines.
      if (search_t.isMultiline)
      {
        auto p = search_t.start, end = search_t.end;
        while (p != end)
        {
          if (scanNewline(p) == '\n')
          {
            lineBegin = p;
            ++lineNum;
          }
          else
            ++p;
        }
      }
    }
    return new Location(filePath, lineNum, lineBegin, this.start);
  }

  alias getLocation!(true) getRealLocation;
  alias getLocation!(false) getErrorLocation;

  uint lineCount()
  {
    uint count = 1;
    if (this.isMultiline)
    {
      auto p = this.start, end = this.end;
      while (p != end)
      {
        if (scanNewline(p) == '\n')
          ++count;
        else
          ++p;
      }
    }
    return count;
  }

  /// Return the source text enclosed by the left and right token.
  static char[] textSpan(Token* left, Token* right)
  {
    assert(left.end <= right.start || left is right );
    return left.start[0 .. right.end - left.start];
  }

  /// Uses malloc() to allocate memory for a token.
  new(size_t size)
  {
    void* p = malloc(size);
    if (p is null)
      throw new OutOfMemoryException(__FILE__, __LINE__);
    // TODO: Token.init should be all zeros.
    // Maybe use calloc() to avoid this line?
    *cast(Token*)p = Token.init;
    return p;
  }

  /// Deletes a token using free().
  delete(void* p)
  {
    auto token = cast(Token*)p;
    if (token)
    {
      if(token.kind == TOK.HashLine)
        token.destructHashLineToken();
      else
      {
      version(D2)
        if (token.isTokenStringLiteral)
          token.destructTokenStringLiteral();
      }
    }
    free(p);
  }

  void destructHashLineToken()
  {
    assert(kind == TOK.HashLine);
    delete tokLineNum;
    delete tokLineFilespec;
  }

version(D2)
{
  void destructTokenStringLiteral()
  {
    assert(kind == TOK.String);
    assert(start && *start == 'q' && start[1] == '{');
    assert(tok_str !is null);
    auto tok_it = tok_str;
    auto tok_del = tok_str;
    while (tok_it && tok_it.kind != TOK.EOF)
    {
      tok_it = tok_it.next;
      assert(tok_del && tok_del.kind != TOK.EOF);
      delete tok_del;
      tok_del = tok_it;
    }
  }
}
}

/// Data associated with newline tokens.
struct NewlineData
{
  struct FilePaths
  {
    char[] oriPath;   /// Original path to the source text.
    char[] setPath;   /// Path set by #line.
  }
  FilePaths* filePaths;
  uint oriLineNum;  /// Actual line number in the source text.
  uint setLineNum;  /// Delta line number set by #line.
}

/// Returns true if this token starts a DeclarationDefinition.
bool isDeclDefStartToken(TOK tok)
{
  switch (tok)
  {
  alias TOK T;
  case  T.Align, T.Pragma, T.Export, T.Private, T.Package, T.Protected,
        T.Public, T.Extern, T.Deprecated, T.Override, T.Abstract,
        T.Synchronized, T.Static, T.Final, T.Const, T.Invariant/*D 2.0*/,
        T.Auto, T.Scope, T.Alias, T.Typedef, T.Import, T.Enum, T.Class,
        T.Interface, T.Struct, T.Union, T.This, T.Tilde, T.Unittest, T.Debug,
        T.Version, T.Template, T.New, T.Delete, T.Mixin, T.Semicolon,
        T.Identifier, T.Dot, T.Typeof:
    return true;
  default:
    if (IntegralTypeBegin <= tok && tok <= IntegralTypeEnd)
      return true;
  }
  return false;
}

/// Returns true if this token starts a Statement.
bool isStatementStartToken(TOK tok)
{
  switch (tok)
  {
  alias TOK T;
  case  T.Align, T.Extern, T.Final, T.Const, T.Auto, T.Identifier, T.Dot,
        T.Typeof, T.If, T.While, T.Do, T.For, T.Foreach, T.Foreach_reverse,
        T.Switch, T.Case, T.Default, T.Continue, T.Break, T.Return, T.Goto,
        T.With, T.Synchronized, T.Try, T.Throw, T.Scope, T.Volatile, T.Asm,
        T.Pragma, T.Mixin, T.Static, T.Debug, T.Version, T.Alias, T.Semicolon,
        T.Enum, T.Class, T.Interface, T.Struct, T.Union, T.LBrace, T.Typedef,
        T.This, T.Super, T.Null, T.True, T.False, T.Int32, T.Int64, T.Uint32,
        T.Uint64, T.Float32, T.Float64, T.Float80, T.Imaginary32,
        T.Imaginary64, T.Imaginary80, T.CharLiteral, T.String, T.LBracket,
        T.Function, T.Delegate, T.Assert, T.Import, T.Typeid, T.Is, T.LParen,
        T.Traits/*D2.0*/, T.AndBinary, T.PlusPlus, T.MinusMinus, T.Mul,
        T.Minus, T.Plus, T.Not, T.Tilde, T.New, T.Delete, T.Cast:
    return true;
  default:
    if (IntegralTypeBegin <= tok && tok <= IntegralTypeEnd ||
        SpecialTokensBegin <= tok && tok <= SpecialTokensEnd)
      return true;
  }
  return false;
}

/// Returns true if this token starts an AsmStatement.
bool isAsmStatementStartToken(TOK tok)
{
  switch(tok)
  {
  alias TOK T;
  case T.In, T.Int, T.Out, T.Identifier, T.Align, T.Semicolon:
    return true;
  default:
  }
  return false;
}