changeset 496:5a607597dc22

Improved error recovery in the Parser. The Parser skips to the next valid token if an illegal Declaration, Statement or AsmInstruction was found. Refactored a few things in Lexer.d and LexerFuncs.d.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Sun, 09 Dec 2007 13:04:15 +0100
parents b60450804b6e
children 0ffcc4ff82f3
files trunk/src/dil/Declarations.d trunk/src/dil/Lexer.d trunk/src/dil/LexerFuncs.d trunk/src/dil/Parser.d trunk/src/dil/Statements.d trunk/src/dil/Token.d
diffstat 6 files changed, 201 insertions(+), 50 deletions(-) [+]
line wrap: on
line diff
--- a/trunk/src/dil/Declarations.d	Sat Dec 08 22:20:34 2007 +0100
+++ b/trunk/src/dil/Declarations.d	Sun Dec 09 13:04:15 2007 +0100
@@ -91,13 +91,16 @@
   {}
 }
 
+/++
+  Illegal declarations encompass all tokens that don't
+  start a DeclarationDefinition.
+  See_Also: dil.Parser.isDeclDefStartToken()
++/
 class IllegalDeclaration : Declaration
 {
-  Token* token;
-  this(Token* token)
+  this()
   {
     mixin(set_kind);
-    this.token = token;
   }
 
   void semantic(Scope)
--- a/trunk/src/dil/Lexer.d	Sat Dec 08 22:20:34 2007 +0100
+++ b/trunk/src/dil/Lexer.d	Sun Dec 09 13:04:15 2007 +0100
@@ -1432,7 +1432,7 @@
         }
         else if (isspace(c))
           continue; // Skip spaces.
-        else if (c == 0 || c == _Z_)
+        else if (isEOF(c))
         {
           error(tokenLineNum, tokenLineBegin, t.start, MID.UnterminatedHexString);
           t.pf = 0;
@@ -1839,7 +1839,7 @@
       }
       else if (isEndOfLine(p))
         error(sequenceStart, MID.UndefinedEscapeSequence,
-          (*p == 0 || *p == _Z_) ? `\EOF` : `\NewLine`);
+          isEOF(*p) ? `\EOF` : `\NewLine`);
       else
       {
         char[] str = `\`;
--- a/trunk/src/dil/LexerFuncs.d	Sat Dec 08 22:20:34 2007 +0100
+++ b/trunk/src/dil/LexerFuncs.d	Sun Dec 09 13:04:15 2007 +0100
@@ -34,12 +34,21 @@
 }
 
 /++
+  Returns true if p points to an EOF character.
+  EOF: 0 | _Z_
++/
+bool isEOF(dchar c)
+{
+  return c == 0 || c == _Z_;
+}
+
+/++
   Returns true if p points to the first character of an EndOfLine.
-  EndOfLine: Newline | 0 | _Z_
+  EndOfLine: Newline | EOF
 +/
 bool isEndOfLine(char* p)
 {
-  return isNewline(p) || *p == 0 || *p == _Z_;
+  return isNewline(p) || isEOF(*p);
 }
 
 /++
--- a/trunk/src/dil/Parser.d	Sat Dec 08 22:20:34 2007 +0100
+++ b/trunk/src/dil/Parser.d	Sun Dec 09 13:04:15 2007 +0100
@@ -345,9 +345,9 @@
     // Declaration
     case T.Identifier, T.Dot, T.Typeof:
     // IntegralType
-    case T.Char,   T.Wchar,   T.Dchar,  T.Bool,
-         T.Byte,   T.Ubyte,   T.Short,  T.Ushort,
-         T.Int,    T.Uint,    T.Long,   T.Ulong,
+    case T.Char,   T.Wchar,   T.Dchar, T.Bool,
+         T.Byte,   T.Ubyte,   T.Short, T.Ushort,
+         T.Int,    T.Uint,    T.Long,  T.Ulong,
          T.Float,  T.Double,  T.Real,
          T.Ifloat, T.Idouble, T.Ireal,
          T.Cfloat, T.Cdouble, T.Creal, T.Void:
@@ -357,9 +357,15 @@
       // TODO: Error: module is optional and can appear only once at the top of the source file.
       break;+/
     default:
-      error(MID.ExpectedButFound, "Declaration", token.srcText);
-      decl = new IllegalDeclaration(token);
-      nT();
+      decl = new IllegalDeclaration();
+      // Skip to next valid token.
+      do
+        nT();
+      while (!token.isDeclDefStart &&
+              token.type != T.RBrace &&
+              token.type != T.EOF)
+      auto text = Token.textSpan(begin, this.prevToken);
+      error(begin, "illegal Declaration found: " ~ text);
     }
     decl.setProtection(this.protection);
     decl.setStorageClass(this.storageClass);
@@ -702,19 +708,18 @@
       break;
     default:
       error(MID.UnrecognizedLinkageType, token.srcText);
-      nT();
     }
     require(T.RParen);
     return linkageType;
   }
 
-  void checkLinkageType(ref LinkageType prev_lt, LinkageType lt, char* tokStart)
+  void checkLinkageType(ref LinkageType prev_lt, LinkageType lt, Token* begin)
   {
     if (prev_lt == LinkageType.None)
       prev_lt = lt;
     else
       // TODO: create new msg RedundantLinkageType.
-      error(MID.RedundantStorageClass, tokStart[0 .. prevToken.end - tokStart]);
+      error(begin, "redundant linkage type: " ~ Token.textSpan(begin, this.prevToken));
   }
 
   Declaration parseStorageAttribute()
@@ -739,7 +744,7 @@
 
         nT();
         auto linkageType = parseLinkageType();
-        checkLinkageType(prev_linkageType, linkageType, begin.start);
+        checkLinkageType(prev_linkageType, linkageType, begin);
 
         auto saved = this.linkageType; // Save.
         this.linkageType = linkageType; // Set.
@@ -1743,9 +1748,9 @@
       else
         goto case_parseExpressionStatement; // Expression
     // IntegralType
-    case T.Char,   T.Wchar,   T.Dchar,  T.Bool,
-         T.Byte,   T.Ubyte,   T.Short,  T.Ushort,
-         T.Int,    T.Uint,    T.Long,   T.Ulong,
+    case T.Char,   T.Wchar,   T.Dchar, T.Bool,
+         T.Byte,   T.Ubyte,   T.Short, T.Ushort,
+         T.Int,    T.Uint,    T.Long,  T.Ulong,
          T.Float,  T.Double,  T.Real,
          T.Ifloat, T.Idouble, T.Ireal,
          T.Cfloat, T.Cdouble, T.Creal, T.Void:
@@ -1889,10 +1894,10 @@
     case T.Typeid:
     case T.Is:
     case T.LParen:
-    // IntegralType
-    /+case T.Char,   T.Wchar,   T.Dchar,  T.Bool,
-         T.Byte,   T.Ubyte,   T.Short,  T.Ushort,
-         T.Int,    T.Uint,    T.Long,   T.Ulong,
+    /+ // IntegralType
+    case T.Char,   T.Wchar,   T.Dchar, T.Bool,
+         T.Byte,   T.Ubyte,   T.Short, T.Ushort,
+         T.Int,    T.Uint,    T.Long,  T.Ulong,
          T.Float,  T.Double,  T.Real,
          T.Ifloat, T.Idouble, T.Ireal,
          T.Cfloat, T.Cdouble, T.Creal, T.Void:+/
@@ -1919,8 +1924,7 @@
 
       if (token.type != T.Dollar)
         // Assert that this isn't a valid expression.
-        assert(
-          delegate bool(){
+        assert(delegate bool(){
             bool success;
             auto expression = try_(&parseExpression, success);
             return success;
@@ -1928,9 +1932,15 @@
         );
 
       // Report error: it's an illegal statement.
-      error(MID.ExpectedButFound, "Statement", token.srcText);
-      s = new IllegalStatement(token);
-      nT();
+      s = new IllegalStatement();
+      // Skip to next valid token.
+      do
+        nT();
+      while (!token.isStatementStart &&
+              token.type != T.RBrace &&
+              token.type != T.EOF)
+      auto text = Token.textSpan(begin, this.prevToken);
+      error(begin, "illegal Statement found: " ~ text);
     }
     assert(s !is null);
     set(s, begin);
@@ -2016,7 +2026,7 @@
 
         nT();
         auto linkageType = parseLinkageType();
-        checkLinkageType(prev_linkageType, linkageType, begin.start);
+        checkLinkageType(prev_linkageType, linkageType, begin);
 
         d = new LinkageDeclaration(linkageType, parse());
         break;
@@ -2721,9 +2731,15 @@
       nT();
       break;
     default:
-      error(MID.ExpectedButFound, "AsmInstruction", token.srcText);
-      s = new IllegalAsmInstruction(token);
-      nT();
+      s = new IllegalAsmInstruction();
+      // Skip to next valid token.
+      do
+        nT();
+      while (!token.isAsmInstructionStart &&
+              token.type != T.RBrace &&
+              token.type != T.EOF)
+      auto text = Token.textSpan(begin, this.prevToken);
+      error(begin, "illegal AsmInstruction found: " ~ text);
     }
     set(s, begin);
     return s;
@@ -3736,9 +3752,9 @@
       }
       break;
     // IntegralType . Identifier
-    case T.Char,   T.Wchar,   T.Dchar,  T.Bool,
-         T.Byte,   T.Ubyte,   T.Short,  T.Ushort,
-         T.Int,    T.Uint,    T.Long,   T.Ulong,
+    case T.Char,   T.Wchar,   T.Dchar, T.Bool,
+         T.Byte,   T.Ubyte,   T.Short, T.Ushort,
+         T.Int,    T.Uint,    T.Long,  T.Ulong,
          T.Float,  T.Double,  T.Real,
          T.Ifloat, T.Idouble, T.Ireal,
          T.Cfloat, T.Cdouble, T.Creal, T.Void:
@@ -3836,9 +3852,9 @@
 
     switch (token.type)
     {
-    case T.Char,   T.Wchar,   T.Dchar,  T.Bool,
-         T.Byte,   T.Ubyte,   T.Short,  T.Ushort,
-         T.Int,    T.Uint,    T.Long,   T.Ulong,
+    case T.Char,   T.Wchar,   T.Dchar, T.Bool,
+         T.Byte,   T.Ubyte,   T.Short, T.Ushort,
+         T.Int,    T.Uint,    T.Long,  T.Ulong,
          T.Float,  T.Double,  T.Real,
          T.Ifloat, T.Idouble, T.Ireal,
          T.Cfloat, T.Cdouble, T.Creal, T.Void:
@@ -4451,15 +4467,26 @@
     return null;
   }
 
+  /// Reports an error that has no message ID yet.
+  void error(Token* token, char[] formatMsg, ...)
+  {
+    error_(token, MID.min, formatMsg, _arguments, _argptr);
+  }
+
   void error(MID mid, ...)
   {
+    error_(this.token, mid, GetMsg(mid), _arguments, _argptr);
+  }
+
+  void error_(Token* token, MID mid, char[] formatMsg, TypeInfo[] _arguments, void* _argptr)
+  {
     if (trying)
     {
       ++errorCount;
       return;
     }
-    auto location = this.token.getLocation();
-    auto msg = Format(_arguments, _argptr, GetMsg(mid));
+    auto location = token.getLocation();
+    auto msg = Format(_arguments, _argptr, formatMsg);
     errors ~= new Information(InfoType.Parser, mid, location, msg);
   }
 }
--- a/trunk/src/dil/Statements.d	Sat Dec 08 22:20:34 2007 +0100
+++ b/trunk/src/dil/Statements.d	Sun Dec 09 13:04:15 2007 +0100
@@ -32,11 +32,9 @@
 
 class IllegalStatement : Statement
 {
-  Token* tok;
-  this(Token* tok)
+  this()
   {
     mixin(set_kind);
-    this.tok = tok;
   }
 }
 
@@ -460,9 +458,8 @@
 
 class IllegalAsmInstruction : IllegalStatement
 {
-  this(Token* token)
+  this()
   {
-    super(token);
     mixin(set_kind);
   }
 }
--- a/trunk/src/dil/Token.d	Sat Dec 08 22:20:34 2007 +0100
+++ b/trunk/src/dil/Token.d	Sun Dec 09 13:04:15 2007 +0100
@@ -112,6 +112,8 @@
 
 alias TOK.Abstract KeywordsBegin;
 alias TOK.With KeywordsEnd;
+alias TOK.FILE SpecialTokensBegin;
+alias TOK.Version SpecialTokensEnd;
 
 struct Token
 {
@@ -212,9 +214,9 @@
   }
 
   /++
-    Returns true if this is a token which can have newlines in it.
-    These can be any string literal except for escape literals
-    and block and nested comments.
+    Returns true if this is a token that can have newlines in it.
+    These can be block and nested comments and any string literal
+    except for escape string literals.
   +/
   bool isMultiline()
   {
@@ -237,7 +239,7 @@
   /// Returns true if this is a special token.
   bool isSpecialToken()
   {
-    return *start == '_' && type != TOK.Identifier;
+    return SpecialTokensBegin <= type && type <= SpecialTokensEnd;
   }
 
 version(D2)
@@ -249,6 +251,24 @@
   }
 }
 
+  /// Returns true if this token starts a DeclarationDefinition.
+  bool isDeclDefStart()
+  {
+    return isDeclDefStartToken(type);
+  }
+
+  /// Returns true if this token starts a Statement.
+  bool isStatementStart()
+  {
+    return isStatementStartToken(type);
+  }
+
+  /// Returns true if this token starts an AsmInstruction.
+  bool isAsmInstructionStart()
+  {
+    return isAsmInstructionStartToken(type);
+  }
+
   int opEquals(TOK type2)
   {
     return type == type2;
@@ -290,6 +310,30 @@
     return new Location(filePath, lineNum, lineBegin, this.start);
   }
 
+  uint lineCount()
+  {
+    uint count = 1;
+    if (this.isMultiline)
+    {
+      auto p = this.start, end = this.end;
+      while (p != end)
+      {
+        if (scanNewline(p) == '\n')
+          ++count;
+        else
+          ++p;
+      }
+    }
+    return count;
+  }
+
+  /// Return the source text enclosed by the left and right token.
+  static char[] textSpan(Token* left, Token* right)
+  {
+    assert(left.end <= right.start);
+    return left.start[0 .. right.end - left.start];
+  }
+
   new(size_t size)
   {
     void* p = malloc(size);
@@ -444,3 +488,74 @@
   "EOF"
 ];
 static assert(tokToString.length == TOK.MAX);
+
+/// Returns true if this token starts a DeclarationDefinition.
+bool isDeclDefStartToken(TOK tok)
+{
+  switch (tok)
+  {
+  alias TOK T;
+  case  T.Align, T.Pragma, T.Export, T.Private, T.Package, T.Protected,
+        T.Public, T.Extern, T.Deprecated, T.Override, T.Abstract,
+        T.Synchronized, T.Static, T.Final, T.Const, T.Invariant/*D 2.0*/,
+        T.Auto, T.Scope, T.Alias, T.Typedef, T.Import, T.Enum, T.Class,
+        T.Interface, T.Struct, T.Union, T.This, T.Tilde, T.Unittest, T.Debug,
+        T.Version, T.Template, T.New, T.Delete, T.Mixin, T.Semicolon,
+        T.Identifier, T.Dot, T.Typeof,
+        T.Char,   T.Wchar,   T.Dchar, T.Bool,
+        T.Byte,   T.Ubyte,   T.Short, T.Ushort,
+        T.Int,    T.Uint,    T.Long,  T.Ulong,
+        T.Float,  T.Double,  T.Real,
+        T.Ifloat, T.Idouble, T.Ireal,
+        T.Cfloat, T.Cdouble, T.Creal, T.Void:
+    return true;
+  default:
+  }
+  return false;
+}
+
+/// Returns true if this token starts a Statement.
+bool isStatementStartToken(TOK tok)
+{
+  switch (tok)
+  {
+  alias TOK T;
+  case  T.Align, T.Extern, T.Final, T.Const, T.Auto, T.Identifier, T.Dot,
+        T.Typeof, T.If, T.While, T.Do, T.For, T.Foreach, T.Foreach_reverse,
+        T.Switch, T.Case, T.Default, T.Continue, T.Break, T.Return, T.Goto,
+        T.With, T.Synchronized, T.Try, T.Throw, T.Scope, T.Volatile, T.Asm,
+        T.Pragma, T.Mixin, T.Static, T.Debug, T.Version, T.Alias, T.Semicolon,
+        T.Enum, T.Class, T.Interface, T.Struct, T.Union, T.LBrace, T.Typedef,
+        T.This, T.Super, T.Null, T.True, T.False, T.Int32, T.Int64, T.Uint32,
+        T.Uint64, T.Float32, T.Float64, T.Float80, T.Imaginary32,
+        T.Imaginary64, T.Imaginary80, T.CharLiteral, T.WCharLiteral,
+        T.DCharLiteral, T.String, T.LBracket, T.Function, T.Delegate,
+        T.Assert, T.Import, T.Typeid, T.Is, T.LParen, T.Traits/*D2.0*/,
+        T.AndBinary, T.PlusPlus, T.MinusMinus, T.Mul,T.Minus, T.Plus, T.Not,
+        T.Tilde, T.New, T.Delete, T.Cast:
+  case  T.Char,   T.Wchar,   T.Dchar, T.Bool,
+        T.Byte,   T.Ubyte,   T.Short, T.Ushort,
+        T.Int,    T.Uint,    T.Long,  T.Ulong,
+        T.Float,  T.Double,  T.Real,
+        T.Ifloat, T.Idouble, T.Ireal,
+        T.Cfloat, T.Cdouble, T.Creal, T.Void:
+    return true;
+  default:
+    if (SpecialTokensBegin <= tok && tok <= SpecialTokensEnd)
+      return true;
+  }
+  return false;
+}
+
+/// Returns true if this token starts an AsmInstruction.
+bool isAsmInstructionStartToken(TOK tok)
+{
+  switch(tok)
+  {
+  alias TOK T;
+  case T.In, T.Int, T.Out, T.Identifier, T.Align, T.Semicolon:
+    return true;
+  default:
+  }
+  return false;
+}