changeset 755:90668b83ae5e

Introduced new module dil.SourceText and class SourceText.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Wed, 13 Feb 2008 20:21:25 +0100
parents c7a5499faa77
children 804111ec8213
files trunk/src/cmd/Generate.d trunk/src/cmd/ImportGraph.d trunk/src/cmd/Statistics.d trunk/src/dil/Converter.d trunk/src/dil/SourceText.d trunk/src/dil/lexer/Lexer.d trunk/src/dil/parser/ImportParser.d trunk/src/dil/parser/Parser.d trunk/src/dil/semantic/Module.d trunk/src/dil/semantic/Pass2.d trunk/src/main.d
diffstat 11 files changed, 139 insertions(+), 59 deletions(-) [+]
line wrap: on
line diff
--- a/trunk/src/cmd/Generate.d	Wed Feb 13 17:10:55 2008 +0100
+++ b/trunk/src/cmd/Generate.d	Wed Feb 13 20:21:25 2008 +0100
@@ -12,9 +12,10 @@
        dil.ast.Types;
 import dil.lexer.Lexer;
 import dil.parser.Parser;
-import dil.File;
+import dil.SourceText;
+import common;
+
 import tango.io.Print;
-import common;
 
 /// Options for the generate command.
 enum DocOption
@@ -405,8 +406,7 @@
 void syntaxToDoc(string filePath, Print!(char) print, DocOption options)
 {
   auto tags = options & DocOption.HTML ? html_tags : xml_tags;
-  auto sourceText = loadFile(filePath);
-  auto parser = new Parser(sourceText, filePath);
+  auto parser = new Parser(new SourceText(filePath, true));
   auto root = parser.start();
   auto lx = parser.lexer;
 
@@ -450,8 +450,7 @@
 void tokensToDoc(string filePath, Print!(char) print, DocOption options)
 {
   auto tags = options & DocOption.HTML ? html_tags : xml_tags;
-  auto sourceText = loadFile(filePath);
-  auto lx = new Lexer(sourceText, filePath);
+  auto lx = new Lexer(new SourceText(filePath, true));
   lx.scanAll();
 
   print(tags[DocPart.Head]~\n);
--- a/trunk/src/cmd/ImportGraph.d	Wed Feb 13 17:10:55 2008 +0100
+++ b/trunk/src/cmd/ImportGraph.d	Wed Feb 13 20:21:25 2008 +0100
@@ -10,11 +10,13 @@
 import dil.parser.ImportParser;
 import dil.File;
 import dil.Settings;
+import dil.SourceText;
+import common;
+
 import tango.text.Regex : RegExp = Regex;
 import tango.io.FilePath;
 import tango.io.FileConst;
 import tango.text.Util;
-import common;
 
 alias FileConst.PathSeparatorChar dirSep;
 
@@ -198,7 +200,7 @@
     {
       auto modul = new Module(moduleFilePath);
       // Use lightweight ImportParser.
-      modul.parser = new ImportParser(loadFile(moduleFilePath), moduleFilePath);
+      modul.setParser(new ImportParser(modul.sourceText));
       modul.parse();
 
       vertex = new Vertex;
--- a/trunk/src/cmd/Statistics.d	Wed Feb 13 17:10:55 2008 +0100
+++ b/trunk/src/cmd/Statistics.d	Wed Feb 13 20:21:25 2008 +0100
@@ -9,6 +9,7 @@
 import dil.lexer.Token;
 import dil.parser.Parser;
 import dil.ast.NodesEnum;
+import dil.SourceText;
 import cmd.ASTStats;
 import common;
 
@@ -137,12 +138,12 @@
   // Create a new record.
   auto stats = Statistics(printTokensTable);
 
-  auto sourceText = loadFile(filePath);
+  auto sourceText = new SourceText(filePath, true);
   Parser parser;
   Lexer lx;
   if (printNodesTable)
   {
-    parser = new Parser(sourceText, filePath);
+    parser = new Parser(sourceText);
     auto rootNode = parser.start();
     // Count nodes.
     stats.nodesTable = (new ASTStats).count(rootNode);
@@ -150,7 +151,7 @@
   }
   else
   {
-    lx = new Lexer(sourceText, filePath);
+    lx = new Lexer(sourceText);
     lx.scanAll();
   }
 
--- a/trunk/src/dil/Converter.d	Wed Feb 13 17:10:55 2008 +0100
+++ b/trunk/src/dil/Converter.d	Wed Feb 13 20:21:25 2008 +0100
@@ -178,7 +178,7 @@
   char[] data2UTF8(ubyte[] data)
   {
     if (data.length == 0)
-      return null;
+      return "";
 
     char[] text;
     BOM bom = tellBOM(data);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/src/dil/SourceText.d	Wed Feb 13 20:21:25 2008 +0100
@@ -0,0 +1,58 @@
+/++
+  Author: Aziz Köksal
+  License: GPL3
++/
+module dil.SourceText;
+
+import dil.Converter;
+import dil.Information;
+import common;
+
+import tango.io.File;
+
+/// Represents D source code.
+///
+/// The source text may come from a file or from a memory buffer.
+final class SourceText
+{
+  string filePath; /// The file path to the source text. Mainly used for error messages.
+  char[] data; /// The UTF-8, zero-terminated source text.
+
+  /// Params:
+  ///   filePath = file path to the source file.
+  ///   loadFile = whether to load the file in the constructor.
+  this(string filePath, bool loadFile = false)
+  {
+    this.filePath = filePath;
+    loadFile && load();
+  }
+
+  /// Params:
+  ///   filePath = file path for error messages.
+  ///   data = memory buffer.
+  this(string filePath, char[] data)
+  {
+    this(filePath);
+    this.data = data;
+    addSentinelCharacter();
+  }
+
+  void load(InfoManager infoMan = null)
+  {
+    if (!infoMan)
+      infoMan = new InfoManager;
+    assert(filePath.length);
+    // Read the file.
+    auto rawdata = cast(ubyte[]) (new File(filePath)).read();
+    // Convert the data.
+    auto converter = Converter(filePath, infoMan);
+    data = converter.data2UTF8(rawdata);
+    addSentinelCharacter();
+  }
+
+  private void addSentinelCharacter()
+  {
+    if (data.length == 0 || data[$-1] != 0)
+      data ~= 0;
+  }
+}
--- a/trunk/src/dil/lexer/Lexer.d	Wed Feb 13 17:10:55 2008 +0100
+++ b/trunk/src/dil/lexer/Lexer.d	Wed Feb 13 20:21:25 2008 +0100
@@ -13,6 +13,7 @@
 import dil.HtmlEntities;
 import dil.CompilerInfo;
 import dil.Unicode;
+import dil.SourceText;
 import common;
 
 import tango.stdc.stdlib : strtof, strtod, strtold;
@@ -28,13 +29,13 @@
 +/
 class Lexer
 {
-  Token* head;      /// The head of the doubly linked token list.
-  Token* tail;      /// The tail of the linked list. Set in scan().
-  Token* token;     /// Points to the current token in the token list.
-  string text;      /// The source text.
-  char[] filePath;  /// Path to the source text.
-  char* p;          /// Points to the current character in the source text.
-  char* end;        /// Points one character past the end of the source text.
+  SourceText srcText; /// The source text.
+  char* p;            /// Points to the current character in the source text.
+  char* end;          /// Points one character past the end of the source text.
+
+  Token* head;  /// The head of the doubly linked token list.
+  Token* tail;  /// The tail of the linked list. Set in scan().
+  Token* token; /// Points to the current token in the token list.
 
   // Members used for error messages:
   InfoManager infoMan;
@@ -50,24 +51,17 @@
   /++
     Construct a Lexer object.
     Params:
-      text     = the UTF-8 source code.
-      filePath = the path to the source code; used for error messages.
-      infoMan  = the information manager (for collecting error messages.)
+      srcText = the UTF-8 source code.
+      infoMan = used for collecting error messages.
   +/
-  this(string text, string filePath, InfoManager infoMan = null)
+  this(SourceText srcText, InfoManager infoMan = null)
   {
-    this.filePath = filePath;
+    this.srcText = srcText;
     this.infoMan = infoMan;
 
-    this.text = text;
-    if (text.length == 0 || text[$-1] != 0)
-    {
-      this.text.length = this.text.length + 1;
-      this.text[$-1] = 0;
-    }
-
-    this.p = this.text.ptr;
-    this.end = this.p + this.text.length;
+    assert(text.length && text[$-1] == 0, "source text has no sentinel character");
+    this.p = text.ptr;
+    this.end = this.p + text.length;
     this.lineBegin = this.p;
 
     this.head = new Token;
@@ -75,7 +69,7 @@
     this.head.start = this.head.end = this.p;
     this.token = this.head;
     // Initialize this.filePaths.
-    newFilePath(this.filePath);
+    newFilePath(this.srcText.filePath);
     // Add a newline as the first token after the head.
     auto newline = new Token;
     newline.kind = TOK.Newline;
@@ -104,6 +98,11 @@
     delete tail;
   }
 
+  char[] text()
+  {
+    return srcText.data;
+  }
+
   /++
     The "shebang" may optionally appear once at the beginning of a file.
     Regexp: #![^\EndOfLine]*
@@ -170,7 +169,7 @@
   void newFilePath(char[] newPath)
   {
     auto paths = new NewlineData.FilePaths;
-    paths.oriPath = this.filePath;
+    paths.oriPath = this.srcText.filePath;
     paths.setPath = newPath;
     this.filePaths = paths;
   }
--- a/trunk/src/dil/parser/ImportParser.d	Wed Feb 13 17:10:55 2008 +0100
+++ b/trunk/src/dil/parser/ImportParser.d	Wed Feb 13 20:21:25 2008 +0100
@@ -8,6 +8,7 @@
 import dil.ast.Node;
 import dil.ast.Declarations;
 import dil.ast.Statements;
+import dil.SourceText;
 import dil.Enums;
 import common;
 
@@ -15,9 +16,9 @@
 
 class ImportParser : Parser
 {
-  this(char[] srcText, string fileName)
+  this(SourceText srcText)
   {
-    super(srcText, fileName);
+    super(srcText);
   }
 
   override CompoundDeclaration start()
--- a/trunk/src/dil/parser/Parser.d	Wed Feb 13 17:10:55 2008 +0100
+++ b/trunk/src/dil/parser/Parser.d	Wed Feb 13 20:21:25 2008 +0100
@@ -16,6 +16,7 @@
 import dil.Information;
 import dil.Enums;
 import dil.CompilerInfo;
+import dil.SourceText;
 import common;
 
 /++
@@ -47,13 +48,12 @@
     Construct a Parser object.
     Params:
       text     = the UTF-8 source code.
-      filePath = the path to the source code; used for error messages.
       infoMan  = used for collecting error messages.
   +/
-  this(char[] srcText, string filePath, InfoManager infoMan = null)
+  this(SourceText srcText, InfoManager infoMan = null)
   {
     this.infoMan = infoMan;
-    lexer = new Lexer(srcText, filePath, infoMan);
+    lexer = new Lexer(srcText, infoMan);
   }
 
   /// Moves to the first token.
--- a/trunk/src/dil/semantic/Module.d	Wed Feb 13 17:10:55 2008 +0100
+++ b/trunk/src/dil/semantic/Module.d	Wed Feb 13 20:21:25 2008 +0100
@@ -12,15 +12,18 @@
 import dil.semantic.Symbol;
 import dil.semantic.Symbols;
 import dil.Information;
+import dil.SourceText;
+import common;
+
 import tango.io.FilePath;
 import tango.io.FileConst;
-import common;
 
 alias FileConst.PathSeparatorChar dirSep;
 
+/// Represents a D module and source file.
 class Module : ScopeSymbol
 {
-  string filePath; /// Path to the source file.
+  SourceText sourceText; /// The source file of this module.
   string moduleFQN; /// Fully qualified name of the module. E.g. dil.ast.Node
   string packageName; /// E.g. dil.ast
   string moduleName; /// E.g. Node
@@ -39,37 +42,49 @@
     super(SYM.Module, null, null);
   }
 
+  /// Params:
+  ///   filePath = file path to the source text; loaded in the constructor.
+  ///   infoMan = used for collecting error messages.
   this(string filePath, InfoManager infoMan = null)
   {
     this();
-    this.filePath = filePath;
+    this.sourceText = new SourceText(filePath);
     this.infoMan = infoMan;
+    this.sourceText.load(infoMan);
   }
 
+  string filePath()
+  {
+    return sourceText.filePath;
+  }
+
+  void setParser(Parser parser)
+  {
+    this.parser = parser;
+  }
+
+  /// Starts the parser.
   void parse()
   {
     if (this.parser is null)
-      this.parser = new Parser(loadFile(filePath), filePath, infoMan);
+      this.parser = new Parser(sourceText, infoMan);
 
     this.root = parser.start();
     this.imports = parser.imports;
 
     if (root.children.length)
-    {
-      // moduleDecl will be null if first node isn't a ModuleDeclaration.
+    { // moduleDecl will be null if first node isn't a ModuleDeclaration.
       this.moduleDecl = root.children[0].Is!(ModuleDeclaration);
       if (this.moduleDecl)
         this.setFQN(moduleDecl.getFQN());
     }
 
     if (!this.moduleFQN.length)
-    {
-      // Take base name of file path as module name.
+    { // Take base name of file path as module name.
       auto str = (new FilePath(filePath)).name();
-      if (!Lexer.isReservedIdentifier(str))
-        this.moduleFQN = this.moduleName = str;
-      else
-        throw new Exception("'"~str~"' is not a valid module name");
+      if (Lexer.isReservedIdentifier(str))
+        throw new Exception("'"~str~"' is not a valid module name; it's a reserved or invalid D identifier.");
+      this.moduleFQN = this.moduleName = str;
     }
   }
 
--- a/trunk/src/dil/semantic/Pass2.d	Wed Feb 13 17:10:55 2008 +0100
+++ b/trunk/src/dil/semantic/Pass2.d	Wed Feb 13 20:21:25 2008 +0100
@@ -20,6 +20,7 @@
        dil.semantic.Analysis,
        dil.semantic.Interpreter;
 import dil.parser.Parser;
+import dil.SourceText;
 import dil.Location;
 import dil.Information;
 import dil.Messages;
@@ -139,7 +140,8 @@
       { // Parse the declarations in the string.
         auto loc = md.begin.getErrorLocation();
         auto filePath = loc.filePath;
-        auto parser = new Parser(stringExpr.getString(), filePath, modul.infoMan);
+        auto sourceText = new SourceText(filePath, stringExpr.getString());
+        auto parser = new Parser(sourceText, modul.infoMan);
         md.decls = parser.start();
       }
     }
@@ -320,7 +322,8 @@
     {
       auto loc = me.begin.getErrorLocation();
       auto filePath = loc.filePath;
-      auto parser = new Parser(stringExpr.getString(), filePath, modul.infoMan);
+      auto sourceText = new SourceText(filePath, stringExpr.getString());
+      auto parser = new Parser(sourceText, modul.infoMan);
       expr = parser.start2();
       expr = visitE(expr); // Check expression.
     }
--- a/trunk/src/main.d	Wed Feb 13 17:10:55 2008 +0100
+++ b/trunk/src/main.d	Wed Feb 13 20:21:25 2008 +0100
@@ -24,6 +24,7 @@
 import dil.CompilerInfo;
 import dil.Information;
 import dil.File;
+import dil.SourceText;
 
 import cmd.Generate;
 import cmd.Statistics;
@@ -195,7 +196,7 @@
     break;
   case "tok", "tokenize":
     char[] filePath;
-    char[] sourceText;
+    SourceText sourceText;
     char[] separator;
     bool ignoreWSToks;
     bool printWS;
@@ -203,7 +204,7 @@
     foreach (arg; args[2..$])
     {
       if (strbeg(arg, "-t"))
-        sourceText = arg[2..$];
+        sourceText = new SourceText("-t", arg[2..$]);
       else if (strbeg(arg, "-s"))
         separator = arg[2..$];
       else if (arg == "-i")
@@ -214,8 +215,9 @@
         filePath = arg;
     }
 
-    separator  || (separator = "\n");
-    sourceText || (sourceText = loadFile(filePath));
+    separator || (separator = "\n");
+    if (sourceText)
+      sourceText = new SourceText(filePath, true);
 
     auto lx = new Lexer(sourceText, null);
     lx.scanAll();
@@ -265,7 +267,7 @@
     swatch.start;
 
     foreach (filePath; filePaths)
-      (new Lexer(loadFile(filePath), null)).scanAll();
+      (new Lexer(new SourceText(filePath, true))).scanAll();
 
     Stdout.formatln("Scanned in {:f10}s.", swatch.stop);
     break;