Mercurial > projects > dil
changeset 755:90668b83ae5e
Introduced new module dil.SourceText and class SourceText.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Wed, 13 Feb 2008 20:21:25 +0100 |
parents | c7a5499faa77 |
children | 804111ec8213 |
files | trunk/src/cmd/Generate.d trunk/src/cmd/ImportGraph.d trunk/src/cmd/Statistics.d trunk/src/dil/Converter.d trunk/src/dil/SourceText.d trunk/src/dil/lexer/Lexer.d trunk/src/dil/parser/ImportParser.d trunk/src/dil/parser/Parser.d trunk/src/dil/semantic/Module.d trunk/src/dil/semantic/Pass2.d trunk/src/main.d |
diffstat | 11 files changed, 139 insertions(+), 59 deletions(-) [+] |
line wrap: on
line diff
--- a/trunk/src/cmd/Generate.d Wed Feb 13 17:10:55 2008 +0100 +++ b/trunk/src/cmd/Generate.d Wed Feb 13 20:21:25 2008 +0100 @@ -12,9 +12,10 @@ dil.ast.Types; import dil.lexer.Lexer; import dil.parser.Parser; -import dil.File; +import dil.SourceText; +import common; + import tango.io.Print; -import common; /// Options for the generate command. enum DocOption @@ -405,8 +406,7 @@ void syntaxToDoc(string filePath, Print!(char) print, DocOption options) { auto tags = options & DocOption.HTML ? html_tags : xml_tags; - auto sourceText = loadFile(filePath); - auto parser = new Parser(sourceText, filePath); + auto parser = new Parser(new SourceText(filePath, true)); auto root = parser.start(); auto lx = parser.lexer; @@ -450,8 +450,7 @@ void tokensToDoc(string filePath, Print!(char) print, DocOption options) { auto tags = options & DocOption.HTML ? html_tags : xml_tags; - auto sourceText = loadFile(filePath); - auto lx = new Lexer(sourceText, filePath); + auto lx = new Lexer(new SourceText(filePath, true)); lx.scanAll(); print(tags[DocPart.Head]~\n);
--- a/trunk/src/cmd/ImportGraph.d Wed Feb 13 17:10:55 2008 +0100 +++ b/trunk/src/cmd/ImportGraph.d Wed Feb 13 20:21:25 2008 +0100 @@ -10,11 +10,13 @@ import dil.parser.ImportParser; import dil.File; import dil.Settings; +import dil.SourceText; +import common; + import tango.text.Regex : RegExp = Regex; import tango.io.FilePath; import tango.io.FileConst; import tango.text.Util; -import common; alias FileConst.PathSeparatorChar dirSep; @@ -198,7 +200,7 @@ { auto modul = new Module(moduleFilePath); // Use lightweight ImportParser. - modul.parser = new ImportParser(loadFile(moduleFilePath), moduleFilePath); + modul.setParser(new ImportParser(modul.sourceText)); modul.parse(); vertex = new Vertex;
--- a/trunk/src/cmd/Statistics.d Wed Feb 13 17:10:55 2008 +0100 +++ b/trunk/src/cmd/Statistics.d Wed Feb 13 20:21:25 2008 +0100 @@ -9,6 +9,7 @@ import dil.lexer.Token; import dil.parser.Parser; import dil.ast.NodesEnum; +import dil.SourceText; import cmd.ASTStats; import common; @@ -137,12 +138,12 @@ // Create a new record. auto stats = Statistics(printTokensTable); - auto sourceText = loadFile(filePath); + auto sourceText = new SourceText(filePath, true); Parser parser; Lexer lx; if (printNodesTable) { - parser = new Parser(sourceText, filePath); + parser = new Parser(sourceText); auto rootNode = parser.start(); // Count nodes. stats.nodesTable = (new ASTStats).count(rootNode); @@ -150,7 +151,7 @@ } else { - lx = new Lexer(sourceText, filePath); + lx = new Lexer(sourceText); lx.scanAll(); }
--- a/trunk/src/dil/Converter.d Wed Feb 13 17:10:55 2008 +0100 +++ b/trunk/src/dil/Converter.d Wed Feb 13 20:21:25 2008 +0100 @@ -178,7 +178,7 @@ char[] data2UTF8(ubyte[] data) { if (data.length == 0) - return null; + return ""; char[] text; BOM bom = tellBOM(data);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/src/dil/SourceText.d Wed Feb 13 20:21:25 2008 +0100 @@ -0,0 +1,58 @@ +/++ + Author: Aziz Köksal + License: GPL3 ++/ +module dil.SourceText; + +import dil.Converter; +import dil.Information; +import common; + +import tango.io.File; + +/// Represents D source code. +/// +/// The source text may come from a file or from a memory buffer. +final class SourceText +{ + string filePath; /// The file path to the source text. Mainly used for error messages. + char[] data; /// The UTF-8, zero-terminated source text. + + /// Params: + /// filePath = file path to the source file. + /// loadFile = whether to load the file in the constructor. + this(string filePath, bool loadFile = false) + { + this.filePath = filePath; + loadFile && load(); + } + + /// Params: + /// filePath = file path for error messages. + /// data = memory buffer. + this(string filePath, char[] data) + { + this(filePath); + this.data = data; + addSentinelCharacter(); + } + + void load(InfoManager infoMan = null) + { + if (!infoMan) + infoMan = new InfoManager; + assert(filePath.length); + // Read the file. + auto rawdata = cast(ubyte[]) (new File(filePath)).read(); + // Convert the data. + auto converter = Converter(filePath, infoMan); + data = converter.data2UTF8(rawdata); + addSentinelCharacter(); + } + + private void addSentinelCharacter() + { + if (data.length == 0 || data[$-1] != 0) + data ~= 0; + } +}
--- a/trunk/src/dil/lexer/Lexer.d Wed Feb 13 17:10:55 2008 +0100 +++ b/trunk/src/dil/lexer/Lexer.d Wed Feb 13 20:21:25 2008 +0100 @@ -13,6 +13,7 @@ import dil.HtmlEntities; import dil.CompilerInfo; import dil.Unicode; +import dil.SourceText; import common; import tango.stdc.stdlib : strtof, strtod, strtold; @@ -28,13 +29,13 @@ +/ class Lexer { - Token* head; /// The head of the doubly linked token list. - Token* tail; /// The tail of the linked list. Set in scan(). - Token* token; /// Points to the current token in the token list. - string text; /// The source text. - char[] filePath; /// Path to the source text. - char* p; /// Points to the current character in the source text. - char* end; /// Points one character past the end of the source text. + SourceText srcText; /// The source text. + char* p; /// Points to the current character in the source text. + char* end; /// Points one character past the end of the source text. + + Token* head; /// The head of the doubly linked token list. + Token* tail; /// The tail of the linked list. Set in scan(). + Token* token; /// Points to the current token in the token list. // Members used for error messages: InfoManager infoMan; @@ -50,24 +51,17 @@ /++ Construct a Lexer object. Params: - text = the UTF-8 source code. - filePath = the path to the source code; used for error messages. - infoMan = the information manager (for collecting error messages.) + srcText = the UTF-8 source code. + infoMan = used for collecting error messages. +/ - this(string text, string filePath, InfoManager infoMan = null) + this(SourceText srcText, InfoManager infoMan = null) { - this.filePath = filePath; + this.srcText = srcText; this.infoMan = infoMan; - this.text = text; - if (text.length == 0 || text[$-1] != 0) - { - this.text.length = this.text.length + 1; - this.text[$-1] = 0; - } - - this.p = this.text.ptr; - this.end = this.p + this.text.length; + assert(text.length && text[$-1] == 0, "source text has no sentinel character"); + this.p = text.ptr; + this.end = this.p + text.length; this.lineBegin = this.p; this.head = new Token; @@ -75,7 +69,7 @@ this.head.start = this.head.end = this.p; this.token = this.head; // Initialize this.filePaths. - newFilePath(this.filePath); + newFilePath(this.srcText.filePath); // Add a newline as the first token after the head. auto newline = new Token; newline.kind = TOK.Newline; @@ -104,6 +98,11 @@ delete tail; } + char[] text() + { + return srcText.data; + } + /++ The "shebang" may optionally appear once at the beginning of a file. Regexp: #![^\EndOfLine]* @@ -170,7 +169,7 @@ void newFilePath(char[] newPath) { auto paths = new NewlineData.FilePaths; - paths.oriPath = this.filePath; + paths.oriPath = this.srcText.filePath; paths.setPath = newPath; this.filePaths = paths; }
--- a/trunk/src/dil/parser/ImportParser.d Wed Feb 13 17:10:55 2008 +0100 +++ b/trunk/src/dil/parser/ImportParser.d Wed Feb 13 20:21:25 2008 +0100 @@ -8,6 +8,7 @@ import dil.ast.Node; import dil.ast.Declarations; import dil.ast.Statements; +import dil.SourceText; import dil.Enums; import common; @@ -15,9 +16,9 @@ class ImportParser : Parser { - this(char[] srcText, string fileName) + this(SourceText srcText) { - super(srcText, fileName); + super(srcText); } override CompoundDeclaration start()
--- a/trunk/src/dil/parser/Parser.d Wed Feb 13 17:10:55 2008 +0100 +++ b/trunk/src/dil/parser/Parser.d Wed Feb 13 20:21:25 2008 +0100 @@ -16,6 +16,7 @@ import dil.Information; import dil.Enums; import dil.CompilerInfo; +import dil.SourceText; import common; /++ @@ -47,13 +48,12 @@ Construct a Parser object. Params: text = the UTF-8 source code. - filePath = the path to the source code; used for error messages. infoMan = used for collecting error messages. +/ - this(char[] srcText, string filePath, InfoManager infoMan = null) + this(SourceText srcText, InfoManager infoMan = null) { this.infoMan = infoMan; - lexer = new Lexer(srcText, filePath, infoMan); + lexer = new Lexer(srcText, infoMan); } /// Moves to the first token.
--- a/trunk/src/dil/semantic/Module.d Wed Feb 13 17:10:55 2008 +0100 +++ b/trunk/src/dil/semantic/Module.d Wed Feb 13 20:21:25 2008 +0100 @@ -12,15 +12,18 @@ import dil.semantic.Symbol; import dil.semantic.Symbols; import dil.Information; +import dil.SourceText; +import common; + import tango.io.FilePath; import tango.io.FileConst; -import common; alias FileConst.PathSeparatorChar dirSep; +/// Represents a D module and source file. class Module : ScopeSymbol { - string filePath; /// Path to the source file. + SourceText sourceText; /// The source file of this module. string moduleFQN; /// Fully qualified name of the module. E.g. dil.ast.Node string packageName; /// E.g. dil.ast string moduleName; /// E.g. Node @@ -39,37 +42,49 @@ super(SYM.Module, null, null); } + /// Params: + /// filePath = file path to the source text; loaded in the constructor. + /// infoMan = used for collecting error messages. this(string filePath, InfoManager infoMan = null) { this(); - this.filePath = filePath; + this.sourceText = new SourceText(filePath); this.infoMan = infoMan; + this.sourceText.load(infoMan); } + string filePath() + { + return sourceText.filePath; + } + + void setParser(Parser parser) + { + this.parser = parser; + } + + /// Starts the parser. void parse() { if (this.parser is null) - this.parser = new Parser(loadFile(filePath), filePath, infoMan); + this.parser = new Parser(sourceText, infoMan); this.root = parser.start(); this.imports = parser.imports; if (root.children.length) - { - // moduleDecl will be null if first node isn't a ModuleDeclaration. + { // moduleDecl will be null if first node isn't a ModuleDeclaration. this.moduleDecl = root.children[0].Is!(ModuleDeclaration); if (this.moduleDecl) this.setFQN(moduleDecl.getFQN()); } if (!this.moduleFQN.length) - { - // Take base name of file path as module name. + { // Take base name of file path as module name. auto str = (new FilePath(filePath)).name(); - if (!Lexer.isReservedIdentifier(str)) - this.moduleFQN = this.moduleName = str; - else - throw new Exception("'"~str~"' is not a valid module name"); + if (Lexer.isReservedIdentifier(str)) + throw new Exception("'"~str~"' is not a valid module name; it's a reserved or invalid D identifier."); + this.moduleFQN = this.moduleName = str; } }
--- a/trunk/src/dil/semantic/Pass2.d Wed Feb 13 17:10:55 2008 +0100 +++ b/trunk/src/dil/semantic/Pass2.d Wed Feb 13 20:21:25 2008 +0100 @@ -20,6 +20,7 @@ dil.semantic.Analysis, dil.semantic.Interpreter; import dil.parser.Parser; +import dil.SourceText; import dil.Location; import dil.Information; import dil.Messages; @@ -139,7 +140,8 @@ { // Parse the declarations in the string. auto loc = md.begin.getErrorLocation(); auto filePath = loc.filePath; - auto parser = new Parser(stringExpr.getString(), filePath, modul.infoMan); + auto sourceText = new SourceText(filePath, stringExpr.getString()); + auto parser = new Parser(sourceText, modul.infoMan); md.decls = parser.start(); } } @@ -320,7 +322,8 @@ { auto loc = me.begin.getErrorLocation(); auto filePath = loc.filePath; - auto parser = new Parser(stringExpr.getString(), filePath, modul.infoMan); + auto sourceText = new SourceText(filePath, stringExpr.getString()); + auto parser = new Parser(sourceText, modul.infoMan); expr = parser.start2(); expr = visitE(expr); // Check expression. }
--- a/trunk/src/main.d Wed Feb 13 17:10:55 2008 +0100 +++ b/trunk/src/main.d Wed Feb 13 20:21:25 2008 +0100 @@ -24,6 +24,7 @@ import dil.CompilerInfo; import dil.Information; import dil.File; +import dil.SourceText; import cmd.Generate; import cmd.Statistics; @@ -195,7 +196,7 @@ break; case "tok", "tokenize": char[] filePath; - char[] sourceText; + SourceText sourceText; char[] separator; bool ignoreWSToks; bool printWS; @@ -203,7 +204,7 @@ foreach (arg; args[2..$]) { if (strbeg(arg, "-t")) - sourceText = arg[2..$]; + sourceText = new SourceText("-t", arg[2..$]); else if (strbeg(arg, "-s")) separator = arg[2..$]; else if (arg == "-i") @@ -214,8 +215,9 @@ filePath = arg; } - separator || (separator = "\n"); - sourceText || (sourceText = loadFile(filePath)); + separator || (separator = "\n"); + if (sourceText) + sourceText = new SourceText(filePath, true); auto lx = new Lexer(sourceText, null); lx.scanAll(); @@ -265,7 +267,7 @@ swatch.start; foreach (filePath; filePaths) - (new Lexer(loadFile(filePath), null)).scanAll(); + (new Lexer(new SourceText(filePath, true))).scanAll(); Stdout.formatln("Scanned in {:f10}s.", swatch.stop); break;