Mercurial > projects > dang
view basic/SourceManager.d @ 88:eb5b2c719a39 new_gen
Major change to locations, tokens and expressions.
A location (now SourceLocation or SLoc) is only 32 bit in size -
disadvantage is that it can't find its own text. You have to go through the
new SourceManager to do that.
This has caused changes to a lot of stuff and removal of DataSource and the
old Location
Additionally Exp has gotten some location stuff, so we can give proper
error messages. Not in Decl and Stmt yet, but thats coming too.
author | Anders Halager <halager@gmail.com> |
---|---|
date | Sun, 04 May 2008 18:13:46 +0200 |
parents | |
children | a49bb982a7b0 |
line wrap: on
line source
module basic.SourceManager; import tango.core.Memory : GC; import tango.io.UnicodeFile; import tango.io.Stdout; import tango.text.convert.Layout; public import basic.SourceLocation; private alias char[] string; /** SourceManager is used to handle input files, by loading them in in chunks that can be referenced elsewhere. It will also help extract the line/col of locations and convert between real and virtual locations **/ class SourceManager { this() { layout = new Layout!(char); } /** Will load in the file belonging to the filename filename = The file to load. Theres some assumptions about this file. 1. The file has a BOM or is valid utf-8 2. The file is not empty, unreadable, a folder etc. **/ SourceLocation addFile(string filename) { scope file = new UnicodeFile!(char)(filename, Encoding.UTF_8); auto file_data = file.read(); return createCheckpoints(file_data, filename); } /** Returns a string slice containing the part of the file after loc (a pointer might be better, it allows negative indexing) **/ string getRawData(SourceLocation loc) { return checkpoints[loc.fileID].data[loc.fileOffset .. $]; } /** Extracts the line number of the given location O("file size") if cache isn't built, O(log "lines in file") else **/ uint getLineNumber(SourceLocation loc) { assert(loc.isValid, "Location is invalid"); assert(loc.isReal, "Virtual locations not supported yet"); assert(loc.fileID < checkpoints.length, "Non-existent location"); CP* cp = &checkpoints[loc.fileID]; auto cache = &linecache[cp.meta_index]; if (!cache.isCached) cache.build(cp.data); return cache.lineOf(getFileOffset(loc)); } /** Extracts the full byte offset into a file, at which a location is pointing. **/ uint getFileOffset(SourceLocation loc) { return loc.fileOffset + checkpoints[loc.fileID].part * loc.Bits.MaxFileOffset; } /** Extracts a string containing the entire line loc appears in. **/ string getLine(SourceLocation loc) { // The line is extracted by getting two pointers to the exact location // and decreasing one until the nearest newline while the other ptr is // increased to the nearest newline. CP* cp = &checkpoints[loc.fileID]; char* ptr = cp.data.ptr + loc.fileOffset; char* ptr_lo = ptr; while (ptr_lo != cp.data.ptr && *ptr_lo != '\n' && *ptr_lo != '\r') --ptr_lo; while (ptr != cp.data.ptr + cp.data.length && *ptr != '\n' && *ptr != '\r') ++ptr; return ptr_lo[0 .. ptr - ptr_lo]; } /** Get the original source text of a SourceRange **/ string getText(SourceRange loc) { assert(loc.isValid, "Range is invalid"); assert(loc.isReal, "Virtual locations not supported yet"); auto begin = getFileOffset(loc.begin); auto end = getFileOffset(loc.end); return checkpoints[loc.begin.fileID].data.ptr[begin .. end]; } /** Get the original source text **/ string getText(SourceLocation loc, size_t length) { return getText(SourceRange(loc, loc + length)); } /** Convert a location into a string. Something like "file(line)" **/ string getLocationAsString(SourceLocation loc) { assert(loc.isValid, "Location is invalid"); return layout.convert("{}({})", checkpoints[loc.fileID].filename, getLineNumber(loc)); } string getLocationAsString(SourceRange loc) { return layout.convert("{}({}:{})", checkpoints[loc.begin.fileID].filename, getFileOffset(loc.begin), getFileOffset(loc.end)); } private: synchronized SourceLocation createCheckpoints(string data, string source_file) { // The line-cache is added, but not built, // getLineNumber makes sure it is called when needed. linecache ~= FileLineCache(); uint meta_index = linecache.length - 1; // SourceLocation's can only index relatively short buffers, therefore // the file is split into several checkpoints. uint checkpoint_counter = checkpoints.length; while (data.length > 0) { uint to_take = min(data.length, SourceLocation.Bits.MaxFileOffset); checkpoints ~= CP(source_file, data[0 .. to_take], checkpoint_counter++, meta_index); data = data[to_take .. $]; // Stdout("Taking ")(to_take)(" from ")(source_file).newline; } checkpoint_counter = checkpoints.length - checkpoint_counter; return SourceLocation.fromFileID(checkpoint_counter); } /// Contains the read/generated data. CP[] checkpoints; /// Cache used to speed up finding of line-starts. FileLineCache[] linecache; /// Used for formatting locations as strings. Layout!(char) layout; // These really should be magically available everywhere and templated. int min(int a, int b) { return a < b? a : b; } int max(int a, int b) { return a >= b? a : b; } // A Check Point is used to store a file in multiple parts, to overcome // the limitation of SourceLocation only having a rather limited amount of // bits to index any one file. struct CP { // read-only char[] filename; // ditto char[] data; // ditto uint part = 0; // ditto uint meta_index = 0; } struct FileLineCache { /// Contains the offset of the i'th line on index i uint[] line_starts; /// Indicates weather the cache has been built or not bool isCached = false; /** This method does a binary search to find the line that contains the given offset. **/ uint lineOf(uint offset) { size_t beg = 0, end = line_starts.length, mid = end >> 1; while( beg < end ) { if( line_starts[mid] <= offset ) beg = mid + 1; else end = mid; mid = beg + ( end - beg ) / 2; } return mid; } /** Builds the cache data - always make sure this has been called before calling lineOf. **/ void build(char[] data) { // j starts at 1, because we need an additional place in the array // to indicate that line 1 starts at index 0. size_t j = 1; char* it = data.ptr, end = data.ptr + data.length; for (; it != end; ++it) if (*it == '\n') ++j; // Allocate without initialization. Saves a bit of time line_starts.length = j; line_starts[0] = 0; // Go over the data again, writing the line starts in our new array j = 1; for (size_t i = 0; i < data.length; i++) { if (data[i] == '\n') line_starts[j++] = i; else if (data[i] == '\r') { line_starts[j++] = i; i += cast(size_t)(data[i+1] == '\n'); } } isCached = true; } } }