Mercurial > projects > dang
view basic/SourceManager.d @ 154:0ea5d2f3e96b
Parsing "this" as constructor. Also removed regex from the test run program(seg fault - dmd???)
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Mon, 21 Jul 2008 21:45:54 +0200 |
parents | 48bb2287c035 |
children |
line wrap: on
line source
module basic.SourceManager; import tango.core.Memory : GC; import tango.io.UnicodeFile; import tango.io.Stdout; import tango.text.convert.Layout; public import basic.SourceLocation; private alias char[] string; /** SourceManager is used to handle input files, by loading them in in chunks that can be referenced elsewhere. It will also help extract the line/col of locations and convert between real and virtual locations **/ class SourceManager { this() { layout = new Layout!(char); } /** Will load in the file belonging to the filename filename = The file to load. Theres some assumptions about this file. 1. The file has a BOM or is valid utf-8 2. The file is not empty, unreadable, a folder etc. **/ SourceLocation addFile(string filename) { scope file = new UnicodeFile!(char)(filename, Encoding.UTF_8); auto file_data = file.read(); return createCheckpoints(file_data, filename); } /** Returns a string slice containing the part of the file after loc (a pointer might be better, it allows negative indexing) **/ string getRawData(SourceLocation loc) { CP cp = checkpoints[loc.fileID]; auto length = cp.data_end - cp.data.ptr; return cp.data.ptr[loc.fileOffset .. length]; } /** Extracts the line number of the given location O("file size") if cache isn't built, O(log "lines in file") else **/ uint getLineNumber(SourceLocation loc) { assert(loc.isValid, "Location is invalid"); assert(loc.isReal, "Virtual locations not supported yet"); assert(loc.fileID < checkpoints.length, "Non-existent location"); CP* cp = &checkpoints[loc.fileID]; auto cache = &linecache[cp.meta_index]; if (!cache.isCached) cache.build(cp.data_start[0 .. cp.data_end - cp.data_start]); return cache.lineOf(getFileOffset(loc)); } /** Extracts the full byte offset into a file, at which a location is pointing. **/ uint getFileOffset(SourceLocation loc) { return loc.fileOffset + checkpoints[loc.fileID].part * loc.Bits.MaxFileOffset; } /** Extracts a string containing the entire line loc appears in. **/ string getLine(SourceLocation loc) { // The line is extracted by getting two pointers to the exact location // and decreasing one until the nearest newline while the other ptr is // increased to the nearest newline. CP* cp = &checkpoints[loc.fileID]; char* ptr = cp.data.ptr + loc.fileOffset; char* ptr_lo = ptr; while (cp.inRange(ptr_lo) && *ptr_lo != '\n' && *ptr_lo != '\r') --ptr_lo; while (cp.inRange(ptr) && *ptr != '\n' && *ptr != '\r') ++ptr; return ptr_lo[1 .. ptr - ptr_lo]; } /** Gets the column of where the loc appears. **/ int getColumn(SourceLocation loc) { // Use same approach as getLine CP* cp = &checkpoints[loc.fileID]; char* ptr = cp.data.ptr + loc.fileOffset; char* ptr_lo = ptr; while (cp.inRange(ptr_lo) && *ptr_lo != '\n' && *ptr_lo != '\r') --ptr_lo; return cast(int)ptr - cast(int)ptr_lo - 1; } /** Get the original source text of a SourceRange **/ string getText(SourceRange loc) { assert(loc.isValid, "Range is invalid"); assert(loc.isReal, "Virtual locations not supported yet"); auto begin = getFileOffset(loc.begin); auto end = getFileOffset(loc.end); return checkpoints[loc.begin.fileID].data_start[begin .. end]; } /** Get the original source text **/ string getText(SourceLocation loc, size_t length) { return getText(SourceRange(loc, loc + length)); } /** Convert a location into a string. Something like "file(line)" **/ string getLocationAsString(SourceLocation loc) { assert(loc.isValid, "Location is invalid"); return layout.convert("{}({})", checkpoints[loc.fileID].filename, getLineNumber(loc)); } string getLocationAsString(SourceRange loc) { return layout.convert("{}({}:{})", checkpoints[loc.begin.fileID].filename, getFileOffset(loc.begin), getFileOffset(loc.end)); } /** Get the file name of a loc. **/ string getFile(SourceLocation loc) { return checkpoints[loc.fileID].filename; } private: synchronized SourceLocation createCheckpoints(string data, string source_file) { // The line-cache is added, but not built, // getLineNumber makes sure it is called when needed. linecache ~= FileLineCache(); uint meta_index = linecache.length - 1; // SourceLocation's can only index relatively short buffers, therefore // the file is split into several checkpoints. uint checkpoint_counter = 0; char* data_start = data.ptr; char* data_end = data.ptr + data.length; while (data.length > 0) { uint to_take = min(data.length, SourceLocation.Bits.MaxFileOffset); checkpoints ~= CP(source_file, data_start, data_end, data[0 .. to_take], checkpoint_counter++, meta_index); data = data[to_take .. $]; } checkpoint_counter = checkpoints.length - checkpoint_counter; return SourceLocation.fromFileID(checkpoint_counter); } /// Contains the read/generated data. CP[] checkpoints; /// Cache used to speed up finding of line-starts. FileLineCache[] linecache; /// Used for formatting locations as strings. Layout!(char) layout; // These really should be magically available everywhere and templated. int min(int a, int b) { return a < b? a : b; } int max(int a, int b) { return a >= b? a : b; } // A Check Point is used to store a file in multiple parts, to overcome // the limitation of SourceLocation only having a rather limited amount of // bits to index any one file. struct CP { // read-only char[] filename; // ditto char* data_start; char* data_end; // ditto char[] data; // ditto uint part = 0; // ditto uint meta_index = 0; bool inRange(char* p) { return p >= data_start && p < data_end; } } struct FileLineCache { /// Contains the offset of the i'th line on index i uint[] line_starts; /// Indicates weather the cache has been built or not bool isCached = false; /** This method does a binary search to find the line that contains the given offset. **/ uint lineOf(uint offset) { size_t beg = 0, end = line_starts.length, mid = end >> 1; while( beg < end ) { if( line_starts[mid] <= offset ) beg = mid + 1; else end = mid; mid = beg + ( end - beg ) / 2; } return mid; } /** Builds the cache data - always make sure this has been called before calling lineOf. **/ void build(char[] data) { // j starts at 1, because we need an additional place in the array // to indicate that line 1 starts at index 0. size_t j = 1; char* it = data.ptr, end = data.ptr + data.length; for (; it != end; ++it) if (*it == '\n') ++j; // Allocate without initialization. Saves a bit of time line_starts.length = j; line_starts[0] = 0; // Go over the data again, writing the line starts in our new array j = 1; for (size_t i = 0; i < data.length; i++) { if (data[i] == '\n') line_starts[j++] = i; else if (data[i] == '\r') { line_starts[j++] = i; i += cast(size_t)(data[i+1] == '\n'); } } isCached = true; } } }