Mercurial > projects > dang
comparison src/basic/SourceManager.d @ 206:d3c148ca429b
Major moving of files. all src now goes into src, all docs in docs.
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Tue, 12 Aug 2008 18:14:56 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
205:8387cbaa85ab | 206:d3c148ca429b |
---|---|
1 module basic.SourceManager; | |
2 | |
3 import tango.core.Memory : GC; | |
4 import tango.io.UnicodeFile; | |
5 import tango.io.Stdout; | |
6 import tango.text.convert.Layout; | |
7 | |
8 public import basic.SourceLocation; | |
9 | |
10 private alias char[] string; | |
11 | |
12 /** | |
13 SourceManager is used to handle input files, by loading them in in chunks | |
14 that can be referenced elsewhere. | |
15 | |
16 It will also help extract the line/col of locations and convert between | |
17 real and virtual locations | |
18 **/ | |
19 class SourceManager | |
20 { | |
21 this() | |
22 { | |
23 layout = new Layout!(char); | |
24 } | |
25 | |
26 /** | |
27 Will load in the file belonging to the filename | |
28 | |
29 filename = The file to load. Theres some assumptions about this file. | |
30 1. The file has a BOM or is valid utf-8 | |
31 2. The file is not empty, unreadable, a folder etc. | |
32 **/ | |
33 SourceLocation addFile(string filename) | |
34 { | |
35 scope file = new UnicodeFile!(char)(filename, Encoding.UTF_8); | |
36 auto file_data = file.read(); | |
37 return createCheckpoints(file_data, filename); | |
38 } | |
39 | |
40 /** | |
41 Returns a string slice containing the part of the file after loc (a | |
42 pointer might be better, it allows negative indexing) | |
43 **/ | |
44 string getRawData(SourceLocation loc) | |
45 { | |
46 CP cp = checkpoints[loc.fileID]; | |
47 auto length = cp.data_end - cp.data.ptr; | |
48 return cp.data.ptr[loc.fileOffset .. length]; | |
49 } | |
50 | |
51 /** | |
52 Extracts the line number of the given location | |
53 O("file size") if cache isn't built, O(log "lines in file") else | |
54 **/ | |
55 uint getLineNumber(SourceLocation loc) | |
56 { | |
57 assert(loc.isValid, "Location is invalid"); | |
58 assert(loc.isReal, "Virtual locations not supported yet"); | |
59 assert(loc.fileID < checkpoints.length, "Non-existent location"); | |
60 | |
61 CP* cp = &checkpoints[loc.fileID]; | |
62 auto cache = &linecache[cp.meta_index]; | |
63 if (!cache.isCached) | |
64 cache.build(cp.data_start[0 .. cp.data_end - cp.data_start]); | |
65 return cache.lineOf(getFileOffset(loc)); | |
66 } | |
67 | |
68 /** | |
69 Extracts the full byte offset into a file, at which a location | |
70 is pointing. | |
71 **/ | |
72 uint getFileOffset(SourceLocation loc) | |
73 { | |
74 return loc.fileOffset | |
75 + checkpoints[loc.fileID].part * loc.Bits.MaxFileOffset; | |
76 } | |
77 | |
78 /** | |
79 Extracts a string containing the entire line loc appears in. | |
80 **/ | |
81 string getLine(SourceLocation loc) | |
82 { | |
83 // The line is extracted by getting two pointers to the exact location | |
84 // and decreasing one until the nearest newline while the other ptr is | |
85 // increased to the nearest newline. | |
86 CP* cp = &checkpoints[loc.fileID]; | |
87 char* ptr = cp.data.ptr + loc.fileOffset; | |
88 char* ptr_lo = ptr; | |
89 while (cp.inRange(ptr_lo) && *ptr_lo != '\n' && *ptr_lo != '\r') | |
90 --ptr_lo; | |
91 while (cp.inRange(ptr) && *ptr != '\n' && *ptr != '\r') | |
92 ++ptr; | |
93 return ptr_lo[1 .. ptr - ptr_lo]; | |
94 } | |
95 | |
96 /** | |
97 Gets the column of where the loc appears. | |
98 **/ | |
99 int getColumn(SourceLocation loc) | |
100 { | |
101 // Use same approach as getLine | |
102 | |
103 CP* cp = &checkpoints[loc.fileID]; | |
104 char* ptr = cp.data.ptr + loc.fileOffset; | |
105 char* ptr_lo = ptr; | |
106 while (cp.inRange(ptr_lo) && *ptr_lo != '\n' && *ptr_lo != '\r') | |
107 --ptr_lo; | |
108 return cast(int)ptr - cast(int)ptr_lo - 1; | |
109 } | |
110 | |
111 /** | |
112 Get the original source text of a SourceRange | |
113 **/ | |
114 string getText(SourceRange loc) | |
115 { | |
116 assert(loc.isValid, "Range is invalid"); | |
117 assert(loc.isReal, "Virtual locations not supported yet"); | |
118 auto begin = getFileOffset(loc.begin); | |
119 auto end = getFileOffset(loc.end); | |
120 return checkpoints[loc.begin.fileID].data_start[begin .. end]; | |
121 } | |
122 | |
123 /** | |
124 Get the original source text | |
125 **/ | |
126 string getText(SourceLocation loc, size_t length) | |
127 { | |
128 return getText(SourceRange(loc, loc + length)); | |
129 } | |
130 | |
131 /** | |
132 Convert a location into a string. Something like "file(line)" | |
133 **/ | |
134 string getLocationAsString(SourceLocation loc) | |
135 { | |
136 assert(loc.isValid, "Location is invalid"); | |
137 return layout.convert("{}({})", | |
138 checkpoints[loc.fileID].filename, | |
139 getLineNumber(loc)); | |
140 } | |
141 string getLocationAsString(SourceRange loc) | |
142 { | |
143 return layout.convert("{}({}:{})", | |
144 checkpoints[loc.begin.fileID].filename, | |
145 getFileOffset(loc.begin), | |
146 getFileOffset(loc.end)); | |
147 } | |
148 | |
149 /** | |
150 Get the file name of a loc. | |
151 **/ | |
152 string getFile(SourceLocation loc) | |
153 { | |
154 return checkpoints[loc.fileID].filename; | |
155 } | |
156 | |
157 private: | |
158 synchronized | |
159 SourceLocation createCheckpoints(string data, string source_file) | |
160 { | |
161 // The line-cache is added, but not built, | |
162 // getLineNumber makes sure it is called when needed. | |
163 linecache ~= FileLineCache(); | |
164 uint meta_index = linecache.length - 1; | |
165 | |
166 // SourceLocation's can only index relatively short buffers, therefore | |
167 // the file is split into several checkpoints. | |
168 uint checkpoint_counter = 0; | |
169 char* data_start = data.ptr; | |
170 char* data_end = data.ptr + data.length; | |
171 while (data.length > 0) | |
172 { | |
173 uint to_take = min(data.length, SourceLocation.Bits.MaxFileOffset); | |
174 checkpoints ~= | |
175 CP(source_file, | |
176 data_start, | |
177 data_end, | |
178 data[0 .. to_take], | |
179 checkpoint_counter++, | |
180 meta_index); | |
181 data = data[to_take .. $]; | |
182 } | |
183 checkpoint_counter = checkpoints.length - checkpoint_counter; | |
184 return SourceLocation.fromFileID(checkpoint_counter); | |
185 } | |
186 | |
187 /// Contains the read/generated data. | |
188 CP[] checkpoints; | |
189 /// Cache used to speed up finding of line-starts. | |
190 FileLineCache[] linecache; | |
191 /// Used for formatting locations as strings. | |
192 Layout!(char) layout; | |
193 | |
194 // These really should be magically available everywhere and templated. | |
195 int min(int a, int b) { return a < b? a : b; } | |
196 int max(int a, int b) { return a >= b? a : b; } | |
197 | |
198 // A Check Point is used to store a file in multiple parts, to overcome | |
199 // the limitation of SourceLocation only having a rather limited amount of | |
200 // bits to index any one file. | |
201 struct CP | |
202 { | |
203 // read-only | |
204 char[] filename; | |
205 // ditto | |
206 char* data_start; | |
207 char* data_end; | |
208 // ditto | |
209 char[] data; | |
210 // ditto | |
211 uint part = 0; | |
212 // ditto | |
213 uint meta_index = 0; | |
214 | |
215 bool inRange(char* p) | |
216 { | |
217 return p >= data_start && p < data_end; | |
218 } | |
219 } | |
220 | |
221 struct FileLineCache | |
222 { | |
223 /// Contains the offset of the i'th line on index i | |
224 uint[] line_starts; | |
225 | |
226 /// Indicates weather the cache has been built or not | |
227 bool isCached = false; | |
228 | |
229 /** | |
230 This method does a binary search to find the line that contains the | |
231 given offset. | |
232 **/ | |
233 uint lineOf(uint offset) | |
234 { | |
235 size_t beg = 0, | |
236 end = line_starts.length, | |
237 mid = end >> 1; | |
238 | |
239 while( beg < end ) | |
240 { | |
241 if( line_starts[mid] <= offset ) | |
242 beg = mid + 1; | |
243 else | |
244 end = mid; | |
245 mid = beg + ( end - beg ) / 2; | |
246 } | |
247 return mid; | |
248 } | |
249 | |
250 /** | |
251 Builds the cache data - always make sure this has been called before | |
252 calling lineOf. | |
253 **/ | |
254 void build(char[] data) | |
255 { | |
256 // j starts at 1, because we need an additional place in the array | |
257 // to indicate that line 1 starts at index 0. | |
258 size_t j = 1; | |
259 char* it = data.ptr, end = data.ptr + data.length; | |
260 for (; it != end; ++it) | |
261 if (*it == '\n') | |
262 ++j; | |
263 // Allocate without initialization. Saves a bit of time | |
264 line_starts.length = j; | |
265 line_starts[0] = 0; | |
266 | |
267 // Go over the data again, writing the line starts in our new array | |
268 j = 1; | |
269 for (size_t i = 0; i < data.length; i++) | |
270 { | |
271 if (data[i] == '\n') | |
272 line_starts[j++] = i; | |
273 else if (data[i] == '\r') | |
274 { | |
275 line_starts[j++] = i; | |
276 i += cast(size_t)(data[i+1] == '\n'); | |
277 } | |
278 } | |
279 | |
280 isCached = true; | |
281 } | |
282 } | |
283 } | |
284 |