comparison src/basic/SourceManager.d @ 206:d3c148ca429b

Major moving of files. all src now goes into src, all docs in docs.
author Anders Johnsen <skabet@gmail.com>
date Tue, 12 Aug 2008 18:14:56 +0200
parents
children
comparison
equal deleted inserted replaced
205:8387cbaa85ab 206:d3c148ca429b
1 module basic.SourceManager;
2
3 import tango.core.Memory : GC;
4 import tango.io.UnicodeFile;
5 import tango.io.Stdout;
6 import tango.text.convert.Layout;
7
8 public import basic.SourceLocation;
9
10 private alias char[] string;
11
12 /**
13 SourceManager is used to handle input files, by loading them in in chunks
14 that can be referenced elsewhere.
15
16 It will also help extract the line/col of locations and convert between
17 real and virtual locations
18 **/
19 class SourceManager
20 {
21 this()
22 {
23 layout = new Layout!(char);
24 }
25
26 /**
27 Will load in the file belonging to the filename
28
29 filename = The file to load. Theres some assumptions about this file.
30 1. The file has a BOM or is valid utf-8
31 2. The file is not empty, unreadable, a folder etc.
32 **/
33 SourceLocation addFile(string filename)
34 {
35 scope file = new UnicodeFile!(char)(filename, Encoding.UTF_8);
36 auto file_data = file.read();
37 return createCheckpoints(file_data, filename);
38 }
39
40 /**
41 Returns a string slice containing the part of the file after loc (a
42 pointer might be better, it allows negative indexing)
43 **/
44 string getRawData(SourceLocation loc)
45 {
46 CP cp = checkpoints[loc.fileID];
47 auto length = cp.data_end - cp.data.ptr;
48 return cp.data.ptr[loc.fileOffset .. length];
49 }
50
51 /**
52 Extracts the line number of the given location
53 O("file size") if cache isn't built, O(log "lines in file") else
54 **/
55 uint getLineNumber(SourceLocation loc)
56 {
57 assert(loc.isValid, "Location is invalid");
58 assert(loc.isReal, "Virtual locations not supported yet");
59 assert(loc.fileID < checkpoints.length, "Non-existent location");
60
61 CP* cp = &checkpoints[loc.fileID];
62 auto cache = &linecache[cp.meta_index];
63 if (!cache.isCached)
64 cache.build(cp.data_start[0 .. cp.data_end - cp.data_start]);
65 return cache.lineOf(getFileOffset(loc));
66 }
67
68 /**
69 Extracts the full byte offset into a file, at which a location
70 is pointing.
71 **/
72 uint getFileOffset(SourceLocation loc)
73 {
74 return loc.fileOffset
75 + checkpoints[loc.fileID].part * loc.Bits.MaxFileOffset;
76 }
77
78 /**
79 Extracts a string containing the entire line loc appears in.
80 **/
81 string getLine(SourceLocation loc)
82 {
83 // The line is extracted by getting two pointers to the exact location
84 // and decreasing one until the nearest newline while the other ptr is
85 // increased to the nearest newline.
86 CP* cp = &checkpoints[loc.fileID];
87 char* ptr = cp.data.ptr + loc.fileOffset;
88 char* ptr_lo = ptr;
89 while (cp.inRange(ptr_lo) && *ptr_lo != '\n' && *ptr_lo != '\r')
90 --ptr_lo;
91 while (cp.inRange(ptr) && *ptr != '\n' && *ptr != '\r')
92 ++ptr;
93 return ptr_lo[1 .. ptr - ptr_lo];
94 }
95
96 /**
97 Gets the column of where the loc appears.
98 **/
99 int getColumn(SourceLocation loc)
100 {
101 // Use same approach as getLine
102
103 CP* cp = &checkpoints[loc.fileID];
104 char* ptr = cp.data.ptr + loc.fileOffset;
105 char* ptr_lo = ptr;
106 while (cp.inRange(ptr_lo) && *ptr_lo != '\n' && *ptr_lo != '\r')
107 --ptr_lo;
108 return cast(int)ptr - cast(int)ptr_lo - 1;
109 }
110
111 /**
112 Get the original source text of a SourceRange
113 **/
114 string getText(SourceRange loc)
115 {
116 assert(loc.isValid, "Range is invalid");
117 assert(loc.isReal, "Virtual locations not supported yet");
118 auto begin = getFileOffset(loc.begin);
119 auto end = getFileOffset(loc.end);
120 return checkpoints[loc.begin.fileID].data_start[begin .. end];
121 }
122
123 /**
124 Get the original source text
125 **/
126 string getText(SourceLocation loc, size_t length)
127 {
128 return getText(SourceRange(loc, loc + length));
129 }
130
131 /**
132 Convert a location into a string. Something like "file(line)"
133 **/
134 string getLocationAsString(SourceLocation loc)
135 {
136 assert(loc.isValid, "Location is invalid");
137 return layout.convert("{}({})",
138 checkpoints[loc.fileID].filename,
139 getLineNumber(loc));
140 }
141 string getLocationAsString(SourceRange loc)
142 {
143 return layout.convert("{}({}:{})",
144 checkpoints[loc.begin.fileID].filename,
145 getFileOffset(loc.begin),
146 getFileOffset(loc.end));
147 }
148
149 /**
150 Get the file name of a loc.
151 **/
152 string getFile(SourceLocation loc)
153 {
154 return checkpoints[loc.fileID].filename;
155 }
156
157 private:
158 synchronized
159 SourceLocation createCheckpoints(string data, string source_file)
160 {
161 // The line-cache is added, but not built,
162 // getLineNumber makes sure it is called when needed.
163 linecache ~= FileLineCache();
164 uint meta_index = linecache.length - 1;
165
166 // SourceLocation's can only index relatively short buffers, therefore
167 // the file is split into several checkpoints.
168 uint checkpoint_counter = 0;
169 char* data_start = data.ptr;
170 char* data_end = data.ptr + data.length;
171 while (data.length > 0)
172 {
173 uint to_take = min(data.length, SourceLocation.Bits.MaxFileOffset);
174 checkpoints ~=
175 CP(source_file,
176 data_start,
177 data_end,
178 data[0 .. to_take],
179 checkpoint_counter++,
180 meta_index);
181 data = data[to_take .. $];
182 }
183 checkpoint_counter = checkpoints.length - checkpoint_counter;
184 return SourceLocation.fromFileID(checkpoint_counter);
185 }
186
187 /// Contains the read/generated data.
188 CP[] checkpoints;
189 /// Cache used to speed up finding of line-starts.
190 FileLineCache[] linecache;
191 /// Used for formatting locations as strings.
192 Layout!(char) layout;
193
194 // These really should be magically available everywhere and templated.
195 int min(int a, int b) { return a < b? a : b; }
196 int max(int a, int b) { return a >= b? a : b; }
197
198 // A Check Point is used to store a file in multiple parts, to overcome
199 // the limitation of SourceLocation only having a rather limited amount of
200 // bits to index any one file.
201 struct CP
202 {
203 // read-only
204 char[] filename;
205 // ditto
206 char* data_start;
207 char* data_end;
208 // ditto
209 char[] data;
210 // ditto
211 uint part = 0;
212 // ditto
213 uint meta_index = 0;
214
215 bool inRange(char* p)
216 {
217 return p >= data_start && p < data_end;
218 }
219 }
220
221 struct FileLineCache
222 {
223 /// Contains the offset of the i'th line on index i
224 uint[] line_starts;
225
226 /// Indicates weather the cache has been built or not
227 bool isCached = false;
228
229 /**
230 This method does a binary search to find the line that contains the
231 given offset.
232 **/
233 uint lineOf(uint offset)
234 {
235 size_t beg = 0,
236 end = line_starts.length,
237 mid = end >> 1;
238
239 while( beg < end )
240 {
241 if( line_starts[mid] <= offset )
242 beg = mid + 1;
243 else
244 end = mid;
245 mid = beg + ( end - beg ) / 2;
246 }
247 return mid;
248 }
249
250 /**
251 Builds the cache data - always make sure this has been called before
252 calling lineOf.
253 **/
254 void build(char[] data)
255 {
256 // j starts at 1, because we need an additional place in the array
257 // to indicate that line 1 starts at index 0.
258 size_t j = 1;
259 char* it = data.ptr, end = data.ptr + data.length;
260 for (; it != end; ++it)
261 if (*it == '\n')
262 ++j;
263 // Allocate without initialization. Saves a bit of time
264 line_starts.length = j;
265 line_starts[0] = 0;
266
267 // Go over the data again, writing the line starts in our new array
268 j = 1;
269 for (size_t i = 0; i < data.length; i++)
270 {
271 if (data[i] == '\n')
272 line_starts[j++] = i;
273 else if (data[i] == '\r')
274 {
275 line_starts[j++] = i;
276 i += cast(size_t)(data[i+1] == '\n');
277 }
278 }
279
280 isCached = true;
281 }
282 }
283 }
284