132
|
1 /*******************************************************************************
|
|
2
|
|
3 copyright: Copyright (c) 2006 Lars Ivar Igesund, Thomas Kühne,
|
|
4 Grzegorz Adam Hankiewicz
|
|
5
|
|
6 license: BSD style: $(LICENSE)
|
|
7
|
|
8 version: Dec 2006: Initial release
|
|
9
|
|
10 author: Lars Ivar Igesund, Thomas Kühne,
|
|
11 Grzegorz Adam Hankiewicz
|
|
12
|
|
13 *******************************************************************************/
|
|
14
|
|
15 module tango.util.PathUtil;
|
|
16
|
|
17 private import tango.core.Exception;
|
|
18
|
|
19 /*******************************************************************************
|
|
20
|
|
21 Normalizes a path component as specified in section 5.2 of RFC 2396.
|
|
22
|
|
23 ./ in path is removed
|
|
24 /. at the end is removed
|
|
25 <segment>/.. at the end is removed
|
|
26 <segment>/../ in path is removed
|
|
27
|
|
28 Unless normSlash is set to false, all slashes will be converted
|
|
29 to the systems path separator character.
|
|
30
|
|
31 Note that any number of ../ segments at the front is ignored,
|
|
32 unless it is an absolute path, in which case an exception will
|
|
33 be thrown. A relative path with ../ segments at the front is only
|
|
34 considered valid if it can be joined with a path such that it can
|
|
35 be fully normalized.
|
|
36
|
|
37 Throws: Exception if the root separator is followed by ..
|
|
38
|
|
39 Examples:
|
|
40 -----
|
|
41 normalize("/home/foo/./bar/../../john/doe"); // => "/home/john/doe"
|
|
42 -----
|
|
43
|
|
44 *******************************************************************************/
|
|
45
|
|
46 char[] normalize(char[] path, bool normSlash = true)
|
|
47 {
|
|
48 /*
|
|
49 Internal helper to patch slashes
|
|
50 */
|
|
51 char[] normalizeSlashes(char[] path)
|
|
52 {
|
|
53 char to = '/', from = '\\';
|
|
54
|
|
55 foreach (inout c; path)
|
|
56 if (c is from)
|
|
57 c = to;
|
|
58 return path;
|
|
59 }
|
|
60
|
|
61 /*
|
|
62 Internal helper that finds a slash followed by a dot
|
|
63 */
|
|
64 int findSlashDot(char[] path, int start) {
|
|
65 assert(start < path.length);
|
|
66 foreach(i, c; path[start..$-1])
|
|
67 if (c == '/')
|
|
68 if (path[start+i+1] == '.')
|
|
69 return i + start + 1;
|
|
70
|
|
71 return -1;
|
|
72 }
|
|
73
|
|
74 /*
|
|
75 Internal helper that finds a slash starting at the back
|
|
76 */
|
|
77 int findSlash(char[] path, int start) {
|
|
78 assert(start < path.length);
|
|
79
|
|
80 if (start < 0)
|
|
81 return -1;
|
|
82
|
|
83 for (int i = start; i >= 0; i--) {
|
|
84 if (path[i] == '/') {
|
|
85 return i;
|
|
86 }
|
|
87 }
|
|
88 return -1;
|
|
89 }
|
|
90
|
|
91 /*
|
|
92 Internal helper that recursively shortens all segments with dots.
|
|
93 */
|
|
94 char[] removeDots(char[] path, int start) {
|
|
95 assert (start < path.length);
|
|
96 assert (path[start] == '.');
|
|
97 if (start + 1 == path.length) {
|
|
98 // path ends with /., remove
|
|
99 return path[0..start - 1];
|
|
100 }
|
|
101 else if (path[start+1] == '/') {
|
|
102 // remove all subsequent './'
|
|
103 do {
|
|
104 path = path[0..start] ~ path[start+2..$];
|
|
105 } while (start + 2 < path.length && path[start..start+2] == "./");
|
|
106 int idx = findSlashDot(path, start - 1);
|
|
107 if (idx < 0) {
|
|
108 // no more /., return path
|
|
109 return path;
|
|
110 }
|
|
111 return removeDots(path, idx);
|
|
112 }
|
|
113 else if (path[start..start+2] == "..") {
|
|
114 // found /.. sequence
|
|
115 version (Win32) {
|
|
116 if (start == 3 && path[1] == '/') { // absolute, X:/..
|
|
117 throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root can not be followed by ..");
|
|
118 }
|
|
119
|
|
120 }
|
|
121 else {
|
|
122 if (start == 1) { // absolute
|
|
123 throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root separator can not be followed by ..");
|
|
124 }
|
|
125 }
|
|
126 int idx = findSlash(path, start - 2);
|
|
127 if (start + 2 == path.length) {
|
|
128 // path ends with /..
|
|
129 if (idx < 0) {
|
|
130 // no more slashes in front of /.., resolves to empty path
|
|
131 return "";
|
|
132 }
|
|
133 // remove /.. and preceding segment and return
|
|
134 return path[0..idx];
|
|
135 }
|
|
136 else if (path[start+2] == '/') {
|
|
137 // found /../ sequence
|
|
138 // if no slashes before /../, set path to everything after
|
|
139 // if <segment>/../ is ../../, keep
|
|
140 // otherwise, remove <segment>/../
|
|
141 if (path[idx+1..start-1] == "..") {
|
|
142 idx = findSlashDot(path, start+4);
|
|
143 if (idx < 0) {
|
|
144 // no more /., path fully shortened
|
|
145 return path;
|
|
146 }
|
|
147 return removeDots(path, idx);
|
|
148 }
|
|
149 path = path[0..idx < 0 ? 0 : idx + 1] ~ path[start+3..$];
|
|
150 idx = findSlashDot(path, idx < 0 ? 0 : idx);
|
|
151 if (idx < 0) {
|
|
152 // no more /., path fully shortened
|
|
153 return path;
|
|
154 }
|
|
155 // examine next /.
|
|
156 return removeDots(path, idx);
|
|
157 }
|
|
158 }
|
|
159 else {
|
|
160 if (findSlash(path, path.length - 1) < start)
|
|
161 // segment is filename that starts with ., and at the end
|
|
162 return path;
|
|
163 else {
|
|
164 // not at end
|
|
165 int idx = findSlashDot(path, start);
|
|
166 if (idx > -1)
|
|
167 return removeDots(path, idx);
|
|
168 else
|
|
169 return path;
|
|
170 }
|
|
171 }
|
|
172 assert(false, "PathUtil :: invalid code path");
|
|
173 }
|
|
174
|
|
175 char[] normpath = path.dup;
|
|
176 if (normSlash) {
|
|
177 normpath = normalizeSlashes(normpath);
|
|
178 }
|
|
179
|
|
180 // if path starts with ./, remove all subsequent instances
|
|
181 while (normpath.length > 1 && normpath[0] == '.' &&
|
|
182 normpath[1] == '/') {
|
|
183 normpath = normpath[2..$];
|
|
184 }
|
|
185 int idx = findSlashDot(normpath, 0);
|
|
186 if (idx > -1) {
|
|
187 normpath = removeDots(normpath, idx);
|
|
188 }
|
|
189
|
|
190 return normpath;
|
|
191 }
|
|
192
|
|
193
|
|
194 debug (UnitTest)
|
|
195 {
|
|
196
|
|
197 unittest
|
|
198 {
|
|
199 assert (normalize ("/home/../john/../.tango/.htaccess") == "/.tango/.htaccess",
|
|
200 normalize ("/home/../john/../.tango/.htaccess"));
|
|
201 assert (normalize ("/home/../john/../.tango/foo.conf") == "/.tango/foo.conf",
|
|
202 normalize ("/home/../john/../.tango/foo.conf"));
|
|
203 assert (normalize ("/home/john/.tango/foo.conf") == "/home/john/.tango/foo.conf",
|
|
204 normalize ("/home/john/.tango/foo.conf"));
|
|
205 assert (normalize ("/foo/bar/.htaccess") == "/foo/bar/.htaccess",
|
|
206 normalize ("/foo/bar/.htaccess"));
|
|
207 assert (normalize ("foo/bar/././.") == "foo/bar",
|
|
208 normalize ("foo/bar/././."));
|
|
209 assert (normalize ("././foo/././././bar") == "foo/bar",
|
|
210 normalize ("././foo/././././bar"));
|
|
211 assert (normalize ("/foo/../john") == "/john",
|
|
212 normalize("/foo/../john"));
|
|
213 assert (normalize ("foo/../john") == "john");
|
|
214 assert (normalize ("foo/bar/..") == "foo");
|
|
215 assert (normalize ("foo/bar/../john") == "foo/john");
|
|
216 assert (normalize ("foo/bar/doe/../../john") == "foo/john");
|
|
217 assert (normalize ("foo/bar/doe/../../john/../bar") == "foo/bar");
|
|
218 assert (normalize ("./foo/bar/doe") == "foo/bar/doe");
|
|
219 assert (normalize ("./foo/bar/doe/../../john/../bar") == "foo/bar");
|
|
220 assert (normalize ("./foo/bar/../../john/../bar") == "bar");
|
|
221 assert (normalize ("foo/bar/./doe/../../john") == "foo/john");
|
|
222 assert (normalize ("../../foo/bar") == "../../foo/bar");
|
|
223 assert (normalize ("../../../foo/bar") == "../../../foo/bar");
|
|
224 assert (normalize ("d/") == "d/");
|
|
225
|
|
226 assert (normalize ("\\foo\\..\\john") == "/john");
|
|
227 assert (normalize ("foo\\..\\john") == "john");
|
|
228 assert (normalize ("foo\\bar\\..") == "foo");
|
|
229 assert (normalize ("foo\\bar\\..\\john") == "foo/john");
|
|
230 assert (normalize ("foo\\bar\\doe\\..\\..\\john") == "foo/john");
|
|
231 assert (normalize ("foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar");
|
|
232 assert (normalize (".\\foo\\bar\\doe") == "foo/bar/doe");
|
|
233 assert (normalize (".\\foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar");
|
|
234 assert (normalize (".\\foo\\bar\\..\\..\\john\\..\\bar") == "bar");
|
|
235 assert (normalize ("foo\\bar\\.\\doe\\..\\..\\john") == "foo/john");
|
|
236 assert (normalize ("..\\..\\foo\\bar") == "../../foo/bar");
|
|
237 assert (normalize ("..\\..\\..\\foo\\bar") == "../../../foo/bar");
|
|
238 }
|
|
239 }
|
|
240
|
|
241
|
|
242 /******************************************************************************
|
|
243
|
|
244 Matches a pattern against a filename.
|
|
245
|
|
246 Some characters of pattern have special a meaning (they are
|
|
247 <i>meta-characters</i>) and <b>can't</b> be escaped. These are:
|
|
248 <p><table>
|
|
249 <tr><td><b>*</b></td>
|
|
250 <td>Matches 0 or more instances of any character.</td></tr>
|
|
251 <tr><td><b>?</b></td>
|
|
252 <td>Matches exactly one instances of any character.</td></tr>
|
|
253 <tr><td><b>[</b><i>chars</i><b>]</b></td>
|
|
254 <td>Matches one instance of any character that appears
|
|
255 between the brackets.</td></tr>
|
|
256 <tr><td><b>[!</b><i>chars</i><b>]</b></td>
|
|
257 <td>Matches one instance of any character that does not appear
|
|
258 between the brackets after the exclamation mark.</td></tr>
|
|
259 </table><p>
|
|
260 Internally individual character comparisons are done calling
|
|
261 charMatch(), so its rules apply here too. Note that path
|
|
262 separators and dots don't stop a meta-character from matching
|
|
263 further portions of the filename.
|
|
264
|
|
265 Returns: true if pattern matches filename, false otherwise.
|
|
266
|
|
267 See_Also: charMatch().
|
|
268
|
|
269 Throws: Nothing.
|
|
270
|
|
271 Examples:
|
|
272 -----
|
|
273 version(Win32)
|
|
274 {
|
|
275 patternMatch("foo.bar", "*") // => true
|
|
276 patternMatch(r"foo/foo\bar", "f*b*r") // => true
|
|
277 patternMatch("foo.bar", "f?bar") // => false
|
|
278 patternMatch("Goo.bar", "[fg]???bar") // => true
|
|
279 patternMatch(r"d:\foo\bar", "d*foo?bar") // => true
|
|
280 }
|
|
281 version(Posix)
|
|
282 {
|
|
283 patternMatch("Go*.bar", "[fg]???bar") // => false
|
|
284 patternMatch("/foo*home/bar", "?foo*bar") // => true
|
|
285 patternMatch("foobar", "foo?bar") // => true
|
|
286 }
|
|
287 -----
|
|
288
|
|
289 ******************************************************************************/
|
|
290
|
|
291 bool patternMatch(char[] filename, char[] pattern)
|
|
292 in
|
|
293 {
|
|
294 // Verify that pattern[] is valid
|
|
295 int i;
|
|
296 int inbracket = false;
|
|
297
|
|
298 for (i = 0; i < pattern.length; i++)
|
|
299 {
|
|
300 switch (pattern[i])
|
|
301 {
|
|
302 case '[':
|
|
303 assert(!inbracket);
|
|
304 inbracket = true;
|
|
305 break;
|
|
306
|
|
307 case ']':
|
|
308 assert(inbracket);
|
|
309 inbracket = false;
|
|
310 break;
|
|
311
|
|
312 default:
|
|
313 break;
|
|
314 }
|
|
315 }
|
|
316 }
|
|
317 body
|
|
318 {
|
|
319 int pi;
|
|
320 int ni;
|
|
321 char pc;
|
|
322 char nc;
|
|
323 int j;
|
|
324 int not;
|
|
325 int anymatch;
|
|
326
|
|
327 ni = 0;
|
|
328 for (pi = 0; pi < pattern.length; pi++)
|
|
329 {
|
|
330 pc = pattern[pi];
|
|
331 switch (pc)
|
|
332 {
|
|
333 case '*':
|
|
334 if (pi + 1 == pattern.length)
|
|
335 goto match;
|
|
336 for (j = ni; j < filename.length; j++)
|
|
337 {
|
|
338 if (patternMatch(filename[j .. filename.length],
|
|
339 pattern[pi + 1 .. pattern.length]))
|
|
340 goto match;
|
|
341 }
|
|
342 goto nomatch;
|
|
343
|
|
344 case '?':
|
|
345 if (ni == filename.length)
|
|
346 goto nomatch;
|
|
347 ni++;
|
|
348 break;
|
|
349
|
|
350 case '[':
|
|
351 if (ni == filename.length)
|
|
352 goto nomatch;
|
|
353 nc = filename[ni];
|
|
354 ni++;
|
|
355 not = 0;
|
|
356 pi++;
|
|
357 if (pattern[pi] == '!')
|
|
358 {
|
|
359 not = 1;
|
|
360 pi++;
|
|
361 }
|
|
362 anymatch = 0;
|
|
363 while (1)
|
|
364 {
|
|
365 pc = pattern[pi];
|
|
366 if (pc == ']')
|
|
367 break;
|
|
368 if (!anymatch && charMatch(nc, pc))
|
|
369 anymatch = 1;
|
|
370 pi++;
|
|
371 }
|
|
372 if (!(anymatch ^ not))
|
|
373 goto nomatch;
|
|
374 break;
|
|
375
|
|
376 default:
|
|
377 if (ni == filename.length)
|
|
378 goto nomatch;
|
|
379 nc = filename[ni];
|
|
380 if (!charMatch(pc, nc))
|
|
381 goto nomatch;
|
|
382 ni++;
|
|
383 break;
|
|
384 }
|
|
385 }
|
|
386 if (ni < filename.length)
|
|
387 goto nomatch;
|
|
388
|
|
389 match:
|
|
390 return true;
|
|
391
|
|
392 nomatch:
|
|
393 return false;
|
|
394 }
|
|
395
|
|
396
|
|
397 debug (UnitTest)
|
|
398 {
|
|
399 unittest
|
|
400 {
|
|
401 version (Win32)
|
|
402 assert(patternMatch("foo", "Foo"));
|
|
403 version (Posix)
|
|
404 assert(!patternMatch("foo", "Foo"));
|
|
405
|
|
406 assert(patternMatch("foo", "*"));
|
|
407 assert(patternMatch("foo.bar", "*"));
|
|
408 assert(patternMatch("foo.bar", "*.*"));
|
|
409 assert(patternMatch("foo.bar", "foo*"));
|
|
410 assert(patternMatch("foo.bar", "f*bar"));
|
|
411 assert(patternMatch("foo.bar", "f*b*r"));
|
|
412 assert(patternMatch("foo.bar", "f???bar"));
|
|
413 assert(patternMatch("foo.bar", "[fg]???bar"));
|
|
414 assert(patternMatch("foo.bar", "[!gh]*bar"));
|
|
415
|
|
416 assert(!patternMatch("foo", "bar"));
|
|
417 assert(!patternMatch("foo", "*.*"));
|
|
418 assert(!patternMatch("foo.bar", "f*baz"));
|
|
419 assert(!patternMatch("foo.bar", "f*b*x"));
|
|
420 assert(!patternMatch("foo.bar", "[gh]???bar"));
|
|
421 assert(!patternMatch("foo.bar", "[!fg]*bar"));
|
|
422 assert(!patternMatch("foo.bar", "[fg]???baz"));
|
|
423
|
|
424 }
|
|
425 }
|
|
426
|
|
427
|
|
428 /******************************************************************************
|
|
429
|
|
430 Matches filename characters.
|
|
431
|
|
432 Under Windows, the comparison is done ignoring case. Under Linux
|
|
433 an exact match is performed.
|
|
434
|
|
435 Returns: true if c1 matches c2, false otherwise.
|
|
436
|
|
437 Throws: Nothing.
|
|
438
|
|
439 Examples:
|
|
440 -----
|
|
441 version(Win32)
|
|
442 {
|
|
443 charMatch('a', 'b') // => false
|
|
444 charMatch('A', 'a') // => true
|
|
445 }
|
|
446 version(Posix)
|
|
447 {
|
|
448 charMatch('a', 'b') // => false
|
|
449 charMatch('A', 'a') // => false
|
|
450 }
|
|
451 -----
|
|
452 ******************************************************************************/
|
|
453
|
|
454 private bool charMatch(char c1, char c2)
|
|
455 {
|
|
456 version (Win32)
|
|
457 {
|
|
458
|
|
459 if (c1 != c2)
|
|
460 {
|
|
461 return ((c1 >= 'a' && c1 <= 'z') ? c1 - ('a' - 'A') : c1) ==
|
|
462 ((c2 >= 'a' && c2 <= 'z') ? c2 - ('a' - 'A') : c2);
|
|
463 }
|
|
464 return true;
|
|
465 }
|
|
466 version (Posix)
|
|
467 {
|
|
468 return c1 == c2;
|
|
469 }
|
|
470 }
|
|
471
|