Mercurial > projects > ldc
diff tango/tango/util/PathUtil.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!!
Initial commit after moving to Tango instead of Phobos.
Lots of bugfixes...
This build is not suitable for most things.
author | lindquist |
---|---|
date | Fri, 11 Jan 2008 17:57:40 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tango/tango/util/PathUtil.d Fri Jan 11 17:57:40 2008 +0100 @@ -0,0 +1,471 @@ +/******************************************************************************* + + copyright: Copyright (c) 2006 Lars Ivar Igesund, Thomas Kühne, + Grzegorz Adam Hankiewicz + + license: BSD style: $(LICENSE) + + version: Dec 2006: Initial release + + author: Lars Ivar Igesund, Thomas Kühne, + Grzegorz Adam Hankiewicz + +*******************************************************************************/ + +module tango.util.PathUtil; + +private import tango.core.Exception; + +/******************************************************************************* + + Normalizes a path component as specified in section 5.2 of RFC 2396. + + ./ in path is removed + /. at the end is removed + <segment>/.. at the end is removed + <segment>/../ in path is removed + + Unless normSlash is set to false, all slashes will be converted + to the systems path separator character. + + Note that any number of ../ segments at the front is ignored, + unless it is an absolute path, in which case an exception will + be thrown. A relative path with ../ segments at the front is only + considered valid if it can be joined with a path such that it can + be fully normalized. + + Throws: Exception if the root separator is followed by .. + + Examples: + ----- + normalize("/home/foo/./bar/../../john/doe"); // => "/home/john/doe" + ----- + +*******************************************************************************/ + +char[] normalize(char[] path, bool normSlash = true) +{ + /* + Internal helper to patch slashes + */ + char[] normalizeSlashes(char[] path) + { + char to = '/', from = '\\'; + + foreach (inout c; path) + if (c is from) + c = to; + return path; + } + + /* + Internal helper that finds a slash followed by a dot + */ + int findSlashDot(char[] path, int start) { + assert(start < path.length); + foreach(i, c; path[start..$-1]) + if (c == '/') + if (path[start+i+1] == '.') + return i + start + 1; + + return -1; + } + + /* + Internal helper that finds a slash starting at the back + */ + int findSlash(char[] path, int start) { + assert(start < path.length); + + if (start < 0) + return -1; + + for (int i = start; i >= 0; i--) { + if (path[i] == '/') { + return i; + } + } + return -1; + } + + /* + Internal helper that recursively shortens all segments with dots. + */ + char[] removeDots(char[] path, int start) { + assert (start < path.length); + assert (path[start] == '.'); + if (start + 1 == path.length) { + // path ends with /., remove + return path[0..start - 1]; + } + else if (path[start+1] == '/') { + // remove all subsequent './' + do { + path = path[0..start] ~ path[start+2..$]; + } while (start + 2 < path.length && path[start..start+2] == "./"); + int idx = findSlashDot(path, start - 1); + if (idx < 0) { + // no more /., return path + return path; + } + return removeDots(path, idx); + } + else if (path[start..start+2] == "..") { + // found /.. sequence +version (Win32) { + if (start == 3 && path[1] == '/') { // absolute, X:/.. + throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root can not be followed by .."); + } + +} +else { + if (start == 1) { // absolute + throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root separator can not be followed by .."); + } +} + int idx = findSlash(path, start - 2); + if (start + 2 == path.length) { + // path ends with /.. + if (idx < 0) { + // no more slashes in front of /.., resolves to empty path + return ""; + } + // remove /.. and preceding segment and return + return path[0..idx]; + } + else if (path[start+2] == '/') { + // found /../ sequence + // if no slashes before /../, set path to everything after + // if <segment>/../ is ../../, keep + // otherwise, remove <segment>/../ + if (path[idx+1..start-1] == "..") { + idx = findSlashDot(path, start+4); + if (idx < 0) { + // no more /., path fully shortened + return path; + } + return removeDots(path, idx); + } + path = path[0..idx < 0 ? 0 : idx + 1] ~ path[start+3..$]; + idx = findSlashDot(path, idx < 0 ? 0 : idx); + if (idx < 0) { + // no more /., path fully shortened + return path; + } + // examine next /. + return removeDots(path, idx); + } + } + else { + if (findSlash(path, path.length - 1) < start) + // segment is filename that starts with ., and at the end + return path; + else { + // not at end + int idx = findSlashDot(path, start); + if (idx > -1) + return removeDots(path, idx); + else + return path; + } + } + assert(false, "PathUtil :: invalid code path"); + } + + char[] normpath = path.dup; + if (normSlash) { + normpath = normalizeSlashes(normpath); + } + + // if path starts with ./, remove all subsequent instances + while (normpath.length > 1 && normpath[0] == '.' && + normpath[1] == '/') { + normpath = normpath[2..$]; + } + int idx = findSlashDot(normpath, 0); + if (idx > -1) { + normpath = removeDots(normpath, idx); + } + + return normpath; +} + + +debug (UnitTest) +{ + + unittest + { + assert (normalize ("/home/../john/../.tango/.htaccess") == "/.tango/.htaccess", + normalize ("/home/../john/../.tango/.htaccess")); + assert (normalize ("/home/../john/../.tango/foo.conf") == "/.tango/foo.conf", + normalize ("/home/../john/../.tango/foo.conf")); + assert (normalize ("/home/john/.tango/foo.conf") == "/home/john/.tango/foo.conf", + normalize ("/home/john/.tango/foo.conf")); + assert (normalize ("/foo/bar/.htaccess") == "/foo/bar/.htaccess", + normalize ("/foo/bar/.htaccess")); + assert (normalize ("foo/bar/././.") == "foo/bar", + normalize ("foo/bar/././.")); + assert (normalize ("././foo/././././bar") == "foo/bar", + normalize ("././foo/././././bar")); + assert (normalize ("/foo/../john") == "/john", + normalize("/foo/../john")); + assert (normalize ("foo/../john") == "john"); + assert (normalize ("foo/bar/..") == "foo"); + assert (normalize ("foo/bar/../john") == "foo/john"); + assert (normalize ("foo/bar/doe/../../john") == "foo/john"); + assert (normalize ("foo/bar/doe/../../john/../bar") == "foo/bar"); + assert (normalize ("./foo/bar/doe") == "foo/bar/doe"); + assert (normalize ("./foo/bar/doe/../../john/../bar") == "foo/bar"); + assert (normalize ("./foo/bar/../../john/../bar") == "bar"); + assert (normalize ("foo/bar/./doe/../../john") == "foo/john"); + assert (normalize ("../../foo/bar") == "../../foo/bar"); + assert (normalize ("../../../foo/bar") == "../../../foo/bar"); + assert (normalize ("d/") == "d/"); + + assert (normalize ("\\foo\\..\\john") == "/john"); + assert (normalize ("foo\\..\\john") == "john"); + assert (normalize ("foo\\bar\\..") == "foo"); + assert (normalize ("foo\\bar\\..\\john") == "foo/john"); + assert (normalize ("foo\\bar\\doe\\..\\..\\john") == "foo/john"); + assert (normalize ("foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar"); + assert (normalize (".\\foo\\bar\\doe") == "foo/bar/doe"); + assert (normalize (".\\foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar"); + assert (normalize (".\\foo\\bar\\..\\..\\john\\..\\bar") == "bar"); + assert (normalize ("foo\\bar\\.\\doe\\..\\..\\john") == "foo/john"); + assert (normalize ("..\\..\\foo\\bar") == "../../foo/bar"); + assert (normalize ("..\\..\\..\\foo\\bar") == "../../../foo/bar"); + } +} + + +/****************************************************************************** + + Matches a pattern against a filename. + + Some characters of pattern have special a meaning (they are + <i>meta-characters</i>) and <b>can't</b> be escaped. These are: + <p><table> + <tr><td><b>*</b></td> + <td>Matches 0 or more instances of any character.</td></tr> + <tr><td><b>?</b></td> + <td>Matches exactly one instances of any character.</td></tr> + <tr><td><b>[</b><i>chars</i><b>]</b></td> + <td>Matches one instance of any character that appears + between the brackets.</td></tr> + <tr><td><b>[!</b><i>chars</i><b>]</b></td> + <td>Matches one instance of any character that does not appear + between the brackets after the exclamation mark.</td></tr> + </table><p> + Internally individual character comparisons are done calling + charMatch(), so its rules apply here too. Note that path + separators and dots don't stop a meta-character from matching + further portions of the filename. + + Returns: true if pattern matches filename, false otherwise. + + See_Also: charMatch(). + + Throws: Nothing. + + Examples: + ----- + version(Win32) + { + patternMatch("foo.bar", "*") // => true + patternMatch(r"foo/foo\bar", "f*b*r") // => true + patternMatch("foo.bar", "f?bar") // => false + patternMatch("Goo.bar", "[fg]???bar") // => true + patternMatch(r"d:\foo\bar", "d*foo?bar") // => true + } + version(Posix) + { + patternMatch("Go*.bar", "[fg]???bar") // => false + patternMatch("/foo*home/bar", "?foo*bar") // => true + patternMatch("foobar", "foo?bar") // => true + } + ----- + +******************************************************************************/ + +bool patternMatch(char[] filename, char[] pattern) +in +{ + // Verify that pattern[] is valid + int i; + int inbracket = false; + + for (i = 0; i < pattern.length; i++) + { + switch (pattern[i]) + { + case '[': + assert(!inbracket); + inbracket = true; + break; + + case ']': + assert(inbracket); + inbracket = false; + break; + + default: + break; + } + } +} +body +{ + int pi; + int ni; + char pc; + char nc; + int j; + int not; + int anymatch; + + ni = 0; + for (pi = 0; pi < pattern.length; pi++) + { + pc = pattern[pi]; + switch (pc) + { + case '*': + if (pi + 1 == pattern.length) + goto match; + for (j = ni; j < filename.length; j++) + { + if (patternMatch(filename[j .. filename.length], + pattern[pi + 1 .. pattern.length])) + goto match; + } + goto nomatch; + + case '?': + if (ni == filename.length) + goto nomatch; + ni++; + break; + + case '[': + if (ni == filename.length) + goto nomatch; + nc = filename[ni]; + ni++; + not = 0; + pi++; + if (pattern[pi] == '!') + { + not = 1; + pi++; + } + anymatch = 0; + while (1) + { + pc = pattern[pi]; + if (pc == ']') + break; + if (!anymatch && charMatch(nc, pc)) + anymatch = 1; + pi++; + } + if (!(anymatch ^ not)) + goto nomatch; + break; + + default: + if (ni == filename.length) + goto nomatch; + nc = filename[ni]; + if (!charMatch(pc, nc)) + goto nomatch; + ni++; + break; + } + } + if (ni < filename.length) + goto nomatch; + + match: + return true; + + nomatch: + return false; +} + + +debug (UnitTest) +{ + unittest + { + version (Win32) + assert(patternMatch("foo", "Foo")); + version (Posix) + assert(!patternMatch("foo", "Foo")); + + assert(patternMatch("foo", "*")); + assert(patternMatch("foo.bar", "*")); + assert(patternMatch("foo.bar", "*.*")); + assert(patternMatch("foo.bar", "foo*")); + assert(patternMatch("foo.bar", "f*bar")); + assert(patternMatch("foo.bar", "f*b*r")); + assert(patternMatch("foo.bar", "f???bar")); + assert(patternMatch("foo.bar", "[fg]???bar")); + assert(patternMatch("foo.bar", "[!gh]*bar")); + + assert(!patternMatch("foo", "bar")); + assert(!patternMatch("foo", "*.*")); + assert(!patternMatch("foo.bar", "f*baz")); + assert(!patternMatch("foo.bar", "f*b*x")); + assert(!patternMatch("foo.bar", "[gh]???bar")); + assert(!patternMatch("foo.bar", "[!fg]*bar")); + assert(!patternMatch("foo.bar", "[fg]???baz")); + + } +} + + +/****************************************************************************** + + Matches filename characters. + + Under Windows, the comparison is done ignoring case. Under Linux + an exact match is performed. + + Returns: true if c1 matches c2, false otherwise. + + Throws: Nothing. + + Examples: + ----- + version(Win32) + { + charMatch('a', 'b') // => false + charMatch('A', 'a') // => true + } + version(Posix) + { + charMatch('a', 'b') // => false + charMatch('A', 'a') // => false + } + ----- +******************************************************************************/ + +private bool charMatch(char c1, char c2) +{ + version (Win32) + { + + if (c1 != c2) + { + return ((c1 >= 'a' && c1 <= 'z') ? c1 - ('a' - 'A') : c1) == + ((c2 >= 'a' && c2 <= 'z') ? c2 - ('a' - 'A') : c2); + } + return true; + } + version (Posix) + { + return c1 == c2; + } +} +