Mercurial > projects > ldc
view tango/tango/util/PathUtil.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!!
Initial commit after moving to Tango instead of Phobos.
Lots of bugfixes...
This build is not suitable for most things.
author | lindquist |
---|---|
date | Fri, 11 Jan 2008 17:57:40 +0100 |
parents | |
children |
line wrap: on
line source
/******************************************************************************* copyright: Copyright (c) 2006 Lars Ivar Igesund, Thomas Kühne, Grzegorz Adam Hankiewicz license: BSD style: $(LICENSE) version: Dec 2006: Initial release author: Lars Ivar Igesund, Thomas Kühne, Grzegorz Adam Hankiewicz *******************************************************************************/ module tango.util.PathUtil; private import tango.core.Exception; /******************************************************************************* Normalizes a path component as specified in section 5.2 of RFC 2396. ./ in path is removed /. at the end is removed <segment>/.. at the end is removed <segment>/../ in path is removed Unless normSlash is set to false, all slashes will be converted to the systems path separator character. Note that any number of ../ segments at the front is ignored, unless it is an absolute path, in which case an exception will be thrown. A relative path with ../ segments at the front is only considered valid if it can be joined with a path such that it can be fully normalized. Throws: Exception if the root separator is followed by .. Examples: ----- normalize("/home/foo/./bar/../../john/doe"); // => "/home/john/doe" ----- *******************************************************************************/ char[] normalize(char[] path, bool normSlash = true) { /* Internal helper to patch slashes */ char[] normalizeSlashes(char[] path) { char to = '/', from = '\\'; foreach (inout c; path) if (c is from) c = to; return path; } /* Internal helper that finds a slash followed by a dot */ int findSlashDot(char[] path, int start) { assert(start < path.length); foreach(i, c; path[start..$-1]) if (c == '/') if (path[start+i+1] == '.') return i + start + 1; return -1; } /* Internal helper that finds a slash starting at the back */ int findSlash(char[] path, int start) { assert(start < path.length); if (start < 0) return -1; for (int i = start; i >= 0; i--) { if (path[i] == '/') { return i; } } return -1; } /* Internal helper that recursively shortens all segments with dots. */ char[] removeDots(char[] path, int start) { assert (start < path.length); assert (path[start] == '.'); if (start + 1 == path.length) { // path ends with /., remove return path[0..start - 1]; } else if (path[start+1] == '/') { // remove all subsequent './' do { path = path[0..start] ~ path[start+2..$]; } while (start + 2 < path.length && path[start..start+2] == "./"); int idx = findSlashDot(path, start - 1); if (idx < 0) { // no more /., return path return path; } return removeDots(path, idx); } else if (path[start..start+2] == "..") { // found /.. sequence version (Win32) { if (start == 3 && path[1] == '/') { // absolute, X:/.. throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root can not be followed by .."); } } else { if (start == 1) { // absolute throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root separator can not be followed by .."); } } int idx = findSlash(path, start - 2); if (start + 2 == path.length) { // path ends with /.. if (idx < 0) { // no more slashes in front of /.., resolves to empty path return ""; } // remove /.. and preceding segment and return return path[0..idx]; } else if (path[start+2] == '/') { // found /../ sequence // if no slashes before /../, set path to everything after // if <segment>/../ is ../../, keep // otherwise, remove <segment>/../ if (path[idx+1..start-1] == "..") { idx = findSlashDot(path, start+4); if (idx < 0) { // no more /., path fully shortened return path; } return removeDots(path, idx); } path = path[0..idx < 0 ? 0 : idx + 1] ~ path[start+3..$]; idx = findSlashDot(path, idx < 0 ? 0 : idx); if (idx < 0) { // no more /., path fully shortened return path; } // examine next /. return removeDots(path, idx); } } else { if (findSlash(path, path.length - 1) < start) // segment is filename that starts with ., and at the end return path; else { // not at end int idx = findSlashDot(path, start); if (idx > -1) return removeDots(path, idx); else return path; } } assert(false, "PathUtil :: invalid code path"); } char[] normpath = path.dup; if (normSlash) { normpath = normalizeSlashes(normpath); } // if path starts with ./, remove all subsequent instances while (normpath.length > 1 && normpath[0] == '.' && normpath[1] == '/') { normpath = normpath[2..$]; } int idx = findSlashDot(normpath, 0); if (idx > -1) { normpath = removeDots(normpath, idx); } return normpath; } debug (UnitTest) { unittest { assert (normalize ("/home/../john/../.tango/.htaccess") == "/.tango/.htaccess", normalize ("/home/../john/../.tango/.htaccess")); assert (normalize ("/home/../john/../.tango/foo.conf") == "/.tango/foo.conf", normalize ("/home/../john/../.tango/foo.conf")); assert (normalize ("/home/john/.tango/foo.conf") == "/home/john/.tango/foo.conf", normalize ("/home/john/.tango/foo.conf")); assert (normalize ("/foo/bar/.htaccess") == "/foo/bar/.htaccess", normalize ("/foo/bar/.htaccess")); assert (normalize ("foo/bar/././.") == "foo/bar", normalize ("foo/bar/././.")); assert (normalize ("././foo/././././bar") == "foo/bar", normalize ("././foo/././././bar")); assert (normalize ("/foo/../john") == "/john", normalize("/foo/../john")); assert (normalize ("foo/../john") == "john"); assert (normalize ("foo/bar/..") == "foo"); assert (normalize ("foo/bar/../john") == "foo/john"); assert (normalize ("foo/bar/doe/../../john") == "foo/john"); assert (normalize ("foo/bar/doe/../../john/../bar") == "foo/bar"); assert (normalize ("./foo/bar/doe") == "foo/bar/doe"); assert (normalize ("./foo/bar/doe/../../john/../bar") == "foo/bar"); assert (normalize ("./foo/bar/../../john/../bar") == "bar"); assert (normalize ("foo/bar/./doe/../../john") == "foo/john"); assert (normalize ("../../foo/bar") == "../../foo/bar"); assert (normalize ("../../../foo/bar") == "../../../foo/bar"); assert (normalize ("d/") == "d/"); assert (normalize ("\\foo\\..\\john") == "/john"); assert (normalize ("foo\\..\\john") == "john"); assert (normalize ("foo\\bar\\..") == "foo"); assert (normalize ("foo\\bar\\..\\john") == "foo/john"); assert (normalize ("foo\\bar\\doe\\..\\..\\john") == "foo/john"); assert (normalize ("foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar"); assert (normalize (".\\foo\\bar\\doe") == "foo/bar/doe"); assert (normalize (".\\foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar"); assert (normalize (".\\foo\\bar\\..\\..\\john\\..\\bar") == "bar"); assert (normalize ("foo\\bar\\.\\doe\\..\\..\\john") == "foo/john"); assert (normalize ("..\\..\\foo\\bar") == "../../foo/bar"); assert (normalize ("..\\..\\..\\foo\\bar") == "../../../foo/bar"); } } /****************************************************************************** Matches a pattern against a filename. Some characters of pattern have special a meaning (they are <i>meta-characters</i>) and <b>can't</b> be escaped. These are: <p><table> <tr><td><b>*</b></td> <td>Matches 0 or more instances of any character.</td></tr> <tr><td><b>?</b></td> <td>Matches exactly one instances of any character.</td></tr> <tr><td><b>[</b><i>chars</i><b>]</b></td> <td>Matches one instance of any character that appears between the brackets.</td></tr> <tr><td><b>[!</b><i>chars</i><b>]</b></td> <td>Matches one instance of any character that does not appear between the brackets after the exclamation mark.</td></tr> </table><p> Internally individual character comparisons are done calling charMatch(), so its rules apply here too. Note that path separators and dots don't stop a meta-character from matching further portions of the filename. Returns: true if pattern matches filename, false otherwise. See_Also: charMatch(). Throws: Nothing. Examples: ----- version(Win32) { patternMatch("foo.bar", "*") // => true patternMatch(r"foo/foo\bar", "f*b*r") // => true patternMatch("foo.bar", "f?bar") // => false patternMatch("Goo.bar", "[fg]???bar") // => true patternMatch(r"d:\foo\bar", "d*foo?bar") // => true } version(Posix) { patternMatch("Go*.bar", "[fg]???bar") // => false patternMatch("/foo*home/bar", "?foo*bar") // => true patternMatch("foobar", "foo?bar") // => true } ----- ******************************************************************************/ bool patternMatch(char[] filename, char[] pattern) in { // Verify that pattern[] is valid int i; int inbracket = false; for (i = 0; i < pattern.length; i++) { switch (pattern[i]) { case '[': assert(!inbracket); inbracket = true; break; case ']': assert(inbracket); inbracket = false; break; default: break; } } } body { int pi; int ni; char pc; char nc; int j; int not; int anymatch; ni = 0; for (pi = 0; pi < pattern.length; pi++) { pc = pattern[pi]; switch (pc) { case '*': if (pi + 1 == pattern.length) goto match; for (j = ni; j < filename.length; j++) { if (patternMatch(filename[j .. filename.length], pattern[pi + 1 .. pattern.length])) goto match; } goto nomatch; case '?': if (ni == filename.length) goto nomatch; ni++; break; case '[': if (ni == filename.length) goto nomatch; nc = filename[ni]; ni++; not = 0; pi++; if (pattern[pi] == '!') { not = 1; pi++; } anymatch = 0; while (1) { pc = pattern[pi]; if (pc == ']') break; if (!anymatch && charMatch(nc, pc)) anymatch = 1; pi++; } if (!(anymatch ^ not)) goto nomatch; break; default: if (ni == filename.length) goto nomatch; nc = filename[ni]; if (!charMatch(pc, nc)) goto nomatch; ni++; break; } } if (ni < filename.length) goto nomatch; match: return true; nomatch: return false; } debug (UnitTest) { unittest { version (Win32) assert(patternMatch("foo", "Foo")); version (Posix) assert(!patternMatch("foo", "Foo")); assert(patternMatch("foo", "*")); assert(patternMatch("foo.bar", "*")); assert(patternMatch("foo.bar", "*.*")); assert(patternMatch("foo.bar", "foo*")); assert(patternMatch("foo.bar", "f*bar")); assert(patternMatch("foo.bar", "f*b*r")); assert(patternMatch("foo.bar", "f???bar")); assert(patternMatch("foo.bar", "[fg]???bar")); assert(patternMatch("foo.bar", "[!gh]*bar")); assert(!patternMatch("foo", "bar")); assert(!patternMatch("foo", "*.*")); assert(!patternMatch("foo.bar", "f*baz")); assert(!patternMatch("foo.bar", "f*b*x")); assert(!patternMatch("foo.bar", "[gh]???bar")); assert(!patternMatch("foo.bar", "[!fg]*bar")); assert(!patternMatch("foo.bar", "[fg]???baz")); } } /****************************************************************************** Matches filename characters. Under Windows, the comparison is done ignoring case. Under Linux an exact match is performed. Returns: true if c1 matches c2, false otherwise. Throws: Nothing. Examples: ----- version(Win32) { charMatch('a', 'b') // => false charMatch('A', 'a') // => true } version(Posix) { charMatch('a', 'b') // => false charMatch('A', 'a') // => false } ----- ******************************************************************************/ private bool charMatch(char c1, char c2) { version (Win32) { if (c1 != c2) { return ((c1 >= 'a' && c1 <= 'z') ? c1 - ('a' - 'A') : c1) == ((c2 >= 'a' && c2 <= 'z') ? c2 - ('a' - 'A') : c2); } return true; } version (Posix) { return c1 == c2; } }