Mercurial > projects > ldc

diff tango/tango/util/PathUtil.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.
author: lindquist
date: Fri, 11 Jan 2008 17:57:40 +0100
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tango/tango/util/PathUtil.d	Fri Jan 11 17:57:40 2008 +0100
@@ -0,0 +1,471 @@
+/*******************************************************************************
+
+        copyright:      Copyright (c) 2006 Lars Ivar Igesund, Thomas Kühne,
+                                            Grzegorz Adam Hankiewicz
+
+        license:        BSD style: $(LICENSE)
+
+        version:        Dec 2006: Initial release
+
+        author:         Lars Ivar Igesund, Thomas Kühne,
+                        Grzegorz Adam Hankiewicz
+
+*******************************************************************************/
+
+module tango.util.PathUtil;
+
+private import  tango.core.Exception;
+
+/*******************************************************************************
+
+    Normalizes a path component as specified in section 5.2 of RFC 2396.
+
+    ./ in path is removed
+    /. at the end is removed
+    <segment>/.. at the end is removed
+    <segment>/../ in path is removed
+
+    Unless normSlash is set to false, all slashes will be converted
+    to the systems path separator character.
+
+    Note that any number of ../ segments at the front is ignored,
+    unless it is an absolute path, in which case an exception will
+    be thrown. A relative path with ../ segments at the front is only
+    considered valid if it can be joined with a path such that it can
+    be fully normalized.
+
+    Throws: Exception if the root separator is followed by ..
+
+    Examples:
+    -----
+     normalize("/home/foo/./bar/../../john/doe"); // => "/home/john/doe"
+    -----
+
+*******************************************************************************/
+
+char[] normalize(char[] path, bool normSlash = true)
+{
+    /*
+       Internal helper to patch slashes
+    */
+    char[] normalizeSlashes(char[] path)
+    {
+        char to = '/', from = '\\';
+
+        foreach (inout c; path)
+                 if (c is from)
+                     c = to;
+        return path;
+    }
+
+    /*
+       Internal helper that finds a slash followed by a dot
+    */
+    int findSlashDot(char[] path, int start) {
+        assert(start < path.length);
+        foreach(i, c; path[start..$-1]) 
+            if (c == '/') 
+                if (path[start+i+1] == '.') 
+                    return i + start + 1;
+
+        return -1;
+    }
+
+    /*
+       Internal helper that finds a slash starting at the back
+    */
+    int findSlash(char[] path, int start) {
+        assert(start < path.length);
+
+        if (start < 0)
+            return -1;
+
+        for (int i = start; i >= 0; i--) {
+            if (path[i] == '/') {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    /*
+        Internal helper that recursively shortens all segments with dots.
+    */
+    char[] removeDots(char[] path, int start) {
+        assert (start < path.length);
+        assert (path[start] == '.');
+        if (start + 1 == path.length) {
+            // path ends with /., remove
+            return path[0..start - 1];
+        }
+        else if (path[start+1] == '/') {
+            // remove all subsequent './'
+            do {
+                path = path[0..start] ~ path[start+2..$];
+            } while (start + 2 < path.length && path[start..start+2] == "./");
+            int idx = findSlashDot(path, start - 1);
+            if (idx < 0) {
+                // no more /., return path
+                return path;
+            }
+            return removeDots(path, idx);
+        }
+        else if (path[start..start+2] == "..") {
+            // found /.. sequence
+version (Win32) {
+            if (start == 3 && path[1] == '/') { // absolute, X:/..
+                throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root can not be followed by ..");
+            }
+
+}
+else {
+            if (start == 1) { // absolute
+                throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root separator can not be followed by ..");
+            }
+}
+            int idx = findSlash(path, start - 2);
+            if (start + 2 == path.length) {
+                // path ends with /..
+                if (idx < 0) {
+                    // no more slashes in front of /.., resolves to empty path
+                    return "";
+                }
+                // remove /.. and preceding segment and return
+                return path[0..idx];
+            }
+            else if (path[start+2] == '/') {
+                // found /../ sequence
+                // if no slashes before /../, set path to everything after
+                // if <segment>/../ is ../../, keep
+                // otherwise, remove <segment>/../
+                if (path[idx+1..start-1] == "..") {
+                    idx = findSlashDot(path, start+4);
+                    if (idx < 0) {
+                        // no more /., path fully shortened
+                        return path;
+                    }
+                    return removeDots(path, idx);
+                }
+                path = path[0..idx < 0 ? 0 : idx + 1] ~ path[start+3..$];
+                idx = findSlashDot(path, idx < 0 ? 0 : idx);
+                if (idx < 0) {
+                    // no more /., path fully shortened
+                    return path;
+                }
+                // examine next /.
+                return removeDots(path, idx);
+            }
+        }
+        else {
+            if (findSlash(path, path.length - 1) < start)
+                // segment is filename that starts with ., and at the end
+                return path;
+            else {
+                // not at end
+                int idx = findSlashDot(path, start);
+                if (idx > -1) 
+                    return removeDots(path, idx);
+                else
+                    return path;
+            }
+        }
+        assert(false, "PathUtil :: invalid code path");
+    }
+
+    char[] normpath = path.dup;
+    if (normSlash) {
+        normpath = normalizeSlashes(normpath);
+    }
+
+    // if path starts with ./, remove all subsequent instances
+    while (normpath.length > 1 && normpath[0] == '.' &&
+        normpath[1] == '/') {
+        normpath = normpath[2..$];
+    }
+    int idx = findSlashDot(normpath, 0);
+    if (idx > -1) {
+        normpath = removeDots(normpath, idx);
+    }
+
+    return normpath;
+}
+
+
+debug (UnitTest)
+{
+
+    unittest
+    {
+        assert (normalize ("/home/../john/../.tango/.htaccess") == "/.tango/.htaccess",
+                normalize ("/home/../john/../.tango/.htaccess"));
+        assert (normalize ("/home/../john/../.tango/foo.conf") == "/.tango/foo.conf",
+                normalize ("/home/../john/../.tango/foo.conf"));
+        assert (normalize ("/home/john/.tango/foo.conf") == "/home/john/.tango/foo.conf",
+                normalize ("/home/john/.tango/foo.conf"));
+        assert (normalize ("/foo/bar/.htaccess") == "/foo/bar/.htaccess", 
+                normalize ("/foo/bar/.htaccess"));
+        assert (normalize ("foo/bar/././.") == "foo/bar", 
+                normalize ("foo/bar/././."));
+        assert (normalize ("././foo/././././bar") == "foo/bar", 
+                normalize ("././foo/././././bar"));
+        assert (normalize ("/foo/../john") == "/john", 
+                normalize("/foo/../john"));
+        assert (normalize ("foo/../john") == "john");
+        assert (normalize ("foo/bar/..") == "foo");
+        assert (normalize ("foo/bar/../john") == "foo/john");
+        assert (normalize ("foo/bar/doe/../../john") == "foo/john");
+        assert (normalize ("foo/bar/doe/../../john/../bar") == "foo/bar");
+        assert (normalize ("./foo/bar/doe") == "foo/bar/doe");
+        assert (normalize ("./foo/bar/doe/../../john/../bar") == "foo/bar");
+        assert (normalize ("./foo/bar/../../john/../bar") == "bar");
+        assert (normalize ("foo/bar/./doe/../../john") == "foo/john");
+        assert (normalize ("../../foo/bar") == "../../foo/bar");
+        assert (normalize ("../../../foo/bar") == "../../../foo/bar");
+        assert (normalize ("d/") == "d/");
+
+        assert (normalize ("\\foo\\..\\john") == "/john");
+        assert (normalize ("foo\\..\\john") == "john");
+        assert (normalize ("foo\\bar\\..") == "foo");
+        assert (normalize ("foo\\bar\\..\\john") == "foo/john");
+        assert (normalize ("foo\\bar\\doe\\..\\..\\john") == "foo/john");
+        assert (normalize ("foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar");
+        assert (normalize (".\\foo\\bar\\doe") == "foo/bar/doe");
+        assert (normalize (".\\foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar");
+        assert (normalize (".\\foo\\bar\\..\\..\\john\\..\\bar") == "bar");
+        assert (normalize ("foo\\bar\\.\\doe\\..\\..\\john") == "foo/john");
+        assert (normalize ("..\\..\\foo\\bar") == "../../foo/bar");
+        assert (normalize ("..\\..\\..\\foo\\bar") == "../../../foo/bar");
+    }
+}
+
+
+/******************************************************************************
+
+    Matches a pattern against a filename.
+
+    Some characters of pattern have special a meaning (they are
+    <i>meta-characters</i>) and <b>can't</b> be escaped. These are:
+    <p><table>
+    <tr><td><b>*</b></td>
+        <td>Matches 0 or more instances of any character.</td></tr>
+    <tr><td><b>?</b></td>
+        <td>Matches exactly one instances of any character.</td></tr>
+    <tr><td><b>[</b><i>chars</i><b>]</b></td>
+        <td>Matches one instance of any character that appears
+        between the brackets.</td></tr>
+    <tr><td><b>[!</b><i>chars</i><b>]</b></td>
+        <td>Matches one instance of any character that does not appear
+        between the brackets after the exclamation mark.</td></tr>
+    </table><p>
+    Internally individual character comparisons are done calling
+    charMatch(), so its rules apply here too. Note that path
+    separators and dots don't stop a meta-character from matching
+    further portions of the filename.
+
+    Returns: true if pattern matches filename, false otherwise.
+
+    See_Also: charMatch().
+
+    Throws: Nothing.
+
+    Examples:
+    -----
+    version(Win32)
+    {
+        patternMatch("foo.bar", "*") // => true
+        patternMatch(r"foo/foo\bar", "f*b*r") // => true
+        patternMatch("foo.bar", "f?bar") // => false
+        patternMatch("Goo.bar", "[fg]???bar") // => true
+        patternMatch(r"d:\foo\bar", "d*foo?bar") // => true
+    }
+    version(Posix)
+    {
+        patternMatch("Go*.bar", "[fg]???bar") // => false
+        patternMatch("/foo*home/bar", "?foo*bar") // => true
+        patternMatch("foobar", "foo?bar") // => true
+    }
+    -----
+    
+******************************************************************************/
+
+bool patternMatch(char[] filename, char[] pattern)
+in
+{
+    // Verify that pattern[] is valid
+    int i;
+    int inbracket = false;
+
+    for (i = 0; i < pattern.length; i++)
+    {
+        switch (pattern[i])
+        {
+        case '[':
+            assert(!inbracket);
+            inbracket = true;
+            break;
+
+        case ']':
+            assert(inbracket);
+            inbracket = false;
+            break;
+
+        default:
+            break;
+        }
+    }
+}
+body
+{
+    int pi;
+    int ni;
+    char pc;
+    char nc;
+    int j;
+    int not;
+    int anymatch;
+
+    ni = 0;
+    for (pi = 0; pi < pattern.length; pi++)
+    {
+        pc = pattern[pi];
+        switch (pc)
+        {
+        case '*':
+            if (pi + 1 == pattern.length)
+                goto match;
+            for (j = ni; j < filename.length; j++)
+            {
+                if (patternMatch(filename[j .. filename.length],
+                            pattern[pi + 1 .. pattern.length]))
+                    goto match;
+            }
+            goto nomatch;
+
+        case '?':
+            if (ni == filename.length)
+            goto nomatch;
+            ni++;
+            break;
+
+        case '[':
+            if (ni == filename.length)
+                goto nomatch;
+            nc = filename[ni];
+            ni++;
+            not = 0;
+            pi++;
+            if (pattern[pi] == '!')
+            {
+                not = 1;
+                pi++;
+            }
+            anymatch = 0;
+            while (1)
+            {
+                pc = pattern[pi];
+                if (pc == ']')
+                    break;
+                if (!anymatch && charMatch(nc, pc))
+                    anymatch = 1;
+                pi++;
+            }
+            if (!(anymatch ^ not))
+                goto nomatch;
+            break;
+
+        default:
+            if (ni == filename.length)
+                goto nomatch;
+            nc = filename[ni];
+            if (!charMatch(pc, nc))
+                goto nomatch;
+            ni++;
+            break;
+        }
+    }
+    if (ni < filename.length)
+        goto nomatch;
+
+    match:
+    return true;
+
+    nomatch:
+    return false;
+}
+
+
+debug (UnitTest)
+{
+    unittest
+    {
+    version (Win32)
+        assert(patternMatch("foo", "Foo"));
+    version (Posix)
+        assert(!patternMatch("foo", "Foo"));
+    
+    assert(patternMatch("foo", "*"));
+    assert(patternMatch("foo.bar", "*"));
+    assert(patternMatch("foo.bar", "*.*"));
+    assert(patternMatch("foo.bar", "foo*"));
+    assert(patternMatch("foo.bar", "f*bar"));
+    assert(patternMatch("foo.bar", "f*b*r"));
+    assert(patternMatch("foo.bar", "f???bar"));
+    assert(patternMatch("foo.bar", "[fg]???bar"));
+    assert(patternMatch("foo.bar", "[!gh]*bar"));
+
+    assert(!patternMatch("foo", "bar"));
+    assert(!patternMatch("foo", "*.*"));
+    assert(!patternMatch("foo.bar", "f*baz"));
+    assert(!patternMatch("foo.bar", "f*b*x"));
+    assert(!patternMatch("foo.bar", "[gh]???bar"));
+    assert(!patternMatch("foo.bar", "[!fg]*bar"));
+    assert(!patternMatch("foo.bar", "[fg]???baz"));
+
+    }
+}
+
+
+/******************************************************************************
+
+     Matches filename characters.
+
+     Under Windows, the comparison is done ignoring case. Under Linux
+     an exact match is performed.
+
+     Returns: true if c1 matches c2, false otherwise.
+
+     Throws: Nothing.
+
+     Examples:
+     -----
+     version(Win32)
+     {
+         charMatch('a', 'b') // => false
+         charMatch('A', 'a') // => true
+     }
+     version(Posix)
+     {
+         charMatch('a', 'b') // => false
+         charMatch('A', 'a') // => false
+     }
+     -----
+******************************************************************************/
+
+private bool charMatch(char c1, char c2)
+{
+    version (Win32)
+    {
+        
+        if (c1 != c2)
+        {
+            return ((c1 >= 'a' && c1 <= 'z') ? c1 - ('a' - 'A') : c1) ==
+                   ((c2 >= 'a' && c2 <= 'z') ? c2 - ('a' - 'A') : c2);
+        }
+        return true;
+    }
+    version (Posix)
+    {
+        return c1 == c2;
+    }
+}
+
author	lindquist
date	Fri, 11 Jan 2008 17:57:40 +0100
parents
children