diff tango/tango/util/PathUtil.d @ 132:1700239cab2e trunk

[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.
author lindquist
date Fri, 11 Jan 2008 17:57:40 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tango/tango/util/PathUtil.d	Fri Jan 11 17:57:40 2008 +0100
@@ -0,0 +1,471 @@
+/*******************************************************************************
+
+        copyright:      Copyright (c) 2006 Lars Ivar Igesund, Thomas Kühne,
+                                            Grzegorz Adam Hankiewicz
+
+        license:        BSD style: $(LICENSE)
+
+        version:        Dec 2006: Initial release
+
+        author:         Lars Ivar Igesund, Thomas Kühne,
+                        Grzegorz Adam Hankiewicz
+
+*******************************************************************************/
+
+module tango.util.PathUtil;
+
+private import  tango.core.Exception;
+
+/*******************************************************************************
+
+    Normalizes a path component as specified in section 5.2 of RFC 2396.
+
+    ./ in path is removed
+    /. at the end is removed
+    <segment>/.. at the end is removed
+    <segment>/../ in path is removed
+
+    Unless normSlash is set to false, all slashes will be converted
+    to the systems path separator character.
+
+    Note that any number of ../ segments at the front is ignored,
+    unless it is an absolute path, in which case an exception will
+    be thrown. A relative path with ../ segments at the front is only
+    considered valid if it can be joined with a path such that it can
+    be fully normalized.
+
+    Throws: Exception if the root separator is followed by ..
+
+    Examples:
+    -----
+     normalize("/home/foo/./bar/../../john/doe"); // => "/home/john/doe"
+    -----
+
+*******************************************************************************/
+
+char[] normalize(char[] path, bool normSlash = true)
+{
+    /*
+       Internal helper to patch slashes
+    */
+    char[] normalizeSlashes(char[] path)
+    {
+        char to = '/', from = '\\';
+
+        foreach (inout c; path)
+                 if (c is from)
+                     c = to;
+        return path;
+    }
+
+    /*
+       Internal helper that finds a slash followed by a dot
+    */
+    int findSlashDot(char[] path, int start) {
+        assert(start < path.length);
+        foreach(i, c; path[start..$-1]) 
+            if (c == '/') 
+                if (path[start+i+1] == '.') 
+                    return i + start + 1;
+
+        return -1;
+    }
+
+    /*
+       Internal helper that finds a slash starting at the back
+    */
+    int findSlash(char[] path, int start) {
+        assert(start < path.length);
+
+        if (start < 0)
+            return -1;
+
+        for (int i = start; i >= 0; i--) {
+            if (path[i] == '/') {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    /*
+        Internal helper that recursively shortens all segments with dots.
+    */
+    char[] removeDots(char[] path, int start) {
+        assert (start < path.length);
+        assert (path[start] == '.');
+        if (start + 1 == path.length) {
+            // path ends with /., remove
+            return path[0..start - 1];
+        }
+        else if (path[start+1] == '/') {
+            // remove all subsequent './'
+            do {
+                path = path[0..start] ~ path[start+2..$];
+            } while (start + 2 < path.length && path[start..start+2] == "./");
+            int idx = findSlashDot(path, start - 1);
+            if (idx < 0) {
+                // no more /., return path
+                return path;
+            }
+            return removeDots(path, idx);
+        }
+        else if (path[start..start+2] == "..") {
+            // found /.. sequence
+version (Win32) {
+            if (start == 3 && path[1] == '/') { // absolute, X:/..
+                throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root can not be followed by ..");
+            }
+
+}
+else {
+            if (start == 1) { // absolute
+                throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root separator can not be followed by ..");
+            }
+}
+            int idx = findSlash(path, start - 2);
+            if (start + 2 == path.length) {
+                // path ends with /..
+                if (idx < 0) {
+                    // no more slashes in front of /.., resolves to empty path
+                    return "";
+                }
+                // remove /.. and preceding segment and return
+                return path[0..idx];
+            }
+            else if (path[start+2] == '/') {
+                // found /../ sequence
+                // if no slashes before /../, set path to everything after
+                // if <segment>/../ is ../../, keep
+                // otherwise, remove <segment>/../
+                if (path[idx+1..start-1] == "..") {
+                    idx = findSlashDot(path, start+4);
+                    if (idx < 0) {
+                        // no more /., path fully shortened
+                        return path;
+                    }
+                    return removeDots(path, idx);
+                }
+                path = path[0..idx < 0 ? 0 : idx + 1] ~ path[start+3..$];
+                idx = findSlashDot(path, idx < 0 ? 0 : idx);
+                if (idx < 0) {
+                    // no more /., path fully shortened
+                    return path;
+                }
+                // examine next /.
+                return removeDots(path, idx);
+            }
+        }
+        else {
+            if (findSlash(path, path.length - 1) < start)
+                // segment is filename that starts with ., and at the end
+                return path;
+            else {
+                // not at end
+                int idx = findSlashDot(path, start);
+                if (idx > -1) 
+                    return removeDots(path, idx);
+                else
+                    return path;
+            }
+        }
+        assert(false, "PathUtil :: invalid code path");
+    }
+
+    char[] normpath = path.dup;
+    if (normSlash) {
+        normpath = normalizeSlashes(normpath);
+    }
+
+    // if path starts with ./, remove all subsequent instances
+    while (normpath.length > 1 && normpath[0] == '.' &&
+        normpath[1] == '/') {
+        normpath = normpath[2..$];
+    }
+    int idx = findSlashDot(normpath, 0);
+    if (idx > -1) {
+        normpath = removeDots(normpath, idx);
+    }
+
+    return normpath;
+}
+
+
+debug (UnitTest)
+{
+
+    unittest
+    {
+        assert (normalize ("/home/../john/../.tango/.htaccess") == "/.tango/.htaccess",
+                normalize ("/home/../john/../.tango/.htaccess"));
+        assert (normalize ("/home/../john/../.tango/foo.conf") == "/.tango/foo.conf",
+                normalize ("/home/../john/../.tango/foo.conf"));
+        assert (normalize ("/home/john/.tango/foo.conf") == "/home/john/.tango/foo.conf",
+                normalize ("/home/john/.tango/foo.conf"));
+        assert (normalize ("/foo/bar/.htaccess") == "/foo/bar/.htaccess", 
+                normalize ("/foo/bar/.htaccess"));
+        assert (normalize ("foo/bar/././.") == "foo/bar", 
+                normalize ("foo/bar/././."));
+        assert (normalize ("././foo/././././bar") == "foo/bar", 
+                normalize ("././foo/././././bar"));
+        assert (normalize ("/foo/../john") == "/john", 
+                normalize("/foo/../john"));
+        assert (normalize ("foo/../john") == "john");
+        assert (normalize ("foo/bar/..") == "foo");
+        assert (normalize ("foo/bar/../john") == "foo/john");
+        assert (normalize ("foo/bar/doe/../../john") == "foo/john");
+        assert (normalize ("foo/bar/doe/../../john/../bar") == "foo/bar");
+        assert (normalize ("./foo/bar/doe") == "foo/bar/doe");
+        assert (normalize ("./foo/bar/doe/../../john/../bar") == "foo/bar");
+        assert (normalize ("./foo/bar/../../john/../bar") == "bar");
+        assert (normalize ("foo/bar/./doe/../../john") == "foo/john");
+        assert (normalize ("../../foo/bar") == "../../foo/bar");
+        assert (normalize ("../../../foo/bar") == "../../../foo/bar");
+        assert (normalize ("d/") == "d/");
+
+        assert (normalize ("\\foo\\..\\john") == "/john");
+        assert (normalize ("foo\\..\\john") == "john");
+        assert (normalize ("foo\\bar\\..") == "foo");
+        assert (normalize ("foo\\bar\\..\\john") == "foo/john");
+        assert (normalize ("foo\\bar\\doe\\..\\..\\john") == "foo/john");
+        assert (normalize ("foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar");
+        assert (normalize (".\\foo\\bar\\doe") == "foo/bar/doe");
+        assert (normalize (".\\foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar");
+        assert (normalize (".\\foo\\bar\\..\\..\\john\\..\\bar") == "bar");
+        assert (normalize ("foo\\bar\\.\\doe\\..\\..\\john") == "foo/john");
+        assert (normalize ("..\\..\\foo\\bar") == "../../foo/bar");
+        assert (normalize ("..\\..\\..\\foo\\bar") == "../../../foo/bar");
+    }
+}
+
+
+/******************************************************************************
+
+    Matches a pattern against a filename.
+
+    Some characters of pattern have special a meaning (they are
+    <i>meta-characters</i>) and <b>can't</b> be escaped. These are:
+    <p><table>
+    <tr><td><b>*</b></td>
+        <td>Matches 0 or more instances of any character.</td></tr>
+    <tr><td><b>?</b></td>
+        <td>Matches exactly one instances of any character.</td></tr>
+    <tr><td><b>[</b><i>chars</i><b>]</b></td>
+        <td>Matches one instance of any character that appears
+        between the brackets.</td></tr>
+    <tr><td><b>[!</b><i>chars</i><b>]</b></td>
+        <td>Matches one instance of any character that does not appear
+        between the brackets after the exclamation mark.</td></tr>
+    </table><p>
+    Internally individual character comparisons are done calling
+    charMatch(), so its rules apply here too. Note that path
+    separators and dots don't stop a meta-character from matching
+    further portions of the filename.
+
+    Returns: true if pattern matches filename, false otherwise.
+
+    See_Also: charMatch().
+
+    Throws: Nothing.
+
+    Examples:
+    -----
+    version(Win32)
+    {
+        patternMatch("foo.bar", "*") // => true
+        patternMatch(r"foo/foo\bar", "f*b*r") // => true
+        patternMatch("foo.bar", "f?bar") // => false
+        patternMatch("Goo.bar", "[fg]???bar") // => true
+        patternMatch(r"d:\foo\bar", "d*foo?bar") // => true
+    }
+    version(Posix)
+    {
+        patternMatch("Go*.bar", "[fg]???bar") // => false
+        patternMatch("/foo*home/bar", "?foo*bar") // => true
+        patternMatch("foobar", "foo?bar") // => true
+    }
+    -----
+    
+******************************************************************************/
+
+bool patternMatch(char[] filename, char[] pattern)
+in
+{
+    // Verify that pattern[] is valid
+    int i;
+    int inbracket = false;
+
+    for (i = 0; i < pattern.length; i++)
+    {
+        switch (pattern[i])
+        {
+        case '[':
+            assert(!inbracket);
+            inbracket = true;
+            break;
+
+        case ']':
+            assert(inbracket);
+            inbracket = false;
+            break;
+
+        default:
+            break;
+        }
+    }
+}
+body
+{
+    int pi;
+    int ni;
+    char pc;
+    char nc;
+    int j;
+    int not;
+    int anymatch;
+
+    ni = 0;
+    for (pi = 0; pi < pattern.length; pi++)
+    {
+        pc = pattern[pi];
+        switch (pc)
+        {
+        case '*':
+            if (pi + 1 == pattern.length)
+                goto match;
+            for (j = ni; j < filename.length; j++)
+            {
+                if (patternMatch(filename[j .. filename.length],
+                            pattern[pi + 1 .. pattern.length]))
+                    goto match;
+            }
+            goto nomatch;
+
+        case '?':
+            if (ni == filename.length)
+            goto nomatch;
+            ni++;
+            break;
+
+        case '[':
+            if (ni == filename.length)
+                goto nomatch;
+            nc = filename[ni];
+            ni++;
+            not = 0;
+            pi++;
+            if (pattern[pi] == '!')
+            {
+                not = 1;
+                pi++;
+            }
+            anymatch = 0;
+            while (1)
+            {
+                pc = pattern[pi];
+                if (pc == ']')
+                    break;
+                if (!anymatch && charMatch(nc, pc))
+                    anymatch = 1;
+                pi++;
+            }
+            if (!(anymatch ^ not))
+                goto nomatch;
+            break;
+
+        default:
+            if (ni == filename.length)
+                goto nomatch;
+            nc = filename[ni];
+            if (!charMatch(pc, nc))
+                goto nomatch;
+            ni++;
+            break;
+        }
+    }
+    if (ni < filename.length)
+        goto nomatch;
+
+    match:
+    return true;
+
+    nomatch:
+    return false;
+}
+
+
+debug (UnitTest)
+{
+    unittest
+    {
+    version (Win32)
+        assert(patternMatch("foo", "Foo"));
+    version (Posix)
+        assert(!patternMatch("foo", "Foo"));
+    
+    assert(patternMatch("foo", "*"));
+    assert(patternMatch("foo.bar", "*"));
+    assert(patternMatch("foo.bar", "*.*"));
+    assert(patternMatch("foo.bar", "foo*"));
+    assert(patternMatch("foo.bar", "f*bar"));
+    assert(patternMatch("foo.bar", "f*b*r"));
+    assert(patternMatch("foo.bar", "f???bar"));
+    assert(patternMatch("foo.bar", "[fg]???bar"));
+    assert(patternMatch("foo.bar", "[!gh]*bar"));
+
+    assert(!patternMatch("foo", "bar"));
+    assert(!patternMatch("foo", "*.*"));
+    assert(!patternMatch("foo.bar", "f*baz"));
+    assert(!patternMatch("foo.bar", "f*b*x"));
+    assert(!patternMatch("foo.bar", "[gh]???bar"));
+    assert(!patternMatch("foo.bar", "[!fg]*bar"));
+    assert(!patternMatch("foo.bar", "[fg]???baz"));
+
+    }
+}
+
+
+/******************************************************************************
+
+     Matches filename characters.
+
+     Under Windows, the comparison is done ignoring case. Under Linux
+     an exact match is performed.
+
+     Returns: true if c1 matches c2, false otherwise.
+
+     Throws: Nothing.
+
+     Examples:
+     -----
+     version(Win32)
+     {
+         charMatch('a', 'b') // => false
+         charMatch('A', 'a') // => true
+     }
+     version(Posix)
+     {
+         charMatch('a', 'b') // => false
+         charMatch('A', 'a') // => false
+     }
+     -----
+******************************************************************************/
+
+private bool charMatch(char c1, char c2)
+{
+    version (Win32)
+    {
+        
+        if (c1 != c2)
+        {
+            return ((c1 >= 'a' && c1 <= 'z') ? c1 - ('a' - 'A') : c1) ==
+                   ((c2 >= 'a' && c2 <= 'z') ? c2 - ('a' - 'A') : c2);
+        }
+        return true;
+    }
+    version (Posix)
+    {
+        return c1 == c2;
+    }
+}
+