view tango/tango/util/PathUtil.d @ 270:d9d5d59873d8 trunk

[svn r291] Fixed a bunch of the old Phobos tests to work with Tango. Branch statements now emit a new block after it. Fixed the _adSort runtime function had a bad signature. Added a missing dot prefix on compiler generated string tables for string switch. Fixed, PTRSIZE seems like it was wrong on 64bit, now it definitely gets set properly.
author lindquist
date Mon, 16 Jun 2008 16:01:19 +0200
parents 1700239cab2e
line wrap: on
line source


        copyright:      Copyright (c) 2006 Lars Ivar Igesund, Thomas Kühne,
                                            Grzegorz Adam Hankiewicz

        license:        BSD style: $(LICENSE)

        version:        Dec 2006: Initial release

        author:         Lars Ivar Igesund, Thomas Kühne,
                        Grzegorz Adam Hankiewicz


module tango.util.PathUtil;

private import  tango.core.Exception;


    Normalizes a path component as specified in section 5.2 of RFC 2396.

    ./ in path is removed
    /. at the end is removed
    <segment>/.. at the end is removed
    <segment>/../ in path is removed

    Unless normSlash is set to false, all slashes will be converted
    to the systems path separator character.

    Note that any number of ../ segments at the front is ignored,
    unless it is an absolute path, in which case an exception will
    be thrown. A relative path with ../ segments at the front is only
    considered valid if it can be joined with a path such that it can
    be fully normalized.

    Throws: Exception if the root separator is followed by ..

     normalize("/home/foo/./bar/../../john/doe"); // => "/home/john/doe"


char[] normalize(char[] path, bool normSlash = true)
       Internal helper to patch slashes
    char[] normalizeSlashes(char[] path)
        char to = '/', from = '\\';

        foreach (inout c; path)
                 if (c is from)
                     c = to;
        return path;

       Internal helper that finds a slash followed by a dot
    int findSlashDot(char[] path, int start) {
        assert(start < path.length);
        foreach(i, c; path[start..$-1]) 
            if (c == '/') 
                if (path[start+i+1] == '.') 
                    return i + start + 1;

        return -1;

       Internal helper that finds a slash starting at the back
    int findSlash(char[] path, int start) {
        assert(start < path.length);

        if (start < 0)
            return -1;

        for (int i = start; i >= 0; i--) {
            if (path[i] == '/') {
                return i;
        return -1;

        Internal helper that recursively shortens all segments with dots.
    char[] removeDots(char[] path, int start) {
        assert (start < path.length);
        assert (path[start] == '.');
        if (start + 1 == path.length) {
            // path ends with /., remove
            return path[0..start - 1];
        else if (path[start+1] == '/') {
            // remove all subsequent './'
            do {
                path = path[0..start] ~ path[start+2..$];
            } while (start + 2 < path.length && path[start..start+2] == "./");
            int idx = findSlashDot(path, start - 1);
            if (idx < 0) {
                // no more /., return path
                return path;
            return removeDots(path, idx);
        else if (path[start..start+2] == "..") {
            // found /.. sequence
version (Win32) {
            if (start == 3 && path[1] == '/') { // absolute, X:/..
                throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root can not be followed by ..");

else {
            if (start == 1) { // absolute
                throw new IllegalArgumentException("PathUtil :: Invalid absolute path, root separator can not be followed by ..");
            int idx = findSlash(path, start - 2);
            if (start + 2 == path.length) {
                // path ends with /..
                if (idx < 0) {
                    // no more slashes in front of /.., resolves to empty path
                    return "";
                // remove /.. and preceding segment and return
                return path[0..idx];
            else if (path[start+2] == '/') {
                // found /../ sequence
                // if no slashes before /../, set path to everything after
                // if <segment>/../ is ../../, keep
                // otherwise, remove <segment>/../
                if (path[idx+1..start-1] == "..") {
                    idx = findSlashDot(path, start+4);
                    if (idx < 0) {
                        // no more /., path fully shortened
                        return path;
                    return removeDots(path, idx);
                path = path[0..idx < 0 ? 0 : idx + 1] ~ path[start+3..$];
                idx = findSlashDot(path, idx < 0 ? 0 : idx);
                if (idx < 0) {
                    // no more /., path fully shortened
                    return path;
                // examine next /.
                return removeDots(path, idx);
        else {
            if (findSlash(path, path.length - 1) < start)
                // segment is filename that starts with ., and at the end
                return path;
            else {
                // not at end
                int idx = findSlashDot(path, start);
                if (idx > -1) 
                    return removeDots(path, idx);
                    return path;
        assert(false, "PathUtil :: invalid code path");

    char[] normpath = path.dup;
    if (normSlash) {
        normpath = normalizeSlashes(normpath);

    // if path starts with ./, remove all subsequent instances
    while (normpath.length > 1 && normpath[0] == '.' &&
        normpath[1] == '/') {
        normpath = normpath[2..$];
    int idx = findSlashDot(normpath, 0);
    if (idx > -1) {
        normpath = removeDots(normpath, idx);

    return normpath;

debug (UnitTest)

        assert (normalize ("/home/../john/../.tango/.htaccess") == "/.tango/.htaccess",
                normalize ("/home/../john/../.tango/.htaccess"));
        assert (normalize ("/home/../john/../.tango/foo.conf") == "/.tango/foo.conf",
                normalize ("/home/../john/../.tango/foo.conf"));
        assert (normalize ("/home/john/.tango/foo.conf") == "/home/john/.tango/foo.conf",
                normalize ("/home/john/.tango/foo.conf"));
        assert (normalize ("/foo/bar/.htaccess") == "/foo/bar/.htaccess", 
                normalize ("/foo/bar/.htaccess"));
        assert (normalize ("foo/bar/././.") == "foo/bar", 
                normalize ("foo/bar/././."));
        assert (normalize ("././foo/././././bar") == "foo/bar", 
                normalize ("././foo/././././bar"));
        assert (normalize ("/foo/../john") == "/john", 
        assert (normalize ("foo/../john") == "john");
        assert (normalize ("foo/bar/..") == "foo");
        assert (normalize ("foo/bar/../john") == "foo/john");
        assert (normalize ("foo/bar/doe/../../john") == "foo/john");
        assert (normalize ("foo/bar/doe/../../john/../bar") == "foo/bar");
        assert (normalize ("./foo/bar/doe") == "foo/bar/doe");
        assert (normalize ("./foo/bar/doe/../../john/../bar") == "foo/bar");
        assert (normalize ("./foo/bar/../../john/../bar") == "bar");
        assert (normalize ("foo/bar/./doe/../../john") == "foo/john");
        assert (normalize ("../../foo/bar") == "../../foo/bar");
        assert (normalize ("../../../foo/bar") == "../../../foo/bar");
        assert (normalize ("d/") == "d/");

        assert (normalize ("\\foo\\..\\john") == "/john");
        assert (normalize ("foo\\..\\john") == "john");
        assert (normalize ("foo\\bar\\..") == "foo");
        assert (normalize ("foo\\bar\\..\\john") == "foo/john");
        assert (normalize ("foo\\bar\\doe\\..\\..\\john") == "foo/john");
        assert (normalize ("foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar");
        assert (normalize (".\\foo\\bar\\doe") == "foo/bar/doe");
        assert (normalize (".\\foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar");
        assert (normalize (".\\foo\\bar\\..\\..\\john\\..\\bar") == "bar");
        assert (normalize ("foo\\bar\\.\\doe\\..\\..\\john") == "foo/john");
        assert (normalize ("..\\..\\foo\\bar") == "../../foo/bar");
        assert (normalize ("..\\..\\..\\foo\\bar") == "../../../foo/bar");


    Matches a pattern against a filename.

    Some characters of pattern have special a meaning (they are
    <i>meta-characters</i>) and <b>can't</b> be escaped. These are:
        <td>Matches 0 or more instances of any character.</td></tr>
        <td>Matches exactly one instances of any character.</td></tr>
        <td>Matches one instance of any character that appears
        between the brackets.</td></tr>
        <td>Matches one instance of any character that does not appear
        between the brackets after the exclamation mark.</td></tr>
    Internally individual character comparisons are done calling
    charMatch(), so its rules apply here too. Note that path
    separators and dots don't stop a meta-character from matching
    further portions of the filename.

    Returns: true if pattern matches filename, false otherwise.

    See_Also: charMatch().

    Throws: Nothing.

        patternMatch("", "*") // => true
        patternMatch(r"foo/foo\bar", "f*b*r") // => true
        patternMatch("", "f?bar") // => false
        patternMatch("", "[fg]???bar") // => true
        patternMatch(r"d:\foo\bar", "d*foo?bar") // => true
        patternMatch("Go*.bar", "[fg]???bar") // => false
        patternMatch("/foo*home/bar", "?foo*bar") // => true
        patternMatch("foobar", "foo?bar") // => true

bool patternMatch(char[] filename, char[] pattern)
    // Verify that pattern[] is valid
    int i;
    int inbracket = false;

    for (i = 0; i < pattern.length; i++)
        switch (pattern[i])
        case '[':
            inbracket = true;

        case ']':
            inbracket = false;

    int pi;
    int ni;
    char pc;
    char nc;
    int j;
    int not;
    int anymatch;

    ni = 0;
    for (pi = 0; pi < pattern.length; pi++)
        pc = pattern[pi];
        switch (pc)
        case '*':
            if (pi + 1 == pattern.length)
                goto match;
            for (j = ni; j < filename.length; j++)
                if (patternMatch(filename[j .. filename.length],
                            pattern[pi + 1 .. pattern.length]))
                    goto match;
            goto nomatch;

        case '?':
            if (ni == filename.length)
            goto nomatch;

        case '[':
            if (ni == filename.length)
                goto nomatch;
            nc = filename[ni];
            not = 0;
            if (pattern[pi] == '!')
                not = 1;
            anymatch = 0;
            while (1)
                pc = pattern[pi];
                if (pc == ']')
                if (!anymatch && charMatch(nc, pc))
                    anymatch = 1;
            if (!(anymatch ^ not))
                goto nomatch;

            if (ni == filename.length)
                goto nomatch;
            nc = filename[ni];
            if (!charMatch(pc, nc))
                goto nomatch;
    if (ni < filename.length)
        goto nomatch;

    return true;

    return false;

debug (UnitTest)
    version (Win32)
        assert(patternMatch("foo", "Foo"));
    version (Posix)
        assert(!patternMatch("foo", "Foo"));
    assert(patternMatch("foo", "*"));
    assert(patternMatch("", "*"));
    assert(patternMatch("", "*.*"));
    assert(patternMatch("", "foo*"));
    assert(patternMatch("", "f*bar"));
    assert(patternMatch("", "f*b*r"));
    assert(patternMatch("", "f???bar"));
    assert(patternMatch("", "[fg]???bar"));
    assert(patternMatch("", "[!gh]*bar"));

    assert(!patternMatch("foo", "bar"));
    assert(!patternMatch("foo", "*.*"));
    assert(!patternMatch("", "f*baz"));
    assert(!patternMatch("", "f*b*x"));
    assert(!patternMatch("", "[gh]???bar"));
    assert(!patternMatch("", "[!fg]*bar"));
    assert(!patternMatch("", "[fg]???baz"));



     Matches filename characters.

     Under Windows, the comparison is done ignoring case. Under Linux
     an exact match is performed.

     Returns: true if c1 matches c2, false otherwise.

     Throws: Nothing.

         charMatch('a', 'b') // => false
         charMatch('A', 'a') // => true
         charMatch('a', 'b') // => false
         charMatch('A', 'a') // => false

private bool charMatch(char c1, char c2)
    version (Win32)
        if (c1 != c2)
            return ((c1 >= 'a' && c1 <= 'z') ? c1 - ('a' - 'A') : c1) ==
                   ((c2 >= 'a' && c2 <= 'z') ? c2 - ('a' - 'A') : c2);
        return true;
    version (Posix)
        return c1 == c2;