view src/basic/SourceManager.d @ 206:d3c148ca429b

Major moving of files. all src now goes into src, all docs in docs.
author Anders Johnsen <skabet@gmail.com>
date Tue, 12 Aug 2008 18:14:56 +0200
parents
children
line wrap: on
line source

module basic.SourceManager;

import tango.core.Memory : GC;
import tango.io.UnicodeFile;
import tango.io.Stdout;
import tango.text.convert.Layout;

public import basic.SourceLocation;

private alias char[] string;

/**
  SourceManager is used to handle input files, by loading them in in chunks
  that can be referenced elsewhere.

  It will also help extract the line/col of locations and convert between
  real and virtual locations
 **/
class SourceManager
{
    this()
    {
        layout = new Layout!(char);
    }

    /**
      Will load in the file belonging to the filename

        filename = The file to load. Theres some assumptions about this file.
            1. The file has a BOM or is valid utf-8
            2. The file is not empty, unreadable, a folder etc.
     **/
    SourceLocation addFile(string filename)
    {
        scope file = new UnicodeFile!(char)(filename, Encoding.UTF_8);
        auto file_data = file.read();
        return createCheckpoints(file_data, filename);
    }

    /**
      Returns a string slice containing the part of the file after loc (a
      pointer might be better, it allows negative indexing)
     **/
    string getRawData(SourceLocation loc)
    {
        CP cp = checkpoints[loc.fileID];
        auto length = cp.data_end - cp.data.ptr;
        return cp.data.ptr[loc.fileOffset .. length];
    }

    /**
      Extracts the line number of the given location
      O("file size") if cache isn't built, O(log "lines in file") else
     **/
    uint getLineNumber(SourceLocation loc)
    {
        assert(loc.isValid, "Location is invalid");
        assert(loc.isReal, "Virtual locations not supported yet");
        assert(loc.fileID < checkpoints.length, "Non-existent location");

        CP* cp = &checkpoints[loc.fileID];
        auto cache = &linecache[cp.meta_index];
        if (!cache.isCached)
            cache.build(cp.data_start[0 .. cp.data_end - cp.data_start]);
        return cache.lineOf(getFileOffset(loc));
    }

    /**
      Extracts the full byte offset into a file, at which a location
      is pointing.
     **/
    uint getFileOffset(SourceLocation loc)
    {
        return loc.fileOffset
            + checkpoints[loc.fileID].part * loc.Bits.MaxFileOffset;
    }

    /**
      Extracts a string containing the entire line loc appears in.
     **/
    string getLine(SourceLocation loc)
    {
        // The line is extracted by getting two pointers to the exact location
        // and decreasing one until the nearest newline while the other ptr is
        // increased to the nearest newline.
        CP* cp = &checkpoints[loc.fileID];
        char* ptr = cp.data.ptr + loc.fileOffset;
        char* ptr_lo = ptr;
        while (cp.inRange(ptr_lo) && *ptr_lo != '\n' && *ptr_lo != '\r')
            --ptr_lo;
        while (cp.inRange(ptr) && *ptr != '\n' && *ptr != '\r')
            ++ptr;
        return ptr_lo[1 .. ptr - ptr_lo];
    }

    /**
      Gets the column of where the loc appears.
     **/
    int getColumn(SourceLocation loc)
    {
        // Use same approach as getLine

        CP* cp = &checkpoints[loc.fileID];
        char* ptr = cp.data.ptr + loc.fileOffset;
        char* ptr_lo = ptr;
        while (cp.inRange(ptr_lo) && *ptr_lo != '\n' && *ptr_lo != '\r')
            --ptr_lo;
        return cast(int)ptr - cast(int)ptr_lo - 1;
    }

    /**
      Get the original source text of a SourceRange
     **/
    string getText(SourceRange loc)
    {
        assert(loc.isValid, "Range is invalid");
        assert(loc.isReal, "Virtual locations not supported yet");
        auto begin  = getFileOffset(loc.begin);
        auto end    = getFileOffset(loc.end);
        return checkpoints[loc.begin.fileID].data_start[begin .. end];
    }

    /**
      Get the original source text
     **/
    string getText(SourceLocation loc, size_t length)
    {
        return getText(SourceRange(loc, loc + length));
    }

    /**
      Convert a location into a string. Something like "file(line)"
     **/
    string getLocationAsString(SourceLocation loc)
    {
        assert(loc.isValid, "Location is invalid");
        return layout.convert("{}({})",
            checkpoints[loc.fileID].filename,
            getLineNumber(loc));
    }
    string getLocationAsString(SourceRange loc)
    {
        return layout.convert("{}({}:{})",
            checkpoints[loc.begin.fileID].filename,
            getFileOffset(loc.begin),
            getFileOffset(loc.end));
    }

    /**
      Get the file name of a loc.
     **/
    string getFile(SourceLocation loc)
    {
        return checkpoints[loc.fileID].filename;
    }

private:
    synchronized
        SourceLocation createCheckpoints(string data, string source_file)
    {
        // The line-cache is added, but not built,
        // getLineNumber makes sure it is called when needed.
        linecache ~= FileLineCache();
        uint meta_index = linecache.length - 1;

        // SourceLocation's can only index relatively short buffers, therefore
        // the file is split into several checkpoints.
        uint checkpoint_counter = 0;
        char* data_start = data.ptr;
        char* data_end = data.ptr + data.length;
        while (data.length > 0)
        {
            uint to_take = min(data.length, SourceLocation.Bits.MaxFileOffset);
            checkpoints ~=
                CP(source_file,
                        data_start,
                        data_end,
                        data[0 .. to_take],
                        checkpoint_counter++,
                        meta_index);
            data = data[to_take .. $];
        }
        checkpoint_counter = checkpoints.length - checkpoint_counter;
        return SourceLocation.fromFileID(checkpoint_counter);
    }

    /// Contains the read/generated data.
    CP[] checkpoints;
    /// Cache used to speed up finding of line-starts.
    FileLineCache[] linecache;
    /// Used for formatting locations as strings.
    Layout!(char) layout;

    // These really should be magically available everywhere and templated.
    int min(int a, int b) { return a < b? a : b; }
    int max(int a, int b) { return a >= b? a : b; }

    // A Check Point is used to store a file in multiple parts, to overcome
    // the limitation of SourceLocation only having a rather limited amount of
    // bits to index any one file.
    struct CP
    {
        // read-only
        char[] filename;
        // ditto
        char* data_start;
        char* data_end;
        // ditto
        char[] data;
        // ditto
        uint part = 0;
        // ditto
        uint meta_index = 0;

        bool inRange(char* p)
        {
            return p >= data_start && p < data_end;
        }
    }

    struct FileLineCache
    {
        /// Contains the offset of the i'th line on index i
        uint[] line_starts;

        /// Indicates weather the cache has been built or not
        bool isCached = false;

        /**
          This method does a binary search to find the line that contains the
          given offset.
         **/
        uint lineOf(uint offset)
        {
            size_t  beg = 0,
                    end = line_starts.length,
                    mid = end >> 1;

            while( beg < end )
            {
                if( line_starts[mid] <= offset )
                    beg = mid + 1;
                else
                    end = mid;
                mid = beg + ( end - beg ) / 2;
            }
            return mid;
        }

        /**
          Builds the cache data - always make sure this has been called before
          calling lineOf.
         **/
        void build(char[] data)
        {
            // j starts at 1, because we need an additional place in the array
            // to indicate that line 1 starts at index 0.
            size_t j = 1;
            char* it = data.ptr, end = data.ptr + data.length;
            for (; it != end; ++it)
                if (*it == '\n')
                    ++j;
            // Allocate without initialization. Saves a bit of time
            line_starts.length = j;
            line_starts[0] = 0;

            // Go over the data again, writing the line starts in our new array
            j = 1;
            for (size_t i = 0; i < data.length; i++)
            {
                if (data[i] == '\n')
                    line_starts[j++] = i;
                else if (data[i] == '\r')
                {
                    line_starts[j++] = i;
                    i += cast(size_t)(data[i+1] == '\n');
                }
            }

            isCached = true;
        }
    }
}