view basic/SourceManager.d @ 88:eb5b2c719a39 new_gen

Major change to locations, tokens and expressions. A location (now SourceLocation or SLoc) is only 32 bit in size - disadvantage is that it can't find its own text. You have to go through the new SourceManager to do that. This has caused changes to a lot of stuff and removal of DataSource and the old Location Additionally Exp has gotten some location stuff, so we can give proper error messages. Not in Decl and Stmt yet, but thats coming too.
author Anders Halager <halager@gmail.com>
date Sun, 04 May 2008 18:13:46 +0200
parents
children a49bb982a7b0
line wrap: on
line source

module basic.SourceManager;

import tango.core.Memory : GC;
import tango.io.UnicodeFile;
import tango.io.Stdout;
import tango.text.convert.Layout;

public import basic.SourceLocation;

private alias char[] string;

/**
  SourceManager is used to handle input files, by loading them in in chunks
  that can be referenced elsewhere.

  It will also help extract the line/col of locations and convert between
  real and virtual locations
 **/
class SourceManager
{
    this()
    {
        layout = new Layout!(char);
    }

    /**
      Will load in the file belonging to the filename

        filename = The file to load. Theres some assumptions about this file.
            1. The file has a BOM or is valid utf-8
            2. The file is not empty, unreadable, a folder etc.
     **/
    SourceLocation addFile(string filename)
    {
        scope file = new UnicodeFile!(char)(filename, Encoding.UTF_8);
        auto file_data = file.read();
        return createCheckpoints(file_data, filename);
    }

    /**
      Returns a string slice containing the part of the file after loc (a
      pointer might be better, it allows negative indexing)
     **/
    string getRawData(SourceLocation loc)
    {
        return checkpoints[loc.fileID].data[loc.fileOffset .. $];
    }

    /**
      Extracts the line number of the given location
      O("file size") if cache isn't built, O(log "lines in file") else
     **/
    uint getLineNumber(SourceLocation loc)
    {
        assert(loc.isValid, "Location is invalid");
        assert(loc.isReal, "Virtual locations not supported yet");
        assert(loc.fileID < checkpoints.length, "Non-existent location");

        CP* cp = &checkpoints[loc.fileID];
        auto cache = &linecache[cp.meta_index];
        if (!cache.isCached)
            cache.build(cp.data);
        return cache.lineOf(getFileOffset(loc));
    }

    /**
      Extracts the full byte offset into a file, at which a location
      is pointing.
     **/
    uint getFileOffset(SourceLocation loc)
    {
        return loc.fileOffset
            + checkpoints[loc.fileID].part * loc.Bits.MaxFileOffset;
    }

    /**
      Extracts a string containing the entire line loc appears in.
     **/
    string getLine(SourceLocation loc)
    {
        // The line is extracted by getting two pointers to the exact location
        // and decreasing one until the nearest newline while the other ptr is
        // increased to the nearest newline.
        CP* cp = &checkpoints[loc.fileID];
        char* ptr = cp.data.ptr + loc.fileOffset;
        char* ptr_lo = ptr;
        while (ptr_lo != cp.data.ptr && *ptr_lo != '\n' && *ptr_lo != '\r')
            --ptr_lo;
        while (ptr != cp.data.ptr + cp.data.length && *ptr != '\n' && *ptr != '\r')
            ++ptr;
        return ptr_lo[0 .. ptr - ptr_lo];
    }

    /**
      Get the original source text of a SourceRange
     **/
    string getText(SourceRange loc)
    {
        assert(loc.isValid, "Range is invalid");
        assert(loc.isReal, "Virtual locations not supported yet");
        auto begin  = getFileOffset(loc.begin);
        auto end    = getFileOffset(loc.end);
        return checkpoints[loc.begin.fileID].data.ptr[begin .. end];
    }

    /**
      Get the original source text
     **/
    string getText(SourceLocation loc, size_t length)
    {
        return getText(SourceRange(loc, loc + length));
    }

    /**
      Convert a location into a string. Something like "file(line)"
     **/
    string getLocationAsString(SourceLocation loc)
    {
        assert(loc.isValid, "Location is invalid");
        return layout.convert("{}({})",
            checkpoints[loc.fileID].filename,
            getLineNumber(loc));
    }
    string getLocationAsString(SourceRange loc)
    {
        return layout.convert("{}({}:{})",
            checkpoints[loc.begin.fileID].filename,
            getFileOffset(loc.begin),
            getFileOffset(loc.end));
    }

private:
    synchronized
        SourceLocation createCheckpoints(string data, string source_file)
    {
        // The line-cache is added, but not built,
        // getLineNumber makes sure it is called when needed.
        linecache ~= FileLineCache();
        uint meta_index = linecache.length - 1;

        // SourceLocation's can only index relatively short buffers, therefore
        // the file is split into several checkpoints.
        uint checkpoint_counter = checkpoints.length;
        while (data.length > 0)
        {
            uint to_take = min(data.length, SourceLocation.Bits.MaxFileOffset);
            checkpoints ~=
                CP(source_file,
                        data[0 .. to_take],
                        checkpoint_counter++,
                        meta_index);
            data = data[to_take .. $];
            // Stdout("Taking ")(to_take)(" from ")(source_file).newline;
        }
        checkpoint_counter = checkpoints.length - checkpoint_counter;
        return SourceLocation.fromFileID(checkpoint_counter);
    }

    /// Contains the read/generated data.
    CP[] checkpoints;
    /// Cache used to speed up finding of line-starts.
    FileLineCache[] linecache;
    /// Used for formatting locations as strings.
    Layout!(char) layout;

    // These really should be magically available everywhere and templated.
    int min(int a, int b) { return a < b? a : b; }
    int max(int a, int b) { return a >= b? a : b; }

    // A Check Point is used to store a file in multiple parts, to overcome
    // the limitation of SourceLocation only having a rather limited amount of
    // bits to index any one file.
    struct CP
    {
        // read-only
        char[] filename;
        // ditto
        char[] data;
        // ditto
        uint part = 0;
        // ditto
        uint meta_index = 0;
    }

    struct FileLineCache
    {
        /// Contains the offset of the i'th line on index i
        uint[] line_starts;

        /// Indicates weather the cache has been built or not
        bool isCached = false;

        /**
          This method does a binary search to find the line that contains the
          given offset.
         **/
        uint lineOf(uint offset)
        {
            size_t  beg = 0,
                    end = line_starts.length,
                    mid = end >> 1;

            while( beg < end )
            {
                if( line_starts[mid] <= offset )
                    beg = mid + 1;
                else
                    end = mid;
                mid = beg + ( end - beg ) / 2;
            }
            return mid;
        }

        /**
          Builds the cache data - always make sure this has been called before
          calling lineOf.
         **/
        void build(char[] data)
        {
            // j starts at 1, because we need an additional place in the array
            // to indicate that line 1 starts at index 0.
            size_t j = 1;
            char* it = data.ptr, end = data.ptr + data.length;
            for (; it != end; ++it)
                if (*it == '\n')
                    ++j;
            // Allocate without initialization. Saves a bit of time
            line_starts.length = j;
            line_starts[0] = 0;

            // Go over the data again, writing the line starts in our new array
            j = 1;
            for (size_t i = 0; i < data.length; i++)
            {
                if (data[i] == '\n')
                    line_starts[j++] = i;
                else if (data[i] == '\r')
                {
                    line_starts[j++] = i;
                    i += cast(size_t)(data[i+1] == '\n');
                }
            }

            isCached = true;
        }
    }
}