# HG changeset patch # User Anders Halager # Date 1211719396 -7200 # Node ID 5e383b3755d6e2514323e2a2b719570a56ade2d7 # Parent e331e4e816e4fe700875af90fe2212ff41aef465# Parent 189c049cbfcc6cf742886847f00942378e6746a5 merge diff -r e331e4e816e4 -r 5e383b3755d6 ast/Decl.d --- a/ast/Decl.d Fri Apr 18 23:45:45 2008 +0200 +++ b/ast/Decl.d Sun May 25 14:43:16 2008 +0200 @@ -5,11 +5,16 @@ import lexer.Token; -import sema.SymbolTable; +import tango.io.Stdout; + +import sema.Scope, + sema.DType, + basic.SmallArray; enum DeclType { VarDecl, + ImportDecl, FuncDecl, StructDecl, } @@ -21,6 +26,12 @@ this.declType = declType; } + void simplify() + { + } + + DType type() { return null; } + DeclType declType; Scope env; } @@ -31,43 +42,148 @@ Exp e = null) { super(DeclType.VarDecl); - this.type = type; + this.varType = type; this.identifier = identifier; this.init = e; } - Identifier type, identifier; + void simplify() + { + } + + override DType type() + { + return env.find(identifier).type; + } + + Identifier varType, identifier; Exp init; } +class ImportDecl : Decl +{ + this() + { + super(DeclType.ImportDecl); + } + + char[] get() + { + char[] res; + foreach(i ; packages) + res ~= i.get ~ "."; + res ~= name.get; + return res; + } + + bool isStatic = false; + + Identifier[] packages; + Identifier name; + Identifier aliasedName; + + Identifier[2][] explicitSymbols; +} + class FuncDecl : Decl { - this(Identifier type, Identifier identifier, - VarDecl[] funcArgs, Stmt[] statements) + this(Identifier type, Identifier identifier) { super(DeclType.FuncDecl); - this.type = type; + this.returnType = type; this.identifier = identifier; - this.funcArgs = funcArgs; - this.statements = statements; + } + + void addParam(Identifier type, Identifier name = null) + { + funcArgs ~= new VarDecl(type, name, null); + } + + void setBody(CompoundStatement stmts) + { + statements = stmts.statements; + emptyFunction = false; } - Identifier type, identifier; + void simplify() + { + if(auto t = cast(DFunction)env.find(identifier).type) + { + if(auto s = cast(DStruct)t.returnType) + { + VarDecl[] funcArgs; + auto i = new Identifier("ret.val"); + i.env = env; + i.env.add(i); + i.env.find(i).setType( s ); + auto var = new VarDecl(returnType, i); + var.env = env; + funcArgs ~= var; + funcArgs ~= this.funcArgs; + this.funcArgs = funcArgs; + t.returnType = DType.Void; + this.returnType = new Identifier("void"); + env.find(identifier).setType(t); + sret = true; + + myType = null; + } + } + + foreach (funcArg; funcArgs) + funcArg.simplify(); + foreach (stmt; statements) + stmt.simplify(); + } + + override DFunction type() + { + if (myType !is null) + return myType; + + auto t = new DFunction(identifier); + t.returnType = env.findType(returnType); + SmallArray!(DType) array; + foreach (a; funcArgs) + array ~= a.type(); + t.params = array.safe(); + t.firstParamIsReturnValue = this.sret; + myType = t; + return myType; + } + + Identifier returnType, identifier; VarDecl[] funcArgs; Stmt[] statements; + bool sret = false; + bool emptyFunction = true; + private DFunction myType; } class StructDecl : Decl { - this(Identifier identifier, - VarDecl[] vars) + this(Identifier identifier) { super(DeclType.StructDecl); this.identifier = identifier; - this.vars = vars; + } + + void addMember(Decl decl) + { + decls ~= decl; + } + + void simplify() + { + } + + override DType type() + { + return env.findType(identifier); } Identifier identifier; - VarDecl[] vars; + Decl[] decls; + private DType myType; } diff -r e331e4e816e4 -r 5e383b3755d6 ast/Exp.d --- a/ast/Exp.d Fri Apr 18 23:45:45 2008 +0200 +++ b/ast/Exp.d Sun May 25 14:43:16 2008 +0200 @@ -1,55 +1,165 @@ module ast.Exp; -import tango.text.Util : jhash; +import tango.text.Util; +import tango.io.Stdout; + +import ast.Decl, + ast.Stmt; import lexer.Token; -import sema.SymbolTable; +import sema.Scope, + sema.DType; enum ExpType { Binary, Negate, + Deref, IntegerLit, + MemberReference, + Index, Identifier, + ArrayIdentifier, + PointerIdentifier, AssignExp, CallExp, + CastExp, } -class Exp +abstract class Exp { - this(ExpType expType) + this(ExpType expType, SLoc loc) { this.expType = expType; + this.loc = loc; } + /// Get the type of the expression + DType type(); + + /// Indicates which type the expression is - to avoid a lot of casts ExpType expType; + + /// The environment of the expression Scope env; + + int stmtIndex; + + /** + The "main" location of the expression. + What exactly this represents varies but for most things its the start + while for a binary expression its the operator. + **/ + SourceLocation loc; + + /// Return the starting location of this expression + SourceLocation startLoc() { return loc; } + + /// Get the full extents of the expression + SourceRange sourceRange() { return SourceRange(loc, loc + 1); } + + /// Do some simplifications + Exp simplify() { return this; } } class CallExp : Exp { this(Exp exp, Exp[] args) { - super(ExpType.CallExp); + super(ExpType.CallExp, exp.loc); this.exp = exp; this.args = args; } + override DType type() + { + DFunction f = cast(DFunction)exp.type(); + assert(f !is null, "Can only call functions"); + return f.returnType; + } + Exp exp; Exp[] args; + bool sret = false; + + override SourceRange sourceRange() + { + SourceRange res = exp.sourceRange; + if (args.length > 0) + res = res + args[$ - 1].sourceRange; + return res; + } + + Exp simplify() + { + if(auto t = type.asStruct) + { + DFunction func_t = cast(DFunction)exp.type(); + assert(func_t !is null, "Calling on something that isn't a function"); + if (cast(DStruct)func_t.returnType is null) + return this; + + auto call = cast(Identifier)exp; + FuncDecl f = env.parentFunction; + auto i = new Identifier("temp.var"); + i.env = f.env; + f.env.add(i); + f.env.find(i).setType(t); + auto ty = new Identifier(t.name); + auto var = new VarDecl(ty, i, null); + Exp[] args; + args ~= i; + args ~= this.args; + auto callExp = new CallExp(exp, args); + callExp.env = f.env; + var.env = f.env; + auto stmtVar = new DeclStmt(var); + auto stmtCall = new ExpStmt(callExp); + Stmt[] stmts; + foreach( index, s ; f.statements) + { + if(stmtIndex == index) + { + stmts ~= stmtVar; + stmts ~= stmtCall; + } + stmts ~= s; + } + f.statements = stmts; + callExp.sret = true; + + return i; + } + return this; + } } class AssignExp : Exp { - this(Identifier identifier, Exp exp) + this(SLoc op, Exp identifier, Exp exp) { - super(ExpType.AssignExp); + super(ExpType.AssignExp, op); this.identifier = identifier; this.exp = exp; } - Identifier identifier; + Exp simplify() + { + identifier = identifier.simplify; + exp = exp.simplify; + + return this; + } + + override SourceRange sourceRange() + { + return identifier.sourceRange + exp.sourceRange; + } + + override DType type() { return identifier.type(); } + + Exp identifier; Exp exp; } @@ -57,21 +167,63 @@ { public enum Operator { + Assign, + Eq, Ne, + Lt, Le, Gt, Ge, - Mul, Div, + Add, Sub, + Mul, Div, Mod, } - this(Operator op, Exp left, Exp right) + char[][] getOp = ["=","==","!=","<","<=",">",">=","+","-","*","/","%"]; + + this(SLoc op_loc, Operator op, Exp left, Exp right) { - super(ExpType.Binary); + super(ExpType.Binary, op_loc); this.op = op; this.left = left; this.right = right; } + override DType type() + { + if (myType) + return myType; + + if (op == Operator.Eq || + op == Operator.Ne || + op == Operator.Lt || + op == Operator.Le || + op == Operator.Gt || + op == Operator.Ge) + { + myType = DType.Bool; + return myType; + } + + DType l = left.type; + DType r = right.type; + if (l is r) + myType = l; + else if (l.hasImplicitConversionTo(r)) + myType = r; + else if (r.hasImplicitConversionTo(l)) + myType = l; + else + return null; + return myType; + } + + override SLoc startLoc() { return left.startLoc(); } + + override SourceRange sourceRange() + { + return left.sourceRange + right.sourceRange; + } + char[] resultType() { if (op >= Operator.Eq && op <= Operator.Ge) @@ -79,39 +231,76 @@ return null; } + Exp simplify() + { + left = left.simplify; + right = right.simplify; + return this; + } + Operator op; Exp left, right; + private DType myType; } class NegateExp : Exp { - this(Exp exp) + this(SLoc op, Exp exp) + { + super(ExpType.Negate, op); + this.exp = exp; + } + + Exp simplify() + { + exp = exp.simplify; + return this; + } + + override DType type() { return exp.type(); } + + override SourceRange sourceRange() { - super(ExpType.Negate); + return SourceRange(loc) + exp.sourceRange; + } + + public Exp exp; +} + +class DerefExp : Exp +{ + this(SLoc op, Exp exp) + { + super(ExpType.Deref, op); this.exp = exp; } + Exp simplify() + { + exp = exp.simplify; + return this; + } + + override DType type() + { + return exp.type().asPointer.pointerOf; + } + + override SourceRange sourceRange() + { + return SourceRange(loc) + exp.sourceRange; + } + public Exp exp; } class IntegerLit : Exp { - this(Token t) + this(SLoc loc, char[] t) { - super(ExpType.IntegerLit); - this.token = t; - } - - Token token; -} - -class Identifier : Exp -{ - this(Token t) - { - super(ExpType.Identifier); - this.token = t; - name = t.get; + super(ExpType.IntegerLit, loc); + range = SourceRange(loc, loc + t.length); + this.name = substitute(t, "_", ""); } char[] get() @@ -119,6 +308,212 @@ return name; } + Exp simplify() + { + return this; + } + + override DType type() { return DType.Int; } + + override SourceRange sourceRange() + { + return range; + } + + char[] name; + private SourceRange range; +} + +class MemberReference : Exp +{ + this(SLoc dot, Exp target, Identifier child) + { + super(ExpType.MemberReference, dot); + this.target = target; + this.child = child; + } + + Exp simplify() + { + target = target.simplify; + return this; + } + + override DType type() + { + if (myType) + return myType; + + DStruct st = cast(DStruct)target.type; + assert(st, "Only structs have members"); + if (auto t = st.typeOf(child.name)) + myType = t; + // no error reporting here + else assert(0, "Referencing non-existant member"); + + return myType; + } + + override SLoc startLoc() { return target.startLoc(); } + + override SourceRange sourceRange() + { + return target.sourceRange + child.sourceRange; + } + + Identifier child; + Exp target; + private DType myType; +} + +class IndexExp : Exp +{ + this(Exp target, SLoc left_bracket, Exp index, SLoc right_bracket) + { + super(ExpType.Index, target.startLoc); + this.target = target; + this.left_bracket = left_bracket; + this.index = index; + this.right_bracket = right_bracket; + } + + override DType type() + { + DType type = target.type(); + if (type.isArray()) + return type.asArray().arrayOf; + else if (type.isPointer()) + return type.asPointer().pointerOf; + else assert(0, "Can only index pointers and arrays"); + } + + override SourceRange sourceRange() + { + return target.sourceRange + SourceRange(right_bracket); + } + + Exp simplify() + { + target = target.simplify; + index = index.simplify; + return this; + } + + Exp target; + Exp index; + SLoc left_bracket, right_bracket; +} + +class CastExp : Exp +{ + this(SLoc loc, Identifier castType, Exp exp) + { + super(ExpType.CastExp, loc); + this.castType = castType; + this.exp = exp; + } + + override DType type() + { + return env.findType(this.castType); + } + + Exp simplify() + { + castType.simplify; + exp.simplify; + return this; + } + + override SourceRange sourceRange() + { + return SourceRange(loc) + exp.sourceRange; + } + + Identifier castType; + Exp exp; +} + +class PointerIdentifier : Identifier +{ + this(Identifier pointerOf) + { + super(ExpType.PointerIdentifier, pointerOf.loc); + this.pointerOf = pointerOf; + this.name = pointerOf.name; + } + + override DType type() + { + return pointerOf.type.getPointerTo(); + } + + Identifier pointerOf; +} + +class ArrayIdentifier : Identifier +{ + this(Identifier arrayOf, IntegerLit size) + { + super(ExpType.ArrayIdentifier, arrayOf.loc); + this.arrayOf = arrayOf; + this.size = Integer.parse(size.get); + this.name = arrayOf.name; + } + + override DType type() + { + return arrayOf.type.getAsArray(size); + } + + Identifier arrayOf; + int size; + + private DType myType; +} + +class Identifier : Exp +{ + this(SLoc loc, char[] name) + { + super(ExpType.Identifier, loc); + this.name = name; + } + + protected this(ExpType t, SLoc loc) + { + super(t, loc); + } + + override DType type() + { + if (myType !is null) + return myType; + myType = env.find(this).type; + return myType; + } + + this(char[] name) + { + super(ExpType.Identifier, SLoc.Invalid); + this.name = name; + } + + char[] get() + { + return name; + } + + char[] getMangled() + { + DType t = type; + + if(name == "main") + return "main"; + + return "_D"~name~t.mangle; + } + hash_t toHash() { return jhash(name); @@ -138,8 +533,17 @@ return 0; } - Token token; + Exp simplify() + { + return this; + } + + void setType(DType myType) + { + this.myType = myType; + } + char[] name; + private DType myType; } - diff -r e331e4e816e4 -r 5e383b3755d6 ast/Module.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ast/Module.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,22 @@ +module ast.Module; + +import sema.Scope; + +import ast.Decl; + +class Module +{ + this(char[] moduleName) + { + this.moduleName = moduleName; + } + + void addDecl(Decl decl) + { + decls ~= decl; + } + + Decl[] decls; + char[] moduleName; + Scope env; +} diff -r e331e4e816e4 -r 5e383b3755d6 ast/Stmt.d --- a/ast/Stmt.d Fri Apr 18 23:45:45 2008 +0200 +++ b/ast/Stmt.d Sun May 25 14:43:16 2008 +0200 @@ -1,18 +1,26 @@ module ast.Stmt; +import Array = tango.core.Array, + Integer = tango.text.convert.Integer, + tango.io.Stdout; + import ast.Exp, ast.Decl; -import sema.SymbolTable; +import sema.Scope, + basic.SourceLocation, + misc.Error; enum StmtType { Stmt, + Compound, Decl, Exp, Return, If, While, + Switch, } class Stmt @@ -22,8 +30,30 @@ this.stmtType = stmtType; } + void simplify() + { + } + StmtType stmtType; Scope env; + int stmtIndex; +} + +class CompoundStatement : Stmt +{ + this(Stmt[] stmts) + { + super(StmtType.Compound); + this.statements = stmts; + } + + void simplify() + { + foreach ( stmt ; statements ) + stmt.simplify; + } + + Stmt[] statements; } class ReturnStmt : Stmt @@ -33,6 +63,34 @@ super(StmtType.Return); } + void simplify() + { + FuncDecl f = env.parentFunction; + if(exp) + exp.simplify; + if(f !is null && f.sret) + { + auto i = new Identifier("ret.val"); + i.env = f.env; + auto ass = new AssignExp(SLoc.Invalid, i, exp); + ass.env = f.env; + auto assStmt = new ExpStmt(ass); + assStmt.env = f.env; + + Stmt[] stmts; + foreach(index, stmt ; f.statements) + { + if(stmtIndex == index) + stmts ~= assStmt; + + stmts ~= stmt; + } + f.statements = stmts; + + exp = null; + } + } + public Exp exp; } @@ -44,6 +102,11 @@ this.decl = decl; } + void simplify() + { + decl.simplify; + } + public Decl decl; } @@ -55,12 +118,17 @@ this.exp = exp; } + void simplify() + { + exp = exp.simplify; + } + public Exp exp; } class IfStmt : Stmt { - this(Exp cond, Stmt[] then, Stmt[] el = null) + this(Exp cond, Stmt then, Stmt el = null) { super(StmtType.If); this.cond = cond; @@ -68,21 +136,113 @@ this.else_body = el; } + void simplify() + { + cond.simplify; + then_body.simplify; + if (else_body) + else_body.simplify; + } + Exp cond; - Stmt[] then_body; - Stmt[] else_body; + Stmt then_body; + Stmt else_body; } class WhileStmt : Stmt { - this(Exp cond, Stmt[] stmts) + this(Exp cond, Stmt stmts) { super(StmtType.While); this.cond = cond; - this.stmts = stmts; + this.whileBody = stmts; + } + + void simplify() + { + cond.simplify; + whileBody.simplify; } Exp cond; - Stmt[] stmts; + Stmt whileBody; } +class SwitchStmt : Stmt +{ + this(Exp target) + { + super(StmtType.Switch); + cond = target; + } + + void addCase(IntegerLit[] values, Stmt[] stmts) + { + long[] new_values; + foreach (lit; values) + new_values ~= Integer.parse(lit.get); + cases ~= Case(values, stmts, new_values); + + // Make sure there is no two cases with the same value + // Does it belong here? + new_values = new_values.dup; + Array.sort(new_values); + long[] all_values = Array.unionOf(old_values, new_values); + if (all_values.length != old_values.length + new_values.length) + { + // overlap! + auto e = new Error( + "Can't have multiple cases with the same value." + " Values appearing in multiple cases: %0"); + //e.loc(values[0].token.location); + + all_values = Array.intersectionOf(old_values, new_values); + char[][] vals; + foreach (val; all_values) + vals ~= Integer.toString(val); + e.arg(vals); + /* + foreach (c; cases) + foreach (i, v; c.values_converted) + if (Array.bsearch(all_values, v)) + e.tok(c.values[i].token); + */ + throw e; + } + old_values = all_values; + } + + void setDefault(Stmt[] stmts) + { + if (defaultBlock.length != 0) + throw new Error("Switch statements can't have multiple defaults"); + defaultBlock = stmts; + if (cases.length > 0) + cases[$ - 1].followedByDefault = true; + } + + void simplify() + { + cond.simplify; + foreach ( stmt ; defaultBlock ) + stmt.simplify; + foreach ( c ; cases ) + foreach ( stmt ; c.stmts ) + stmt.simplify; + } + + Exp cond; + Case[] cases; + Stmt[] defaultBlock; + + struct Case + { + IntegerLit[] values; + Stmt[] stmts; + long[] values_converted; + bool followedByDefault = false; + } + + private long[] old_values; +} + diff -r e331e4e816e4 -r 5e383b3755d6 basic/Message.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/basic/Message.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,172 @@ +module basic.Message; + +import tango.core.Exception, + Array = tango.core.Array, + tango.io.Stdout, + tango.text.Util; + +import tango.stdc.stdlib; + +import llvm.type; + +import lexer.Token, + lexer.Lexer, + sema.DType; + +import basic.SourceLocation, + basic.SourceManager; + +public import basic.Messages; + +enum ExitLevel +{ + Normal = 1, + Lexer = 2, + Parser = 3, + Semantic = 3, +} + +class MessageHandler +{ +public: + + this(SourceManager src_mgr) + { + this.src_mgr = src_mgr; + } + + Message report(uint opcode, SLoc location) + { + Message m = new Message(opcode, location, src_mgr, this); + messages ~= m; + return m; + } + + void checkErrors(ExitLevel exitlevel = ExitLevel.Normal) + { + if(messages.length == 0) + return; + + if(warnings) + checkWarnings; + foreach(m ; messages) + if(m.type == MessageType.Error) + { + Stdout(m).newline; + } + + exit(exitlevel); + } + + void checkWarnings() + { + foreach(m ; messages) + if(m.type == MessageType.Warning) + { + Stdout(m).newline; + } + } + + void showWarnings(bool value) + { + warnings = value; + } + +private: + Message[] messages; + SourceManager src_mgr; + bool warnings; +} + +class Message +{ + + this(int opcode, SLoc location, SourceManager src_mgr, MessageHandler msg_handler) + { + this.src_mgr = src_mgr; + this.location = location; + args ~= Messages[opcode].message; + this.type = Messages[opcode].type; + this.msg_handler = msg_handler; + } + + char[] toString() + { + char[256] tmp = void; + char[] msg = layout(tmp, args); + + Lexer l = new Lexer(location, src_mgr, new MessageHandler(src_mgr)); + + Token t = l.next; + + if (src_mgr.getRawData(location).length > 0) + msg = src_mgr.getLocationAsString(location) ~ ": " ~ msg; + else + msg = msg.dup; + + + char[] line = src_mgr.getLine(location); + char[] marks = line.dup; + marks[] = ' '; + size_t p = src_mgr.getColumn(location); + marks[p .. p + t.length] = '^'; + + msg ~= "\n "; + msg ~= line; + msg ~= "\n "; + msg ~= marks; + + return msg; + } + + Message arg(char[] s) + { + if (args.length == 11) + throw new Exception("Sorry, errors only support up to 10 args"); + args ~= s; + return this; + } + + Message arg(char[][] s) + { + char[] res = s[0 .. $ - 1].join(", "); + if (s.length > 1) + res ~= " and "; + res ~= s[$ - 1]; + return arg(res); + } + + Message arg(char c) + { + return arg([c]); + } + + Message arg(DType[] types) + { + char[][] res; + foreach (type; types) + res ~= type.name(); + return arg(res); + } + + Message fatal(ExitLevel exitlevel = ExitLevel.Normal) + { + msg_handler.checkErrors(exitlevel); + return this; + } + + /* + Message loc(SLoc loc) + { + location = loc; + return this; + } + */ + + MessageType type; +private: + char[][] args; + SLoc location; + SourceManager src_mgr; + MessageHandler msg_handler; +} diff -r e331e4e816e4 -r 5e383b3755d6 basic/Messages.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/basic/Messages.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,74 @@ +module basic.Messages; + +enum : uint +{ + // Lex + InvalidSymbol, + InvalidIlligaleType, + UnexpectedEOFBlock, + OnlyOneDotFloating, + OnlyOneEFloating, + + // Parse + UnexpectedTokMulti, + UnexpectedTokSingle, + UnexpectedTok, + CaseValueMustBeInt, + UnexpectedBeginStmt, + UnexpectedTokType, + ExpectedIdAfterDot, + ExpectedExp, + ExpectedCastType, + InvalidDeclType, + InvalidType, + // - imports/module + ExpectedIdAfterPackage, + RenameMustBeSingleIdent, + + + // Imports + CannotFindModule, +} + +enum MessageType +{ + Warning, + Error, +} + +MessageEntry[uint] Messages; + +struct MessageEntry +{ + MessageType type; + char[] message; +} + +private alias MessageType.Error Err; +private alias MessageType.Warning War; +private alias MessageEntry E; +static this() +{ + Messages = [ + UnexpectedEOFBlock : E(Err, "Unexpected end of file. Unclosed comment block"), + InvalidSymbol : E(Err, "Read invalid symbol: '%0'"), + OnlyOneDotFloating : E(Err, "Only one '.' is allowed in an floating number"), + OnlyOneEFloating : E(Err, "Only one E is allowed in an floating number"), + + UnexpectedTokMulti : E(Err, "Unexpected token, got %0 expected one of %1"), + UnexpectedTokSingle : E(Err, "Unexpected token, got %0 expected %1"), + UnexpectedTok : E(Err, "Unexpected token %0"), + CaseValueMustBeInt : E(Err, "Cases can only be integer literals"), + UnexpectedBeginStmt : E(Err, "Unexpected begining of statement."), + UnexpectedTokType : E(Err, "Unexpected token in Type parsing. Got %0"), + ExpectedIdAfterDot : E(Err, "Expected identifier after '.'"), + ExpectedExp : E(Err, "Expected expression"), + ExpectedCastType : E(Err, "Expected cast type"), + InvalidDeclType : E(Err, "Invalid declaration type"), + InvalidType : E(Err, "Invalid type"), + ExpectedIdAfterPackage : E(Err, "Identifier expected following package"), + + CannotFindModule : E(Err, "Cannot find module '%0'") + ]; +} + diff -r e331e4e816e4 -r 5e383b3755d6 basic/SmallArray.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/basic/SmallArray.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,106 @@ +module basic.SmallArray; + +/** + This struct acts like a normal dynamic array, with one difference. + A size is given, which is how many elements are preallocated on the stack. + + Example: + -------- + SmallArray!(float, 4) array; + array ~= 1.0; + array ~= 2.0; + array ~= 3.0; + float[] three_floats = array[0 .. 3]; + // The slice gives a reference to the stack, remember to .dup + array ~= 4.0; + // not using the heap yet, but after the next line all values will have been + // copied to the heap. + array ~= 5.0; + -------- + + Compared to a normal dynamic array there is 8 bytes overhead (on 32 bit cpus) + and ofcourse size * T.sizeof bytes for the stack allocated array. + */ +struct SmallArray(T, ubyte size = 8) +{ + T[] opSlice(size_t low, size_t high) + { + assert(high <= len && low <= high, "Array index out of range"); + return ptr[low .. high]; + } + + T[] opSlice() + { + return ptr[0 .. len]; + } + alias opSlice unsafe; + + T[] safe() + { + if (len <= size) + return static_array[0 .. len].dup; + return array[0 .. len]; + } + + T[] opSliceAssign(T val, size_t low, size_t high) + { + assert(high <= len && low <= high, "Array index out of range"); + return ptr[low .. high] = val; + } + + T[] opSliceAssign(T val) + { + return ptr[0 .. len] = val; + } + + T opIndex(size_t index) + { + assert(index < len, "Array index out of range"); + return ptr[index]; + } + + T opIndexAssign(T val, size_t index) + { + assert(index < len, "Array index out of range"); + return ptr[index] = val; + } + + void opCatAssign(T val) + { + if (len < size) + static_array[len] = val; + else if (len == size) + { + T[] tmp = static_array[].dup; + array = tmp; + array ~= val; + } + else + array ~= val; + + ++len; + if (len <= size) + ptr = static_array.ptr; + else + ptr = array.ptr; + } + + size_t length() { return len; } + + static SmallArray opCall() + { + SmallArray array; + array.ptr = array.static_array.ptr; + return array; + } + +private: + T* ptr; + size_t len; + union + { + T[] array; + T[size] static_array; + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 basic/SourceLocation.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/basic/SourceLocation.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,145 @@ +module basic.SourceLocation; + +/// Shorter alias for SourceLocation +public alias SourceLocation SLoc; + +/// SourceLocation points to a place in some buffer +struct SourceLocation +{ + /// Returns true, if the location is from a real file + bool isReal() { return (val & 0x80_00_00_00) == 0; } + /// Returns true, if the location is not from a real file + bool isVirtual() { return (val & 0x80_00_00_00) != 0; } + + /// Check if this location is invalid or not + bool isValid() { return val != uint.max; } + /// ditto + bool isInvalid() { return val == uint.max; } + + /** + Extracts the file id. + + Warning: In release mode this may return junk, if the loc is not from a + file + **/ + uint fileID() { + assert(isValid, "Location is invalid"); + assert(isReal, "You can only extract fileID from a real location"); + // Here we can rely on two facts, first that the high bit is zero + // since its a real position, second that FileOffset is saved in the + // high end, so all we need is some shifting + return val >> Bits.FileOffset; + } + + /** + Extracts the offset into the "file". (actually in to the referenced + chunk) + + Warning: In release mode this may return junk, if the loc is not from a + file + **/ + uint fileOffset() { + assert(isValid, "Location is invalid"); + assert(isReal, "You can only extract fileOffset from real locations"); + // FileOffset is stored in the lower bits, so all that is needed is a + // binary and with all ones in the lower bits. + return val & (1 << Bits.FileOffset) - 1; + } + + /// Get a new location, placed n bytes after the given location + SourceLocation opAdd(int n) + { + SourceLocation res = *this; + res.val += n; + return res; + } + + /// Get a new location, placed n bytes before the given location + SourceLocation opSub(int n) + { + SourceLocation res = *this; + res.val -= n; + return res; + } + + /// Creates a SourceLocation from a File ID + static SourceLocation fromFileID(uint fileID) + { + assert(fileID < Bits.MaxFileID, "To large fileID"); + SourceLocation res; + res.val = fileID << Bits.FileOffset; + return res; + } + + /** + Used for invalid/unknown locations. (also the default value, but this is + more explicit) + **/ + static const SourceLocation Invalid = {val: uint.max}; + +private: + /** + A SourceLocation consists of 1 bit, indicating real or virtual, meaning + if the location points to a file(real), a string mixin or has been + affected by #line(virtual). That information is saved in the most + significant bit. + The rest depends on which type we are dealing with. + Real: + 13 bits for a file identifier + 18 bits for offset into that "file" (one file may be split) + Virtual: + Unknown for now. Likely skewed toward more ids and some meta data + An invalid location is uint.max, this might happen by accident but its + unlikely. + **/ + uint val = uint.max; + + /** + This enum contains some constants that are useful for manipulating + SourceLocation's, like the size of various “members” of val. + **/ + static enum Bits { + /// Number of bits used for the offset into file buffers + FileOffset = 18, + /// Number of bits used to identify which file buffer this is from + FileID = 31 - FileOffset, + + /// Indicates how much can be indexed within one block(2^FileOffset) + MaxFileOffset = 1 << FileOffset, + MaxFileID = 1 << FileID, + } +} + +/// A simple pair used to describe a range in a buffer and not just a point. +struct SourceRange +{ + SourceLocation begin, end; + + static SourceRange opCall(SourceLocation loc) + { + return SourceRange(loc, loc + 1); + } + + static SourceRange opCall(SourceLocation begin, SourceLocation end) + { + SourceRange res; + res.begin = begin; + res.end = end; + return res; + } + + bool isValid() { return begin.isValid && end.isValid; } + bool isInvalid() { return begin.isInvalid || end.isInvalid; } + + bool isReal() { return begin.isReal && end.isReal; } + + /// Get a new range spanning both ranges + SourceRange opAdd(SourceRange that) + { + assert(this.isValid && that.isValid, "Invalid range"); + return SourceRange( + this.begin.val < that.begin.val? this.begin : that.begin, + this.end.val > that.end.val? this.end : that.end); + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 basic/SourceManager.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/basic/SourceManager.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,284 @@ +module basic.SourceManager; + +import tango.core.Memory : GC; +import tango.io.UnicodeFile; +import tango.io.Stdout; +import tango.text.convert.Layout; + +public import basic.SourceLocation; + +private alias char[] string; + +/** + SourceManager is used to handle input files, by loading them in in chunks + that can be referenced elsewhere. + + It will also help extract the line/col of locations and convert between + real and virtual locations + **/ +class SourceManager +{ + this() + { + layout = new Layout!(char); + } + + /** + Will load in the file belonging to the filename + + filename = The file to load. Theres some assumptions about this file. + 1. The file has a BOM or is valid utf-8 + 2. The file is not empty, unreadable, a folder etc. + **/ + SourceLocation addFile(string filename) + { + scope file = new UnicodeFile!(char)(filename, Encoding.UTF_8); + auto file_data = file.read(); + return createCheckpoints(file_data, filename); + } + + /** + Returns a string slice containing the part of the file after loc (a + pointer might be better, it allows negative indexing) + **/ + string getRawData(SourceLocation loc) + { + CP cp = checkpoints[loc.fileID]; + auto length = cp.data_end - cp.data.ptr; + return cp.data.ptr[loc.fileOffset .. length]; + } + + /** + Extracts the line number of the given location + O("file size") if cache isn't built, O(log "lines in file") else + **/ + uint getLineNumber(SourceLocation loc) + { + assert(loc.isValid, "Location is invalid"); + assert(loc.isReal, "Virtual locations not supported yet"); + assert(loc.fileID < checkpoints.length, "Non-existent location"); + + CP* cp = &checkpoints[loc.fileID]; + auto cache = &linecache[cp.meta_index]; + if (!cache.isCached) + cache.build(cp.data_start[0 .. cp.data_end - cp.data_start]); + return cache.lineOf(getFileOffset(loc)); + } + + /** + Extracts the full byte offset into a file, at which a location + is pointing. + **/ + uint getFileOffset(SourceLocation loc) + { + return loc.fileOffset + + checkpoints[loc.fileID].part * loc.Bits.MaxFileOffset; + } + + /** + Extracts a string containing the entire line loc appears in. + **/ + string getLine(SourceLocation loc) + { + // The line is extracted by getting two pointers to the exact location + // and decreasing one until the nearest newline while the other ptr is + // increased to the nearest newline. + CP* cp = &checkpoints[loc.fileID]; + char* ptr = cp.data.ptr + loc.fileOffset; + char* ptr_lo = ptr; + while (cp.inRange(ptr_lo) && *ptr_lo != '\n' && *ptr_lo != '\r') + --ptr_lo; + while (cp.inRange(ptr) && *ptr != '\n' && *ptr != '\r') + ++ptr; + return ptr_lo[1 .. ptr - ptr_lo]; + } + + /** + Gets the column of where the loc appears. + **/ + int getColumn(SourceLocation loc) + { + // Use same approach as getLine + + CP* cp = &checkpoints[loc.fileID]; + char* ptr = cp.data.ptr + loc.fileOffset; + char* ptr_lo = ptr; + while (cp.inRange(ptr_lo) && *ptr_lo != '\n' && *ptr_lo != '\r') + --ptr_lo; + return cast(int)ptr - cast(int)ptr_lo - 1; + } + + /** + Get the original source text of a SourceRange + **/ + string getText(SourceRange loc) + { + assert(loc.isValid, "Range is invalid"); + assert(loc.isReal, "Virtual locations not supported yet"); + auto begin = getFileOffset(loc.begin); + auto end = getFileOffset(loc.end); + return checkpoints[loc.begin.fileID].data_start[begin .. end]; + } + + /** + Get the original source text + **/ + string getText(SourceLocation loc, size_t length) + { + return getText(SourceRange(loc, loc + length)); + } + + /** + Convert a location into a string. Something like "file(line)" + **/ + string getLocationAsString(SourceLocation loc) + { + assert(loc.isValid, "Location is invalid"); + return layout.convert("{}({})", + checkpoints[loc.fileID].filename, + getLineNumber(loc)); + } + string getLocationAsString(SourceRange loc) + { + return layout.convert("{}({}:{})", + checkpoints[loc.begin.fileID].filename, + getFileOffset(loc.begin), + getFileOffset(loc.end)); + } + + /** + Get the file name of a loc. + **/ + string getFile(SourceLocation loc) + { + return checkpoints[loc.fileID].filename; + } + +private: + synchronized + SourceLocation createCheckpoints(string data, string source_file) + { + // The line-cache is added, but not built, + // getLineNumber makes sure it is called when needed. + linecache ~= FileLineCache(); + uint meta_index = linecache.length - 1; + + // SourceLocation's can only index relatively short buffers, therefore + // the file is split into several checkpoints. + uint checkpoint_counter = 0; + char* data_start = data.ptr; + char* data_end = data.ptr + data.length; + while (data.length > 0) + { + uint to_take = min(data.length, SourceLocation.Bits.MaxFileOffset); + checkpoints ~= + CP(source_file, + data_start, + data_end, + data[0 .. to_take], + checkpoint_counter++, + meta_index); + data = data[to_take .. $]; + } + checkpoint_counter = checkpoints.length - checkpoint_counter; + return SourceLocation.fromFileID(checkpoint_counter); + } + + /// Contains the read/generated data. + CP[] checkpoints; + /// Cache used to speed up finding of line-starts. + FileLineCache[] linecache; + /// Used for formatting locations as strings. + Layout!(char) layout; + + // These really should be magically available everywhere and templated. + int min(int a, int b) { return a < b? a : b; } + int max(int a, int b) { return a >= b? a : b; } + + // A Check Point is used to store a file in multiple parts, to overcome + // the limitation of SourceLocation only having a rather limited amount of + // bits to index any one file. + struct CP + { + // read-only + char[] filename; + // ditto + char* data_start; + char* data_end; + // ditto + char[] data; + // ditto + uint part = 0; + // ditto + uint meta_index = 0; + + bool inRange(char* p) + { + return p >= data_start && p < data_end; + } + } + + struct FileLineCache + { + /// Contains the offset of the i'th line on index i + uint[] line_starts; + + /// Indicates weather the cache has been built or not + bool isCached = false; + + /** + This method does a binary search to find the line that contains the + given offset. + **/ + uint lineOf(uint offset) + { + size_t beg = 0, + end = line_starts.length, + mid = end >> 1; + + while( beg < end ) + { + if( line_starts[mid] <= offset ) + beg = mid + 1; + else + end = mid; + mid = beg + ( end - beg ) / 2; + } + return mid; + } + + /** + Builds the cache data - always make sure this has been called before + calling lineOf. + **/ + void build(char[] data) + { + // j starts at 1, because we need an additional place in the array + // to indicate that line 1 starts at index 0. + size_t j = 1; + char* it = data.ptr, end = data.ptr + data.length; + for (; it != end; ++it) + if (*it == '\n') + ++j; + // Allocate without initialization. Saves a bit of time + line_starts.length = j; + line_starts[0] = 0; + + // Go over the data again, writing the line starts in our new array + j = 1; + for (size_t i = 0; i < data.length; i++) + { + if (data[i] == '\n') + line_starts[j++] = i; + else if (data[i] == '\r') + { + line_starts[j++] = i; + i += cast(size_t)(data[i+1] == '\n'); + } + } + + isCached = true; + } + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 dang/compiler.d --- a/dang/compiler.d Fri Apr 18 23:45:45 2008 +0200 +++ b/dang/compiler.d Sun May 25 14:43:16 2008 +0200 @@ -2,51 +2,65 @@ import tango.io.Stdout, tango.core.Signal, + tango.core.Memory, + tango.sys.Process, + tango.time.StopWatch, + tango.io.FileConduit, tango.io.FilePath; import lexer.Lexer, parser.Parser; -import misc.DataSource; +import basic.SourceManager; -import ast.Decl; +import basic.Message; + +import ast.Module; import tools.AstPrinter, tools.DotPrinter; -import gen.LuaGen, - gen.LLVMGen; +import gen.CodeGen; import sema.Visitor, - sema.SymbolTableBuilder, - sema.Declarations; + sema.AstAction, + sema.ScopeBuilder, + sema.ScopeCheck, + sema.TypeCheck; -import dang.OptParse; +import tango.stdc.posix.unistd; + +import Opt = dang.OptParse; void checkFiles(char[][] *files) { - bool error = false; + GC.disable(); + bool non_existant_files = false; + bool duplicate_files = false; char[][] validFiles; - foreach(file ; *files) + foreach (file; *files) { - auto path = new FilePath(file); + scope path = new FilePath(file); - if(!path.exists) + if (!path.exists) { - Stdout("File '"~file~"' does not exists").newline; - error = true; + Stderr.formatln("'{}' does not exist", file).newline; + non_existant_files = true; continue; } bool fileInStack = false; - foreach(vFile ; validFiles) - if(vFile == file) + foreach (vFile; validFiles) + if (vFile == file) + { fileInStack = true; + duplicate_files = true; + } - if(fileInStack) + if (fileInStack) continue; validFiles ~= file; @@ -54,9 +68,12 @@ *files = validFiles; - if(error) - throw new Exception("Some file(s) did not exist"); + if (non_existant_files) + throw new Exception("All files given must exist"); + if (duplicate_files) + Stderr("warning: duplicate files ignored").newline; } + void main(char[][] args) { char[][] filesToHandle; @@ -67,118 +84,182 @@ Signal!(Lexer) postLex; Signal!(Lexer) preParse; - Signal!(Decl[], DataSource) postParse; + Signal!(Module[], SourceManager) postParse; preStart.attach(&checkFiles); - auto argParse = new OptionParser; + auto argParse = new Opt.OptionParser(`Dang "D" compiler v0.0`); + + bool optimize = false; + bool inline = false; + - argParse.addOption( - ["-h", "--help"],{ - argParse.helpText(); - return; - } - ).help("Show this help message"); + SourceManager src_mgr = new SourceManager; + MessageHandler messages = new MessageHandler(src_mgr); + + argParse.addOption(["-h", "--help"], Opt.Action.Help) + .help("Show this help message"); - argParse.addOption( - ["--ast-dump-dot"], { - postParse.attach( - (Decl[] decls, DataSource src) { - auto print = new DotPrinter(); - print.print(decls); - }); - } - ).help("Output the AST as dot-graphicz"); + argParse.addOption(["--ast-dump-dot"], + "what-to-do", Opt.Action.StoreConst, "dot") + .help("Output the AST in the dot format"); + argParse.addOption(["--ast-dump-code"], + "what-to-do", Opt.Action.StoreConst, "code") + .help("Output the AST as code"); + argParse.addOption(["--gen-llvm"], + "what-to-do", Opt.Action.StoreConst, "gen-llvm") + .help("Compile to LLVM code (default)"); + argParse.addOption(["-c"], + "what-to-do", Opt.Action.StoreConst, "compile") + .help("Compile to .o or executeable"); argParse.addOption( - ["--ast-dump-code"], { - postParse.attach( - (Decl[] decls, DataSource src) { - auto print = new AstPrinter(src); - print.print(decls); - }); + ["-O","--optimize"], { + optimize = true; } - ).help("Output the AST as dot-graphicz"); + ).help("Tell LLVM to do its standard optimizations"); argParse.addOption( - ["--gen-lua"], { - postParse.attach( - (Decl[] decls, DataSource src) { - auto luaGen = new LuaGen(); - luaGen.gen(decls); - }); + ["--inline"], { + inline = true; } - ).help("Compile to Lua code"); + ).help("Tell LLVM that its allowed to inline functions"); - argParse.addOption( - ["--gen-llvm"], { - postParse.attach( - (Decl[] decls, DataSource src) { - auto llvmGen = new LLVMGen(); - llvmGen.gen(decls); - }); - } - ).help("Compile to LLVM code"); + argParse + .addOption(["--time"], Opt.Action.SetTrue, "time") + .help("Time the various operations performed."); auto options = argParse.parse(args); filesToHandle ~= options.args; - try - { - preStart(&filesToHandle); - } - catch(Exception e) - { - return; - } + // Will throw exception if some files don't exist + preStart(&filesToHandle); + + struct Measurement { char[] label; double time; } + Measurement[] timings; - foreach(file ; filesToHandle) + auto what = options["what-to-do"]; + if (what == "" || what == "gen-llvm") + postParse.attach( + (Module[] modules, SourceManager sm) { + foreach(m ; modules) + { + StopWatch w; w.start; + auto llvmGen = new CodeGen(); + auto file = new FileConduit(m.moduleName~".bc", FileConduit.WriteCreate); + llvmGen.gen(m, file.fileHandle, optimize, inline); + timings ~= Measurement("Generating LLVM bytecode", w.stop); + } + }); + else if (what == "compile") + postParse.attach( + (Module[] modules, SourceManager sm) { + foreach(m ; modules) + { + StopWatch w; w.start; + auto llvmGen = new CodeGen(); + auto llc = new Process("llc","-o=-"); + auto gcc = new Process("gcc","-c","-o","out.o","-x","assembler","-"); + llc.execute(); + int i = dup(llc.stdin.fileHandle); + llc.stdin.detach; + llvmGen.gen(m, i, optimize, inline); + llc.wait(); + gcc.execute(); + gcc.stdin.copy(llc.stdout); + gcc.stdin.detach; + gcc.wait(); + timings ~= Measurement("Generating assemble bytecode", w.stop); + } + + }); + else if (what == "dot") + postParse.attach( + (Module[] m, SourceManager sm) { + StopWatch w; w.start; + // auto print = new DotPrinter(); +// print.print(m); + timings ~= Measurement("Generating dot output", w.stop); + }); + else if (what == "code") + postParse.attach( + (Module[] modules, SourceManager sm) { + StopWatch w; w.start; + auto print = new AstPrinter(sm); + foreach ( m ; modules ) + print.print(m); + timings ~= Measurement("Converting AST to text", w.stop); + }); + StopWatch total; + total.start; + + Module[] modules; + + StopWatch watch; + watch.start; + foreach (file; filesToHandle) { preLex(file); - auto src = DataSource(file); - auto lexer = new Lexer(src); -/* - auto t = lexer.next; - while(t.getType != "EOF") - { - Stdout(t.getType)(" : ")(t.get).newline; - t = lexer.next; - } - lexer = new Lexer(src); -*/ + auto start = src_mgr.addFile(file); + auto lexer = new Lexer(start, src_mgr, messages); postLex(lexer); preParse(lexer); - auto parser = new Parser; - auto decls = parser.parse(lexer); + auto parser = new Parser(messages); + auto action = new AstAction(src_mgr); + modules ~= cast(Module)parser.parse(src_mgr, lexer, action); + timings ~= Measurement("Lex + Parse of '"~file~"'", watch.stop); + messages.checkErrors(ExitLevel.Parser); +/* + StopWatch watch2; + watch.start; + watch2.start; + Module[] mods = (new LoadModule).visit(m, src_mgr, messages); + (new ScopeBuilder).visit(m); + auto scope_builder = watch2.stop; + watch2.start; + (new ScopeCheck).visit(m); + auto scope_check = watch2.stop; + watch2.start; + (new TypeCheck).visit(m); + auto type_check = watch2.stop; + watch2.start; - (new SymbolTableBuilder).visit(decls); - (new Declarations).visit(decls); + foreach (decl; m.decls) + decl.simplify(); + auto simplify = watch2.stop; + auto extra_stuff = watch.stop; + timings ~= Measurement("Extra stuff", watch.stop); + timings ~= Measurement(" - Building scopes", scope_builder); + timings ~= Measurement(" - Checking scopes", scope_check); + timings ~= Measurement(" - Checking types", type_check); - postParse(decls, src); + postParse(m, src_mgr);*/ } -/* if (args.length > 1 && args[1] == "lex") - { - Token t; + (new ScopeBuilder).visit(modules); + StopWatch watch2; + watch.start; + watch2.start; + (new ScopeCheck).visit(modules); + auto scope_check = watch2.stop; + watch2.start; + (new TypeCheck).visit(modules); + auto type_check = watch2.stop; + watch2.start; - t = lexer.next(); - while(t.type != Tok.EOF) - { - Stdout(src.get(t.position, t.length)).newline; - t = lexer.next(); - } - } - else - { - auto decl = parser.parse(lexer); - if(args.length > 1 && args[1] == "dump-ast") - { - auto buffer = new AstBuffer(src.data); - decl.print(buffer); - } - }*/ + foreach (m; modules) + foreach (decl; m.decls) + decl.simplify(); + + timings ~= Measurement("Total", total.stop); + postParse(modules, src_mgr); + + if (options.flag("time")) + foreach (m; timings) + Stderr.formatln("{,-45} {}ms", m.label, m.time*1e3); } + diff -r e331e4e816e4 -r 5e383b3755d6 dsss.conf --- a/dsss.conf Fri Apr 18 23:45:45 2008 +0200 +++ b/dsss.conf Sun May 25 14:43:16 2008 +0200 @@ -1,9 +1,20 @@ [lexer] [parser] [ast] +[gen] + [dang/compiler.d] Target = Dang +buildflags = -llstdc++ \ + -llLLVMSystem -llLLVMSupport -llLLVMCore -llLLVMBitWriter -llLLVMBitReader -llLLVMAnalysis -llLLVMTarget \ + -llLLVMTransformUtils -llLLVMScalarOpts -llLLVMipa -llLLVMipo \ + -llLLVMInstrumentation -llllvm-c-ext -lldl [tests/run.d] Target = tests/run +buildflags = -llllvm-c-ext -llstdc++ \ + -llLLVMCore -llLLVMBitWriter -llLLVMBitReader -llLLVMAnalysis -llLLVMTarget \ + -llLLVMTransformUtils -llLLVMScalarOpts -llLLVMipa -llLLVMipo \ + -llLLVMInstrumentation -llLLVMSystem -llLLVMSupport + diff -r e331e4e816e4 -r 5e383b3755d6 gen/CodeGen.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/CodeGen.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,980 @@ +module gen.CodeGen; + +import tango.io.Stdout, + Int = tango.text.convert.Integer; +import tango.core.Array : find, partition; + +import llvm.llvm; + +import ast.Decl, + ast.Stmt, + ast.Exp, + ast.Module : DModule = Module; + +import misc.Error, + basic.SmallArray; + +import lexer.Token; + +import sema.Scope, + sema.Visitor; + +/** + Wrapper for Values representing rvalues (things you can only read from) + **/ +private struct RValue +{ + /** + Returns true if this is a simple value, like an int or a pointer. + This is basicly anything except a struct, which will contain a Value that + is a pointer to the struct. + **/ + bool isSimple() { return simple; } + /// Opposite of isSimple + bool isAggregate() { return !simple; } + + Value value; + private bool simple = true; +} + +/** + Wrapper for Values representing lvalues (things you can write to) + **/ +private struct LValue +{ + Value getAddress() { return value; } + private Value value; +} + +class CodeGen +{ +public: + this() + { + b = new Builder; + ZeroIndex = ConstantInt.GetU(Type.Int32, 0); + + table = new SimpleSymbolTable(); + + createBasicTypes(); + } + + /** + Generate a new module. + **/ + /* + Find all function decls and add the functions to the llvm module, so + they can be referenced. + + Make sure all var-decls are located before functions, so we wont get + problems when referencing the global vars. + + Generate the actual llvm code needed for all decls + + Optimize if requested + + Write to filehandle (can be a file or stdout) + */ + void gen(DModule mod, uint handle, bool optimize, bool inline) + { + this.mod = mod; + // create module + m = new .llvm.llvm.Module("main_module"); + scope(exit) m.dispose(); + + table.enterScope; + + BytePtr = PointerType.Get(Type.Int8); + auto temp = FunctionType.Get( + Type.Void, + [BytePtr, BytePtr, Type.Int32, Type.Int32]); + llvm_memcpy = m.addFunction(temp, "llvm.memcpy.i32"); + + auto registerFunc = + (FuncDecl fd) + { + Type[] param_types; + foreach (i, p; fd.funcArgs) + { + DType t = p.env.find(p.identifier).type; + if (auto st = t.asStruct()) + { + Type pointer = PointerType.Get(llvm(st)); + param_types ~= pointer; + } + else if (auto ar = t.asArray()) + { + Type pointer = PointerType.Get(llvm(ar)); + param_types ~= pointer; + } + else + param_types ~= llvm(t); + } + auto ret_t = fd.env.find(fd.identifier).type; + if(auto st = cast(DStruct)ret_t) + ret_t = DType.Void; + else if(auto f = cast(DFunction)ret_t) + ret_t = f.returnType; + auto func_t = FunctionType.Get(llvm(ret_t), param_types); + auto llfunc = m.addFunction(func_t, fd.identifier.getMangled); + + foreach (i, p; fd.funcArgs) + { + if(i == 0 && fd.sret) + llfunc.addParamAttr(0, ParamAttr.StructRet); + + DType t = p.env.find(p.identifier).type; + if (auto st = t.asStruct) + { + if (i == 0 && fd.sret) + continue; + llfunc.addParamAttr(i, ParamAttr.ByVal); + } + else if (auto ar = t.asArray) + { + llfunc.addParamAttr(i, ParamAttr.ByVal); + } + } + }; + auto visitor = new VisitFuncDecls(registerFunc); + visitor.visit([mod]); + // Before beginning we move all top level var-decls to the start + // and then we generate the var-decls first + // partition is NOT required to be stable, but that should not create + // any problems. + partition(mod.decls, (Decl d) { return d.declType == DeclType.VarDecl; }); + + foreach (decl; mod.decls) + genRootDecl(decl); + + table.leaveScope; + +// debug m.verify(); + + if(optimize) + m.optimize(inline); + + m.writeBitcodeToFileHandle(handle); + } + +private: + /** + Generate a single top-level Decl + **/ + void genRootDecl(Decl decl) + { + switch(decl.declType) + { + case DeclType.FuncDecl: + FuncDecl funcDecl = cast(FuncDecl)decl; + + // Empty function - declare; + if(funcDecl.emptyFunction) + return; + + llvm(funcDecl.type); + auto llfunc = m.getNamedFunction(funcDecl.identifier.getMangled); + auto func_tp = cast(PointerType)llfunc.type; + auto func_t = cast(FunctionType)func_tp.elementType(); + auto ret_t = func_t.returnType(); + + + auto bb = llfunc.appendBasicBlock("entry"); + b.positionAtEnd(bb); + + table.enterScope; + foreach (i, v; funcDecl.funcArgs) + { + llfunc.getParam(i).name = v.identifier.get; + auto name = v.identifier.get; + if (!cast(PointerType)llfunc.getParam(i).type) + { + auto AI = b.buildAlloca(llfunc.getParam(i).type, name); + // Value va = b.buildLoad(val, name); + b.buildStore(llfunc.getParam(i), AI); + table[name] = AI; + } + else + table[name] = llfunc.getParam(i); + } + + foreach (stmt; funcDecl.statements) + genStmt(stmt); + + // if the function didn't end with a return, we automatically + // add one (return 0 as default) + if (b.getInsertBlock().terminated() is false) + if (ret_t is Type.Void) + b.buildRetVoid(); + else + b.buildRet(ConstantInt.GetS(ret_t, 0)); + + table.leaveScope; + break; + + case DeclType.VarDecl: + auto varDecl = cast(VarDecl)decl; + auto id = varDecl.env.find(varDecl.identifier); + Type t = llvm(id.type); + GlobalVariable g = m.addGlobal(t, id.get); + g.initializer = ConstantInt.GetS(t, 0); + table[varDecl.identifier.get] = g; + break; + + case DeclType.StructDecl: + auto structDecl = cast(StructDecl)decl; + llvm(structDecl.type); + //m.addTypeName(structDecl.identifier.get, llvm(structDecl.type)); + break; + + default: + break; + } + } + + /** + Generate a single local Decl + **/ + void genDecl(Decl decl) + { + switch(decl.declType) + { + case DeclType.VarDecl: + auto varDecl = cast(VarDecl)decl; + auto name = varDecl.identifier.get; + auto sym = varDecl.env.find(varDecl.identifier); + auto AI = b.buildAlloca(llvm(sym.type), name); + table[name] = AI; + if (varDecl.init) + { + LValue dst = genLValue(varDecl.identifier); + RValue src = genExpression(varDecl.init); + storeThroughLValue(dst, src, sym.type); + } + break; + + default: + } + } + + // Remove - do it right (basic/Messages.d) + struct PE + { + static char[] NoImplicitConversion = + "Can't find an implicit conversion between %0 and %1"; + static char[] VoidRetInNonVoidFunc = + "Only void functions can return without an expression"; + } + + /** + Takes two iX and overwrite the smaller one, with a sign-extended version + so both values can be operated on without worrying about the exact types. + + Used when adding a int and a long or similar. + + Currently unused + **/ + void sextSmallerToLarger(ref Value left, ref Value right) + { + if (left.type != right.type) + { + // try to find a convertion - only works for iX + IntegerType l = cast(IntegerType) left.type; + IntegerType r = cast(IntegerType) right.type; + if (l is null || r is null) + throw error(__LINE__, PE.NoImplicitConversion) + .arg(left.type.toString) + .arg(right.type.toString); + + if (l.numBits() < r.numBits()) + left = b.buildSExt(left, r, ".cast"); + else + right = b.buildSExt(right, l, ".cast"); + } + } + + /** + Generate a single expression. + + This is the most general way of generating expressions and therefore + returns an RValue. + **/ + RValue genExpression(Exp exp) + { + switch(exp.expType) + { + case ExpType.Binary: + return RValue(genBinExp(cast(BinaryExp)exp)); + case ExpType.IntegerLit: + auto integetLit = cast(IntegerLit)exp; + auto val = Integer.parse(integetLit.get); + return RValue(ConstantInt.GetS(Type.Int32, val)); + case ExpType.Negate: + auto negateExp = cast(NegateExp)exp; + auto target = genExpression(negateExp.exp); + return RValue(b.buildNeg(target.value, "neg")); + case ExpType.Deref: + auto derefExp = cast(DerefExp)exp; + auto target = genExpression(derefExp.exp); + return RValue(b.buildLoad(target.value, "deref")); + case ExpType.AssignExp: + auto AE = cast(AssignExp)exp; + LValue dst = genLValue(AE.identifier); + RValue src = genExpression(AE.exp); + storeThroughLValue(dst, src, AE.exp.type()); + return src; + case ExpType.Index: + auto indexExp = cast(IndexExp)exp; + return loadLValue(genLValue(exp)); + case ExpType.CallExp: + auto callExp = cast(CallExp)exp; + // BUG: Might not be a simple identifier, a.foo(x) is also a + // valid call - or foo(x)(y) for that matter. + auto id = exp.env.find(cast(Identifier)callExp.exp); + scope args = new Value[callExp.args.length]; + foreach (i, arg; callExp.args) + args[i] = genExpression(arg).value; + llvm(id.type); + auto f = m.getNamedFunction(id.getMangled); + DFunction f_type = cast(DFunction)id.type; + bool isVoid = f_type.returnType is DType.Void; + // BUG: doesn't do implicit type-conversion on args + auto r = b.buildCall(f, args, isVoid? "" : "call"); + return RValue(r); + case ExpType.CastExp: + auto castExp = cast(CastExp)exp; + auto value = genExpression(castExp.exp).value; + + if (!castExp.type.hasImplicitConversionTo(castExp.type)) + assert(0, "Invalid cast"); + + Value v; + if(castExp.exp.type.byteSize <= castExp.type.byteSize) + v = b.buildZExt(value, llvm(castExp.type), "zext"); + else + v = b.buildTrunc(value, llvm(castExp.type), "trunc"); + + return RValue(v); + + case ExpType.Identifier: + auto identifier = cast(Identifier)exp; + auto id = exp.env.find(identifier); + if(id.type.isStruct() || id.type.isArray()) + return RValue(table.find(id.get)); + else + return RValue(b.buildLoad(table.find(id.get), id.get)); + case ExpType.MemberReference: + return loadLValue(genLValue(exp)); + } + assert(0, "Reached end of switch in genExpression"); + return RValue(null); + } + + /** + Generate a binary expression. + + Currently only works for signed and unsigned integers, but is almost + ready for floats and should be expanded to everything else. + **/ + Value genBinExp(BinaryExp e) + { + auto left = genExpression(e.left).value; + auto right = genExpression(e.right).value; + DType t_a = e.left.type; + DType t_b = e.right.type; + + Value res; + // TODO: do usual type promotions on a and b + // TODO: support floats + if (t_a.isInteger() && t_b.isInteger()) + { + Operation op = t_a.getOperationWith(op2op(e.op), t_b); + assert(op.isBuiltin(), + "integers should only use builtin ops"); + alias BuiltinOperation BO; + BO val = op.builtinOp(); + // map val to buildAdd or similar + switch (val) { + case BO.Add: res = b.buildAdd(left, right, "add"); break; + case BO.Sub: res = b.buildSub(left, right, "sub"); break; + case BO.Mul: res = b.buildMul(left, right, "mul"); break; + case BO.SDiv: res = b.buildSDiv(left, right, "div"); break; + case BO.UDiv: res = b.buildUDiv(left, right, "div"); break; + case BO.FDiv: res = b.buildFDiv(left, right, "div"); break; + case BO.SRem: res = b.buildSRem(left, right, "rem"); break; + case BO.URem: res = b.buildURem(left, right, "rem"); break; + case BO.FRem: res = b.buildFRem(left, right, "rem"); break; + default: + LLVMPred pred = predFromBI(val); + IntPredicate ip = pred.intPred; + RealPredicate rp = pred.realPred; + assert(pred.isValid, "Not a predicate"); + if (pred.isReal) + res = b.buildFCmp(rp, left, right, "cmp"); + else + res = b.buildICmp(ip, left, right, "cmp"); + break; + } + } + else + /* + if left has op for right's type: + a_op = left.op(right) + if right has usable op_r: + b_op_r = right.op_r(left) + if a_op or b_op_r is set, choose the best one + else if op is commutative + if left has usable op_r + a_op_r = left.op_r(right) + if right has usable op + b_op = right.op(left) + choose best one from a_op_r and b_op + else error + */ + assert(0, "Not integers?"); + + return res; + } + + /** + Generates one statement + **/ + // This should be split into specific methods - one per Stmt type? + void genStmt(Stmt stmt) + { + switch(stmt.stmtType) + { + case StmtType.Compound: + auto stmts = cast(CompoundStatement)stmt; + foreach (s; stmts.statements) + genStmt(s); + break; + case StmtType.Return: + auto ret = cast(ReturnStmt)stmt; + DFunction type = stmt.env.parentFunction().type(); + Type t = llvm(type.returnType); + if (ret.exp is null) + if (t is Type.Void) + { + b.buildRetVoid(); + return; + } + else + throw error(__LINE__, PE.VoidRetInNonVoidFunc); + + RValue v = genExpression(ret.exp); +/* if (v.type != t) + { + IntegerType v_t = cast(IntegerType) v.type; + IntegerType i_t = cast(IntegerType) t; + if (v_t is null || i_t is null) + throw error(__LINE__, PE.NoImplicitConversion) + .arg(v.type.toString) + .arg(t.toString); + + if (v_t.numBits() < i_t.numBits()) + v = b.buildSExt(v, t, ".cast"); + else + v = b.buildTrunc(v, t, ".cast"); + }*/ + b.buildRet(v.value); + break; + case StmtType.Decl: + auto declStmt = cast(DeclStmt)stmt; + genDecl(declStmt.decl); + break; + case StmtType.Exp: + auto expStmt = cast(ExpStmt)stmt; + genExpression(expStmt.exp); + break; + case StmtType.If: + auto ifStmt = cast(IfStmt)stmt; + Value cond = genExpression(ifStmt.cond).value; + if (cond.type !is Type.Int1) + { + Value False = ConstantInt.GetS(cond.type, 0); + cond = b.buildICmp(IntPredicate.NE, cond, False, ".cond"); + } + auto func_name = stmt.env.parentFunction().identifier.getMangled; + Function func = m.getNamedFunction(func_name); + bool has_else = (ifStmt.else_body !is null); + + auto thenBB = func.appendBasicBlock("then"); + auto elseBB = has_else? func.appendBasicBlock("else") : null; + auto mergeBB = func.appendBasicBlock("merge"); + + b.buildCondBr(cond, thenBB, has_else? elseBB : mergeBB); + b.positionAtEnd(thenBB); + genStmt(ifStmt.then_body); + thenBB = b.getInsertBlock(); + if (b.getInsertBlock().terminated() is false) + b.buildBr(mergeBB); + + if (has_else) + { + b.positionAtEnd(elseBB); + genStmt(ifStmt.else_body); + elseBB = b.getInsertBlock(); + if (elseBB.terminated() is false) + b.buildBr(mergeBB); + } + + b.positionAtEnd(mergeBB); + break; + case StmtType.While: + auto wStmt = cast(WhileStmt)stmt; + auto func_name = stmt.env.parentFunction().identifier.get; + Function func = m.getNamedFunction(func_name); + + auto condBB = func.appendBasicBlock("cond"); + auto bodyBB = func.appendBasicBlock("body"); + auto doneBB = func.appendBasicBlock("done"); + + b.buildBr(condBB); + b.positionAtEnd(condBB); + Value cond = genExpression(wStmt.cond).value; + if (cond.type !is Type.Int1) + { + Value False = ConstantInt.GetS(cond.type, 0); + cond = b.buildICmp(IntPredicate.NE, cond, False, ".cond"); + } + b.buildCondBr(cond, bodyBB, doneBB); + + b.positionAtEnd(bodyBB); + genStmt(wStmt.whileBody); + if (b.getInsertBlock().terminated() is false) + b.buildBr(condBB); + + b.positionAtEnd(doneBB); + break; + case StmtType.Switch: + auto sw = cast(SwitchStmt)stmt; + Value cond = genExpression(sw.cond).value; + + auto func_name = stmt.env.parentFunction().identifier.get; + Function func = m.getNamedFunction(func_name); + + BasicBlock oldBB = b.getInsertBlock(); + BasicBlock defBB; + BasicBlock endBB = func.appendBasicBlock("sw.end"); + if (sw.defaultBlock) + { + defBB = Function.InsertBasicBlock(endBB, "sw.def"); + b.positionAtEnd(defBB); + foreach (case_statement; sw.defaultBlock) + genStmt(case_statement); + if (!defBB.terminated()) + b.buildBr(endBB); + b.positionAtEnd(oldBB); + } + else + defBB = endBB; + auto SI = b.buildSwitch(cond, defBB, sw.cases.length); + foreach (c; sw.cases) + { + BasicBlock prevBB; + foreach (i, val; c.values) + { + auto BB = Function.InsertBasicBlock(defBB, "sw.bb"); + SI.addCase(ConstantInt.GetS(cond.type, c.values_converted[i]), BB); + + if (i + 1 == c.values.length) + { + b.positionAtEnd(BB); + foreach (case_statement; c.stmts) + genStmt(case_statement); + if (!BB.terminated()) + b.buildBr(c.followedByDefault? defBB : endBB); + } + + if (prevBB !is null && !prevBB.terminated()) + { + b.positionAtEnd(prevBB); + b.buildBr(BB); + } + prevBB = BB; + } + } + b.positionAtEnd(endBB); + break; + } + } + + /** + Given the address of something, load it into an alloc. + **/ + RValue loadLValue(LValue addr, char[] name = null) + { + Value val = addr.getAddress(); + if (name is null) + name = val.name.length > 0? val.name : "load"; + + auto res = b.buildLoad(val, name); + return RValue(res); + } + + /* + Get the address of an expression - allowing us to modify something in + memory or on the stack. + */ + LValue genLValue(Exp exp) + { + switch(exp.expType) + { + case ExpType.Identifier: + auto identifier = cast(Identifier)exp; + auto id = exp.env.find(identifier); + return LValue(table.find(id.get)); + case ExpType.Deref: + // LValue(*x): x + // RValue(*x): load(x) + // This way *x = *x + 1 will work + // TODO: Get's an i32** rather than i32* because it's alloc'd + // so there needs to be a load? + auto DE = cast(DerefExp)exp; + return genLValue(DE.exp); + case ExpType.Index: + auto indexExp = cast(IndexExp)exp; + auto type = indexExp.target.type; + auto index = genExpression(indexExp.index); + Value[2] gep_indices; + gep_indices[0] = ZeroIndex; + gep_indices[1] = index.value; + Value res; + auto target = genLValue(indexExp.target).getAddress(); + if (type.isArray()) + res = b.buildGEP(target, gep_indices[0 .. 2], "index"); + else if (type.isPointer()) + res = b.buildGEP(target, gep_indices[1 .. 2], "index"); + else assert(0, "Can only index pointers and arrays"); + return LValue(res); + case ExpType.MemberReference: + auto mem = cast(MemberReference)exp; + switch (mem.target.expType) + { + case ExpType.Identifier: + auto identifier = cast(Identifier)mem.target; + auto child = mem.child; + auto id = exp.env.find(identifier); + Value v = table.find(id.get); + DType t = id.type; + auto st = t.asStruct; + + int i = st.indexOf(child.get); + + Value[2] vals; + vals[0] = ZeroIndex; + vals[1] = ConstantInt.GetU(IntegerType.Int32, i); + + Value val = b.buildGEP(v, vals, id.get~"."~child.get); + return LValue(val); + + case ExpType.MemberReference: + auto addr = genLValue(mem.target).getAddress(); + auto child = mem.child; + auto symChild = child.env.find(child); + DType t = mem.target.type; + auto st = t.asStruct; + + int i = st.indexOf(child.get); + + Value[2] vals; + vals[0] = ZeroIndex; + vals[1] = ConstantInt.GetU(IntegerType.Int32, i); + + Value val = b.buildGEP(addr, vals, "."~child.get); + return LValue(val); + } + break; + } + assert(0, "Reached end of switch in getPointer"); + return LValue(null); + } + + /** + Store into an lvalue from a rvalue. Both are assumed to have type t. + **/ + void storeThroughLValue(LValue dst, RValue src, DType t) + { + Value to = dst.getAddress(); + Value from = src.value; + + auto a = cast(PointerType)to.type; + assert(a !is null, "Can only store through pointers"); + + if (auto st = t.asStruct()) + genMemcpy(to, from, t); + else + b.buildStore(from, to); + } + + /** + Copy from src into dst. The values are assumed to have the same size, + and the amount of bytes to copy is taken from t. + **/ + void genMemcpy(Value dst, Value src, DType t) + { + Value from = b.buildBitCast(src, BytePtr, ".copy_from"); + Value to = b.buildBitCast(dst, BytePtr, ".copy_to"); + Value[4] args; + args[0] = to; + args[1] = from; + args[2] = ConstantInt.GetS(Type.Int32, t.byteSize()); + args[3] = ConstantInt.GetS(Type.Int32, 32); + b.buildCall(llvm_memcpy, args[], null); + } + + Error error(uint line, char[] msg) + { + return new Error(msg); + } + + /** + Get the LLVM Type corresponding to a DType. + + Currently using the built-in associative array - not sure if it works + well when the hashes are so uniform. + + Other possibilities would be to find a hash-function that works on + something as small as 4 bytes or to create a sparse array perhaps. + */ + Type llvm(DType t) + { + if (auto llvm_t = t in type_map) + return *llvm_t; + return llvmCreateNew(t); + } + + // Create an LLVM type and insert it into the type map, and return the + // result + Type llvmCreateNew(DType t) + { + if (auto i = cast(DInteger)t) + { + Type res = IntegerType.Get(i.byteSize() * 8); + type_map[t] = res; + return res; + } + else if (auto s = t.asStruct) + { + SmallArray!(Type, 8) members; + DType[] array; + array.length = s.members.length; + + foreach (m; s.members) + array[m.index] = m.type; + + foreach (m; array) + members ~= llvm(m); + + Type res = StructType.Get(members.unsafe()); + type_map[t] = res; + m.addTypeName(s.name, res); + return res; + } + else if (auto f = t.asFunction) + { + // We should never have a function returning structs, because of + // the simplify step + assert(f.returnType.isStruct() == false, "Can't return structs"); + Type ret_t = llvm(f.returnType); + + SmallArray!(Type, 8) params; + foreach(param; f.params) + if (param.isStruct) + params ~= PointerType.Get(llvm(param)); + else if (param.isArray) + params ~= PointerType.Get(llvm(param)); + else + params ~= llvm(param); + + Type res = FunctionType.Get(ret_t, params.unsafe()); + type_map[t] = res; + auto id = new Identifier(f.name); + id.setType(f); + + auto f_t = m.getNamedFunction(id.getMangled); + if(f_t is null) + { + auto llfunc = m.addFunction(res, id.getMangled); + + foreach (i, param; f.params) + if (param.isStruct) + llfunc.addParamAttr(i, ParamAttr.ByVal); + + if (f.firstParamIsReturnValue) + { + llfunc.removeParamAttr(0, ParamAttr.ByVal); + llfunc.addParamAttr(0, ParamAttr.StructRet); + } + } + return res; + } + else if (auto f = t.asPointer) + { + Type res = PointerType.Get(llvm(f.pointerOf)); + type_map[t] = res; + return res; + } + else if (auto f = t.asArray) + { + Type res = ArrayType.Get(llvm(f.arrayOf), f.size); + type_map[t] = res; + return res; + } + assert(0, "Only integers, structs and functions are supported"); + } + + // Might as well insert all the basic types from the start + void createBasicTypes() + { + type_map[DType.Void] = Type.Void; + + type_map[DType.Bool] = Type.Int1; + type_map[DType.Byte] = Type.Int8; + type_map[DType.UByte] = Type.Int8; + type_map[DType.Short] = Type.Int16; + type_map[DType.UShort] = Type.Int16; + type_map[DType.Int] = Type.Int32; + type_map[DType.UInt] = Type.Int32; + type_map[DType.Long] = Type.Int64; + type_map[DType.ULong] = Type.Int64; + } + +private: + + // llvm stuff + DModule mod; + .llvm.llvm.Module m; + Builder b; + Function llvm_memcpy; + ConstantInt ZeroIndex; + Type BytePtr; + Type[DType] type_map; + + FuncDecl[char[]] functions; + + SimpleSymbolTable table; +} + +private Operator op2op(BinaryExp.Operator op) +{ + alias BinaryExp.Operator O; + Operator res; + switch (op) { + case O.Add: res = Operator.Add; break; + case O.Sub: res = Operator.Sub; break; + case O.Mul: res = Operator.Mul; break; + case O.Div: res = Operator.Div; break; + + case O.Eq: res = Operator.Eq; break; + case O.Ne: res = Operator.Ne; break; + case O.Lt: res = Operator.Lt; break; + case O.Le: res = Operator.Le; break; + case O.Gt: res = Operator.Gt; break; + case O.Ge: res = Operator.Ge; break; + } + return res; +} + +private struct LLVMPred +{ + bool isValid = false; + bool isReal; + union { + IntPredicate intPred; + RealPredicate realPred; + } + + static LLVMPred Int(IntPredicate p) + { + LLVMPred res; + res.isValid = true; + res.isReal = false; + res.intPred = p; + return res; + } + static LLVMPred Real(RealPredicate p) + { + LLVMPred res; + res.isValid = true; + res.isReal = true; + res.realPred = p; + return res; + } +} +private LLVMPred predFromBI(BuiltinOperation op) +{ + alias BuiltinOperation O; + LLVMPred pred; + switch (op) { + case O.Eq: pred = LLVMPred.Int(IntPredicate.EQ); break; + case O.Ne: pred = LLVMPred.Int(IntPredicate.NE); break; + + case O.SLt: pred = LLVMPred.Int(IntPredicate.SLT); break; + case O.ULt: pred = LLVMPred.Int(IntPredicate.ULT); break; + case O.FLt: pred = LLVMPred.Real(RealPredicate.OLT); break; + + case O.SLe: pred = LLVMPred.Int(IntPredicate.SLE); break; + case O.ULe: pred = LLVMPred.Int(IntPredicate.ULE); break; + case O.FLe: pred = LLVMPred.Real(RealPredicate.OLE); break; + + case O.SGt: pred = LLVMPred.Int(IntPredicate.SGT); break; + case O.UGt: pred = LLVMPred.Int(IntPredicate.UGT); break; + case O.FGt: pred = LLVMPred.Real(RealPredicate.OGT); break; + + case O.SGe: pred = LLVMPred.Int(IntPredicate.SGE); break; + case O.UGe: pred = LLVMPred.Int(IntPredicate.UGE); break; + case O.FGe: pred = LLVMPred.Real(RealPredicate.OGE); break; + }; + return pred; +} + +private class VisitFuncDecls : Visitor!(void) +{ + void delegate(FuncDecl) dg; + this(void delegate(FuncDecl funcDecl) dg) + { + this.dg = dg; + } + + override void visitModule(DModule m) + { + foreach (decl; m.decls) + if (auto f = cast(FuncDecl)decl) + dg(f); + } +} + +private class SimpleSymbolTable +{ + Value[char[]][] namedValues; + + void enterScope() + { + namedValues ~= cast(Value[char[]])["__dollar":null]; + } + + void leaveScope() + { + namedValues.length = namedValues.length - 1; + } + + Value put(Value val, char[] key) + { + namedValues[$ - 1][key] = val; + return val; + } + + Value find(char[] key) + { + foreach_reverse (map; namedValues) + if(auto val_ptr = key in map) + return *val_ptr; + return null; + } + + alias find opIndex; + alias put opIndexAssign; +} + diff -r e331e4e816e4 -r 5e383b3755d6 gen/LLVMGen.d --- a/gen/LLVMGen.d Fri Apr 18 23:45:45 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,575 +0,0 @@ -module gen.LLVMGen; - -import tango.io.Stdout, - Int = tango.text.convert.Integer; -import tango.core.Array : find; - -import ast.Decl, - ast.Stmt, - ast.Exp; - -import lexer.Token; - -import sema.SymbolTableBuilder; - -class LLVMGen -{ -public: - this() - { - typeToLLVM = - [ - "int"[] : "i32"[], - "byte" : "i8", - "short" : "i16", - "long" : "i64", - "bool" : "i1", - "float" : "float", - "double" : "double", - "void" : "void" - ]; - alias BinaryExp.Operator op; - opToLLVM = [ - op.Add : "add"[], - op.Sub : "sub", - op.Mul : "mul", - op.Div : "div", - op.Eq : "icmp eq", - op.Ne : "icmp ne", - op.Lt : "icmp slt", - op.Le : "icmp sle", - op.Gt : "icmp sgt", - op.Ge : "icmp sge" - ]; - table = new SimpleSymbolTable(); - } - - void gen(Decl[] decls) - { - // Fill in scopes - - table.enterScope; - - foreach(decl ; decls) - genRootDecl(decl); - - foreach(decl ; nestedFunc) - genRootDecl(decl); - - table.leaveScope; - } - - void genRootDecl(Decl decl) - { - switch(decl.declType) - { - case DeclType.FuncDecl: - FuncDecl funcDecl = cast(FuncDecl)decl; - auto return_type = typeToLLVM[funcDecl.type.token.get]; - - printBeginLine("define "); - print(return_type); - print(" @"); - genIdentifier(funcDecl.identifier); - print("("); - - table.enterScope; - Identifier[] args; - foreach(i, funcArg ; funcDecl.funcArgs) - { - print(typeToLLVM[funcArg.type.token.get]); - print(" %"); - print("."~Integer.toString(i)); - args ~= funcArg.identifier; - table.find(funcArg.identifier.get); - if(i+1 < funcDecl.funcArgs.length) - print(", "); - } - - printEndLine(") {"); - - indent; - - foreach(i, arg ; args) - { - auto sym = arg.env.find(arg); - auto type = typeToLLVM[sym.type.get]; - printBeginLine("%"~arg.get); - printEndLine(" = alloca " ~ type); - printBeginLine("store " ~ type ~ " %."); - print(Integer.toString(i)); - print(", " ~ type ~ "* %"); - printEndLine(arg.get); - } - - printEndLine(); - - foreach (stmt; funcDecl.statements) - genStmt(stmt); - if (return_type == "void") - { - printBeginLine("ret void"); - printEndLine(); - } - table.leaveScope; - dedent; - printBeginLine("}"); - printEndLine(); - - break; - - case DeclType.VarDecl: - auto varDecl = cast(VarDecl)decl; - printBeginLine("@"); - genIdentifier(varDecl.identifier); - - print(" = "); - if(varDecl.init) - { - if(cast(IntegerLit)varDecl.init) - printEndLine("global i32 " ~ (cast(IntegerLit)varDecl.init).token.get); - else - assert(0,"Declaring an variable to an expression is not allowed"); - } - else - printEndLine("i32 0"); - - printEndLine(); - - break; - - case DeclType.StructDecl: - auto structDecl = cast(StructDecl)decl; - printBeginLine("%struct."); - genIdentifier(structDecl.identifier); - - print(" = type { "); - foreach (i, var; structDecl.vars) - { - print(typeToLLVM[var.type.get]); - if(i+1 < structDecl.vars.length) - print(", "); - } - - printEndLine(" }"); - - break; - - default: - } - } - - void genDecl(Decl decl) - { - switch(decl.declType) - { - case DeclType.VarDecl: - auto varDecl = cast(VarDecl)decl; - printBeginLine("%"); - print(table.find(varDecl.identifier.get)); - print(" = alloca "); - if(varDecl.type.get in typeToLLVM) - printEndLine(typeToLLVM[varDecl.type.get]); - else - printEndLine("%struct."~varDecl.type.get); - if(varDecl.init) - { - auto assignExp = new AssignExp(varDecl.identifier, varDecl.init); - assignExp.env = decl.env; - assignExp.identifier.env = decl.env; - genExpression(assignExp); - } - break; - - case DeclType.FuncDecl: - auto func = cast(FuncDecl)decl; - nestedFunc[func.identifier.get] = func; - break; - - default: - } - } - - void unify(Ref* a, Ref* b) - { - if (a.type != b.type) - { - auto a_val = intTypes.find(a.type); - auto b_val = intTypes.find(b.type); - // swap types so a is always the "largest" type - if (a_val < b_val) - { - Ref* tmp = b; - b = a; - a = tmp; - } - - auto res = table.find("%.cast"); - printBeginLine(res); - printCastFromTo(b, a); - print(*b); - print(" to "); - printEndLine(a.type); - - b.type = a.type; - b.name = res; - } - } - - Ref genExpression(Exp exp) - { - switch(exp.expType) - { - case ExpType.Binary: - auto binaryExp = cast(BinaryExp)exp; - - auto left = genExpression(binaryExp.left); - auto right = genExpression(binaryExp.right); - - unify(&left, &right); - - auto res = Ref(left.type, table.find); - printBeginLine(res.name); - print(" = "~opToLLVM[binaryExp.op]~" "); - print(left); - print(", "); - printEndLine(right.name); - - // exp always returns known type (== returns bool no matter - // what the params are) - if (binaryExp.resultType) - res.type = typeToLLVM[binaryExp.resultType]; - - return res; - case ExpType.IntegerLit: - auto integetLit = cast(IntegerLit)exp; - auto t = integetLit.token; - return Ref("int", t.get, true); - case ExpType.Negate: - auto negateExp = cast(NegateExp)exp; - auto target = genExpression(negateExp.exp); - auto res = table.find; - printBeginLine(res); - print(" = sub "~target.type~" 0, "); - printEndLine(target.name); - return Ref(target.type, res); - case ExpType.AssignExp: - auto assignExp = cast(AssignExp)exp; - auto sym = exp.env.find(assignExp.identifier); - - Ref val = genExpression(assignExp.exp); - Ref r = Ref(typeToLLVM[sym.type.get], val.name); - - if (val.type != r.type) - { - auto res = table.find("%.cast"); - printBeginLine(res); - printCastFromTo(val.type, r.type); - print(val); - print(" to "); - printEndLine(r.type); - r.name = res; - } - - printBeginLine("store "); - print(r); - print(", "); - print(r.type ~ "* %"); - printEndLine(assignExp.identifier.get); - break; - case ExpType.CallExp: - auto callExp = cast(CallExp)exp; - auto func_sym = exp.env.find(cast(Identifier)callExp.exp); - auto func_type = typeToLLVM[func_sym.type.get]; - Ref[] args; - foreach(i, arg ; callExp.args) - args ~= genExpression(arg); - - char[] res = ""; - if (func_type != "void") - { - res = table.find; - printBeginLine(res); - print(" = call "); - } - else - printBeginLine("call "); - - print(func_type); - print(" @"); - - print(func_sym.id.get); - - print("("); - foreach(i, arg ; args) - { - print(arg); - if(i+1 < args.length) - print(", "); - } - printEndLine(")"); - return Ref(func_sym.type.get, res); - case ExpType.Identifier: - auto identifier = cast(Identifier)exp; - auto sym = exp.env.find(identifier); - char[] res = table.find; - printBeginLine(res); - print(" = load "); - print(typeToLLVM[sym.type.get]); - print("* %"); - printEndLine(sym.id.name); - return Ref(sym.type.get, res); - } - return Ref(); - } - - void genStmt(Stmt stmt) - { - switch(stmt.stmtType) - { - case StmtType.Return: - auto ret = cast(ReturnStmt)stmt; - auto sym = stmt.env.parentFunction(); - auto type = typeToLLVM[sym.type.get]; - - Ref res = genExpression(ret.exp); - - if (type != res.type) - { - auto cast_res = table.find("%.cast"); - printBeginLine(cast_res); - printCastFromTo(res.type, type); - print(res); - print(" to "); - printEndLine(type); - res.name = cast_res; - res.type = type; - } - printBeginLine("ret "); - printEndLine(res); - break; - case StmtType.Decl: - auto declStmt = cast(DeclStmt)stmt; - genDecl(declStmt.decl); - break; - case StmtType.Exp: - auto expStmt = cast(ExpStmt)stmt; - genExpression(expStmt.exp); - break; - case StmtType.If: - auto ifStmt = cast(IfStmt)stmt; - Ref val = genExpression(ifStmt.cond); - auto cond = table.find("%.cond"); - printBeginLine(cond); - print(" = icmp ne "); - print(val); - printEndLine(", 0"); - - auto then_branch = table.find("then"); - auto else_branch = table.find("else"); - auto done_label = table.find("done"); - printBeginLine("br i1 "); - print(cond); - print(", label %"); - print(then_branch); - print(", label %"); - printEndLine(ifStmt.else_body? else_branch : done_label); - - printBeginLine(then_branch); - printEndLine(":"); - - indent(); - foreach (s; ifStmt.then_body) - genStmt(s); - printBeginLine("br label %"); - printEndLine(done_label); - dedent(); - - if (ifStmt.else_body) - { - printBeginLine(else_branch); - printEndLine(":"); - - indent(); - foreach (s; ifStmt.else_body) - genStmt(s); - printBeginLine("br label %"); - printEndLine(done_label); - dedent(); - } - - printBeginLine(done_label); - printEndLine(":"); - - break; - case StmtType.While: - auto wStmt = cast(WhileStmt)stmt; - - auto body_label = table.find("while_body"); - auto cond_label = table.find("while_cond"); - auto done_label = table.find("while_done"); - - printBeginLine("br label %"); - printEndLine(cond_label); - - printBeginLine(cond_label); - printEndLine(":"); - - indent(); - - Ref val = genExpression(wStmt.cond); - auto cond = table.find("%.cond"); - - printBeginLine(cond); - print(" = icmp ne "); - print(val); - printEndLine(", 0"); - - printBeginLine("br i1 "); - print(cond); - print(", label %"); - print(body_label); - print(", label %"); - printEndLine(done_label); - - dedent(); - - printBeginLine(body_label); - printEndLine(":"); - - indent(); - foreach (s; wStmt.stmts) - genStmt(s); - printBeginLine("br label %"); - printEndLine(cond_label); - dedent(); - - printBeginLine(done_label); - printEndLine(":"); - - break; - } - } - - void genIdentifier(Identifier identifier) - { - print(identifier.get); - } - - void indent() - { - tabIndex ~= tabType; - } - - void dedent() - { - tabIndex = tabIndex[0 .. $-tabType.length]; - } - - void printBeginLine(char[] line = "") - { - Stdout(tabIndex~line); - } - void printBeginLine(Ref r) - { - Stdout(tabIndex~r.type~" "~r.name); - } - - void printEndLine(char[] line = "") - { - Stdout(line).newline; - } - - void printEndLine(Ref r) - { - Stdout(r.type~" "~r.name).newline; - } - - void print(char[] line) - { - Stdout(line); - } - - void print(Ref r) - { - Stdout(r.type~" "~r.name); - } - - void printCastFromTo(size_t t1, size_t t2) - { - if (t1 < t2) - print(" = zext "); - else - print(" = trunc "); - } - - void printCastFromTo(char[] t1, char[] t2) - { - printCastFromTo(intTypes.find(t1), intTypes.find(t2)); - } - - void printCastFromTo(Ref* t1, Ref* t2) - { - printCastFromTo(intTypes.find(t1.type), intTypes.find(t2.type)); - } - -private: - - char[] tabIndex; - const char[] tabType = " "; // 4 spaces - FuncDecl[char[]] functions; - - SimpleSymbolTable table; - SymbolTable symbolTable; - static char[][char[]] typeToLLVM; - static char[][BinaryExp.Operator] opToLLVM; - - static char[][] intTypes = [ "i1", "i8", "i16", "i32", "i64" ]; - - FuncDecl[char[]] nestedFunc; -} - -struct Ref -{ - char[] type; - char[] name; - bool atomic = false; - static Ref opCall(char[] type = "void", char[] name = "", bool atomic = false) - { - Ref r; - if(auto llvm_t = type in LLVMGen.typeToLLVM) - r.type = *llvm_t; - else - r.type = type; - r.name = name; - r.atomic = atomic; - return r; - } -} - -class SimpleSymbolTable -{ - int[char[]][] variables; - - void enterScope() - { - variables ~= cast(int[char[]])["__dollar":-1]; - } - - void leaveScope() - { - variables.length = variables.length - 1; - } - - char[] find(char[] v = "%.tmp") - { - foreach_reverse(map ; variables) - { - if(v in map) - return v~"."~Integer.toString(++map[v]); - } - variables[$-1][v] = 0; - return v; - } -} - diff -r e331e4e816e4 -r 5e383b3755d6 gen/LuaGen.d --- a/gen/LuaGen.d Fri Apr 18 23:45:45 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,180 +0,0 @@ -module gen.LuaGen; - -import tango.io.Stdout, - Int = tango.text.convert.Integer; - -import ast.Decl, - ast.Stmt, - ast.Exp; - -import lexer.Token; - -class LuaGen -{ -public: - this() - { - } - - void gen(Decl[] decls) - { - - foreach(decl ; decls) - genDecl(decl); - printBeginLine("main()"); - printEndLine; - } - - void genDecl(Decl decl) - { - switch(decl.declType) - { - case DeclType.FuncDecl: - FuncDecl funcDecl = cast(FuncDecl)decl; - - printBeginLine("function "); - genIdentifier(funcDecl.identifier); - print("("); - foreach(i, funcArg ; funcDecl.funcArgs) - { - genIdentifier(funcArg.identifier); - if(i+1 < funcDecl.funcArgs.length) - print(", "); - } - printEndLine(")"); - indent; - foreach(stmt ; funcDecl.statements) - { - genStmt(stmt); - } - dedent; - printBeginLine("end"); - printEndLine(); - break; - - case DeclType.VarDecl: - genVarDecl(cast(VarDecl)decl); - - default: - } - } - - void genStmt(Stmt stmt) - { - switch(stmt.stmtType) - { - case StmtType.Return: - auto ret = cast(ReturnStmt)stmt; - printBeginLine("return "); - genExpression(ret.exp); - printEndLine(); - break; - case StmtType.Decl: - auto declStmt = cast(DeclStmt)stmt; - genDecl(declStmt.decl); - break; - case StmtType.Exp: - auto expStmt = cast(ExpStmt)stmt; - printBeginLine(); - genExpression(expStmt.exp); - printEndLine(); - break; - - } - } - - void genVarDecl(VarDecl decl) - { - printBeginLine("local "); - genIdentifier(decl.identifier); - if(decl.init) - { - print(" = "); - genExpression(decl.init); - } - printEndLine(); - - } - - void genExpression(Exp exp) - { - switch(exp.expType) - { - case ExpType.Binary: - auto binaryExp = cast(BinaryExp)exp; - genExpression(binaryExp.left); - print(" " ~ [binaryExp.op] ~ " "); - genExpression(binaryExp.right); - break; - case ExpType.IntegerLit: - auto integetLit = cast(IntegerLit)exp; - auto t = integetLit.token; - print(t.get); - break; - case ExpType.Negate: - auto negateExp = cast(NegateExp)exp; - print("-("); - genExpression(negateExp.exp); - print(")"); - break; - case ExpType.AssignExp: - auto assignExp = cast(AssignExp)exp; - genIdentifier(assignExp.identifier); - print(" = "); - genExpression(assignExp.exp); - break; - case ExpType.CallExp: - auto callExp = cast(CallExp)exp; - genExpression(callExp.exp); - print("("); - foreach(i, arg ; callExp.args) - { - genExpression(arg); - if(i+1 < callExp.args.length) - print(", "); - } - print(")"); - break; - case ExpType.Identifier: - auto identifier = cast(Identifier)exp; - print(identifier.token.get); - break; - } - - } - - void genIdentifier(Identifier identifier) - { - print(identifier.token.get); - } - - void indent() - { - tabIndex ~= tabType; - } - - void dedent() - { - tabIndex = tabIndex[0 .. $-tabType.length]; - } - - void printBeginLine(char[] line = "") - { - Stdout(tabIndex~line); - } - - void printEndLine(char[] line = "") - { - Stdout(line).newline; - } - - void print(char[] line) - { - Stdout(line); - } - -private: - char[] tabIndex; - const char[] tabType = " "; // 4 spaces -} - diff -r e331e4e816e4 -r 5e383b3755d6 lala.txt diff -r e331e4e816e4 -r 5e383b3755d6 lexer/Keyword.d --- a/lexer/Keyword.d Fri Apr 18 23:45:45 2008 +0200 +++ b/lexer/Keyword.d Sun May 25 14:43:16 2008 +0200 @@ -2,12 +2,17 @@ import lexer.Token; +/** + A list of keywords in an associative array that link a string + representation of the keyword to a Tok + */ Tok[char[]] keywords; static this () { keywords = [ + // types "byte"[] : Tok.Byte, "ubyte" : Tok.Ubyte, "short" : Tok.Short, @@ -17,15 +22,32 @@ "long" : Tok.Long, "ulong" : Tok.Ulong, + "char" : Tok.Char, + "wchar" : Tok.Wchar, + "dchar" : Tok.Dchar, + "bool" : Tok.Bool, "float" : Tok.Float, "double" : Tok.Double, + "void" : Tok.Void, + + // type related + "struct" : Tok.Struct, + + // control flow "if" : Tok.If, "else" : Tok.Else, "while" : Tok.While, + "switch" : Tok.Switch, + "case" : Tok.Case, + "default" : Tok.Default, "return" : Tok.Return, - "struct" : Tok.Struct + "cast" : Tok.Cast, + + // modules + "module" : Tok.Module, + "import" : Tok.Import ]; } diff -r e331e4e816e4 -r 5e383b3755d6 lexer/Lexer.d --- a/lexer/Lexer.d Fri Apr 18 23:45:45 2008 +0200 +++ b/lexer/Lexer.d Sun May 25 14:43:16 2008 +0200 @@ -1,29 +1,85 @@ module lexer.Lexer; -import misc.Error, - misc.DataSource; +import basic.Message, + basic.SourceManager; import lexer.Token, lexer.Keyword; import tango.io.Stdout; +/** + The Lexer class will supply you with methods to tokenize a D file. Supply the + Lexer with a DataSource and you can 'peek' and 'next' Tokens from the file. + + For more info about Tokens, look up the lexer.Token module. +*/ class Lexer { public: - this (DataSource source) + + /** + Create a new Lexer. + */ + this(SourceLocation start, SourceManager src_mgr, MessageHandler messages) { - this.source = source; - this.position = 0; + this.messages = messages; + sm = src_mgr; + start_loc = start; + position = 0; + source = sm.getRawData(start_loc); + + + charTable.length = 256; + foreach (c; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_") + charTable[c] = CharType.Letter; + + foreach (c; "0123456789") + charTable[c] = CharType.Number; + + foreach (c; "(){}[];:.,=!<>+-*/%") + charTable[c] = CharType.Symbol; + + foreach (c; " \n") + charTable[c] = CharType.Whitespace; + + symbolFunctions.length = 256; + + symbolFunctions['('] = &openParentheses; + symbolFunctions[')'] = &closeParentheses; + symbolFunctions['{'] = &openBrace; + symbolFunctions['}'] = &closeBrace; + symbolFunctions['['] = &openBracket; + symbolFunctions[']'] = &closeBracket; + symbolFunctions[';'] = &seperator; + symbolFunctions[':'] = : + symbolFunctions['.'] = ˙ + symbolFunctions[','] = , + symbolFunctions['='] = &eq; + symbolFunctions['!'] = ≠ + symbolFunctions['<'] = ≤ + symbolFunctions['>'] = ≥ + symbolFunctions['+'] = + + symbolFunctions['-'] = − + symbolFunctions['*'] = ☆ + symbolFunctions['/'] = &slash; + symbolFunctions['%'] = &percent; } - Token next () + /** + Get the next token from the source. This method will move the + internal position forward to the next Token. + + return: A Token - Token.type is TokType.EOF if there is + no more tokens in the file. + */ + Token next() { switch (getNextChar) { case CharType.EOF: - Location l; - return Token (Tok.EOF, l, 0); + SLoc loc; + return Token(Tok.EOF, loc, 0); case CharType.Whitespace: position += 1; @@ -40,124 +96,225 @@ } } - Token peek ( int skip = 0) + /** + Get the next token from the source. This method will NOT move the + internal position forward, and thereby having no side-effects. + + return: A Token - Token.type is TokType.EOF if there is + no more tokens in the file. + */ + Token peek(int skip = 0) { int oldPosition = this.position; - while(skip-- > 0) + while (skip-- > 0) this.next; Token t = this.next; this.position = oldPosition; return t; } - public Error[] getErrors() +private: + Token eq() + { + if(source[position] == '=') + return Token(Tok.Eq, Loc(position++ - 1), 2); + return Token(Tok.Assign, Loc(position - 1), 1); + } + Token openBrace() + { + return Token(Tok.OpenBrace, Loc(position - 1), 1); + } + Token closeBrace() + { + return Token(Tok.CloseBrace, Loc(position - 1), 1); + } + Token openParentheses() + { + return Token(Tok.OpenParentheses, Loc(position - 1), 1); + } + Token closeParentheses() + { + return Token(Tok.CloseParentheses, Loc(position - 1), 1); + } + Token openBracket() + { + return Token(Tok.OpenBracket, Loc(position - 1), 1); + } + Token closeBracket() + { + return Token(Tok.CloseBracket, Loc(position - 1), 1); + } + Token seperator() + { + return Token(Tok.Seperator, Loc(position - 1), 1); + } + Token colon() + { + return Token(Tok.Colon, Loc(position - 1), 1); + } + Token dot() + { + int pos = 0; + while(getNextChar(0) == CharType.Number || + this.source[position + pos + 1] == '_') + { + if(getNextChar(0) == CharType.Number) + { + position--; + return lexNumber(); + } + pos++; + } + return Token(Tok.Dot, Loc(position - 1), 1); + } + Token comma() + { + return Token(Tok.Comma, Loc(position - 1), 1); + } + Token ne() + { + if(source[position] == '=') + return Token(Tok.Ne, Loc(position++ - 1), 2); + return Token(Tok.Not, Loc(position - 1), 1); + } + Token le() + { + if(source[position] == '=') + return Token(Tok.Le, Loc(position++ - 1), 2); + return Token(Tok.Lt, Loc(position - 1), 1); + } + Token ge() { - return this.errors; + if(source[position] == '=') + return Token(Tok.Ge, Loc(position++ - 1), 2); + return Token(Tok.Gt, Loc(position - 1), 1); + } + Token plus() + { + return Token(Tok.Plus, Loc(position - 1), 1); + } + Token minus() + { + return Token(Tok.Minus, Loc(position - 1), 1); + } + Token star() + { + return Token(Tok.Star, Loc(position - 1), 1); } -private: + Token slash() + { + switch(source[position]) + { + case '/': + while(getNextChar != CharType.EOF) + { + if(source[position++] == '\n') + return this.next; + } + return Token(Tok.EOF, Loc(position), 0); + + case '*': + position += 2; + while(getNextChar != CharType.EOF) + { + ++position; + if(source[position-2] == '*') + if(source[position-1] == '/') + return this.next; + } + messages.report(UnexpectedEOFBlock,Loc(position)); + case '+': + position += 2; + int nesting = 1; + while(getNextChar != CharType.EOF) + { + ++position; + if(source[position-2] == '+') + if(source[position-1] == '/') + { + position++; + nesting--; + } + + if(source[position-2] == '/') + if(source[position-1] == '+') + { + nesting++; + position++; + } + + if(nesting == 0) + return this.next; + } + messages.report(UnexpectedEOFBlock,Loc(position)); + + default: + return Token(Tok.Slash, Loc(position - 1), 1); + } + } + + Token percent() + { + return Token(Tok.Percent, Loc(position - 1), 1); + } + Token lexNumber () { + bool sign = false; + bool dot = false; + bool e = false; + int i = 0; - while(getNextChar(++i) == CharType.Number) - {} + + bool end = false; + while(!end) + { + switch(getNextChar(i)) + { + case CharType.Number: + break; + case CharType.Symbol: + if(this.source[position+i] == '.') + { + if(dot) + messages.report(OnlyOneDotFloating, Loc(position + i)); + dot = true; + break; + } + end = true; + continue; + case CharType.Letter: + if(this.source[position+i] == '_') + break; + if (this.source[position+i] == 'e' || + this.source[position+i] == 'E') + { + if (e) + messages.report(OnlyOneEFloating, Loc(position + i)); + e = true; + break; + } + end = true; + continue; + + default: + end = true; + continue; + } + i++; + } position += i; - return Token(Tok.Integer, Location(position - i, this.source), i); + return Token(Tok.Integer, Loc(position - i), i); } Token lexSymbol () { - switch(source.data[position++]) - { - case '(': - return Token(Tok.OpenParentheses, Location(position - 1, this.source), 1); - case ')': - return Token(Tok.CloseParentheses, Location(position - 1, this.source), 1); - case '{': - return Token(Tok.OpenBrace, Location(position - 1, this.source), 1); - case '}': - return Token(Tok.CloseBrace, Location(position - 1, this.source), 1); - case ';': - return Token(Tok.Seperator, Location(position - 1, this.source), 1); - case ',': - return Token(Tok.Comma, Location(position - 1, this.source), 1); - case '=': - if(source.data[position] == '=') - return Token(Tok.Eq, Location(position++ - 1, this.source), 2); - return Token(Tok.Assign, Location(position - 1, this.source), 1); - case '!': - if(source.data[position] == '=') - return Token(Tok.Ne, Location(position++ - 1, this.source), 2); - return Token(Tok.Not, Location(position - 1, this.source), 1); - case '<': - if(source.data[position] == '=') - return Token(Tok.Le, Location(position++ - 1, this.source), 2); - return Token(Tok.Lt, Location(position - 1, this.source), 1); - case '>': - if(source.data[position] == '=') - return Token(Tok.Ge, Location(position++ - 1, this.source), 2); - return Token(Tok.Gt, Location(position - 1, this.source), 1); - case '+': - return Token(Tok.Add, Location(position - 1, this.source), 1); - case '-': - return Token(Tok.Sub, Location(position - 1, this.source), 1); - case '*': - return Token(Tok.Mul, Location(position - 1, this.source), 1); - case '/': - switch(source.data[position]) - { - case '/': - while(getNextChar != CharType.EOF) - { - if(source.data[position++] == '\n') - return this.next; - } - return Token(Tok.EOF, Location(position, this.source), 0); + Token t = symbolFunctions[source[position++]](); - case '*': - position += 2; - while(getNextChar != CharType.EOF) - { - ++position; - if(source.data[position-2] == '*') - if(source.data[position-1] == '/') - return this.next; - } - throw new Error("Unexpected end of file. Unclosed comment block", - Location(position, source)); - - case '+': - position += 2; - int nesting = 1; - while(getNextChar != CharType.EOF) - { - ++position; - if(source.data[position-2] == '+') - if(source.data[position-1] == '/') - { - position++; - nesting--; - } - - if(source.data[position-2] == '/') - if(source.data[position-1] == '+') - { - nesting++; - position++; - } - - if(nesting == 0) - return this.next; - } - throw new Error("Unexpected end of file. Unclosed comment block", - Location(position, source)); - - default: - return Token(Tok.Div, Location(position - 1, this.source), 1); - } - } + return t; } Token lexLetter () @@ -173,11 +330,11 @@ } } - Token t = Token(Tok.Identifier, Location(position, source), i); + Token t = Token(Tok.Identifier, Loc(), i); if (!hasNumber) { - char[] str = source.data[position .. position + i]; + char[] str = source[position .. position + i]; if(str in keywords) t.type = keywords[str]; } @@ -189,53 +346,41 @@ CharType getNextChar(int offset = 0) { - if (position + offset >= this.source.data.length) + if (position + offset >= this.source.length) return CharType.EOF; - char current = source.data[position + offset]; - - if (current >= 'A' && current <= 'Z' || - current >= 'a' && current <= 'z' || current > 127) - return CharType.Letter; + char current = source[position + offset]; - if (current >= '0' && current <= '9') - return CharType.Number; - - switch(current) - { - case ' ': - case '\n': - return CharType.Whitespace; + CharType c = charTable[current]; - case '(': - case ')': - case '{': - case '}': - case ';': - case ',': - case '=': - case '!': - case '<': - case '>': - case '+': - case '-': - case '*': - case '/': - return CharType.Symbol; + if(c == CharType.INVALID) + messages.report(InvalidSymbol, Loc()) + .arg(Integer.toString(cast(int)current)) + .fatal(ExitLevel.Lexer); - default: - throw new Error("Read invalid symbol: '" ~ current ~ "'", Location(position, source)); - } + return c; } - DataSource source; + private final SourceLocation Loc(int pos = -1) + { + if (pos < 0) + return start_loc + position; + return start_loc + pos; + } + + SourceManager sm; + SourceLocation start_loc; int position; - Error[] errors; + char[] source; + MessageHandler messages; + CharType[] charTable; + Token delegate()[] symbolFunctions; } enum CharType : ubyte { + INVALID, Letter, Number, Symbol, @@ -243,3 +388,4 @@ EOF } + diff -r e331e4e816e4 -r 5e383b3755d6 lexer/Token.d --- a/lexer/Token.d Fri Apr 18 23:45:45 2008 +0200 +++ b/lexer/Token.d Sun May 25 14:43:16 2008 +0200 @@ -1,17 +1,28 @@ module lexer.Token; public -import misc.Location; +import basic.SourceLocation; import Integer = tango.text.convert.Integer; +/** + The Token struct will be used through the Lexer, Parser and other + modules as a location into source. + + The Token should always be optimized for size to limit unnecessary + memory usage. + */ struct Token { Tok type; - Location location; + SLoc location; uint length; - static Token opCall (Tok type, Location location, uint length) + /** + Create a new token with a Tok type, Location in source and a + length of how many chars the Token span in the source + */ + static Token opCall (Tok type, SLoc location, uint length) { Token t; t.type = type; @@ -20,23 +31,57 @@ return t; } + /** + Get the type of the Token as a string + */ char[] getType () { return typeToString[this.type]; } + /** + A human readable dump of a Token + */ char[] toString () { - return this.getType()~": Len: "~Integer.toString(this.length) - ~", Loc: "~location.toString; + return this.getType()~": Len: "~Integer.toString(this.length); + } + + /// Get the range of this token + SourceRange asRange() { return SourceRange(location, location + length); } + + /** + Returns true if the type of this token is a basic type (int, float, ...). + Void is included, although a void in it self is not really a type. + */ + bool isBasicType() + { + return type >= Tok.Byte && type <= Tok.Void; } - char[] get () + /** + Returns true for all the various assignments (=, +=, *= ...) + */ + bool isAssignment() { - return location.get(length); + return type == Tok.Assign; + } + + /** + Just a shortcut to avoid `token.type == Tok.Identifier`. + */ + bool isIdentifier() + { + return type == Tok.Identifier; } } +/** + Tok is short for TokenType. This enum list is to supply the Token + with a type. + + This enum is used to switch over "many" places. + */ enum Tok : ushort { /* Non-code related tokens */ @@ -48,8 +93,8 @@ /* Basic operators */ Assign, - Add, Sub, - Mul, Div, + Plus, Minus, + Star, Slash, Percent, Comma, /* Symbols */ @@ -57,7 +102,11 @@ CloseParentheses, OpenBrace, CloseBrace, + OpenBracket, + CloseBracket, Seperator, + Colon, + Dot, /* Comparator operators */ Eq, Ne, @@ -72,18 +121,30 @@ Int, Uint, Long, Ulong, + Char, Wchar, Dchar, + Float, Double, Bool, + Void, + Struct, If, Else, While, - Return, + Switch, Case, Default, + Return, Cast, + + Module, Import, } +/** + An associative array to supply a Tok to String function. + + Keep always this list updated when adding a new Tok. + */ public char[][Tok] typeToString; static this() @@ -96,7 +157,11 @@ Tok.Short:"Short", Tok.Int:"Int", Tok.Long:"Long", + Tok.Char:"Char", + Tok.Wchar:"Wchar", + Tok.Dchar:"Dchar", Tok.Bool:"Bool", + Tok.Void:"Void", Tok.Eq:"Eq", Tok.Ne:"Ne", Tok.Lt:"Lt", @@ -107,17 +172,28 @@ Tok.CloseParentheses:"CloseParentheses", Tok.OpenBrace:"OpenBrace", Tok.CloseBrace:"CloseBrace", + Tok.OpenBracket:"OpenBracket", + Tok.CloseBracket:"CloseBracket", + Tok.Dot:"Dot", Tok.Assign:"Assign", - Tok.Add:"Add", - Tok.Sub:"Sub", - Tok.Mul:"Mul", - Tok.Div:"Div", + Tok.Plus:"Plus", + Tok.Minus:"Minus", + Tok.Star:"Star", + Tok.Slash:"Slash", + Tok.Percent:"Percent", Tok.Integer:"Integer", Tok.If:"If", Tok.While:"While", + Tok.Switch:"Switch", + Tok.Case:"Case", + Tok.Default:"Default", Tok.Comma:"Comma", Tok.Return:"Return", Tok.Struct:"Struct", - Tok.Seperator:"Seperator" + Tok.Colon:"Colon", + Tok.Seperator:"Seperator", + Tok.Cast:"Cast", + Tok.Module:"Module", + Tok.Import:"Import" ]; } diff -r e331e4e816e4 -r 5e383b3755d6 llvm.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/llvm.patch Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,107 @@ +Index: llvm/c/Core.d +=================================================================== +--- llvm/c/Core.d (revision 170) ++++ llvm/c/Core.d (working copy) +@@ -82,6 +82,20 @@ + */ + typedef LLVM_OpaqueMemoryBuffer* LLVMMemoryBufferRef; + ++enum LLVMParamAttr { ++ ZExt = 1<<0, ++ SExt = 1<<1, ++ NoReturn = 1<<2, ++ InReg = 1<<3, ++ StructRet = 1<<4, ++ NoUnwind = 1<<5, ++ NoAlias = 1<<6, ++ ByVal = 1<<7, ++ Nest = 1<<8, ++ ReadNone = 1<<9, ++ ReadOnly = 1<<10 ++} ++ + enum LLVMTypeKind { + Void, /**< type with no size */ + Float, /**< 32 bit floating point type */ +@@ -388,6 +402,14 @@ + /*const*/ char *LLVMGetCollector(LLVMValueRef Fn); + void LLVMSetCollector(LLVMValueRef Fn, /*const*/ char *Coll); + ++void LLVMAddParamAttr(LLVMValueRef Arg, LLVMParamAttr Attr); ++void LLVMRemoveParamAttr(LLVMValueRef Arg, LLVMParamAttr Attr); ++void LLVMSetParamAlignment(LLVMValueRef Arg, uint Align); ++void LLVMAddInstrParamAttr(LLVMValueRef Inst, uint Index, LLVMParamAttr Attr); ++void LLVMRemoveInstrParamAttr(LLVMValueRef Inst, uint Index, LLVMParamAttr Attr); ++void LLVMSetInstrParamAlignment(LLVMValueRef Inst, uint Index, uint Align); ++ ++ + /* Operations on basic blocks */ + LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef Bb); + int LLVMValueIsBasicBlock(LLVMValueRef Val); +Index: llvm/llvm.d +=================================================================== +--- llvm/llvm.d (revision 170) ++++ llvm/llvm.d (working copy) +@@ -43,6 +43,8 @@ + alias LLVMVisibility Visibility; + /// + alias LLVMValueKind ValueKind; ++/// ++public alias LLVMParamAttr ParamAttr; + + /// + class Module +@@ -836,6 +838,20 @@ + return getValueOf(v); + } + /// ++ void addParamAttr(uint idx, ParamAttr PA) ++ { ++ auto v = LLVMGetParam(value, idx); ++ assert(v !is null); ++ LLVMAddParamAttr(v, PA); ++ } ++ /// ++ void removeParamAttr(uint idx, ParamAttr PA) ++ { ++ auto v = LLVMGetParam(value, idx); ++ assert(v !is null); ++ LLVMRemoveParamAttr(v, PA); ++ } ++ /// + uint intrinsicID() + { + return LLVMGetIntrinsicID(value); +Index: llvm-fix.cpp +=================================================================== +--- llvm-fix.cpp (revision 170) ++++ llvm-fix.cpp (working copy) +@@ -29,10 +29,10 @@ + APN.convert(SemanticsForType(unwrap(RealTy)), APFloat::rmNearestTiesToEven); + return wrap(ConstantFP::get(unwrap(RealTy), APN)); + } +- ++/* + LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text) { + return wrap(ConstantFP::get(unwrap(RealTy), + APFloat(SemanticsForType(unwrap(RealTy)), Text))); + } +- ++*/ + } +Index: llvm-ext.cpp +=================================================================== +--- llvm-ext.cpp (revision 170) ++++ llvm-ext.cpp (working copy) +@@ -80,9 +80,11 @@ + LLVMTargetDataRef LLVMGetTargetDataFromModule(LLVMModuleRef M) { + return wrap(new TargetData(unwrap(M))); + } ++/* + void LLVMDisposeTargetData(LLVMTargetDataRef TD) { + delete unwrap(TD); + } ++*/ + + // we need to be able to query the ABI size of a type as an integer + size_t LLVMGetABITypeSize(LLVMTargetDataRef TD, LLVMTypeRef T) { diff -r e331e4e816e4 -r 5e383b3755d6 misc/DataSource.d --- a/misc/DataSource.d Fri Apr 18 23:45:45 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -module misc.DataSource; - -import tango.io.UnicodeFile; - -struct DataSource -{ - char[] name; - char[] data; - - static DataSource opCall(char[] name) - { - DataSource source; - - auto file = new UnicodeFile!(char)(name, Encoding.UTF_8); - - source.name = name; - source.data = file.read(); - return source; - } - - char[] get(uint position, ushort len) - { - return data[position .. position+len]; - } -} diff -r e331e4e816e4 -r 5e383b3755d6 misc/Error.d --- a/misc/Error.d Fri Apr 18 23:45:45 2008 +0200 +++ b/misc/Error.d Sun May 25 14:43:16 2008 +0200 @@ -1,18 +1,131 @@ module misc.Error; -import misc.Location; +import tango.core.Exception, + Array = tango.core.Array, + tango.text.Util; -import tango.core.Exception; +import llvm.type; + +import lexer.Token, + sema.DType; class Error : Exception { - char[] message; - Location errorLocation; + + this(char[] message) + { + super(message); + args ~= message; + } + + char[] toString() + { + char[256] tmp = void; + char[] msg = layout(tmp, args); + /* + if (location.source.name.length > 0) + msg = location.toString ~ ": " ~ msg; + else + msg = msg.dup; - this(char[] message, Location errorLocation) + if (toks.length > 0) + { + Array.sort(toks, + (Token a, Token b) + { + return a.location.position - b.location.position; + }); + char[] data = toks[0].location.source.data; + size_t low = toks[0].location.position; + size_t high = toks[$ - 1].location.position; + + size_t line_start = Array.rfind(data[0 .. low], '\n'); + size_t line_end = high + Array.find(data[high .. $], '\n'); + char[] line = trim(data[line_start + 1 .. line_end]); + char[] marks = line.dup; + marks[] = ' '; + foreach (tok; toks[0 .. $]) + { + size_t p = tok.location.position - (line.ptr - data.ptr); + marks[p .. p + tok.length] = '~'; + } + size_t p = main_tok.location.position - (line.ptr - data.ptr); + marks[p .. p + main_tok.length] = '^'; + + msg ~= "\n "; + msg ~= line; + msg ~= "\n "; + msg ~= marks; + } +*/ + return msg; + } + + Error arg(char[] s) { - super(message ~ " at line " ~ errorLocation.toString); - this.message = message; - this.errorLocation = errorLocation; + if (args.length == 11) + throw new Exception("Sorry, errors only support up to 10 args"); + args ~= s; + return this; + } + + Error arg(char[][] s) + { + char[] res = s[0 .. $ - 1].join(", "); + if (s.length > 1) + res ~= " and "; + res ~= s[$ - 1]; + return arg(res); + } + + Error arg(char c) + { + return arg([c]); + } + + Error arg(DType[] types) + { + char[][] res; + foreach (type; types) + res ~= type.name(); + return arg(res); } + + Error arg(Tok[] toks...) + { + char[][] res; + foreach (t; toks) + res ~= typeToString[t]; + return arg(res); + } + + /* + Error loc(Location loc) + { + location = loc; + return this; + } + */ + + Error tok(Token tok) + { + /* + if (toks.length > 0) + assert(tok.location.source == toks[0].location.source, + "Tokens must come from the same source"); + else + { + main_tok = tok; + loc = tok.location; + } + toks ~= tok; + */ + return this; + } + +private: + char[][] args; + //Location location; + Token[] toks; + Token main_tok; } diff -r e331e4e816e4 -r 5e383b3755d6 misc/Location.d --- a/misc/Location.d Fri Apr 18 23:45:45 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -module misc.Location; - -import misc.DataSource; - -import Integer = tango.text.convert.Integer, - tango.text.Util; - -struct Location -{ - uint position; - DataSource source; - - char[] toString () - { - int lineNumber = split(source.get(0, position), "\n").length; - return Integer.toString(lineNumber) ~" in "~source.name; - } - - char[] get(uint length) - { - return source.get(position, length); - } -} diff -r e331e4e816e4 -r 5e383b3755d6 parser/Action.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/parser/Action.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,356 @@ +module parser.Action; + +import lexer.Token; + +import misc.Error; + +/** + Used to indicate what type of operator is used in a given binary expression + (and unary expressions?) + */ +public enum Operator +{ + Assign, + + Eq, Ne, + + Lt, Le, + Gt, Ge, + + Add, Sub, + Mul, Div, Mod, +} + + +class Id +{ + public static Id opCall(Token tok) + { + auto id = new Id(); + id.tok = tok; + return id; + } + Token tok; +} + +class PointerId : Id +{ + public static PointerId opCall(Id id) + { + auto p = new PointerId(); + p.id = id; + return p; + } + + Id id; +} + +class ArrayId : Id +{ + public static ArrayId opCall(Id id, Object number) + { + auto a = new ArrayId(); + a.id = id; + a.number = number; + return a; + } + + Id id; + Object number; +} + +/** + Represents a fully qualified name, with some packages and a final identifier. + The identifier should always be set, but packages may have length 0. + **/ +struct ModuleName +{ + Id id; + Id[] packages; + + /// Get the full ranged spanned by packages and identifier + SourceRange asRange() + { + SourceRange r = id.tok.asRange(); + foreach (identifier; packages) + r = r + identifier.tok.asRange(); + return r; + } +} + +/** + All methods are optional. + +Warning: Interface is not stable yet. Use the `override` keyword in all classes + inheriting from this to get warning if the interface changes. + */ +abstract class Action +{ + /** + A few aliases to indicate what methods should be dealing with the same + types. + + Not typesafe, and not using typedef because users would need a lot of + casts (and base type would be void*, so no possibility to synchronize, + print etc.) + */ + alias Object ExprT; + alias Object StmtT; /// ditto + alias Object DeclT; /// ditto + alias Object ModuleT; /// ditto + + // -- Modules -- + + ModuleT actOnModule(ref Token _module, char[] name) + { + return null; + } + + /** + This action is called when a file does not start with a module + declaration - in which case there is no Token available. + + Instead a SLoc to the start of the file is given. + */ + ModuleT actOnImplicitModule(SourceLocation fileStart, char[] name) + { + return null; + } + + void actOnModuleDecl(ModuleT m, DeclT d) + { + } + + // -- Declarations -- + + /** + Called for an import statement, that may be renamed. Id name is null, + there is no rename. + + If there are selective imports, its handled in add + */ + DeclT actOnImport(ref Token _import, ref ModuleName target, Id* name) + { + return null; + } + + /** + */ + void addSelectiveImport(DeclT _import, ref Id target, Id* name) + { + } + + /** + Either we should have one case that handles a lot of things, or we should + have a lot of separate cases. + As an example, this method could handle the params in `int f(int, int)` + as well as handling `int x` at both top-level, in classes and in methods. + + The other solution is an addParamToFunc or similar. + */ + DeclT actOnDeclarator(ref Id type, ref Id name, ExprT init) + { + return null; + } + + /** + Add a struct member to a struct. + */ + void actOnStructMember(DeclT st_decl, DeclT m_decl) //ref Id type, ref Id name, ExprT init) + { + return null; + } + + /** + Add an initialization expression to a previously created decl. + + Used for default values on function params and for values to local + variables. + */ + void addInitToDeclarator(DeclT decl, ExprT exp) + { + } + + /** + Called at the start of a function, doesn't get a lot of info - that is + added later on, through addFuncArg and actOnEndOfFunction. + */ + DeclT actOnStartOfFunctionDef(ref Id type, ref Id name) + { + return null; + } + + /** + Add a new parameter to the function func. + */ + void addFuncArg(DeclT func, Id type, Id name) + { + } + + /** + Finish off the function, by giving it the body (a single statement, so + you probably want some sort of compound statement) + */ + DeclT actOnEndOfFunction(DeclT func, StmtT stmts) + { + return func; + } + + // -- Statements -- + + /** + Called after parsing a function/while/for/whatever body. + + Note that stmts is to be considered temporary, it might point into the + stack and needs to be copied before saving. + */ + StmtT actOnCompoundStmt(ref Token left, ref Token right, StmtT[] stmts) + { + return null; + } + + /** + An expression was used as a statement - this includes assignments, + function calls. + + Additionally the D spec dictates that expressions with no effect are not + legal as statements, but the parser can't test for this so it has to be + done in the later stages. + */ + StmtT actOnExprStmt(ExprT exp) + { + return null; + } + + /** + Called after parsing return statements. + + loc is the return token. + */ + StmtT actOnReturnStmt(ref Token loc, ExprT exp) + { + return null; + } + + /** + */ + StmtT actOnIfStmt(ref Token ifTok, ExprT cond, StmtT thenBody, + ref Token elseTok, StmtT elseBody) + { + return null; + } + + /** + */ + StmtT actOnWhileStmt(ref Token whileTok, ExprT cond, StmtT whileBody) + { + return null; + } + + /** + */ + StmtT actOnDeclStmt(DeclT decl) + { + return null; + } + + StmtT actOnStartOfSwitchStmt() + { + return null; + } + + void actOnCaseStmt() + { + } + + void actOnDefaultStmt() + { + } + + StmtT actOnFinishSwitchStmt(StmtT sw) + { + return sw; + } + + // -- Expressions -- + + /** + A single numerical constant -- this can be absolutely any kind of number. + Integers, floats, hex, octal, binary, imaginary and so on. + */ + ExprT actOnNumericConstant(Token op) + { + return null; + } + + /** + This is called when identifiers are used in expressions. + */ + ExprT actOnIdentifierExp(Id id) + { + return null; + } + + /** + Unary operator. + */ + ExprT actOnUnaryOp(Token op, ExprT operand) + { + return null; + } + + /** + Binary operator. + */ + ExprT actOnBinaryOp(SLoc op_loc, Operator op, ExprT l, ExprT r) + { + return null; + } + + /** + Called when using the 'dot' operator. + The left hand side can be any expression, but its only possible to look + up an identifier. + */ + ExprT actOnMemberReference(ExprT lhs, SourceLocation op, Id member) + { + return null; + } + + /** + Called when function calls are encountered. + + Note that args is temporary and might point into the stack. Remember to + copy before saving a reference to it. + */ + ExprT actOnCallExpr(ExprT func, ref Token left_paren, ExprT[] args, + ref Token right_paren) + { + return null; + } + + /** + Called when function calls are encountered. + */ + ExprT actOnIndexEpr(ExprT array, ref Token left_bracket, ExprT index, + ref Token right_bracket) + { + return null; + } + + /** + Cast expression. + */ + ExprT actOnCastExpr(ref Token _cast, Id type, ExprT exp) + { + return null; + } +} + +/** + Doesn't do anything at all - can be used for benchmarking the parser. + */ +class NullAction : Action +{ +} + diff -r e331e4e816e4 -r 5e383b3755d6 parser/Parser.d --- a/parser/Parser.d Fri Apr 18 23:45:45 2008 +0200 +++ b/parser/Parser.d Sun May 25 14:43:16 2008 +0200 @@ -3,110 +3,245 @@ import lexer.Lexer, lexer.Token; -import ast.Exp, - ast.Stmt, - ast.Decl; +import parser.Action; -import misc.Error; +import basic.Message; + +import basic.SmallArray, + basic.SourceManager; import tango.io.Stdout, Integer = tango.text.convert.Integer; class Parser { + Action action; + MessageHandler messages; + alias Object Exp; + alias Object Stmt; + alias Object Decl; + alias Object Module; -public: - Decl[] parse(Lexer lexer) + this(MessageHandler messages) { - this.lexer = lexer; + this.messages = messages; + } - - Decl[] declarations; + Module parse(SourceManager sm, Lexer lexer, Action act) + { + this.sm = sm; + this.lexer = lexer; + this.action = act; - while(lexer.peek.type != Tok.EOF) + Module m; + if (lexer.peek.type == Tok.Module) { - declarations ~= parseDecl; + Token _module = lexer.next; + ModuleName name = parseModuleName(); + m = action.actOnModule(_module, sm.getText(name.asRange())); + require(Tok.Seperator); + } + else + { + SLoc loc = lexer.peek.location; + m = action.actOnImplicitModule(loc, sm.getFile(loc)); } - return declarations; + while (lexer.peek.type != Tok.EOF) + foreach (d; parseDeclDef()) + action.actOnModuleDecl(m, d); + + return m; + } + +private: + Decl[] parseDeclDef() + { + Token t = lexer.peek; + if (t.type == Tok.Import) + return parseImports(); + else + return [parseDecl()]; } Decl parseDecl() { - Token t = lexer.next; + Token t = lexer.peek; - switch(t.type) + if (t.isBasicType || t.isIdentifier) { - case Tok.Byte, Tok.Ubyte, - Tok.Short, Tok.Ushort, - Tok.Int, Tok.Uint, - Tok.Long, Tok.Ulong, - Tok.Float, Tok.Double, - Tok.Bool, - Tok.Identifier: - Identifier type = new Identifier(t); - - Token iden = lexer.next; - - switch(iden.type) + Id type; + Id iden; + int len = peekParseType; + if(lexer.peek(len).type == Tok.Identifier && len != 0) + { + type = parseType; +parseDeclAfterInvalidType: + iden = Id(require(Tok.Identifier)); + Token next = lexer.peek(); + if (next.type == Tok.Seperator) + { + Token sep = lexer.next(); + return action.actOnDeclarator(type, iden, null); + } + else if (next.type == Tok.Assign) { - case Tok.Identifier: - Identifier identifier = new Identifier(iden); - Token p = lexer.peek(); - switch(p.type) - { - case Tok.OpenParentheses: - return parseFunc(type, identifier); - case Tok.Seperator: - require(Tok.Seperator); - return new VarDecl(type, identifier, null); - case Tok.Assign: - lexer.next(); - auto exp = parseExpression(); - require(Tok.Seperator); - return new VarDecl(type, identifier, exp); - default: - char[] c = t.getType; - error("Unexpexted token "~c~" at line "~Integer.toString(__LINE__)); - } - break; - default: - char[] c = t.getType; - error("Unexpexted token "~c~" at line "~Integer.toString(__LINE__)); + Token assign = lexer.next(); + Exp exp = parseExpression(); + require(Tok.Seperator); + return action.actOnDeclarator(type, iden, exp); } - break; - case Tok.Struct: - Token iden = lexer.next; - switch(iden.type) - { - case Tok.Identifier: - Identifier identifier = new Identifier(iden); - return new StructDecl (identifier, parseStruct()); - default: - throw new Error("Expected struct identifier, but got "~iden.getType, - iden.location); - } - case Tok.EOF: - return null; - default: - char[] c = t.getType; - error("Unexpexted token "~c~" at line "~Integer.toString(__LINE__)); + else if (next.type == Tok.OpenParentheses) + return parseFunc(type, iden); + else + messages.report(UnexpectedTok, next.location).arg(next.getType); + } + t = lexer.peek(len); + messages.report(InvalidDeclType, t.location) + .arg(sm.getText(t.asRange)); + while(len--) + lexer.next; + while(lexer.peek.type != Tok.Identifier) + lexer.next; + type = Id(lexer.peek); + goto parseDeclAfterInvalidType; } + else if (t.type == Tok.Struct) + { + Id type = Id(lexer.next); + Id iden = Id(require(Tok.Identifier)); + + return parseStruct(type, iden); + } + messages.report(UnexpectedTok, t.location) + .arg(t.getType) + .arg(Tok.Identifier) + .fatal(ExitLevel.Parser); } - VarDecl[] parseStruct() + /** + Parse a series of imports belonging to a single import token. + */ + Decl[] parseImports() { - VarDecl[] varDecls; + Token _import = require(Tok.Import); + SmallArray!(Decl) res; + void addToRes(Decl d) { res ~= d; } + + bool done = false; + while (!done && !on_a(Tok.Seperator)) + { + ModuleName mod = parseModuleName(); + Token tok = lexer.peek; + switch (tok.type) + { + case Tok.Comma: + // import A, B.C; + // parse another module-name + lexer.next(); + res ~= action.actOnImport(_import, mod, null); + break; + case Tok.Assign: + // import B = A.A; + // ^- must be a single identifier + // renamed import + if (mod.packages.length != 0) + { + SLoc loc = mod.packages[0].tok.location; + messages.report(RenameMustBeSingleIdent, loc); + } + //if (isStatic) + // error("Static imports cannot be renamed"); + lexer.next(); + Id name = mod.id; + mod = parseModuleName(); + // create from mod and rename to `name` + res ~= action.actOnImport(_import, mod, &name); + break; + case Tok.Colon: + // import A : a; + // selective imports, potentially import A : print = a + lexer.next(); + Decl d = action.actOnImport(_import, mod, null); + // do-while on a comma: + // add explicit symbol + do + { + Id sym = parseIdentifier(); + Id dummy; + Id* name = null; + if (skip(Tok.Assign)) + { + dummy = sym; + name = &dummy; + sym = parseIdentifier(); + } + action.addSelectiveImport(d, sym, name); + + } while (skip(Tok.Comma)); + require(Tok.Seperator); + res ~= d; + return res.safe(); + case Tok.Seperator: + done = true; + break; + default: + goto Lerror; + } + res ~= action.actOnImport(_import, mod, null); + } + + require(Tok.Seperator); + return res.safe(); +Lerror: + while (!on_a (Tok.Seperator)) + lexer.next(); + return res.safe(); + } + + /** + Parse struct + */ + Decl parseStruct(Id type, Id iden) + { + auto decl = action.actOnDeclarator(type, iden, null); + require(Tok.OpenBrace); - while(lexer.peek.type != Tok.CloseBrace) + + while(lexer.peek.isBasicType || lexer.peek.isIdentifier) { - varDecls ~= cast(VarDecl)parseDecl; + auto m_decl = parseDecl(); + action.actOnStructMember(decl, m_decl); +/* Id var_type = Id(lexer.next); + Id var_iden = Id(require(Tok.Identifier)); + Token next = lexer.peek(); + if (next.type == Tok.Seperator) + { + Token sep = lexer.next(); + action.actOnStructMember(decl, var_type, var_iden, null); + continue; + } + else if (next.type == Tok.Assign) + { + Token assign = lexer.next(); + Exp exp = parseExpression(); + require(Tok.Seperator); + action.actOnStructMember(decl, var_type, var_iden, exp); + continue; + } + messages.report(UnexpectedTok, next.location).arg(next.getType);*/ } require(Tok.CloseBrace); - return varDecls; + + return decl; } + /** + Parse statements. + + This is the place to attack! + */ Stmt parseStatement() { Token t = lexer.peek; @@ -114,141 +249,329 @@ switch(t.type) { case Tok.Return: - lexer.next; - auto ret = new ReturnStmt(); - ret.exp = parseExpression(); + Token ret = lexer.next; + Exp exp; + if (lexer.peek.type != Tok.Seperator) + exp = parseExpression(); require(Tok.Seperator); - return ret; + return action.actOnReturnStmt(ret, exp); + /* + if (cond) + single statement | compound statement + [else + single statement | compound statement] + */ case Tok.If: - lexer.next; + Token _if = lexer.next(); + require(Tok.OpenParentheses); - auto condition = parseExpression(); + Exp cond = parseExpression(); require(Tok.CloseParentheses); - auto then_body = parseBlockOrSingleStmt(); + Stmt thenB = parseSingleOrCompoundStatement(); - Stmt[] else_body; + // if there is no else part we use the if as token, to have + // something than can be passed along + Token _else = _if; + Stmt elseB; if (lexer.peek.type == Tok.Else) { - lexer.next; - else_body = parseBlockOrSingleStmt(); + _else = lexer.next; + elseB = parseSingleOrCompoundStatement(); } - return new IfStmt(condition, then_body, else_body); + return action.actOnIfStmt(_if, cond, thenB, _else, elseB); + /* + while (cond) + single statement | compound statement + */ case Tok.While: - lexer.next; + Token _while = lexer.next; require(Tok.OpenParentheses); - auto condition = parseExpression(); + Exp cond = parseExpression(); require(Tok.CloseParentheses); - return new WhileStmt(condition, parseBlockOrSingleStmt()); + Stmt bodyStmt = parseSingleOrCompoundStatement(); + return action.actOnWhileStmt(_while, cond, bodyStmt); + /* + One of four things: + A declaration of a function/variable `type id ...` + A direct assignment `id = exp;` + An indirect assignment `id.id = exp` + Some sort of free standing expression + + The assignments should be handled as binary expressions? + */ case Tok.Identifier: + Token iden = lexer.peek; Token n = lexer.peek(1); - switch(n.type) + // Must be an decl, if we start with a basic type, or two + // identifiers in a row + if (iden.isBasicType() || iden.isIdentifier()) { - case Tok.Assign: - lexer.next; - lexer.next; - auto stmt = new ExpStmt(new AssignExp(new Identifier(t), parseExpression())); + if ( n.type == Tok.Star || n.type == Tok.OpenBracket) + { + int len = peekParseType; + if(lexer.peek(len).type == Tok.Identifier && len != 0) + return action.actOnDeclStmt(parseVarDecl()); + + Exp exp = parseExpression(); require(Tok.Seperator); - return stmt; - break; - case Tok.Identifier: - auto decl = new DeclStmt(parseDecl()); - return decl; + return action.actOnExprStmt(exp); + } + + if (n.isIdentifier()) + return action.actOnDeclStmt(parseVarDecl()); - default: - auto e = new ExpStmt(parseExpression()); - require(Tok.Seperator); - return e; + // Expression: a.b, a = b, a(b) etc. + Exp exp = parseExpression(); + require(Tok.Seperator); + return action.actOnExprStmt(exp); + } - } - break; + case Tok.Switch: + messages.report(UnexpectedTok, lexer.peek.location).arg(lexer.next.getType); + return null; default: - auto decl = new DeclStmt(parseDecl()); - //require(Tok.Seperator); - return decl; + if (t.isBasicType()) + goto case Tok.Identifier; + if (t.type == Tok.Star) + { + auto exp = parseExpression(); + require(Tok.Seperator); + return action.actOnExprStmt(exp); + } + messages.report(UnexpectedBeginStmt, lexer.peek.location).arg(lexer.next.getType); + return null; } - return new Stmt(); + messages.report(UnexpectedTok, t.location); + return null; } - FuncDecl parseFunc(Identifier type, Identifier identifier) + Decl parseVarDecl() { - VarDecl[] funcArgs = parseFuncArgs(); - - lexer.next; // Remove the "{" - - Stmt[] statements; - - while(lexer.peek.type != Tok.CloseBrace) - statements ~= parseStatement(); - - lexer.next; // Remove "}" - - return new FuncDecl(type, identifier, funcArgs, statements); + // manually hardcoded to only support "type id [= exp];" + // as that is the only thing the codegen understands + Id type = parseType; + Id id = Id(lexer.next); + Exp init; + if (skip(Tok.Assign)) + init = parseExpression(); + require(Tok.Seperator); + Decl d = action.actOnDeclarator(type, id, init); + return d; } - VarDecl[] parseFuncArgs() + /** + Parses a function/method given the already parsed return type and name + */ + Decl parseFunc(ref Id type, ref Id name) { - lexer.next; // Remove the "(" token. + Decl func = action.actOnStartOfFunctionDef(type, name); + parseFuncArgs(func); + + if(lexer.peek.type == Tok.Seperator) + { + lexer.next; + return func; + } + Stmt stmt = parseCompoundStatement(); - VarDecl[] funcArgs; + return action.actOnEndOfFunction(func, stmt); + } + + /** + Parse the function arguments, assumes current token is (. + + Both the intitial paren and the ending paren is consumed. + */ + void parseFuncArgs(Decl func) + { + require(Tok.OpenParentheses); // Remove the "(" token. while(lexer.peek.type != Tok.CloseParentheses) { - auto t = parseType; - auto i = parseIdentifier; - funcArgs ~= new VarDecl(t, i); + auto t = parseType(); + Id i; + if(lexer.peek.type == Tok.Identifier) + i = parseIdentifier(); + action.addFuncArg(func, t, i); if(lexer.peek.type == Tok.Comma) lexer.next; } - lexer.next; // Remove the ")" + require(Tok.CloseParentheses); // Remove the ")" + } + + /** + Parse either a block, or a single statement as allowed after if, while + and for. + */ + Stmt parseSingleOrCompoundStatement() + { + if (lexer.peek.type == Tok.OpenBrace) + return parseCompoundStatement(); + return parseStatement(); + } + + /** + Parses a function-body or similar, expects an opening brace to be the + current token. + + Will consume both the starting { and ending } + */ + Stmt parseCompoundStatement() + { + Token lbrace = require(Tok.OpenBrace); + SmallArray!(Stmt, 32) stmts; // Try to use the stack only + while (lexer.peek.type != Tok.CloseBrace) + stmts ~= parseStatement(); + Token rbrace = require(Tok.CloseBrace); + return action.actOnCompoundStmt(lbrace, rbrace, stmts.unsafe()); + } - return funcArgs; + Id parseIdentifier() + { + Token tok = lexer.next; + + if (tok.type is Tok.Identifier) + return Id(tok); + + messages.report(UnexpectedTokSingle, tok.location) + .arg(tok.getType) + .arg(Tok.Identifier); + } + + ModuleName parseModuleName() + { + auto id = parseIdentifier(); + ModuleName mod; + while (skip(Tok.Dot)) + { + mod.packages ~= id; + if (lexer.peek.type != Tok.Identifier) { + messages.report(ExpectedIdAfterPackage, lexer.peek.location); + goto Lerror; + } + id = parseIdentifier(); + } + mod.id = id; + return mod; +Lerror: + while (!skip(Tok.Seperator)) + lexer.next(); + return mod; } - Identifier parseIdentifier() + + /** + Parse a type - this includes pointer and array(at some point) types. + */ + Id parseType() { - Token identifier = lexer.next; + Token type = lexer.next; + + Id currentType; - switch(identifier.type) + if ( !(type.isBasicType || type.type == Tok.Identifier) ) + messages.report(InvalidType, type.location); + + currentType = Id(type); + type = lexer.peek; + + while(type.type == Tok.Star || type.type == Tok.OpenBracket) { - case Tok.Identifier: - return new Identifier(identifier); - break; + if(type.type == Tok.Star) + { + currentType = PointerId(currentType); + lexer.next; + } + else + { + lexer.next; + if(lexer.peek.type == Tok.Integer) + currentType = ArrayId(currentType, action.actOnNumericConstant(require(Tok.Integer))); + require(Tok.CloseBracket); + + } + type = lexer.peek; + } + + return currentType; + } + + int peekParseType() + { + int i; + Token type = lexer.peek(i); + + Id currentType; + + if ( !(type.isBasicType || type.type == Tok.Identifier) ) + return 0; + + currentType = Id(type); + type = lexer.peek(++i); + + while(type.type == Tok.Star || type.type == Tok.OpenBracket) + { + if(type.type == Tok.Star) + { + i++; + } + else + { + if(lexer.peek(i++).type != Tok.OpenBracket) + return 0; + if(lexer.peek(i).type == Tok.Integer) + { + i++; + if(lexer.peek(i++).type != Tok.CloseBracket) + return 0; + } + else + if(lexer.peek(i++).type != Tok.CloseBracket) + return 0; + + } + type = lexer.peek(i); + } + + return i; + } + +private: + // -- Expression parsing -- // + Exp parsePostfixExp(Exp target) + { + switch(lexer.peek.type) + { + case Tok.Dot: + switch(lexer.peek(1).type) + { + case Tok.Identifier: + Token op = lexer.next; + Id member = Id(lexer.next); + Exp exp = action.actOnMemberReference(target, op.location, member); + return parsePostfixExp(exp); + default: + Token t = lexer.peek(1); + messages.report(ExpectedIdAfterDot, t.location); + } + case Tok.OpenBracket: + Token open = lexer.next; + Exp index = parseExpression(); + Token close = require(Tok.CloseBracket); + return action.actOnIndexEpr(target, open, index, close); default: - throw new Error("Unexpexted token in Identifier parsing. Got "~identifier.getType, identifier.location); + return target; } } - Identifier parseType() - { - Token type = lexer.next; - - switch(type.type) - { - case Tok.Byte, Tok.Ubyte, - Tok.Short, Tok.Ushort, - Tok.Int, Tok.Uint, - Tok.Long, Tok.Ulong, - Tok.Float, Tok.Double, - Tok.Bool, - Tok.Identifier: - return new Identifier(type); - break; - default: - char[] c = type.getType; - error("Unexpexted token in Type parsing. Got "~c); - } - } - - // -- Expression parsing -- // -private: Exp parseExpression(int p = 0) { auto exp = P(); @@ -259,7 +582,7 @@ lexer.next(); int q = op.leftAssoc? 1 + op.prec : op.prec; auto exp2 = parseExpression(q); - exp = new BinaryExp(op.operator, exp, exp2); + exp = action.actOnBinaryOp(next.location, op.operator, exp, exp2); next = lexer.peek(); } @@ -270,7 +593,7 @@ { Token next = lexer.next(); if (auto op = unary(next.type)) - return new NegateExp(parseExpression(op.prec)); + return action.actOnUnaryOp(next, parseExpression(op.prec)); else if (next.type == Tok.OpenParentheses) { auto e = parseExpression(0); @@ -279,48 +602,47 @@ } else if (next.type == Tok.Identifier) { + Exp value = action.actOnIdentifierExp(Id(next)); + Exp iden = parsePostfixExp(value); switch(lexer.peek.type) { case Tok.OpenParentheses: - lexer.next; - Exp[] args; + Token lp = lexer.next; + SmallArray!(Exp, 8) args; while(lexer.peek.type != Tok.CloseParentheses) { if(lexer.peek.type == Tok.Comma) - { lexer.next; - } args ~= parseExpression(); } - lexer.next(); - return new CallExp(new Identifier(next), args); + Token rp = lexer.next(); + return action.actOnCallExpr(iden, lp, args.unsafe(), rp); default: - return new Identifier(next); + return iden; } } + else if (next.type == Tok.Cast) + return parseCast(next); else if (next.type == Tok.Integer) - return new IntegerLit(next); + return action.actOnNumericConstant(next); - Stdout.formatln("{}", next.getType); - assert(0, "Should not happen"); + messages.report(ExpectedExp, next.location) + .fatal(ExitLevel.Parser); + return null; } - private Stmt[] parseBlockOrSingleStmt() + Exp parseCast(ref Token _cast) { - Stmt[] stmts; - if (lexer.peek.type == Tok.OpenBrace) - { - lexer.next; - while(lexer.peek.type != Tok.CloseBrace) - stmts ~= parseStatement(); - lexer.next; - } - else - stmts ~= parseStatement(); - - return stmts; + require(Tok.OpenParentheses); + auto next = lexer.next; + if(!next.isBasicType && !next.isIdentifier) + messages.report(ExpectedCastType, next.location); + + require(Tok.CloseParentheses); + auto exp = P(); + return action.actOnCastExpr(_cast, Id(next), exp); } struct UnOp @@ -329,7 +651,11 @@ int prec; } - static UnOp[] _unary = [{Tok.Sub, 4}]; + static const UnOp[] _unary = + [ + {Tok.Minus, 4}, + {Tok.Star, 4} + ]; UnOp* unary(Tok t) { foreach (ref op; _unary) @@ -343,23 +669,27 @@ Tok tokenType; int prec; bool leftAssoc; - BinaryExp.Operator operator; + Operator operator; } - static BinOp[] _binary = + static const BinOp[] _binary = [ - {Tok.Eq, 2, true, BinaryExp.Operator.Eq}, - {Tok.Ne, 2, true, BinaryExp.Operator.Ne}, - {Tok.Lt, 2, true, BinaryExp.Operator.Lt}, - {Tok.Le, 2, true, BinaryExp.Operator.Le}, - {Tok.Gt, 2, true, BinaryExp.Operator.Gt}, - {Tok.Ge, 2, true, BinaryExp.Operator.Ge}, + {Tok.Assign, 1, false, Operator.Assign}, + + {Tok.Eq, 2, true, Operator.Eq}, + {Tok.Ne, 2, true, Operator.Ne}, - {Tok.Add, 3, true, BinaryExp.Operator.Add}, - {Tok.Sub, 3, true, BinaryExp.Operator.Sub}, + {Tok.Lt, 2, true, Operator.Lt}, + {Tok.Le, 2, true, Operator.Le}, + {Tok.Gt, 2, true, Operator.Gt}, + {Tok.Ge, 2, true, Operator.Ge}, - {Tok.Mul, 5, true, BinaryExp.Operator.Mul}, - {Tok.Div, 5, true, BinaryExp.Operator.Div} + {Tok.Plus, 3, true, Operator.Add}, + {Tok.Minus, 3, true, Operator.Sub}, + + {Tok.Star, 5, true, Operator.Mul}, + {Tok.Slash, 5, true, Operator.Div}, + {Tok.Percent, 5, true, Operator.Mod} ]; BinOp* binary(Tok t) { @@ -371,17 +701,29 @@ private: - void require(Tok t) + Token require(Tok t) { if (lexer.peek().type != t) - error("Unexpexted token: Got '"~lexer.peek.getType~"' Expected '"~typeToString[t]~"'"); - lexer.next(); + messages.report(UnexpectedTokSingle, lexer.peek.location) + .arg(lexer.peek.getType) + .arg(t); + return lexer.next(); } - void error(char[] errMsg) + bool skip(Tok t) { - throw new Exception("Parser error: " ~errMsg); + if (lexer.peek().type != t) + return false; + lexer.next(); + return true; + } + + bool on_a(Tok t) + { + return lexer.peek.type == t; } Lexer lexer; + SourceManager sm; } + diff -r e331e4e816e4 -r 5e383b3755d6 sema/AstAction.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sema/AstAction.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,226 @@ +module sema.AstAction; + +import Integer = tango.text.convert.Integer; + +import lexer.Token; + +import misc.Error, + basic.SourceManager; + +import ast.Module, + ast.Exp, + ast.Stmt, + ast.Decl; + +public +import parser.Action; + +/** + This class implements the default actions for Dang, by building up an AST + with the data needed in a compiler. + */ +class AstAction : Action +{ + this(SourceManager sm) + { + this.sm = sm; + } + private SourceManager sm; + + private Identifier handleType(Id type) + { + if(auto t = cast(PointerId)type) + return new PointerIdentifier(handleType(t.id)); + if(auto t = cast(ArrayId)type) + return new ArrayIdentifier(handleType(t.id), cast(IntegerLit)t.number); + else + return identifierFromTok(type.tok); + } + + private Identifier identifierFromTok(Token t) + { + return new Identifier(t.location, sm.getText(t.asRange)); + } + + override ModuleT actOnModule(ref Token _module, char[] name) + { + return new Module(name); + } + + override ModuleT actOnImplicitModule(SLoc startLoc, char[] name) + { + return new Module(name); + } + + override void actOnModuleDecl(ModuleT m, DeclT d) + { + (cast(Module)m).addDecl(cast(Decl)d); + } + + // -- Declarations -- + override DeclT actOnImport(ref Token _, ref ModuleName target, Id* name) + { + auto res = new ImportDecl; + Identifier[] packages = new Identifier[target.packages.length]; + foreach (i, v; target.packages) + packages[i] = identifierFromTok(v.tok); + res.packages = packages; + + res.name = identifierFromTok(target.id.tok); + if (name !is null) + res.aliasedName = identifierFromTok(name.tok); + return res; + } + + override void addSelectiveImport(DeclT _import, ref Id target, Id* name) + { + auto d = cast(ImportDecl)_import; + Identifier t = identifierFromTok(target.tok); + Identifier n = t; + if (name !is null) + n = identifierFromTok(name.tok); + d.explicitSymbols ~= [t, n]; + } + + override DeclT actOnDeclarator(ref Id type, ref Id id, ExprT init) + { + Exp exp = cast(Exp)init; + if(type.tok.type == Tok.Struct) + return new StructDecl(identifierFromTok(id.tok)); + else + return new VarDecl(handleType(type), identifierFromTok(id.tok), exp); + } + + override void actOnStructMember(DeclT st_decl, DeclT m_decl) //ref Id type, ref Id name, ExprT init) + { + StructDecl st = cast(StructDecl)st_decl; + st.addMember(cast(Decl)m_decl); + } + + override ExprT actOnMemberReference(ExprT lhs, SLoc op, Id member) + { + Exp exp = cast(Exp)lhs; + Identifier id = identifierFromTok(member.tok); + return new MemberReference(op, exp, id); + } + + override DeclT actOnStartOfFunctionDef(ref Id type, ref Id name) + { + return new FuncDecl(identifierFromTok(type.tok), identifierFromTok(name.tok)); + } + + override void addFuncArg(DeclT func, Id type, Id name) + { + FuncDecl fd = cast(FuncDecl)func; + if(name) + fd.addParam(handleType(type), identifierFromTok(name.tok)); + else + fd.addParam(identifierFromTok(type.tok)); + } + + override DeclT actOnEndOfFunction(DeclT func, StmtT stmts) + { + FuncDecl fd = cast(FuncDecl)func; + fd.setBody(cast(CompoundStatement)stmts); + return fd; + } + + // -- Statements -- + override StmtT actOnCompoundStmt(ref Token l, ref Token r, StmtT[] stmts) + { + Stmt[] statements = cast(Stmt[])stmts; + return new CompoundStatement(statements.dup); + } + + override StmtT actOnExprStmt(ExprT exp) + { + return new ExpStmt(cast(Exp)exp); + } + + override StmtT actOnReturnStmt(ref Token loc, ExprT exp) + { + Exp e = cast(Exp)exp; + auto res = new ReturnStmt; + res.exp = e; + return res; + } + + override StmtT actOnIfStmt(ref Token ifTok, ExprT cond, StmtT thenBody, + ref Token elseTok, StmtT elseBody) + { + Exp c = cast(Exp)cond; + Stmt t = cast(Stmt)thenBody; + Stmt e = cast(Stmt)elseBody; + return new IfStmt(c, t, e); + } + + override StmtT actOnWhileStmt(ref Token tok, ExprT cond, StmtT whileBody) + { + Exp c = cast(Exp)cond; + Stmt b = cast(Stmt)whileBody; + return new WhileStmt(c, b); + } + + override StmtT actOnDeclStmt(DeclT decl) + { + Decl d = cast(Decl)decl; + return new DeclStmt(d); + } + + // -- Expressions -- + override ExprT actOnNumericConstant(Token c) + { + return new IntegerLit(c.location, sm.getText(c.asRange)); + } + + override ExprT actOnIdentifierExp(Id id) + { + return identifierFromTok(id.tok); + } + + override ExprT actOnBinaryOp(SLoc op_loc, Operator op, ExprT l, ExprT r) + { + Exp left = cast(Exp)l; + Exp right = cast(Exp)r; + if (op == Operator.Assign) + return new AssignExp(op_loc, left, right); + else + { + BinaryExp.Operator bin_op = cast(BinaryExp.Operator)op; + return new BinaryExp(op_loc, bin_op, left, right); + } + } + + override ExprT actOnUnaryOp(Token op, ExprT operand) + { + Exp target = cast(Exp)operand; + if (op.type == Tok.Minus) + return new NegateExp(op.location, target); + if (op.type == Tok.Star) + return new DerefExp(op.location, target); + assert(0, "Only valid unary expressions are -x and *x"); + } + + override ExprT actOnCallExpr(ExprT fn, ref Token, ExprT[] args, ref Token) + { + Exp f = cast(Exp)fn; + Exp[] arguments = cast(Exp[])args.dup; + return new CallExp(f, arguments); + } + + override ExprT actOnCastExpr(ref Token _cast, Id id, ExprT exp) + { + Exp target = cast(Exp)exp; + Identifier target_type = identifierFromTok(id.tok); + return new CastExp(_cast.location, target_type, target); + } + + override ExprT + actOnIndexEpr(ExprT arr, ref Token lb, ExprT index, ref Token rb) + { + Exp target = cast(Exp)arr; + Exp idx = cast(Exp)index; + return new IndexExp(target, lb.location, idx, rb.location); + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 sema/DType.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sema/DType.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,340 @@ +module sema.DType; + +import lexer.Token, + ast.Exp; + +public +import sema.Operation; + +class DType +{ + private char[] id; + private SourceLocation loc; + public DType actual; + + this(Identifier id, DType actual = null) + { + this.id = id.name; + this.loc = id.startLoc(); + this.actual = actual is null? this : actual; + } + + this(char[] id, DType actual = null) + { + this.id = id; + this.actual = actual is null? this : actual; + } + + /// Is this type a DStruct + bool isStruct() { return false; } + /// Return a DStruct if this is one, otherwise return null + DStruct asStruct() { return null; } + + /// Is this type a DArray + bool isArray() { return false; } + /// Return a DArray if this is one, otherwise return null + DArray asArray() { return null; } + + /// Is this type a DPointer + bool isPointer() { return false; } + /// Return a DPointer if this is one, otherwise return null + DPointer asPointer() { return null; } + + /// Is this type a DFunction + bool isFunction() { return false; } + /// Return a DFunction if this is one, otherwise return null + DFunction asFunction() { return null; } + + /// Is this type a DInteger + bool isInteger() { return false; } + /// Return a DInteger if this is one, otherwise return null + DInteger asInteger() { return null; } + + int opEquals(Object o) + { + if (auto t = cast(DType)o) + return this.actual is t.actual; + return 0; + } + + int opCmp(Object o) + { + if (auto t = cast(DType)o) + return cast(void*)this.actual - cast(void*)t.actual; + return 0; + } + + /** + Hashing is done by casting the reference to a void* and taking that + value, but this gives a bad distribution of hash-values. + + Multiple DType's allocated close to each other will only have a + difference in the lower bits of their hashes. + */ + hash_t toHash() + { + return cast(hash_t)(cast(void*)this); + } + + char[] name() { return id; } + SourceLocation getLoc() { return loc; } + int byteSize() { return 0; } + + /** + Can this type legally be converted to that type with no casts? + True for short -> int etc. + */ + bool hasImplicitConversionTo(DType that) { return false; } + + /** + Get an Operation describing how to use the supplied operator on the two + types given. + */ + Operation getOperationWith(Operator op, DType other) + { + Operation res; + return res; + } + + /** + Get a type representing a pointer to this type (from int to int*) + */ + DPointer getPointerTo() + { + if(myPointer !is null) + return myPointer; + myPointer = new DPointer(this); + return myPointer; + } + private DPointer myPointer; + + /** + Mangle the DType following the specs at http://digitalmars.com/d/1.0/abi.html + **/ + char[] mangle() + { + /// expects to be void + return "v"; + } + + /** + Get a type representing a static array of this type with length 'size' + */ + DArray getAsArray(int size) + { + if(size in myArray) + return myArray[size]; + myArray[size] = new DArray(this, size); + return myArray[size]; + } + private DArray[int] myArray; + + static DInteger + Bool, + Byte, UByte, Short, UShort, + Int, UInt, Long, ULong, + Char, WChar, DChar; + + static DType Void; + + static this() + { + Void = new DType("void"); + + Bool = new DInteger("bool", 1, false); + Byte = new DInteger("byte", 8, false); + UByte = new DInteger("ubyte", 8, true); + Short = new DInteger("short", 16, false); + UShort = new DInteger("ushort", 16, true); + Int = new DInteger("int", 32, false); + UInt = new DInteger("uint", 32, true); + Long = new DInteger("long", 64, false); + ULong = new DInteger("ulong", 64, true); + Char = new DInteger("char", 8, true); + WChar = new DInteger("wchar", 16, true); + DChar = new DInteger("dchar", 32, true); + } +} + +/** + Class to represent the built-in integer types, from byte to long. + */ +class DInteger : DType +{ + private static char[][DInteger] mangle_types; + + static this() + { + mangle_types = + [ + Bool : "b", + Byte : "g", + UByte : "h", + Short : "s", + UShort : "t", + Int : "i", + UInt : "k", + Long : "l", + ULong : "m" + ]; + } + + this(char[] name, int bits, bool unsigned) + { + super(name, null); + this.bits = bits; + this.unsigned = unsigned; + } + + override int byteSize() { return bits / 8; } + + override bool hasImplicitConversionTo(DType that) + { + if (auto o = cast(DInteger)that) + return true; +// return this.bits >= o.bits; + return false; + } + + override bool isInteger() { return true; } + override DInteger asInteger() { return this; } + + override Operation getOperationWith(Operator op, DType that) + { + Operation operation; + if (this is that) + operation = Operation.builtin(op, unsigned, false); + return operation; + } + + override char[] mangle() + { + return mangle_types[this]; + } + + int bits; + bool unsigned; +} + +class DStruct : DType +{ + this(Identifier id, DType actual = null) + { + super(id, actual); + } + + int byteSize() { return bytes_total; } + + override bool isStruct() { return true; } + override DStruct asStruct() { return this; } + + void addMember(DType type, char[] name) + { + auto s = DStructMember(type, members.length); + members[name] = s; + + bytes_total += type.byteSize(); + } + + int indexOf(char[] name) + { + if(name in members) + return members[name].index; + + return -1; + } + + DType typeOf(char[] name) + { + if (auto res = name in members) + return res.type; + return null; + } + + DStructMember[char[]] members; + private int bytes_total; + + override char[] mangle() + { + return "S"~Integer.toString(name.length)~name; + } + + struct DStructMember + { + DType type; + int index; + } +} + +class DArray : DType +{ + this(DType arrayOf, int size, DType actual = null) + { + super(id, actual); + this.arrayOf = arrayOf; + this.size = size; + } + + override bool isArray() { return true; } + override DArray asArray() { return this; } + + int byteSize() { return arrayOf.byteSize * size; } + + override char[] mangle() + { + return "G"~Integer.toString(size)~arrayOf.mangle; + } + + DType arrayOf; + const int size; +} + +class DPointer : DType +{ + this(DType pointerOf, DType actual = null) + { + super(id, actual); + this.pointerOf = pointerOf; + } + + override bool isPointer() { return true; } + override DPointer asPointer() { return this; } + + int byteSize() { return DType.Int.byteSize; } + + override char[] mangle() + { + return "P"~pointerOf.mangle; + } + + DType pointerOf; +} + +class DFunction : DType +{ + this(Identifier id, DType actual = null) + { + super(id, actual); + } + + override bool isFunction() { return true; } + override DFunction asFunction() { return this; } + + override char[] mangle() + { + char[] res; + res ~= "F"; + + foreach(param ; params) + res ~= "J" ~ param.mangle; + + res ~= "Z" ~ returnType.mangle; + + return res; + } + + DType[] params; + DType returnType; + bool firstParamIsReturnValue = false; +} + diff -r e331e4e816e4 -r 5e383b3755d6 sema/Declarations.d --- a/sema/Declarations.d Fri Apr 18 23:45:45 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -module sema.Declarations; - -import sema.Visitor; - -import tango.io.Stdout; - -import misc.Error; - -class Declarations : Visitor!(void) -{ - int[char[]] types; - - this() - { - types = [ - "byte"[]:0, - "ubyte":1, - "short":2, - "ushort":3, - "int":4, - "uint":5, - "long":6, - "ulong":7, - "bool":8 - ]; - } - - override void visitIdentifier(Identifier i) - { - auto symbol = i.env.find(i); - - if(symbol is null && !isType(i.get)) - throw new Error("Undefined identifier: '"~i.get~"'",i.token.location); - - } - - override void visitVarDecl(VarDecl d) - { - if(!isType(d.type.get) && d.env.findType(d.identifier)) - throw new Error("Undefined type: '"~d.type.get~"'",d.type.token.location); - - visitExp(d.type); - visitExp(d.identifier); - if (d.init) - visitExp(d.init); - } - - - bool isType(char[] s) - { - return (s in types? true : false); - } - -} - diff -r e331e4e816e4 -r 5e383b3755d6 sema/Operation.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sema/Operation.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,136 @@ +module sema.Operation; + +/// Operators +public enum Operator +{ + Add, Sub, Mul, Div, Rem, + Shl, LShr, AShr, + And, Or, Xor, + + Eq, Ne, + Lt, Le, + Gt, Ge, +} + +/** + Enum for the basic builtin operations. + + S for signed, U for unsigned and F for floating point. + **/ +public enum BuiltinOperation +{ + Add, Sub, Mul, SDiv, UDiv, FDiv, SRem, URem, FRem, + + Shl, LShr, AShr, + And, Or, Xor, + + Eq, Ne, + SLt, ULt, FLt, SLe, ULe, FLe, + SGt, UGt, FGt, SGe, UGe, FGe, + + None +} + +/** + Returns true if the operation has an unsigned variant. + + Will only be true for the S version, so SDiv gives true, UDiv or FDiv dont. + **/ +private bool hasUnsignedVariant(BuiltinOperation op) +{ + alias BuiltinOperation O; + return op is O.SDiv + || op is O.SRem + || op is O.SLt + || op is O.SLe + || op is O.SGt + || op is O.SGe; +} + +/// Same as hasUnsignedVariant, but for float variants +private bool hasFloatVariant(BuiltinOperation op) +{ + alias BuiltinOperation O; + return op is O.SDiv + || op is O.SRem + || op is O.SLt + || op is O.SLe + || op is O.SGt + || op is O.SGe; +} + +private BuiltinOperation OpToBI(Operator op) +{ + // This is dependent on the definition of Operator + // Maps from an Operator to the first appropiate BuiltinOperation + static const BuiltinOperation[] map = + [ + BuiltinOperation.Add, + BuiltinOperation.Sub, + BuiltinOperation.Mul, + BuiltinOperation.SDiv, + BuiltinOperation.SRem, + + BuiltinOperation.Shl, + BuiltinOperation.LShr, + BuiltinOperation.AShr, + BuiltinOperation.And, + BuiltinOperation.Or, + BuiltinOperation.Xor, + + BuiltinOperation.Eq, + BuiltinOperation.Ne, + BuiltinOperation.SLt, + BuiltinOperation.SLe, + BuiltinOperation.SGt, + BuiltinOperation.SGe, + + ]; + if (op >= Operator.Add && op <= Operator.Ge) + return map[op]; + + return BuiltinOperation.None; +} + +/** + Represents an operation on to values of (potentionally) different types. + + Can be either some built-in thing (addition of floats, int etc) or a user + defined operation (a method in a struct/class). + **/ +struct Operation +{ + /// Returns true if the operation is legal + bool isPossible() { return is_valid; } + + /// True for <, <=, ==, !=, >, >= + bool isComparison() { return false; } + + /// Built in operations like adding ints or floats + bool isBuiltin() { return is_bi; } + + /// Get the builtin operation - only valid if isBuiltin() returns true + BuiltinOperation builtinOp() { return bi_op; }; + + /// Create builtin operation + static Operation builtin(Operator op, bool unsigned, bool fp) + { + assert(!(unsigned && fp), "Can't be both unsigned and a float"); + Operation res; + res.is_valid = true; + res.is_bi = true; + res.bi_op = OpToBI(op); + + if (unsigned && hasUnsignedVariant(res.bi_op)) + res.bi_op += 1; + if (fp && hasFloatVariant(res.bi_op)) + res.bi_op += 2; + return res; + } + +private: + bool is_valid = false; + bool is_bi; + BuiltinOperation bi_op; +} + diff -r e331e4e816e4 -r 5e383b3755d6 sema/Scope.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sema/Scope.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,169 @@ +module sema.Scope; + +import tango.io.Stdout; + +import lexer.Token, + ast.Module, + ast.Decl, + ast.Exp; + +public +import sema.DType; + +class Scope +{ + this() {} + this(Scope enclosing) + { + this.enclosing = enclosing; + this.func = enclosing.func; + this.inModule = enclosing.inModule; + this.mHandle = enclosing.mHandle; + } + + Scope enclosing; + ModuleHandler mHandle; + Module inModule; + Scope[] imported; + + ImportDecl[] imports; + + + void add(Identifier id) + { + symbols[id] = id; + } + + Identifier find(Identifier id) + { + if(id is null) + return null; + if (auto sym = id in symbols) + return *sym; + if (enclosing !is null) + { + auto type = enclosing.find(id); + if(type is null) + return mHandle.find(getImports, id); + return type; + } + return null; + } + + ImportDecl[] getImports() + { + if(enclosing) + return enclosing.getImports ~ imports; + return imports; + } + + DType findType(Identifier id) + { + if (auto type = id.get in types) + return *type; + if (enclosing !is null) + { + auto type = enclosing.findType(id); + if(type is null) + return mHandle.findType(getImports, id); + return type; + } + return null; + } + + char[][] names() + { + char[][] res; + if (parentFunction() !is null) + res ~= "pf: " ~ parentFunction().identifier.get; + if (enclosing) + res = enclosing.names; + foreach (id, sym; symbols) + res ~= sym.name ~ " : " ~ (sym.type is null? "?" : sym.type.name); + return res; + } + + FuncDecl parentFunction() + { + if (func !is null) + return func; + else if (enclosing !is null) + return enclosing.parentFunction(); + else + return null; + } + + int stmtIndex() + { + if (currentStmtIndex != -1) + return currentStmtIndex; + else if (enclosing !is null) + return enclosing.stmtIndex(); + else + return -1; + } + + int opEquals(Object o) + { + return this is o; + } + + char[] toString() + { + if (func) + return Stdout.layout.convert("{}: {}", func.identifier.get, symbols.length); + return Stdout.layout.convert("root: {}", symbols.length); + } + + FuncDecl parentFunction(FuncDecl f) + { + func = f; + return f; + } + DType[char[]] types; + int currentStmtIndex = -1; +private: + Identifier[Identifier] symbols; + FuncDecl func; +} + +class ModuleHandler +{ + void add(Module m) + { + modules[m.moduleName] = m; + } + void add(Module m, char[] file) + { + fileToModule[file] = m.moduleName; + add(m); + } + + DType findType(ImportDecl[] imports, Identifier type) + { + foreach(i ; imports) + if(i.get in modules) + { + auto t = modules[i.get].env.findType(type); + if(t !is null) + return t; + } + return null; + } + + Identifier find(ImportDecl[] imports, Identifier id) + { + foreach(i ; imports) + if(i.get in modules) + { + auto t = modules[i.get].env.find(id); + if(t !is null) + return t; + } + return null; + } + + char[][char[]] fileToModule; + Module[char[]] modules; +} + diff -r e331e4e816e4 -r 5e383b3755d6 sema/ScopeBuilder.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sema/ScopeBuilder.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,259 @@ +module sema.ScopeBuilder; + +import tango.io.Stdout, + tango.core.Array : find; + +public +import sema.Scope; + +import sema.Visitor, + basic.SmallArray; + +class ForwardReference : Visitor!(void) +{ + override void visit(Module[] modules) + { + this.modules = modules; + super.visit(modules); + } + + override void visitFuncDecl(FuncDecl d) + { + visitExp(d.returnType); + visitExp(d.identifier); + foreach (arg; d.funcArgs) + visitDecl(arg); + foreach (stmt; d.statements) + visitStmt(stmt); + + d.env.find(d.identifier).setType(d.type); + } + + override void visitVarDecl(VarDecl d) + { + visitExp(d.varType); + visitExp(d.identifier); + + if (d.init) + visitExp(d.init); + + d.env.find(d.identifier).setType( typeOf(d.varType, d.env) ); + } + + override void visitStructDecl(StructDecl s) + { + super.visitStructDecl(s); + + DType[char[]] types; + + auto st = s.env.types[s.identifier.get].asStruct; + foreach (decl; s.decls) + if (auto varDecl = cast(VarDecl)decl) + st.addMember(typeOf(varDecl.varType, varDecl.env), varDecl.identifier.get); + else if (auto fd = cast(FuncDecl)decl) + st.addMember(fd.type, fd.identifier.get); + } + + DType typeOf(Identifier id, Scope sc) + { + if(auto i = cast(PointerIdentifier)id) + return (typeOf(i.pointerOf, sc)).getPointerTo(); + if(auto i = cast(ArrayIdentifier)id) + return typeOf(i.arrayOf, sc).getAsArray(i.size); + return sc.findType(id); + } + + Module[] modules; +} + +class ScopeBuilder : Visitor!(void) +{ + static ModuleHandler mHandle; + + static this() + { + mHandle = new ModuleHandler; + } + + this() + { + } + + override void visit(Module[] modules) + { + foreach(m ; modules) + visitModule(m); + + auto fr = new ForwardReference(); + + fr.visit(modules); + } + + override void visitModule(Module m) + { + table ~= new Scope; + table[table.length-1].types["void"] = DType.Void; + table[table.length-1].types["bool"] = DType.Bool; + table[table.length-1].types["byte"] = DType.Byte; + table[table.length-1].types["ubyte"] = DType.UByte; + table[table.length-1].types["short"] = DType.Short; + table[table.length-1].types["ushort"] = DType.UShort; + table[table.length-1].types["int"] = DType.Int; + table[table.length-1].types["uint"] = DType.UInt; + table[table.length-1].types["long"] = DType.Long; + table[table.length-1].types["ulong"] = DType.ULong; + table[table.length-1].types["char"] = DType.Char; + table[table.length-1].types["wchar"] = DType.WChar; + table[table.length-1].types["dchar"] = DType.DChar; + + current().inModule = m; + current().mHandle = mHandle; + mHandle.add(m); + m.env = current(); + super.visitModule(m); + } + + override void visitDecl(Decl d) + { + d.env = current(); + super.visitDecl(d); + } + + override void visitImportDecl(ImportDecl i) + { + i.env.imports ~= i; + super.visitImportDecl(i); + } + + override void visitStmt(Stmt s) + { + s.env = current(); + s.stmtIndex = s.env.stmtIndex; + super.visitStmt(s); + } + + override void visitExp(Exp e) + { + e.env = current(); + e.stmtIndex = e.env.stmtIndex; + super.visitExp(e); + } + + override void visitFuncDecl(FuncDecl d) + { + current().add(d.identifier); + auto sc = push(); + + visitExp(d.returnType); + visitExp(d.identifier); + d.env = current(); + sc.parentFunction = d; + foreach (arg; d.funcArgs) + visitDecl(arg); + foreach (stmt; d.statements) + { + sc.currentStmtIndex++; + visitStmt(stmt); + } + pop(sc); + } + + override void visitVarDecl(VarDecl d) + { + if (d.init) + visitExp(d.init); + + if (need_push > 0 && current().parentFunction !is null) { + push(); + --need_push; + } + + auto sc = current(); + sc.add(d.identifier); + d.env = sc; + visitExp(d.varType); + visitExp(d.identifier); + } + + override void visitStructDecl(StructDecl s) + { + auto sc = current(); + sc.add(s.identifier); + s.env = sc; + auto type = new DStruct(s.identifier); + + sc.types[s.identifier.get] = type; + + sc = push(); + super.visitStructDecl(s); + pop(sc); + } + + override void visitDeclStmt(DeclStmt d) + { + ++need_push; + super.visitDeclStmt(d); + } + private uint need_push = 0; + + override void visitIfStmt(IfStmt s) + { + s.env = current(); + visitExp(s.cond); + auto sc = push(); + visitStmt(s.then_body); + pop(sc); + + if (s.else_body !is null) + { + sc = push(); + visitStmt(s.else_body); + pop(sc); + } + } + + override void visitWhileStmt(WhileStmt s) + { + s.env = current(); + auto sc = push(); + super.visitWhileStmt(s); + pop(sc); + } + + override void visitCompoundStmt(CompoundStatement s) + { + s.env = current(); + auto sc = push(); + super.visitCompoundStmt(s); + pop(sc); + } + +private: + Scope[] table; + + Scope push() + { + auto sc = new Scope(current()); + table ~= sc; + return sc; + } + + Scope pop(Scope sc = null) + { + if (sc !is null) + { + table.length = table.find(sc); + return sc; + } + + auto res = table[$ - 1]; + table.length = table.length - 1; + return res; + } + + Scope current() + { + return table[$ - 1]; + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 sema/ScopeCheck.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sema/ScopeCheck.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,81 @@ +module sema.ScopeCheck; + +import sema.Visitor, + sema.DType; + +import tango.io.Stdout; + +import misc.Error; + +class ScopeCheck : Visitor!(void) +{ + int[char[]] types; + + private Error error(uint line, char[] msg) + { + return new Error(msg); + } + + override void visitIdentifier(Identifier i) + { + auto symbol = i.env.find(i); + + if(symbol is null) + throw error(__LINE__, "Undefined identifier: '%0'") + .arg(i.get); + //.loc(i.token.location); + } + + override void visitVarDecl(VarDecl d) + { + if(!d.env.findType(d.varType)) + throw error(__LINE__, "Undefined type: '%0'") + .arg(d.varType.get); + //.loc(d.varType.token.location); + + visitExp(d.identifier); + if (d.init) + visitExp(d.init); + } + + override void visitFuncDecl(FuncDecl f) + { + visitExp(f.identifier); + + foreach (stmt; f.statements) + visitStmt(stmt); + } + + override void visitImportDecl(ImportDecl) { } + + override void visitCastExp(CastExp exp) + { + visitExp(exp.exp); + } + + override void visitMemberReference(MemberReference m) + { + switch(m.target.expType) + { + case ExpType.Identifier: + auto target = cast(Identifier)m.target; + auto child = m.child; + auto st = cast(DStruct)(target.env.find(target).type); + if((child.get in st.members) is null) + throw error(__LINE__, "%0 %1 has no member %2") + .arg(st.name) + .arg(target.get) + .arg(child.get); + //.tok(child.token); + break; + case ExpType.MemberReference: + break; + } + } + + private bool isType(char[] s) + { + return (s in types? true : false); + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 sema/SymbolTable.d --- a/sema/SymbolTable.d Fri Apr 18 23:45:45 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -module sema.SymbolTable; - -import tango.io.Stdout; - -import lexer.Token; - -import ast.Exp : Identifier; - -class SymbolTable -{ -} - -class Scope -{ - this() {} - this(Scope enclosing) - { - this.enclosing = enclosing; - this.func = enclosing.func; - } - - Scope enclosing; - - Symbol add(Identifier id) - { - auto s = new Symbol; - s.id = id; - symbols[id] = s; - return s; - } - - Symbol find(Identifier id) - { - if (auto sym = id in symbols) - return *sym; - if (enclosing !is null) - return enclosing.find(id); - return null; - } - - Symbol findType(Identifier id) - { - if (auto sym = id in symbols) - if(symbols[id].type == null) - return *sym; - if (enclosing !is null) - return enclosing.find(id); - return null; - } - - char[][] names() - { - char[][] res; - foreach (id, sym; symbols) - res ~= sym.id.name ~ " : " ~ sym.type.name; - return res; - } - - Symbol parentFunction() - { - if (func !is null) - return func; - else if (enclosing !is null) - return enclosing.parentFunction(); - else - return null; - } - - int opEquals(Object o) - { - return this is o; - } - - char[] toString() - { - if (func) - return Stdout.layout.convert("{}: {}", func.id.get, symbols.length); - return "root"; - } - - Symbol parentFunction(Symbol f) - { - func = f; - return f; - } -private: - Symbol[Identifier] symbols; - Symbol func; -} - -class Symbol -{ - Identifier id; - Identifier type; -} - diff -r e331e4e816e4 -r 5e383b3755d6 sema/SymbolTableBuilder.d --- a/sema/SymbolTableBuilder.d Fri Apr 18 23:45:45 2008 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,130 +0,0 @@ -module sema.SymbolTableBuilder; - -import tango.io.Stdout, - tango.core.Array : find; - -public import sema.SymbolTable; - -import sema.Visitor; - -class SymbolTableBuilder : Visitor!(void) -{ - this() - { - table ~= new Scope; - } - - override void visit(Decl[] decls) - { - foreach (decl; decls) - visitDecl(decl); - } - - override void visitDecl(Decl d) - { - d.env = current(); - super.visitDecl(d); - } - - override void visitStmt(Stmt s) - { - s.env = current(); - super.visitStmt(s); - } - - override void visitExp(Exp e) - { - e.env = current(); - super.visitExp(e); - } - - override void visitFuncDecl(FuncDecl d) - { - auto sym = current().add(d.identifier); - sym.type = d.type; - visitExp(d.type); - visitExp(d.identifier); - d.env = current(); - auto sc = push(); - sc.parentFunction = sym; - foreach (arg; d.funcArgs) - visitDecl(arg); - foreach (stmt; d.statements) - visitStmt(stmt); - pop(sc); - } - - override void visitVarDecl(VarDecl d) - { - auto sc = current(); - auto sym = sc.add(d.identifier); - sym.type = d.type; - super.visitVarDecl(d); - } - - override void visitStructDecl(StructDecl s) - { - auto sc = current(); - auto sym = sc.add(s.identifier); -// sym.type = Tok.Struct; - super.visitStructDecl(s); - } - - override void visitDeclStmt(DeclStmt d) - { - super.visitDeclStmt(d); - push(); - } - - override void visitIfStmt(IfStmt s) - { - s.env = current(); - visitExp(s.cond); - auto sc = push(); - foreach (stmt; s.then_body) - visitStmt(stmt); - pop(sc); - - sc = push(); - foreach (stmt; s.else_body) - visitStmt(stmt); - pop(sc); - } - - override void visitWhileStmt(WhileStmt s) - { - s.env = current(); - auto sc = push(); - super.visitWhileStmt(s); - pop(sc); - } - -private: - Scope[] table; - - Scope push() - { - auto sc = new Scope(current()); - table ~= sc; - return sc; - } - - Scope pop(Scope sc = null) - { - if (sc !is null) - { - table.length = table.find(sc); - return sc; - } - - auto res = table[$ - 1]; - table.length = table.length - 1; - return res; - } - - Scope current() - { - return table[$ - 1]; - } -} - diff -r e331e4e816e4 -r 5e383b3755d6 sema/TypeCheck.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sema/TypeCheck.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,146 @@ +module sema.TypeCheck; + +import sema.Visitor, + sema.DType; + +import tango.io.Stdout; + +import misc.Error, + basic.SourceLocation; + +class TypeCheck : Visitor!(void) +{ + private Error error(uint line, char[] msg) + { + return new Error(msg); + } + + override void visitBinaryExp(BinaryExp exp) + { + super.visitBinaryExp(exp); + + if(exp.left.type.byteSize > exp.right.type.byteSize) + { + if(!exp.right.type.hasImplicitConversionTo(exp.left.type)) + throw error(__LINE__, "Cannot make implicit cast"); + + auto castExp = new CastExp( + SLoc.Invalid, + new Identifier(exp.left.type.name), + exp.right); + castExp.env = exp.env; + exp.right = castExp; + } + + if(exp.left.type.byteSize < exp.right.type.byteSize) + { + if(!exp.left.type.hasImplicitConversionTo(exp.right.type)) + throw error(__LINE__, "Cannot make implicit cast"); + + auto castExp = new CastExp( + SLoc.Invalid, + new Identifier(exp.right.type.name), + exp.left); + castExp.env = exp.env; + exp.left = castExp; + } + + } + + override void visitCallExp(CallExp exp) + { + super.visitCallExp(exp); + + Exp[] newArgs; + + foreach(i, arg; exp.args) + { + auto argType = (cast(DFunction)exp.exp.type).params[i]; + auto expType = arg.type; + if(argType.byteSize != expType.byteSize) + { + if(!expType.hasImplicitConversionTo(argType)) + throw error(__LINE__, "Cannot make implicit cast"); + + auto castExp = new CastExp( + SLoc.Invalid, + new Identifier(argType.name), + arg); + castExp.env = exp.exp.env; + newArgs ~= castExp; + } + else + newArgs ~= arg; + } + + exp.args = newArgs; + } + + override void visitAssignExp(AssignExp exp) + { + super.visitAssignExp(exp); + + auto identifierType = exp.identifier.type; + auto expType = exp.exp.type; + + if(identifierType != expType) + { + if(!expType.hasImplicitConversionTo(identifierType)) + throw error(__LINE__, "Cannot make implicit cast between"); + + auto castExp = new CastExp( + SLoc.Invalid, + new Identifier(identifierType.name), + exp.exp); + castExp.env = exp.exp.env; + exp.exp = castExp; + } + } + + override void visitReturnStmt(ReturnStmt stmt) + { + super.visitReturnStmt(stmt); + + if(stmt.exp) + { + auto returnType = stmt.env.parentFunction.type.asFunction.returnType; + auto expType = stmt.exp.type; + if(returnType != expType) + { + if(!expType.hasImplicitConversionTo(returnType)) + throw error(__LINE__, "Cannot make implicit cast"); + + auto castExp = new CastExp( + SLoc.Invalid, + new Identifier(returnType.name), + stmt.exp); + castExp.env = stmt.exp.env; + stmt.exp = castExp; + } + } + } + + override void visitVarDecl(VarDecl decl) + { + super.visitVarDecl(decl); + + if(decl.init) + { + auto varType = decl.type; + auto expType = decl.init.type; + if(varType.byteSize != expType.byteSize) + { + if(!expType.hasImplicitConversionTo(varType)) + throw error(__LINE__, "Cannot make implicit cast"); + + auto castExp = new CastExp( + SLoc.Invalid, + new Identifier(varType.name), + decl.init); + castExp.env = decl.init.env; + decl.init = castExp; + } + } + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 sema/Visitor.d --- a/sema/Visitor.d Fri Apr 18 23:45:45 2008 +0200 +++ b/sema/Visitor.d Sun May 25 14:43:16 2008 +0200 @@ -3,25 +3,36 @@ import tango.io.Stdout; public -import ast.Decl, +import ast.Module, + ast.Decl, ast.Stmt, ast.Exp; import lexer.Token; -class Visitor(FinalT = int, DeclT = FinalT, StmtT = DeclT, ExpT = StmtT) +class Visitor(FinalT = int, ModuleT = FinalT, DeclT = ModuleT, StmtT = DeclT, ExpT = StmtT) { public: - FinalT visit(Decl[] decls) + FinalT visit(Module[] modules) { - foreach (decl; decls) - visitDecl(decl); + foreach(m ; modules) + visitModule(m); static if (is(FinalT == void)) return; else return FinalT.init; } + ModuleT visitModule(Module m) + { + foreach (decl; m.decls) + visitDecl(decl); + static if (is(ModuleT == void)) + return; + else + return ModuleT.init; + } + DeclT visitDecl(Decl decl) { switch(decl.declType) @@ -30,6 +41,8 @@ return visitFuncDecl(cast(FuncDecl)decl); case DeclType.VarDecl: return visitVarDecl(cast(VarDecl)decl); + case DeclType.ImportDecl: + return visitImportDecl(cast(ImportDecl)decl); case DeclType.StructDecl: return visitStructDecl(cast(StructDecl)decl); default: @@ -43,6 +56,8 @@ { case StmtType.Return: return visitReturnStmt(cast(ReturnStmt)stmt); + case StmtType.Compound: + return visitCompoundStmt(cast(CompoundStatement)stmt); case StmtType.Decl: return visitDeclStmt(cast(DeclStmt)stmt); case StmtType.Exp: @@ -51,6 +66,8 @@ return visitIfStmt(cast(IfStmt)stmt); case StmtType.While: return visitWhileStmt(cast(WhileStmt)stmt); + case StmtType.Switch: + return visitSwitchStmt(cast(SwitchStmt)stmt); default: throw new Exception("Unknown statement type"); } @@ -66,12 +83,24 @@ return visitIntegerLit(cast(IntegerLit)exp); case ExpType.Negate: return visitNegateExp(cast(NegateExp)exp); + case ExpType.Deref: + return visitDerefExp(cast(DerefExp)exp); case ExpType.AssignExp: return visitAssignExp(cast(AssignExp)exp); case ExpType.CallExp: return visitCallExp(cast(CallExp)exp); + case ExpType.CastExp: + return visitCastExp(cast(CastExp)exp); case ExpType.Identifier: return visitIdentifier(cast(Identifier)exp); + case ExpType.PointerIdentifier: + return visitPointerIdentifier(cast(PointerIdentifier)exp); + case ExpType.ArrayIdentifier: + return visitArrayIdentifier(cast(ArrayIdentifier)exp); + case ExpType.Index: + return visitIndexExp(cast(IndexExp)exp); + case ExpType.MemberReference: + return visitMemberReference(cast(MemberReference)exp); default: throw new Exception("Unknown expression type"); } @@ -80,8 +109,9 @@ // Declarations: DeclT visitVarDecl(VarDecl d) { - visitExp(d.type); - visitExp(d.identifier); + visitExp(d.varType); + if(d.identifier) + visitExp(d.identifier); if (d.init) visitExp(d.init); @@ -91,9 +121,27 @@ return DeclT.init; } + DeclT visitImportDecl(ImportDecl d) + { + visitIdentifier(d.name); + visitIdentifier(d.aliasedName); + foreach (id; d.packages) + visitIdentifier(id); + foreach (ids; d.explicitSymbols) + { + visitIdentifier(ids[0]); + visitIdentifier(ids[1]); + } + + static if (is(DeclT == void)) + return; + else + return DeclT.init; + } + DeclT visitFuncDecl(FuncDecl f) { - visitExp(f.type); + visitExp(f.returnType); visitExp(f.identifier); foreach (arg; f.funcArgs) visitDecl(arg); @@ -110,7 +158,7 @@ { visitExp(s.identifier); - foreach (arg; s.vars) + foreach (arg; s.decls) visitDecl(arg); static if (is(DeclT == void)) @@ -122,7 +170,8 @@ // Statements: StmtT visitReturnStmt(ReturnStmt s) { - visitExp(s.exp); + if (s.exp) + visitExp(s.exp); static if (is(StmtT == void)) return; else @@ -138,13 +187,22 @@ return StmtT.init; } + StmtT visitCompoundStmt(CompoundStatement c) + { + foreach (stmt; c.statements) + visitStmt(stmt); + static if (is(StmtT == void)) + return; + else + return StmtT.init; + } + StmtT visitIfStmt(IfStmt s) { visitExp(s.cond); - foreach (stmt; s.then_body) - visitStmt(stmt); - foreach (stmt; s.else_body) - visitStmt(stmt); + visitStmt(s.then_body); + if (s.else_body !is null) + visitStmt(s.else_body); static if (is(StmtT == void)) return; else @@ -154,8 +212,25 @@ StmtT visitWhileStmt(WhileStmt s) { visitExp(s.cond); - foreach (stmt; s.stmts) + visitStmt(s.whileBody); + static if (is(StmtT == void)) + return; + else + return StmtT.init; + } + + StmtT visitSwitchStmt(SwitchStmt s) + { + visitExp(s.cond); + foreach(stmt; s.defaultBlock) visitStmt(stmt); + foreach (c; s.cases) + { + foreach(lit; c.values) + visitIntegerLit(lit); + foreach(stmt; c.stmts) + visitStmt(stmt); + } static if (is(StmtT == void)) return; else @@ -203,6 +278,16 @@ return ExpT.init; } + ExpT visitCastExp(CastExp exp) + { + visitExp(exp.castType); + visitExp(exp.exp); + static if (is(ExpT == void)) + return; + else + return ExpT.init; + } + ExpT visitNegateExp(NegateExp exp) { visitExp(exp.exp); @@ -212,6 +297,15 @@ return ExpT.init; } + ExpT visitDerefExp(DerefExp exp) + { + visitExp(exp.exp); + static if (is(ExpT == void)) + return; + else + return ExpT.init; + } + ExpT visitIntegerLit(IntegerLit exp) { static if (is(ExpT == void)) @@ -227,5 +321,47 @@ else return ExpT.init; } + + ExpT visitPointerIdentifier(PointerIdentifier exp) + { + visitExp(exp.pointerOf); + + static if (is(ExpT == void)) + return; + else + return ExpT.init; + } + + ExpT visitArrayIdentifier(ArrayIdentifier exp) + { + visitExp(exp.arrayOf); + + static if (is(ExpT == void)) + return; + else + return ExpT.init; + } + + ExpT visitIndexExp(IndexExp exp) + { + visitExp(exp.target); + visitExp(exp.index); + + static if (is(ExpT == void)) + return; + else + return ExpT.init; + } + + ExpT visitMemberReference(MemberReference mem) + { + visitExp(mem.target); + visitExp(mem.child); + + static if (is(ExpT == void)) + return; + else + return ExpT.init; + } } diff -r e331e4e816e4 -r 5e383b3755d6 test.td --- a/test.td Fri Apr 18 23:45:45 2008 +0200 +++ b/test.td Sun May 25 14:43:16 2008 +0200 @@ -1,49 +1,31 @@ - -int x = 4; -struct mystruct -{ - int x; - int y; -} + int main() { - mystruct my; - int x = 5; int y = 4; - return add(6, 7); -} -int add(int x, int y) -{ - return x + y; -} + + karina k; + -int fib(int n) -{ - if(n < 2) - return n; - - return fib(n-1) + fib(n-2); + while (y > 0) + y = y - 1; + + return x; } -int nice(long s, short t) -{ - int y; - byte x = 5 + t + y; - if (x != 0) - t = 5 + 1 * 5 * s + t; - return 2 * (t + -1) - x; -} +int x = 4; + -int fac2(int n) +struct karina { - int res = 1; - while (n != 1) - { - res = res * n; - n = n - 1; - } - return res; + anders a; + int age; + int width; + int height; + int lovers; } - +struct anders +{ + int hej; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/array_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/array_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,21 @@ +struct Array +{ + int[] data; + int length; +} + +void insert(Array a, int v) +{ + a.length = a.length + 1; + a.data[a.length - 1] = v; +} + +int main() +{ + Array a; + a.length = 0; + + insert(a, 5); + + return a.data[0]; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/basic_2.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/basic_2.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,8 @@ +//fail +int main() +{ + int x = y; + int y = 1; + return y; +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/basic_types_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/basic_types_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,8 @@ +int main() +{ + // test some basic type conversions + byte a = 2; + short b = 3 * a; + int c = b + a; + long d = c * a / b; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/basic_types_2.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/basic_types_2.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,21 @@ +int main() +{ + byte a = 2; + short b = 3 * a; + int c = b + a; + long d = c * a / b; + + d = itol(2); + d = itol(a); + d = itol(b); + d = itol(c); + + c = stoi(a); + c = stoi(b); + + return 3; +} + +long itol(int x) { return x; } +int stoi(short x) { return x; } + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/cast_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/cast_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,12 @@ + +int main() +{ + byte y = 44; + + int b = cast(int)y * 66; + + if(b == 2904) + return 0; + else + return 1; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/cast_2.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/cast_2.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,14 @@ + + + +int main() +{ + byte y = 44; + + int b = y * cast(byte)66; + + if(b == 88) + return 0; + else + return 1; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/func_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/func_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,20 @@ + + +int main() +{ + testStruct t; + t.x = 5; + + return t.x; +} + +testStruct m(testStruct t) +{ + t.x = t.x - 5; + return t; +} + +struct testStruct +{ + int x; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/if_4.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/if_4.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,13 @@ +int main() +{ + int x = 0; + int y = 1; + if (x) + return 1; + else if (y == 2) + return 1; + else + y = 0; + return y; +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/math_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/math_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,6 @@ +int main() +{ + int x = 2; + int y = 3; + return 2 * (x + y); +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/math_2.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/math_2.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,6 @@ +int main() +{ + int x = 1 + 2 * 3 + 4 + 5; + int y = x - x; + return y + x - 15; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/math_3.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/math_3.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,6 @@ +//fail +int main() +{ + int x = x; + return x; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/sarray_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/sarray_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,7 @@ +int main() +{ + int[10] a; + a[0] = 1; + a[1] = a[0]; +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/sarray_2.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/sarray_2.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,8 @@ +//fail +int main() +{ + int[10] a; + // static array assignment is illegal - we fail for other reasons though + int[10] b = a; +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/scope_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/scope_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,7 @@ +//fail +int main() +{ + int x = y; + int y = 1; + return x; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/scope_2.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/scope_2.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,11 @@ +//fail +int main() +{ + int x = 10; + while (x > 0) + { + int y = 1; + x = x -y; + } + return y; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/scope_3.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/scope_3.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,9 @@ +int y = 0; + +int main() +{ + return x + y; +} + +int x = 0; + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/struct_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/struct_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,9 @@ + +struct S +{ +} + +void main() +{ + S s; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/struct_2.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/struct_2.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,13 @@ + +struct S +{ + int a; + int b; +} + +void main() +{ + S s; + s.a = 2; + s.b = s.a; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/struct_3.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/struct_3.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,11 @@ + +struct S +{ + int a; +} + +void main() +{ + S s; + S s2 = s; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/struct_4.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/struct_4.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,18 @@ + +struct A +{ + int a; +} + +struct B +{ + int b; + A a; +} + +void main() +{ + B b; + b.a.a = 1; + b.b = 2; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/struct_5.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/struct_5.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,17 @@ +// Test forward references in struct members +struct B +{ + int b; + A a; +} + +struct A +{ + int a; +} + +void main() +{ + B b; + b.b = 2; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/switch_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/switch_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,8 @@ + +void main(int x) +{ + switch (x) + { + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/switch_2.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/switch_2.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,9 @@ + +void main(int x) +{ + switch (x) + { + case 1: + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/switch_3.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/switch_3.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,15 @@ + +void main(int x) +{ + switch (x) + { + case 1, 2: + x = 2; + return; + case 3, 4: + x = 1; + return; + default: + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/switch_4.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/switch_4.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,16 @@ + +int main(int x) +{ + switch (x) + { + case 1, 2: + x = 2; + return x; + case 3, 4: + x = 1; + return x; + default: + return 0; + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/switch_5.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/switch_5.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,12 @@ +//fail +int main(int x) +{ + switch (x) + { + default: + return 0; + default: + return 1; + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/code/switch_6.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/code/switch_6.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,12 @@ +//fail +int main(int x) +{ + switch (x) + { + case 1, 2: + return 0; + case 2, 3: + return 1; + } +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/lexer/Comments2.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/lexer/Comments2.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,4 @@ +//fail + +/+ + diff -r e331e4e816e4 -r 5e383b3755d6 tests/parser/basic_type_char_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/parser/basic_type_char_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,10 @@ + + +int main() +{ + char c; + wchar w; + dchar d; + + return 0; +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/parser/struct_method_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/parser/struct_method_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,17 @@ +module struct_method_1; + +struct A +{ + int x; + int foo(int i) + { + return i; + } +} + +int main() +{ + A a; + a.x = 6; + return a.foo(a.x); +} diff -r e331e4e816e4 -r 5e383b3755d6 tests/run.d --- a/tests/run.d Fri Apr 18 23:45:45 2008 +0200 +++ b/tests/run.d Sun May 25 14:43:16 2008 +0200 @@ -13,6 +13,14 @@ tango.sys.Process; +enum +{ + SuccessSuccess, + SuccessFailure, + FailureSuccess, + FailureFailure, +} + char[] prog = "./Dang"; void main(char[][] args) @@ -39,16 +47,16 @@ switch(result) { - case 0: + case SuccessSuccess: success_success++; break; - case 1: + case SuccessFailure: success_failure++; break; - case 2: + case FailureFailure: failure_failure++; break; - case 3: + case FailureSuccess: failure_success++; break; } @@ -58,15 +66,28 @@ Stdout().newline.newline() ("Result:").newline() - (" - Succes/Success: ")(success_success).newline() - (" - Succes/Failure: ")(success_failure).newline() + (" - Success/Success: ")(success_success).newline() + (" - Success/Failure: ")(success_failure).newline() (" - Failure/Failure: ")(failure_failure).newline() (" - Failure/Success: ")(failure_success).newline; } class Test { + enum TestValue + { + Success = 0, + Lexer = 2, + Parser = 3, + Gen = 4, + + Fail = 100 + } + FilePath target; + + TestValue[int] testValues; + public this(FilePath target) { this.target = target; @@ -78,10 +99,25 @@ auto file = new UnicodeFile!(char)(target.path~target.file, Encoding.UTF_8); - int mode; + TestValue mode; char[] data = file.read; - if(data.length > 6 && data[0..6] == "//test") + char[][] commands = split(splitLines(data)[0], " "); + if(commands[0] == "//fail") + { + mode = TestValue.Fail; + if(commands.length > 1) + { + try + { + int i = Integer.toInt(commands[1]); + if(i in testValues) + mode = testValues[i]; + } + catch{} + } + } +/* if(data.length > 6 && data[0..6] == "//fail") { char[] str = data.splitLines()[0][6..$]; @@ -95,12 +131,13 @@ mode = 0; } } - +*/ Stdout.format(" {,-25}", target.file); process.execute; auto result = process.wait; + /* if(result.status == 0) { auto llvm_process = new Process("llvm-as"); @@ -109,11 +146,12 @@ llvm_process.stdin.close(); result = llvm_process.wait; } + */ return resultOf(result.status, mode); } - private int resultOf(int status, int mode) + private int resultOf(int status, TestValue mode) { char[] good(char[] s) { @@ -133,28 +171,28 @@ if(status == 0) { - if(mode == 0) + if(mode == TestValue.Success) { - Stdout(good("SUCCES")).newline; - return 0; + Stdout(good("SUCCESS")).newline; + return SuccessSuccess; } - if(mode == 1) + if(mode == TestValue.Fail) { - Stdout(bad("SUCCES - Unexpected")).newline; - return 3; + Stdout(bad("SUCCESS - Unexpected")).newline; + return FailureSuccess; } } else { - if(mode == 1) + if(mode == TestValue.Fail) { Stdout(good("FAILURE")).newline; - return 2; + return FailureFailure; } - if(mode == 0) + if(mode == TestValue.Success) { Stdout(bad("FAILURE - Unexpected")).newline; - return 1; + return SuccessFailure; } } } diff -r e331e4e816e4 -r 5e383b3755d6 tests/sema/deref_1.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/sema/deref_1.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,7 @@ +//fail +int main() +{ + int a = 2; + int b = *a; +} + diff -r e331e4e816e4 -r 5e383b3755d6 tests/sema/deref_2.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/sema/deref_2.d Sun May 25 14:43:16 2008 +0200 @@ -0,0 +1,10 @@ +int main() +{ + int *a; + int *b; + a = b; + *a = 1; + + return *a == *b; +} + diff -r e331e4e816e4 -r 5e383b3755d6 tools/AstPrinter.d --- a/tools/AstPrinter.d Fri Apr 18 23:45:45 2008 +0200 +++ b/tools/AstPrinter.d Sun May 25 14:43:16 2008 +0200 @@ -2,28 +2,33 @@ import tango.io.Stdout; -import ast.Decl, +import ast.Module, + ast.Decl, ast.Stmt, ast.Exp; -import misc.DataSource; +import basic.SourceManager; class AstPrinter { const char[] tabType = " "; // 4 spaces - this(DataSource ds) + this(SourceManager sm) { - this.ds = ds; + this.sm = sm; } - void print(Decl[] decls) + void print(Module m) { - foreach(decl ; decls) + printBeginLine("module "); + printEndLine(m.moduleName); + printEndLine(); + foreach(decl ; m.decls) { printDecl(decl); } + printEndLine(); } void printDecl(Decl decl) @@ -33,9 +38,10 @@ case DeclType.FuncDecl: auto funcDecl = cast(FuncDecl)decl; printBeginLine(); - printIdentifier(funcDecl.type); + printIdentifier(funcDecl.returnType); + space; printIdentifier(funcDecl.identifier); - printFuncArgs(funcDecl.funcArgs); + printFuncArgs(funcDecl); printOpenBrace(); foreach(stmt ; funcDecl.statements) printStatement(stmt); @@ -45,16 +51,35 @@ case DeclType.VarDecl: auto varDecl = cast(VarDecl)decl; printBeginLine(); - printIdentifier(varDecl.type); - printIdentifier(varDecl.identifier); + printExp(varDecl.varType); + space; + printExp(varDecl.identifier); if(varDecl.init) { - print("= "); + print(" = "); printExp(varDecl.init); } printEndLine(";"); break; + + case DeclType.StructDecl: + auto structDecl = cast(StructDecl)decl; + printBeginLine("struct "); + printIdentifier(structDecl.identifier); + printEndLine; + printOpenBrace; + foreach( var ; structDecl.decls) + printDecl(var); + printCloseBrace; + break; + + case DeclType.ImportDecl: + auto i = cast(ImportDecl)decl; + printBeginLine("import "); + printEndLine(i.get); + break; } + printEndLine(); } void printStatement(Stmt stmt) @@ -63,8 +88,12 @@ { case StmtType.Return: auto ret = cast(ReturnStmt)stmt; - printBeginLine("return "); - printExp(ret.exp); + printBeginLine("return"); + if(ret.exp) + { + space; + printExp(ret.exp); + } printEndLine(";"); break; case StmtType.Decl: @@ -87,39 +116,83 @@ { case ExpType.Binary: auto binaryExp = cast(BinaryExp)exp; + print("("); printExp(binaryExp.left); - print([binaryExp.op] ~ " "); + print(" " ~ binaryExp.getOp[binaryExp.op] ~ " "); printExp(binaryExp.right); + print(")"); break; case ExpType.IntegerLit: auto integetLit = cast(IntegerLit)exp; - auto t = integetLit.token; - print(t.get ~ " "); + print(integetLit.get); break; case ExpType.Negate: auto negateExp = cast(NegateExp)exp; print("-"); printExp(negateExp.exp); break; + case ExpType.Deref: + auto derefExp = cast(DerefExp)exp; + print("*"); + printExp(derefExp.exp); + break; case ExpType.AssignExp: auto assignExp = cast(AssignExp)exp; - printIdentifier(assignExp.identifier); - print("= "); + printExp(assignExp.identifier); + print(" = "); printExp(assignExp.exp); break; + case ExpType.MemberReference: + auto mrExp = cast(MemberReference)exp; + printExp(mrExp.target); + print("."); + printIdentifier(mrExp.child); + break; + case ExpType.Identifier: + auto iden = cast(Identifier)exp; + printIdentifier(iden); + break; + case ExpType.PointerIdentifier: + auto iden = cast(PointerIdentifier)exp; + printExp(iden.pointerOf); + print("*"); + break; + case ExpType.CallExp: + auto callExp = cast(CallExp)exp; + printExp(callExp.exp); + print("("); + foreach(i, e; callExp.args) + { + printExp(e); + if(i+1 < callExp.args.length) + print(", "); + } + print(")"); + break; + case ExpType.CastExp: + auto castExp = cast(CastExp)exp; + print("cast"); + print("("); + printExp(castExp.castType); + print(")"); + printExp(castExp.exp); + break; } } - void printFuncArgs(VarDecl[] decls) + void printFuncArgs(FuncDecl decl) { print("("); - foreach(i, decl; decls) + foreach(i, d; decl.funcArgs) { - printIdentifier(decl.type); - printIdentifier(decl.identifier); - if(i+1 < decls.length) + printIdentifier(d.varType); + if(i == 0 && decl.sret) + print("*"); + space; + printIdentifier(d.identifier); + if(i+1 < decl.funcArgs.length) print(","); } @@ -128,8 +201,7 @@ void printIdentifier(Identifier identifier) { - auto t = identifier.token; - print(t.get ~ " "); + print(identifier.get); } void printOpenBrace() @@ -158,7 +230,12 @@ { Stdout(line); } + + void space() + { + print(" "); + } private: - DataSource ds; + SourceManager sm; char[] tabIndex; } diff -r e331e4e816e4 -r 5e383b3755d6 tools/DotPrinter.d --- a/tools/DotPrinter.d Fri Apr 18 23:45:45 2008 +0200 +++ b/tools/DotPrinter.d Sun May 25 14:43:16 2008 +0200 @@ -3,13 +3,11 @@ import tango.io.Stdout, Int = tango.text.convert.Integer; -import ast.Decl, +import ast.Module, + ast.Decl, ast.Stmt, ast.Exp; -import misc.DataSource, - lexer.Token; - class DotPrinter { this() @@ -19,10 +17,10 @@ private char[][void*] identifiers; private int current_id = 0; - void print(Decl[] decls) + void print(Module m) { Stdout("digraph {").newline; - foreach(decl ; decls) + foreach(decl ; m.decls) { printDecl(decl); } @@ -40,7 +38,7 @@ //printFuncArgs(funcDecl.funcArgs); Stdout(dotId(decl))(` [label="function`); Stdout(`\n name: `)(text(funcDecl.identifier)); - Stdout(`\n return type: `)(text(funcDecl.type)); + Stdout(`\n return type: `)(text(funcDecl.returnType)); Stdout(`", shape=box, fillcolor=lightblue, style=filled]`); Stdout.newline; //Stdout(`"`); @@ -55,7 +53,7 @@ //printFuncArgs(funcDecl.funcArgs); Stdout(dotId(decl))(` [label="var`); Stdout(`\n name: `)(text(varDecl.identifier)); - Stdout(`\n type: `)(text(varDecl.type)); + Stdout(`\n type: `)(text(varDecl.varType)); Stdout(`"]`).newline; if (varDecl.init !is null) @@ -116,7 +114,7 @@ case ExpType.IntegerLit: auto e = cast(IntegerLit)exp; - Stdout(id)(` [label="`)(text(e.token))(`"]`).newline; + Stdout(id)(` [label="`)(text(e.get))(`"]`).newline; break; case ExpType.Identifier: @@ -127,7 +125,7 @@ case ExpType.AssignExp: auto ass = cast(AssignExp)exp; Stdout(parent)(` [label="Assign"]`).newline; - Stdout(id)(` [label="`)(text(ass.identifier))(`"]`).newline; +// Stdout(id)(` [label="`)(text(ass.identifier))(`"]`).newline; printExpression(parent, ass.exp); break; @@ -155,13 +153,11 @@ char[] text(Identifier identifier) { - auto t = identifier.token; - return t.get; + return identifier.get; } - char[] text(Token t) + char[] text(char[] t) { - return t.get; + return t; } -private: - DataSource ds; } +