Mercurial > projects > aid
changeset 5:810d58835f86
Added momentum and stochastic training to backprop.
author | revcompgeek |
---|---|
date | Tue, 15 Apr 2008 14:39:49 -0600 |
parents | 73beed484455 |
children | ff92c77006c7 |
files | trunk/aid/misc.d trunk/aid/nn/multilayer/backprop.d trunk/backprop_test.d |
diffstat | 3 files changed, 303 insertions(+), 186 deletions(-) [+] |
line wrap: on
line diff
--- a/trunk/aid/misc.d Sat Apr 12 21:55:37 2008 -0600 +++ b/trunk/aid/misc.d Tue Apr 15 14:39:49 2008 -0600 @@ -1,6 +1,7 @@ module aid.misc; import std.random; +import std.stdio; class InputException : Exception { this(char[] message){ @@ -10,4 +11,28 @@ double rnd(){ // The function that should be included in every math library! return (cast(double)rand())/uint.max; +} + +void printArray(double[] array){ + writef("["); + for(int i=0; i<array.length-1; i++){ + writef("%f, ",array[i]); + } + writefln("%f]",array[$-1]); +} + +void printArray(double[][] array){ + writef("["); + for(int i=0; i<array.length; i++){ + printArray(array[i]); + } + writefln("]"); +} + +void printArray(double[][][] array){ + writef("["); + for(int i=0; i<array.length; i++){ + printArray(array[i]); + } + writefln("]"); } \ No newline at end of file
--- a/trunk/aid/nn/multilayer/backprop.d Sat Apr 12 21:55:37 2008 -0600 +++ b/trunk/aid/nn/multilayer/backprop.d Tue Apr 15 14:39:49 2008 -0600 @@ -1,3 +1,4 @@ + module aid.nn.multilevel.backprop; import aid.nn.outputFunctions; @@ -7,88 +8,107 @@ import std.stdio; class Backprop { - private uint numInputs; - private double[][][] units; // Includes the output units. units[layer][unit][inputWeight] + private uint numInputs; + private double[][][] units; // Includes the output units. units[layer][unit][inputWeight] private OutputFunctionPtr[] functions; public double learningRate; - + public double momentum; + private double[][][] oldWeightUpdate; + ///Constructor - public this(uint numInputs,uint[] numUnits,OutputFunctionPtr[] functions,double learningRate=0.03,double value=0.1,bool randomize=true){ - if(numUnits.length == 0) throw new InputException("numUnits must be greater than 0"); - if(numUnits.length != functions.length) throw new InputException("numUnits and functions must be the same length"); + public this(uint numInputs, uint[] numUnits, OutputFunctionPtr[] functions, + double learningRate = 0.03, double momentum = 0.1, + double value = 0.1, bool randomize = true) { + if(numUnits.length == 0) + throw new InputException("numUnits must be greater than 0"); + if(numUnits.length != functions.length) + throw new InputException( + "numUnits and functions must be the same length"); this.numInputs = numInputs; this.functions = functions; this.learningRate = learningRate; + this.momentum = momentum; units.length = numUnits.length; - initUnitLayer(0,numUnits[0],numInputs,value,randomize); - for(int i=1; i<numUnits.length; i++){ - initUnitLayer(i,numUnits[i],numUnits[i-1],value,randomize); + oldWeightUpdate.length = numUnits.length; + initUnitLayer(0, numUnits[0], numInputs, value, randomize); + for(int i = 1; i < numUnits.length; i++) { + initUnitLayer(i, numUnits[i], numUnits[i - 1], value, randomize); } } - + // Helper function to initialize a certain layer. - private void initUnitLayer(uint layer,uint num,uint numPrev,double value,bool randomize){ + private void initUnitLayer(uint layer, uint num, uint numPrev, + double value, bool randomize) { units[layer].length = num; - for(int i=0; i<num; i++){ - units[layer][i].length = numPrev+1; // include the bias weight - for(int j=0; j<numPrev+1; j++){ - if(randomize) units[layer][i][j] = rnd() * value * 2 - value; // between -value and value - else units[layer][i][j] = value; + oldWeightUpdate[layer].length = num; + for(int i = 0; i < num; i++) { + units[layer][i].length = numPrev + 1; // include the bias weight + oldWeightUpdate[layer][i].length = numPrev + 1; + + for(int j = 0; j < numPrev + 1; j++) { + if(randomize) + units[layer][i][j] = rnd() * value * 2 - value; // between -value and value + else + units[layer][i][j] = value; + oldWeightUpdate[layer][i][j] = 0; } } } - + ////////////////////////////////////////////////////// Evaluation ////////////////////////////////////////////////////// /// Evaluates the neural network. - public double[] evaluate(double[] inputs){ - return evaluateFull(inputs)[$-1]; // the last item (outputs) of the return value + public double[] evaluate(double[] inputs) { + return evaluateFull(inputs)[$ - 1]; // the last item (outputs) of the return value } - + /// Evaluates the neural network and returns the output from all units. - public double[][] evaluateFull(double[] inputs){ - if(inputs.length != numInputs) throw new InputException("Wrong length of inputs."); + public double[][] evaluateFull(double[] inputs) { + if(inputs.length != numInputs) + throw new InputException("Wrong length of inputs."); double[][] outputs; outputs.length = units.length; - outputs[0] = evaluateLayer(0,inputs); - for(int i=1; i<units.length; i++){ - outputs[i] = this.evaluateLayer(i,outputs[i-1]); + outputs[0] = evaluateLayer(0, inputs); + for(int i = 1; i < units.length; i++) { + outputs[i] = this.evaluateLayer(i, outputs[i - 1]); } return outputs; } - + // Helper function to evaluate the outputs of a single layer. - private double[] evaluateLayer(uint layer,double[] layerInputs){ + private double[] evaluateLayer(uint layer, double[] layerInputs) { double[] output; output.length = units[layer].length; //printArray(layerInputs); - for(int i=0; i<units[layer].length; i++){ - output[i] = evaluateUnit(layer,i,layerInputs); + for(int i = 0; i < units[layer].length; i++) { + output[i] = evaluateUnit(layer, i, layerInputs); } return output; } - + // Helper function to evaluate the output of a single unit. - private double evaluateUnit(uint layer, uint unit, double[] layerInputs){ + private double evaluateUnit(uint layer, uint unit, double[] layerInputs) { //writef("(%d,%d)=",layer,unit); //printArray(layerInputs); double total = units[layer][unit][0]; //bias - for(int i=1; i<layerInputs.length+1; i++){ - total += layerInputs[i-1] * units[layer][unit][i]; // wi * xi - //writef("@"); + for(int i = 1; i < layerInputs.length + 1; i++) { + total += layerInputs[i - 1] * units[layer][unit][i]; // wi * xi + //writef("@"); } //writefln(" ! %f",total); - if(functions[layer] != null) return functions[layer](total); // apply the function (if there is one) + if(functions[layer] != null) + return functions[layer](total); // apply the function (if there is one) writefln("no function"); return total; // just return the result instead } - - + ////////////////////////////////////////////////////// Training ////////////////////////////////////////////////////// /// Trains the neural network. - /// TODO: - /// Pull error calculation into a separate function. - public void train(double[][] trainingInputs, double[][] trainingOutputs){ - if(trainingInputs.length != trainingOutputs.length) throw new InputException("trainingInputs and trainingOutputs must be the same size"); + /// TODO: Pull error calculation into a separate function. (maybe) + public void train(double[][] trainingInputs, double[][] trainingOutputs, + bool stochastic = false) { + if(trainingInputs.length != trainingOutputs.length) + throw new InputException( + "trainingInputs and trainingOutputs must be the same size"); double[][][] weightUpdate; double[][] outputsError; double[][] outputs; @@ -98,93 +118,116 @@ weightUpdate.length = units.length; outputsError.length = units.length; //writefln("#%d,%d",weightUpdate.length,outputsError.length); - for(int i=0; i<units.length; i++){ + for(int i = 0; i < units.length; i++) { weightUpdate[i].length = units[i].length; outputsError[i].length = units[i].length; //writefln("##(%d)%d,%d",i,weightUpdate[i].length,outputsError[i].length); - for(int j=0; j<weightUpdate[i].length; j++){ + for(int j = 0; j < weightUpdate[i].length; j++) { weightUpdate[i][j].length = units[i][j].length; - for(int k=0; k<weightUpdate[i][j].length; k++) weightUpdate[i][j][k] = 0.0f; - //writefln("###(%d)%d",j,weightUpdate[i][j].length); + for(int k = 0; k < weightUpdate[i][j].length; k++) + weightUpdate[i][j][k] = 0.0f; + //writefln("###(%d)%d",j,weightUpdate[i][j].length); } } // Loop through each of the training examples - for(int example=0; example < trainingInputs.length; example++){ + for(int example = 0; example < trainingInputs.length; example++) { outputs = evaluateFull(trainingInputs[example]); + // Computing error of output layer - for(int i=0; i<outputs[$-1].length; i++){ // units of last layer + for(int i = 0; i < outputs[$ - 1].length; i++) { // units of last layer //writefln("{%d,%d,%d,%d}",example,i,outputs.length,outputsError[$-1].length); - outputsError[$-1][i] = outputs[$-1][i] * (1 - outputs[$-1][i]) * (trainingOutputs[example][i] - outputs[$-1][i]); - } // o(1-o)(t-o) + outputsError[$ - 1][i] = outputs[$ - 1][i] * (1 - outputs[$ - 1][i]) * (trainingOutputs[example][i] - outputs[$ - 1][i]); + } + // o(1-o)(t-o) //printArray(outputsError[$-1]); //printArray(units[length-1]); //* // Loop through each of the hidden layers (backwards - BACKpropagation!) - for(int layer=units.length-2; layer >= 0; layer--){ // -2 to skip the output layer + for(int layer = units.length - 2; layer >= 0; layer--) { // -2 to skip the output layer //writef("|"); // loop through the units in each hidden layer - for(int unit=0; unit<units[layer].length; unit++){ + for(int unit = 0; unit < units[layer].length; unit++) { //writef("*"); - total=0; + total = 0; // total up w * e for the units the output of this unit goes into - for(int k=0; k<units[layer+1].length; k++){ + for(int k = 0; k < units[layer + 1].length; k++) { //writef("{weight=%f,error=%f}", units[layer+1][k][unit+1/* +1 for bias*/], outputsError[layer+1][k]); - total += units[layer+1][k][unit+1/* +1 for bias*/] * outputsError[layer+1][k]; + total += units[layer + 1][k][unit + 1/* +1 for bias*/] * outputsError[layer + 1][k]; } //writefln("=%f(total)",total); // multiply total by o(1-o), store in outputsError outputsError[layer][unit] = outputs[layer][unit] * (1 - outputs[layer][unit]) * total; } - } //writefln(); + } + //writefln(); //writef("outputError="); printArray(outputsError); // special case for the units that receive the input values - for(int unit=0; unit<units[0].length; unit++){ // unit + for(int unit = 0; unit < units[0].length; unit++) { // unit //writefln(":%d,%d,%d,%d",j,weightUpdate.length,weightUpdate[0].length,weightUpdate[0][j].length); weightUpdate[0][unit][0] += outputsError[0][unit]; //bias - for(int input=1; input<units[0][unit].length; input++){ // input - weightUpdate[0][unit][input] += outputsError[0][unit] * trainingInputs[example][input-1]; // account for bias + for(int input = 1; input < units[0][unit].length; input++) { // input + weightUpdate[0][unit][input] += outputsError[0][unit] * trainingInputs[example][input - 1]; // account for bias } } - // Update the weightUpdate array - for(int i=1; i<units.length; i++){ // layer - for(int j=0; j<units[i].length; j++){ // unit + + // Update the weightUpdate array OR update the weights + for(int i = 1; i < units.length; i++) { // layer + for(int j = 0; j < units[i].length; j++) { // unit weightUpdate[i][j][0] += outputsError[i][j]; //bias - for(int k=1; k<units[i][j].length; k++){ // input + for(int k = 1; k < units[i][j].length; k++) { // input //writefln("[%d,%d,%d]=%f; %f; %f",i,j,k,weightUpdate[i][j][k],outputsError[i][j],outputs[i-1][k-1]); - weightUpdate[i][j][k] += outputsError[i][j] * outputs[i-1][k-1]; // previous layer, account for bias + weightUpdate[i][j][k] += outputsError[i][j] * outputs[i - 1][k - 1]; // previous layer, account for bias + } + } + } + if(stochastic) { + // Apply the weightUpdate array to the weights + for(int i = 0; i < units.length; i++) { // layer + for(int j = 0; j < units[i].length; j++) { // unit + for(int k = 0; k < units[i][j].length; k++) { // input + units[i][j][k] += this.learningRate * weightUpdate[i][j][k] + (this.momentum * this.oldWeightUpdate[i][j][k]); + this.oldWeightUpdate[i][j][k] = weightUpdate[i][j][k]; + weightUpdate[i][j][k] = 0; + } } } } } - // Apply the weightUpdate array to the weights - for(int i=0; i<units.length; i++){ // layer - for(int j=0; j<units[i].length; j++){ // unit - for(int k=0; k<units[i][j].length; k++){ // input - //writefln("[%d,%d,%d]=%f; %f",i,j,k,units[i][j][k],weightUpdate[i][j][k]); - units[i][j][k] += this.learningRate * weightUpdate[i][j][k]; + if(!stochastic) { + // Apply the weightUpdate array to the weights + for(int i = 0; i < units.length; i++) { // layer + for(int j = 0; j < units[i].length; j++) { // unit + for(int k = 0; k < units[i][j].length; k++) { // input + //writefln("[%d,%d,%d]=%f; %f",i,j,k,units[i][j][k],weightUpdate[i][j][k]); + units[i][j][k] += this.learningRate * weightUpdate[i][j][k] + (this.momentum * this.oldWeightUpdate[i][j][k]); + } } } + this.oldWeightUpdate = weightUpdate; } } - + /// Calculate the output error - double calculateError(double[][] trainingInputs, double[][] trainingOutputs){ - if(trainingInputs.length != trainingOutputs.length) throw new InputException("trainingInputs and trainingOutputs must be the same size"); + double calculateError(double[][] trainingInputs, double[][] trainingOutputs) { + if(trainingInputs.length != trainingOutputs.length) + throw new InputException( + "trainingInputs and trainingOutputs must be the same size"); double[] outputs; - double total=0,temp; - for(int i=0; i<trainingInputs.length; i++){ + double total = 0, temp; + for(int i = 0; i < trainingInputs.length; i++) { outputs = evaluate(trainingInputs[i]); - if(outputs.length != trainingOutputs[i].length) throw new InputException("Wrong output length"); - for(int j=0; j<outputs.length; j++){ + if(outputs.length != trainingOutputs[i].length) + throw new InputException("Wrong output length"); + for(int j = 0; j < outputs.length; j++) { temp = trainingOutputs[i][j] - outputs[j]; //writefln("&%f,%f",temp*temp,total); total += temp * temp; @@ -192,34 +235,8 @@ } return 0.5 * total; } - - double[][][] getWeights(){ + + double[][][] getWeights() { return units.dup; } } - -void printArray(double[] array){ - writef("["); - for(int i=0; i<array.length-1; i++){ - writef("%f, ",array[i]); - } - writefln("%f]",array[$-1]); -} - -void printArray(double[][] array){ - writef("["); - for(int i=0; i<array.length; i++){ - printArray(array[i]); - } - writefln("]"); -} - -void printArray(double[][][] array){ - writef("["); - for(int i=0; i<array.length; i++){ - printArray(array[i]); - } - writefln("]"); -} - -
--- a/trunk/backprop_test.d Sat Apr 12 21:55:37 2008 -0600 +++ b/trunk/backprop_test.d Tue Apr 15 14:39:49 2008 -0600 @@ -1,102 +1,177 @@ + module backprop_test; + import aid.nn.multilayer.backprop; import aid.nn.outputFunctions; +import aid.misc; import std.stdio; import std.random; +import std.conv; -/+double[][] trainingInputs = [ - [0,0,0], - [0,0,1], - [0,1,0], - [0,1,1], - [1,0,0], - [1,0,1], - [1,1,0], - [1,1,1]]; +double[][] trainingInputs, trainingOutputs; +uint numInputs; +uint[] outputsArray; -double[][] trainingOutputs = [ - [0.1], - [0.9], - [0.9], - [0.1], - [0.9], - [0.1], - [0.1], - [0.9]];+/ +void initTrainingExample(int example) { + if(example == 0) { + numInputs = 3; + outputsArray = [2,1]; + trainingInputs = [[0, 0, 0], + [0, 0, 1], + [0, 1, 0], + [0, 1, 1], + [1, 0, 0], + [1, 0, 1], + [1, 1, 0], + [1, 1, 1]]; + + trainingOutputs = [[0.1], + [0.9], + [0.9], + [0.1], + [0.9], + [0.1], + [0.1], + [0.9]]; + } else if(example == 1) { + numInputs = 2; + outputsArray = [2,1]; + trainingInputs = [[0, 0], + [1, 0], + [0, 1], + [1, 1]]; + + trainingOutputs = [[0.9], + [0.1], + [0.1], + [0.9]]; + } else if(example == 2) { + numInputs = 8; + outputsArray = [3,8]; + trainingInputs = [ + [0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], + [0.1, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], + [0.1, 0.1, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1], + [0.1, 0.1, 0.1, 0.9, 0.1, 0.1, 0.1, 0.1], + [0.1, 0.1, 0.1, 0.1, 0.9, 0.1, 0.1, 0.1], + [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.1, 0.1], + [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.1], + [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.9]]; + + trainingOutputs = trainingInputs; + } +} -/+double[][] trainingInputs = [ - [0,0], - [1,0], - [0,1], - [1,1]]; - -double[][] trainingOutputs = [ - [0.9], - [0.1], - [0.1], - [0.9]];+/ - -double[][] trainingInputs = [ - [0.9,0.1,0.1,0.1,0.1,0.1,0.1,0.1], - [0.1,0.9,0.1,0.1,0.1,0.1,0.1,0.1], - [0.1,0.1,0.9,0.1,0.1,0.1,0.1,0.1], - [0.1,0.1,0.1,0.9,0.1,0.1,0.1,0.1], - [0.1,0.1,0.1,0.1,0.9,0.1,0.1,0.1], - [0.1,0.1,0.1,0.1,0.1,0.9,0.1,0.1], - [0.1,0.1,0.1,0.1,0.1,0.1,0.9,0.1], - [0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.9]]; - -void main(){ - //rand_seed(0,0); - Backprop nn = new Backprop(8,[3,8],[&sigmoid,&sigmoid],.1); +void main(char[][] args) { + double learningRate = 0.2, momentum = 0.3, randomSize = 0.1, errorMin = 0.05; + int trainingExample = 0, maxIters = 10000; // 0 to 2 + bool quiet = false; // don't print output each time + int printEvery = 500; // output every ~ times - double error = nn.calculateError(trainingInputs,trainingInputs); + //try { + for(int i = 1; i < args.length; i++) { + switch(args[i]) { + case "-s": + case "--seed": + rand_seed(123, 0); + break; + case "-l": + case "--learning-rate": + //if(args.length = i + 1) + //throw new Error("Wrong number of paramaters"); + learningRate = toDouble(args[++i]); + break; + case "-m": + case "--momentum": + momentum = toDouble(args[++i]); + break; + case "-r": + case "--random-size": + randomSize = toDouble(args[++i]); + break; + case "-e": + case "--error-min": + errorMin = toDouble(args[++i]); + break; + case "-n": + case "--example-number": + trainingExample = toInt(args[++i]); + if(trainingExample > 2 || trainingExample < 0) + throw new Error("example number must be between 0 and 2"); + case "-x": + case "--example": + switch(args[++i]) { + case "parity": + trainingExample = 0; + break; + case "xor": + trainingExample = 1; + break; + case "identity": + trainingExample = 2; + break; + default: + throw new Error("Wrong example name. Must be parity, xor or identity"); + } + break; + case "-q": + case "--quiet": + quiet = true; + break; + case "-p": + case "--print-every": + printEvery = toInt(args[++i]); + break; + case "-i": + case "--min-iters": + case "--min-iterations": + maxIters = toInt(args[++i]); + break; + default: + throw new Error("Unknown switch: " ~ args[i]); + } + } + //} catch(ArrayBoundsError) { + // throw new Error("Wrong number of paramaters"); + //} + + initTrainingExample(trainingExample); + + Backprop nn = new Backprop(numInputs, outputsArray, [&sigmoid, &sigmoid], learningRate, momentum, randomSize, true); + + double error = nn.calculateError(trainingInputs, trainingOutputs); double[] output; int iter = 0; - writef("weights="); printArray(nn.getWeights()); - writef("outputs="); printArray(nn.evaluateFull(trainingInputs[$-1])); - while(error >= 0.01 && iter < 50000){ - if(iter % 500 == 0){ - writefln("Iter: %d",iter); - for(int i=0; i<trainingInputs.length; i++){ - output = nn.evaluate(trainingInputs[i]); - writef(" %d:", i); printArray(output); + //writef("weights="); + //printArray(nn.getWeights()); + //writef("outputs="); + //printArray(nn.evaluateFull(trainingInputs[$ - 1])); + while (error >= errorMin && iter < maxIters) { + if(iter % printEvery == 0) { + writefln("Iter: %d", iter); + if(!quiet) { + for(int i = 0; i < trainingInputs.length; i++) { + output = nn.evaluate(trainingInputs[i]); + writef(" %d:", i); + printArray(output); + } } writefln(" Error: %f", error); } - nn.train(trainingInputs,trainingInputs); - error = nn.calculateError(trainingInputs,trainingInputs); + nn.train(trainingInputs, trainingOutputs, true); + error = nn.calculateError(trainingInputs, trainingOutputs); iter++; } - writefln("Total Iters: %d",iter); - for(int i=0; i<trainingInputs.length; i++){ - writef(" %d:", i); printArray(nn.evaluateFull(trainingInputs[i])[0]); + writefln("Total Iters: %d", iter); + for(int i = 0; i < trainingInputs.length; i++) { + writef(" %d:", i); + if(trainingExample == 2) + printArray(nn.evaluateFull(trainingInputs[i])[0]); + else + printArray(nn.evaluate(trainingInputs[i])); } writefln(" Error: %f", error); - writef("weights="); printArray(nn.getWeights()); -} - -void printArray(double[] array){ - writef("["); - for(int i=0; i<array.length-1; i++){ - writef("%f, ",array[i]); - } - writefln("%f]",array[$-1]); + writef("weights="); + printArray(nn.getWeights()); } - -void printArray(double[][] array){ - writef("["); - for(int i=0; i<array.length; i++){ - printArray(array[i]); - } - writefln("]"); -} - -void printArray(double[][][] array){ - writef("["); - for(int i=0; i<array.length; i++){ - printArray(array[i]); - } - writefln("]"); -} \ No newline at end of file