Mercurial > projects > aid
view trunk/aid/nn/multilayer/backprop.d @ 5:810d58835f86
Added momentum and stochastic training to backprop.
author | revcompgeek |
---|---|
date | Tue, 15 Apr 2008 14:39:49 -0600 |
parents | 73beed484455 |
children | ff92c77006c7 |
line wrap: on
line source
module aid.nn.multilevel.backprop; import aid.nn.outputFunctions; import aid.misc; import std.random; import std.stream; import std.stdio; class Backprop { private uint numInputs; private double[][][] units; // Includes the output units. units[layer][unit][inputWeight] private OutputFunctionPtr[] functions; public double learningRate; public double momentum; private double[][][] oldWeightUpdate; ///Constructor public this(uint numInputs, uint[] numUnits, OutputFunctionPtr[] functions, double learningRate = 0.03, double momentum = 0.1, double value = 0.1, bool randomize = true) { if(numUnits.length == 0) throw new InputException("numUnits must be greater than 0"); if(numUnits.length != functions.length) throw new InputException( "numUnits and functions must be the same length"); this.numInputs = numInputs; this.functions = functions; this.learningRate = learningRate; this.momentum = momentum; units.length = numUnits.length; oldWeightUpdate.length = numUnits.length; initUnitLayer(0, numUnits[0], numInputs, value, randomize); for(int i = 1; i < numUnits.length; i++) { initUnitLayer(i, numUnits[i], numUnits[i - 1], value, randomize); } } // Helper function to initialize a certain layer. private void initUnitLayer(uint layer, uint num, uint numPrev, double value, bool randomize) { units[layer].length = num; oldWeightUpdate[layer].length = num; for(int i = 0; i < num; i++) { units[layer][i].length = numPrev + 1; // include the bias weight oldWeightUpdate[layer][i].length = numPrev + 1; for(int j = 0; j < numPrev + 1; j++) { if(randomize) units[layer][i][j] = rnd() * value * 2 - value; // between -value and value else units[layer][i][j] = value; oldWeightUpdate[layer][i][j] = 0; } } } ////////////////////////////////////////////////////// Evaluation ////////////////////////////////////////////////////// /// Evaluates the neural network. public double[] evaluate(double[] inputs) { return evaluateFull(inputs)[$ - 1]; // the last item (outputs) of the return value } /// Evaluates the neural network and returns the output from all units. public double[][] evaluateFull(double[] inputs) { if(inputs.length != numInputs) throw new InputException("Wrong length of inputs."); double[][] outputs; outputs.length = units.length; outputs[0] = evaluateLayer(0, inputs); for(int i = 1; i < units.length; i++) { outputs[i] = this.evaluateLayer(i, outputs[i - 1]); } return outputs; } // Helper function to evaluate the outputs of a single layer. private double[] evaluateLayer(uint layer, double[] layerInputs) { double[] output; output.length = units[layer].length; //printArray(layerInputs); for(int i = 0; i < units[layer].length; i++) { output[i] = evaluateUnit(layer, i, layerInputs); } return output; } // Helper function to evaluate the output of a single unit. private double evaluateUnit(uint layer, uint unit, double[] layerInputs) { //writef("(%d,%d)=",layer,unit); //printArray(layerInputs); double total = units[layer][unit][0]; //bias for(int i = 1; i < layerInputs.length + 1; i++) { total += layerInputs[i - 1] * units[layer][unit][i]; // wi * xi //writef("@"); } //writefln(" ! %f",total); if(functions[layer] != null) return functions[layer](total); // apply the function (if there is one) writefln("no function"); return total; // just return the result instead } ////////////////////////////////////////////////////// Training ////////////////////////////////////////////////////// /// Trains the neural network. /// TODO: Pull error calculation into a separate function. (maybe) public void train(double[][] trainingInputs, double[][] trainingOutputs, bool stochastic = false) { if(trainingInputs.length != trainingOutputs.length) throw new InputException( "trainingInputs and trainingOutputs must be the same size"); double[][][] weightUpdate; double[][] outputsError; double[][] outputs; double total; //temp variable // Initialize the weightUpdate and outputsError variables weightUpdate.length = units.length; outputsError.length = units.length; //writefln("#%d,%d",weightUpdate.length,outputsError.length); for(int i = 0; i < units.length; i++) { weightUpdate[i].length = units[i].length; outputsError[i].length = units[i].length; //writefln("##(%d)%d,%d",i,weightUpdate[i].length,outputsError[i].length); for(int j = 0; j < weightUpdate[i].length; j++) { weightUpdate[i][j].length = units[i][j].length; for(int k = 0; k < weightUpdate[i][j].length; k++) weightUpdate[i][j][k] = 0.0f; //writefln("###(%d)%d",j,weightUpdate[i][j].length); } } // Loop through each of the training examples for(int example = 0; example < trainingInputs.length; example++) { outputs = evaluateFull(trainingInputs[example]); // Computing error of output layer for(int i = 0; i < outputs[$ - 1].length; i++) { // units of last layer //writefln("{%d,%d,%d,%d}",example,i,outputs.length,outputsError[$-1].length); outputsError[$ - 1][i] = outputs[$ - 1][i] * (1 - outputs[$ - 1][i]) * (trainingOutputs[example][i] - outputs[$ - 1][i]); } // o(1-o)(t-o) //printArray(outputsError[$-1]); //printArray(units[length-1]); //* // Loop through each of the hidden layers (backwards - BACKpropagation!) for(int layer = units.length - 2; layer >= 0; layer--) { // -2 to skip the output layer //writef("|"); // loop through the units in each hidden layer for(int unit = 0; unit < units[layer].length; unit++) { //writef("*"); total = 0; // total up w * e for the units the output of this unit goes into for(int k = 0; k < units[layer + 1].length; k++) { //writef("{weight=%f,error=%f}", units[layer+1][k][unit+1/* +1 for bias*/], outputsError[layer+1][k]); total += units[layer + 1][k][unit + 1/* +1 for bias*/] * outputsError[layer + 1][k]; } //writefln("=%f(total)",total); // multiply total by o(1-o), store in outputsError outputsError[layer][unit] = outputs[layer][unit] * (1 - outputs[layer][unit]) * total; } } //writefln(); //writef("outputError="); printArray(outputsError); // special case for the units that receive the input values for(int unit = 0; unit < units[0].length; unit++) { // unit //writefln(":%d,%d,%d,%d",j,weightUpdate.length,weightUpdate[0].length,weightUpdate[0][j].length); weightUpdate[0][unit][0] += outputsError[0][unit]; //bias for(int input = 1; input < units[0][unit].length; input++) { // input weightUpdate[0][unit][input] += outputsError[0][unit] * trainingInputs[example][input - 1]; // account for bias } } // Update the weightUpdate array OR update the weights for(int i = 1; i < units.length; i++) { // layer for(int j = 0; j < units[i].length; j++) { // unit weightUpdate[i][j][0] += outputsError[i][j]; //bias for(int k = 1; k < units[i][j].length; k++) { // input //writefln("[%d,%d,%d]=%f; %f; %f",i,j,k,weightUpdate[i][j][k],outputsError[i][j],outputs[i-1][k-1]); weightUpdate[i][j][k] += outputsError[i][j] * outputs[i - 1][k - 1]; // previous layer, account for bias } } } if(stochastic) { // Apply the weightUpdate array to the weights for(int i = 0; i < units.length; i++) { // layer for(int j = 0; j < units[i].length; j++) { // unit for(int k = 0; k < units[i][j].length; k++) { // input units[i][j][k] += this.learningRate * weightUpdate[i][j][k] + (this.momentum * this.oldWeightUpdate[i][j][k]); this.oldWeightUpdate[i][j][k] = weightUpdate[i][j][k]; weightUpdate[i][j][k] = 0; } } } } } if(!stochastic) { // Apply the weightUpdate array to the weights for(int i = 0; i < units.length; i++) { // layer for(int j = 0; j < units[i].length; j++) { // unit for(int k = 0; k < units[i][j].length; k++) { // input //writefln("[%d,%d,%d]=%f; %f",i,j,k,units[i][j][k],weightUpdate[i][j][k]); units[i][j][k] += this.learningRate * weightUpdate[i][j][k] + (this.momentum * this.oldWeightUpdate[i][j][k]); } } } this.oldWeightUpdate = weightUpdate; } } /// Calculate the output error double calculateError(double[][] trainingInputs, double[][] trainingOutputs) { if(trainingInputs.length != trainingOutputs.length) throw new InputException( "trainingInputs and trainingOutputs must be the same size"); double[] outputs; double total = 0, temp; for(int i = 0; i < trainingInputs.length; i++) { outputs = evaluate(trainingInputs[i]); if(outputs.length != trainingOutputs[i].length) throw new InputException("Wrong output length"); for(int j = 0; j < outputs.length; j++) { temp = trainingOutputs[i][j] - outputs[j]; //writefln("&%f,%f",temp*temp,total); total += temp * temp; } } return 0.5 * total; } double[][][] getWeights() { return units.dup; } }