Mercurial > projects > aid
diff trunk/aid/nn/multilayer/backprop.d @ 3:314d68bafeff
Backprop and backprop_test added (no testing).
author | revcompgeek |
---|---|
date | Fri, 11 Apr 2008 18:12:55 -0600 |
parents | |
children | 73beed484455 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/aid/nn/multilayer/backprop.d Fri Apr 11 18:12:55 2008 -0600 @@ -0,0 +1,170 @@ +module aid.nn.multilevel.backprop; + +import aid.nn.outputFunctions; +import aid.misc; +import std.random; +import std.stream; + +class Backprop { + private uint numInputs; + private float[][][] units; // Includes the output units. units[layer][unit][inputWeight] + private OutputFunctionPtr[] functions; + public float learningRate; + + ///Constructor + public this(uint numInputs,uint[] numUnits,OutputFunctionPtr[] functions,float value=0.05,bool randomize=true){ + if(numUnits.length == 0) throw new InputException("numUnits must be greater than 0"); + if(numUnits.length != functions.length) throw new InputException("numUnits and functions must be the same length"); + this.numInputs = numInputs; + this.functions = functions; + initUnitLayer(0,numUnits[0],numInputs,value,randomize); + for(int i=1; i<numUnits.length; i++){ + initUnitLayer(i,numUnits[i],numUnits[i-1],value,randomize); + } + } + + // Helper function to initialize a certain layer. + private void initUnitLayer(uint layer,uint num,uint numPrev,float value,bool randomize){ + units[layer].length = num; + for(int i=0; i<num; i++){ + units[layer][i].length = numPrev+1; // include the bias weight + for(int j=0; j<numPrev+1; j++){ + if(randomize) units[layer][i][j] = rnd() * value * 2 - value; // between -value and value + else units[layer][i][j] = value; + } + } + } + + ////////////////////////////////////////////////////// Evaluation ////////////////////////////////////////////////////// + /// Evaluates the neural network. + public float[] evaluate(float[] inputs){ + return evaluateFull(inputs)[$]; // the last item (outputs) of the return value + } + + /// Evaluates the neural network and returns the output from all units. + public float[][] evaluateFull(float[] inputs){ + if(inputs.length != numInputs) throw new InputException("Wrong length of inputs."); + float[][] outputs; + outputs.length = units.length; + outputs[0] = evaluateLayer(0,inputs); + for(int i=0; i<units.length; i++){ + outputs[i] = this.evaluateLayer(i,outputs[i-1]); + } + return outputs; + } + + // Helper function to evaluate the outputs of a single layer. + private float[] evaluateLayer(uint layer,float[] layerInputs){ + float[] output; + output.length = layerInputs.length; + for(int i=0; i<layerInputs.length; i++){ + output[i] = evaluateUnit(layer,i,layerInputs); + } + return output; + } + + // Helper function to evaluate the output of a single unit. + private float evaluateUnit(uint layer, uint unit, float[] layerInputs){ + float total = units[layer][unit][0]; //bias + for(int i=1; i<layerInputs.length; i++){ + total += layerInputs[i-1] * units[layer][unit][i]; // wi * xi + } + if(functions[layer] != null) return functions[layer](total); // apply the function (if there is one) + else return total; // just return the result instead + } + + + ////////////////////////////////////////////////////// Training ////////////////////////////////////////////////////// + /// Trains the neural network. + /// TODO: + /// Pull error calculation into a separate function. + public void train(float[][] allInputs, float[][] allOutputs){ + if(allInputs.length != allOutputs.length) throw new InputException("allInputs and allOutputs must be the same size"); + float[][][] weightUpdate; + float[][] outputsError; + float[][] outputs; + float total; //temp variable + + // Initialize the weightUpdate and outputsError variables + weightUpdate.length = units.length; + outputsError.length = units.length; + for(int i=0; i<weightUpdate.length; i++){ + weightUpdate[i].length = units[i].length; + outputsError[i].length = units[i].length; + for(int j=0; j<weightUpdate[i].length; i++){ + weightUpdate[i][j].length = units[i][j].length; + } + } + + + // Loop through each of the training examples + for(int example=0; example < allInputs.length; example++){ + outputs = evaluateFull(allInputs[example]); + + // Computing error of output layer + for(int i=0; i<outputs[$].length; i++) + outputsError[$][i] = outputs[$][i] * (1 - outputs[$][i]) * (allOutputs[example][i] - outputs[$][i]); // o(1-o)(t-o) + + // Loop through each of the hidden layers (backwards - BACKpropagation!) + for(int i=units.length-2; i >= 0; i--){ // -2 to skip the output layer + // loop through the units in each hidden layer + for(int j=0; j<units[i].length; j++){ + total=0; + // total up w * e for the units the output of this unit goes into + for(int k=0; k<units[i+1].length; k++){ + total += units[i+1][k][j+1] * outputsError[i+1][k]; + } + // multiply total by o(1-o), store in outputsError + outputsError[i][j] = outputs[i][j] * (1 - outputs[i][j]) * total; + } + } + + // special case for the units that receive the input values + for(int j=0; j<units[0].length; j++){ // unit + weightUpdate[0][j][0] += outputsError[0][j]; //bias + for(int k=1; k<units[0][j].length; k++){ // input + weightUpdate[0][j][k] += outputsError[0][j] * allInputs[example][k-1]; + } + } + + // Update the weightUpdate array + for(int i=1; i<units.length; i++){ // layer + for(int j=0; j<units[i].length; j++){ // unit + weightUpdate[i][j][0] += outputsError[i][j]; //bias + for(int k=1; k<units[i][j].length; k++){ // input + weightUpdate[i][j][k] += outputsError[i][j] * outputs[i-1][k-1]; // previous layer, account for bias + } + } + } + } + + // Apply the weightUpdate array to the weights + for(int i=0; i<units.length; i++){ // layer + for(int j=0; j<units[i].length; j++){ // unit + for(int k=0; k<units[i][j].length; k++){ // input + units[i][j][k] += this.learningRate * weightUpdate[i][j][k]; + } + } + } + } + + /// Calculate the output error + float calculateError(float[][] allInputs, float[][] allOutputs){ + if(allInputs.length != allOutputs.length) throw new InputException("allInputs and allOutputs must be the same size"); + float[] outputs; + float total,temp; + for(int i=0; i<allInputs.length; i++){ + outputs = evaluate(allInputs[i]); + if(outputs.length != allOutputs[i].length) throw new InputException("Wrong output length"); + for(int j=0; j<outputs.length; j++){ + temp = allOutputs[i][j] - outputs[j]; + total += temp * temp; + } + } + return 0.5 * total; + } +} + + + +