Mercurial > projects > aid
view trunk/aid/nn/multilayer/backprop.d @ 4:73beed484455
Backprop working correctly.
author | revcompgeek |
---|---|
date | Sat, 12 Apr 2008 21:55:37 -0600 |
parents | 314d68bafeff |
children | 810d58835f86 |
line wrap: on
line source
module aid.nn.multilevel.backprop; import aid.nn.outputFunctions; import aid.misc; import std.random; import std.stream; import std.stdio; class Backprop { private uint numInputs; private double[][][] units; // Includes the output units. units[layer][unit][inputWeight] private OutputFunctionPtr[] functions; public double learningRate; ///Constructor public this(uint numInputs,uint[] numUnits,OutputFunctionPtr[] functions,double learningRate=0.03,double value=0.1,bool randomize=true){ if(numUnits.length == 0) throw new InputException("numUnits must be greater than 0"); if(numUnits.length != functions.length) throw new InputException("numUnits and functions must be the same length"); this.numInputs = numInputs; this.functions = functions; this.learningRate = learningRate; units.length = numUnits.length; initUnitLayer(0,numUnits[0],numInputs,value,randomize); for(int i=1; i<numUnits.length; i++){ initUnitLayer(i,numUnits[i],numUnits[i-1],value,randomize); } } // Helper function to initialize a certain layer. private void initUnitLayer(uint layer,uint num,uint numPrev,double value,bool randomize){ units[layer].length = num; for(int i=0; i<num; i++){ units[layer][i].length = numPrev+1; // include the bias weight for(int j=0; j<numPrev+1; j++){ if(randomize) units[layer][i][j] = rnd() * value * 2 - value; // between -value and value else units[layer][i][j] = value; } } } ////////////////////////////////////////////////////// Evaluation ////////////////////////////////////////////////////// /// Evaluates the neural network. public double[] evaluate(double[] inputs){ return evaluateFull(inputs)[$-1]; // the last item (outputs) of the return value } /// Evaluates the neural network and returns the output from all units. public double[][] evaluateFull(double[] inputs){ if(inputs.length != numInputs) throw new InputException("Wrong length of inputs."); double[][] outputs; outputs.length = units.length; outputs[0] = evaluateLayer(0,inputs); for(int i=1; i<units.length; i++){ outputs[i] = this.evaluateLayer(i,outputs[i-1]); } return outputs; } // Helper function to evaluate the outputs of a single layer. private double[] evaluateLayer(uint layer,double[] layerInputs){ double[] output; output.length = units[layer].length; //printArray(layerInputs); for(int i=0; i<units[layer].length; i++){ output[i] = evaluateUnit(layer,i,layerInputs); } return output; } // Helper function to evaluate the output of a single unit. private double evaluateUnit(uint layer, uint unit, double[] layerInputs){ //writef("(%d,%d)=",layer,unit); //printArray(layerInputs); double total = units[layer][unit][0]; //bias for(int i=1; i<layerInputs.length+1; i++){ total += layerInputs[i-1] * units[layer][unit][i]; // wi * xi //writef("@"); } //writefln(" ! %f",total); if(functions[layer] != null) return functions[layer](total); // apply the function (if there is one) writefln("no function"); return total; // just return the result instead } ////////////////////////////////////////////////////// Training ////////////////////////////////////////////////////// /// Trains the neural network. /// TODO: /// Pull error calculation into a separate function. public void train(double[][] trainingInputs, double[][] trainingOutputs){ if(trainingInputs.length != trainingOutputs.length) throw new InputException("trainingInputs and trainingOutputs must be the same size"); double[][][] weightUpdate; double[][] outputsError; double[][] outputs; double total; //temp variable // Initialize the weightUpdate and outputsError variables weightUpdate.length = units.length; outputsError.length = units.length; //writefln("#%d,%d",weightUpdate.length,outputsError.length); for(int i=0; i<units.length; i++){ weightUpdate[i].length = units[i].length; outputsError[i].length = units[i].length; //writefln("##(%d)%d,%d",i,weightUpdate[i].length,outputsError[i].length); for(int j=0; j<weightUpdate[i].length; j++){ weightUpdate[i][j].length = units[i][j].length; for(int k=0; k<weightUpdate[i][j].length; k++) weightUpdate[i][j][k] = 0.0f; //writefln("###(%d)%d",j,weightUpdate[i][j].length); } } // Loop through each of the training examples for(int example=0; example < trainingInputs.length; example++){ outputs = evaluateFull(trainingInputs[example]); // Computing error of output layer for(int i=0; i<outputs[$-1].length; i++){ // units of last layer //writefln("{%d,%d,%d,%d}",example,i,outputs.length,outputsError[$-1].length); outputsError[$-1][i] = outputs[$-1][i] * (1 - outputs[$-1][i]) * (trainingOutputs[example][i] - outputs[$-1][i]); } // o(1-o)(t-o) //printArray(outputsError[$-1]); //printArray(units[length-1]); //* // Loop through each of the hidden layers (backwards - BACKpropagation!) for(int layer=units.length-2; layer >= 0; layer--){ // -2 to skip the output layer //writef("|"); // loop through the units in each hidden layer for(int unit=0; unit<units[layer].length; unit++){ //writef("*"); total=0; // total up w * e for the units the output of this unit goes into for(int k=0; k<units[layer+1].length; k++){ //writef("{weight=%f,error=%f}", units[layer+1][k][unit+1/* +1 for bias*/], outputsError[layer+1][k]); total += units[layer+1][k][unit+1/* +1 for bias*/] * outputsError[layer+1][k]; } //writefln("=%f(total)",total); // multiply total by o(1-o), store in outputsError outputsError[layer][unit] = outputs[layer][unit] * (1 - outputs[layer][unit]) * total; } } //writefln(); //writef("outputError="); printArray(outputsError); // special case for the units that receive the input values for(int unit=0; unit<units[0].length; unit++){ // unit //writefln(":%d,%d,%d,%d",j,weightUpdate.length,weightUpdate[0].length,weightUpdate[0][j].length); weightUpdate[0][unit][0] += outputsError[0][unit]; //bias for(int input=1; input<units[0][unit].length; input++){ // input weightUpdate[0][unit][input] += outputsError[0][unit] * trainingInputs[example][input-1]; // account for bias } } // Update the weightUpdate array for(int i=1; i<units.length; i++){ // layer for(int j=0; j<units[i].length; j++){ // unit weightUpdate[i][j][0] += outputsError[i][j]; //bias for(int k=1; k<units[i][j].length; k++){ // input //writefln("[%d,%d,%d]=%f; %f; %f",i,j,k,weightUpdate[i][j][k],outputsError[i][j],outputs[i-1][k-1]); weightUpdate[i][j][k] += outputsError[i][j] * outputs[i-1][k-1]; // previous layer, account for bias } } } } // Apply the weightUpdate array to the weights for(int i=0; i<units.length; i++){ // layer for(int j=0; j<units[i].length; j++){ // unit for(int k=0; k<units[i][j].length; k++){ // input //writefln("[%d,%d,%d]=%f; %f",i,j,k,units[i][j][k],weightUpdate[i][j][k]); units[i][j][k] += this.learningRate * weightUpdate[i][j][k]; } } } } /// Calculate the output error double calculateError(double[][] trainingInputs, double[][] trainingOutputs){ if(trainingInputs.length != trainingOutputs.length) throw new InputException("trainingInputs and trainingOutputs must be the same size"); double[] outputs; double total=0,temp; for(int i=0; i<trainingInputs.length; i++){ outputs = evaluate(trainingInputs[i]); if(outputs.length != trainingOutputs[i].length) throw new InputException("Wrong output length"); for(int j=0; j<outputs.length; j++){ temp = trainingOutputs[i][j] - outputs[j]; //writefln("&%f,%f",temp*temp,total); total += temp * temp; } } return 0.5 * total; } double[][][] getWeights(){ return units.dup; } } void printArray(double[] array){ writef("["); for(int i=0; i<array.length-1; i++){ writef("%f, ",array[i]); } writefln("%f]",array[$-1]); } void printArray(double[][] array){ writef("["); for(int i=0; i<array.length; i++){ printArray(array[i]); } writefln("]"); } void printArray(double[][][] array){ writef("["); for(int i=0; i<array.length; i++){ printArray(array[i]); } writefln("]"); }