diff trunk/aid/nn/multilayer/backprop.d @ 3:314d68bafeff

Backprop and backprop_test added (no testing).
author revcompgeek
date Fri, 11 Apr 2008 18:12:55 -0600
parents
children 73beed484455
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/aid/nn/multilayer/backprop.d	Fri Apr 11 18:12:55 2008 -0600
@@ -0,0 +1,170 @@
+module aid.nn.multilevel.backprop;
+
+import aid.nn.outputFunctions;
+import aid.misc;
+import std.random;
+import std.stream;
+
+class Backprop {
+	private uint       numInputs;
+	private float[][][] units;     // Includes the output units. units[layer][unit][inputWeight]
+	private OutputFunctionPtr[] functions;
+	public float learningRate;
+	
+	///Constructor
+	public this(uint numInputs,uint[] numUnits,OutputFunctionPtr[] functions,float value=0.05,bool randomize=true){
+		if(numUnits.length == 0) throw new InputException("numUnits must be greater than 0");
+		if(numUnits.length != functions.length) throw new InputException("numUnits and functions must be the same length");
+		this.numInputs = numInputs;
+		this.functions = functions;
+		initUnitLayer(0,numUnits[0],numInputs,value,randomize);
+		for(int i=1; i<numUnits.length; i++){
+			initUnitLayer(i,numUnits[i],numUnits[i-1],value,randomize);
+		}
+	}
+	
+	// Helper function to initialize a certain layer.
+	private void initUnitLayer(uint layer,uint num,uint numPrev,float value,bool randomize){
+		units[layer].length = num;
+		for(int i=0; i<num; i++){
+			units[layer][i].length = numPrev+1; // include the bias weight
+			for(int j=0; j<numPrev+1; j++){
+				if(randomize) units[layer][i][j] = rnd() * value * 2 - value; // between -value and value
+				else units[layer][i][j] = value;
+			}
+		}
+	}
+	
+	////////////////////////////////////////////////////// Evaluation //////////////////////////////////////////////////////
+	/// Evaluates the neural network.
+	public float[] evaluate(float[] inputs){
+		return evaluateFull(inputs)[$]; // the last item (outputs) of the return value
+	}
+	
+	/// Evaluates the neural network and returns the output from all units.
+	public float[][] evaluateFull(float[] inputs){
+		if(inputs.length != numInputs) throw new InputException("Wrong length of inputs.");
+		float[][] outputs;
+		outputs.length = units.length;
+		outputs[0] = evaluateLayer(0,inputs);
+		for(int i=0; i<units.length; i++){
+			outputs[i] = this.evaluateLayer(i,outputs[i-1]);
+		}
+		return outputs;
+	}
+	
+	// Helper function to evaluate the outputs of a single layer.
+	private float[] evaluateLayer(uint layer,float[] layerInputs){
+		float[] output;
+		output.length = layerInputs.length;
+		for(int i=0; i<layerInputs.length; i++){
+			output[i] = evaluateUnit(layer,i,layerInputs);
+		}
+		return output;
+	}
+	
+	// Helper function to evaluate the output of a single unit.
+	private float evaluateUnit(uint layer, uint unit, float[] layerInputs){
+		float total = units[layer][unit][0]; //bias
+		for(int i=1; i<layerInputs.length; i++){
+			total += layerInputs[i-1] * units[layer][unit][i]; // wi * xi
+		}
+		if(functions[layer] != null) return functions[layer](total); // apply the function (if there is one)
+		else return total; // just return the result instead
+	}
+	
+	
+	////////////////////////////////////////////////////// Training //////////////////////////////////////////////////////
+	/// Trains the neural network.
+	/// TODO:
+	///   Pull error calculation into a separate function.
+	public void train(float[][] allInputs, float[][] allOutputs){
+		if(allInputs.length != allOutputs.length) throw new InputException("allInputs and allOutputs must be the same size");
+		float[][][] weightUpdate;
+		float[][] outputsError;
+		float[][] outputs;
+		float total; //temp variable
+		
+		// Initialize the weightUpdate and outputsError variables
+		weightUpdate.length = units.length;
+		outputsError.length = units.length;
+		for(int i=0; i<weightUpdate.length; i++){
+			weightUpdate[i].length = units[i].length;
+			outputsError[i].length = units[i].length;
+			for(int j=0; j<weightUpdate[i].length; i++){
+				weightUpdate[i][j].length = units[i][j].length;
+			}
+		}
+		
+		
+		// Loop through each of the training examples
+		for(int example=0; example < allInputs.length; example++){
+			outputs = evaluateFull(allInputs[example]);
+			
+			// Computing error of output layer
+			for(int i=0; i<outputs[$].length; i++)
+				outputsError[$][i] = outputs[$][i] * (1 - outputs[$][i]) * (allOutputs[example][i] - outputs[$][i]); // o(1-o)(t-o)
+			
+			// Loop through each of the hidden layers (backwards - BACKpropagation!)
+			for(int i=units.length-2; i >= 0; i--){ // -2 to skip the output layer
+				// loop through the units in each hidden layer
+				for(int j=0; j<units[i].length; j++){
+					total=0;
+					// total up w * e for the units the output of this unit goes into
+					for(int k=0; k<units[i+1].length; k++){
+						total += units[i+1][k][j+1] * outputsError[i+1][k];
+					}
+					// multiply total by o(1-o), store in outputsError
+					outputsError[i][j] = outputs[i][j] * (1 - outputs[i][j]) * total;
+				}
+			}
+			
+			// special case for the units that receive the input values
+			for(int j=0; j<units[0].length; j++){ // unit
+				weightUpdate[0][j][0] += outputsError[0][j]; //bias
+				for(int k=1; k<units[0][j].length; k++){ // input
+					weightUpdate[0][j][k] += outputsError[0][j] * allInputs[example][k-1];
+				}
+			}
+			
+			// Update the weightUpdate array
+			for(int i=1; i<units.length; i++){ // layer
+				for(int j=0; j<units[i].length; j++){ // unit
+					weightUpdate[i][j][0] += outputsError[i][j]; //bias
+					for(int k=1; k<units[i][j].length; k++){ // input
+						weightUpdate[i][j][k] += outputsError[i][j] * outputs[i-1][k-1]; // previous layer, account for bias
+					}
+				}
+			}
+		}
+		
+		// Apply the weightUpdate array to the weights
+		for(int i=0; i<units.length; i++){ // layer
+			for(int j=0; j<units[i].length; j++){ // unit
+				for(int k=0; k<units[i][j].length; k++){ // input
+					units[i][j][k] += this.learningRate * weightUpdate[i][j][k];
+				}
+			}
+		}
+	}
+	
+	/// Calculate the output error
+	float calculateError(float[][] allInputs, float[][] allOutputs){
+		if(allInputs.length != allOutputs.length) throw new InputException("allInputs and allOutputs must be the same size");
+		float[] outputs;
+		float total,temp;
+		for(int i=0; i<allInputs.length; i++){
+			outputs = evaluate(allInputs[i]);
+			if(outputs.length != allOutputs[i].length) throw new InputException("Wrong output length");
+			for(int j=0; j<outputs.length; j++){
+				temp = allOutputs[i][j] - outputs[j];
+				total += temp * temp;
+			}
+		}
+		return 0.5 * total;
+	}
+}
+
+
+
+