view trunk/aid/nn/multilayer/backprop.d @ 3:314d68bafeff

Backprop and backprop_test added (no testing).
author revcompgeek
date Fri, 11 Apr 2008 18:12:55 -0600
parents
children 73beed484455
line wrap: on
line source

module aid.nn.multilevel.backprop;

import aid.nn.outputFunctions;
import aid.misc;
import std.random;
import std.stream;

class Backprop {
	private uint       numInputs;
	private float[][][] units;     // Includes the output units. units[layer][unit][inputWeight]
	private OutputFunctionPtr[] functions;
	public float learningRate;
	
	///Constructor
	public this(uint numInputs,uint[] numUnits,OutputFunctionPtr[] functions,float value=0.05,bool randomize=true){
		if(numUnits.length == 0) throw new InputException("numUnits must be greater than 0");
		if(numUnits.length != functions.length) throw new InputException("numUnits and functions must be the same length");
		this.numInputs = numInputs;
		this.functions = functions;
		initUnitLayer(0,numUnits[0],numInputs,value,randomize);
		for(int i=1; i<numUnits.length; i++){
			initUnitLayer(i,numUnits[i],numUnits[i-1],value,randomize);
		}
	}
	
	// Helper function to initialize a certain layer.
	private void initUnitLayer(uint layer,uint num,uint numPrev,float value,bool randomize){
		units[layer].length = num;
		for(int i=0; i<num; i++){
			units[layer][i].length = numPrev+1; // include the bias weight
			for(int j=0; j<numPrev+1; j++){
				if(randomize) units[layer][i][j] = rnd() * value * 2 - value; // between -value and value
				else units[layer][i][j] = value;
			}
		}
	}
	
	////////////////////////////////////////////////////// Evaluation //////////////////////////////////////////////////////
	/// Evaluates the neural network.
	public float[] evaluate(float[] inputs){
		return evaluateFull(inputs)[$]; // the last item (outputs) of the return value
	}
	
	/// Evaluates the neural network and returns the output from all units.
	public float[][] evaluateFull(float[] inputs){
		if(inputs.length != numInputs) throw new InputException("Wrong length of inputs.");
		float[][] outputs;
		outputs.length = units.length;
		outputs[0] = evaluateLayer(0,inputs);
		for(int i=0; i<units.length; i++){
			outputs[i] = this.evaluateLayer(i,outputs[i-1]);
		}
		return outputs;
	}
	
	// Helper function to evaluate the outputs of a single layer.
	private float[] evaluateLayer(uint layer,float[] layerInputs){
		float[] output;
		output.length = layerInputs.length;
		for(int i=0; i<layerInputs.length; i++){
			output[i] = evaluateUnit(layer,i,layerInputs);
		}
		return output;
	}
	
	// Helper function to evaluate the output of a single unit.
	private float evaluateUnit(uint layer, uint unit, float[] layerInputs){
		float total = units[layer][unit][0]; //bias
		for(int i=1; i<layerInputs.length; i++){
			total += layerInputs[i-1] * units[layer][unit][i]; // wi * xi
		}
		if(functions[layer] != null) return functions[layer](total); // apply the function (if there is one)
		else return total; // just return the result instead
	}
	
	
	////////////////////////////////////////////////////// Training //////////////////////////////////////////////////////
	/// Trains the neural network.
	/// TODO:
	///   Pull error calculation into a separate function.
	public void train(float[][] allInputs, float[][] allOutputs){
		if(allInputs.length != allOutputs.length) throw new InputException("allInputs and allOutputs must be the same size");
		float[][][] weightUpdate;
		float[][] outputsError;
		float[][] outputs;
		float total; //temp variable
		
		// Initialize the weightUpdate and outputsError variables
		weightUpdate.length = units.length;
		outputsError.length = units.length;
		for(int i=0; i<weightUpdate.length; i++){
			weightUpdate[i].length = units[i].length;
			outputsError[i].length = units[i].length;
			for(int j=0; j<weightUpdate[i].length; i++){
				weightUpdate[i][j].length = units[i][j].length;
			}
		}
		
		
		// Loop through each of the training examples
		for(int example=0; example < allInputs.length; example++){
			outputs = evaluateFull(allInputs[example]);
			
			// Computing error of output layer
			for(int i=0; i<outputs[$].length; i++)
				outputsError[$][i] = outputs[$][i] * (1 - outputs[$][i]) * (allOutputs[example][i] - outputs[$][i]); // o(1-o)(t-o)
			
			// Loop through each of the hidden layers (backwards - BACKpropagation!)
			for(int i=units.length-2; i >= 0; i--){ // -2 to skip the output layer
				// loop through the units in each hidden layer
				for(int j=0; j<units[i].length; j++){
					total=0;
					// total up w * e for the units the output of this unit goes into
					for(int k=0; k<units[i+1].length; k++){
						total += units[i+1][k][j+1] * outputsError[i+1][k];
					}
					// multiply total by o(1-o), store in outputsError
					outputsError[i][j] = outputs[i][j] * (1 - outputs[i][j]) * total;
				}
			}
			
			// special case for the units that receive the input values
			for(int j=0; j<units[0].length; j++){ // unit
				weightUpdate[0][j][0] += outputsError[0][j]; //bias
				for(int k=1; k<units[0][j].length; k++){ // input
					weightUpdate[0][j][k] += outputsError[0][j] * allInputs[example][k-1];
				}
			}
			
			// Update the weightUpdate array
			for(int i=1; i<units.length; i++){ // layer
				for(int j=0; j<units[i].length; j++){ // unit
					weightUpdate[i][j][0] += outputsError[i][j]; //bias
					for(int k=1; k<units[i][j].length; k++){ // input
						weightUpdate[i][j][k] += outputsError[i][j] * outputs[i-1][k-1]; // previous layer, account for bias
					}
				}
			}
		}
		
		// Apply the weightUpdate array to the weights
		for(int i=0; i<units.length; i++){ // layer
			for(int j=0; j<units[i].length; j++){ // unit
				for(int k=0; k<units[i][j].length; k++){ // input
					units[i][j][k] += this.learningRate * weightUpdate[i][j][k];
				}
			}
		}
	}
	
	/// Calculate the output error
	float calculateError(float[][] allInputs, float[][] allOutputs){
		if(allInputs.length != allOutputs.length) throw new InputException("allInputs and allOutputs must be the same size");
		float[] outputs;
		float total,temp;
		for(int i=0; i<allInputs.length; i++){
			outputs = evaluate(allInputs[i]);
			if(outputs.length != allOutputs[i].length) throw new InputException("Wrong output length");
			for(int j=0; j<outputs.length; j++){
				temp = allOutputs[i][j] - outputs[j];
				total += temp * temp;
			}
		}
		return 0.5 * total;
	}
}