view trunk/aid/nn/multilayer/backprop.d @ 6:ff92c77006c7

Added support for reading training examples from files.
author revcompgeek
date Tue, 06 May 2008 21:43:55 -0600
parents 810d58835f86
children
line wrap: on
line source

/**
 * backprop.d
 * Holds the backpropagation neural network.
 */

module aid.nn.multilevel.backprop;

import aid.nn.outputFunctions;
import aid.misc;
import std.random;
import std.stream;
import std.stdio;

class Backprop {
	private uint numInputs;
	private double[][][] units; // Includes the output units. units[layer][unit][inputWeight]
	private OutputFunctionPtr[] functions;
	public double learningRate;
	public double momentum;
	private double[][][] oldWeightUpdate;

	///Constructor
	public this(uint numInputs, uint[] numUnits, OutputFunctionPtr[] functions,
			double learningRate = 0.03, double momentum = 0.1,
			double value = 0.1, bool randomize = true) {
		if(numUnits.length == 0)
			throw new InputException("numUnits must be greater than 0");
		if(numUnits.length != functions.length)
			throw new InputException(
					"numUnits and functions must be the same length");
		this.numInputs = numInputs;
		this.functions = functions;
		this.learningRate = learningRate;
		this.momentum = momentum;
		units.length = numUnits.length;
		oldWeightUpdate.length = numUnits.length;
		initUnitLayer(0, numUnits[0], numInputs, value, randomize);
		for(int i = 1; i < numUnits.length; i++) {
			initUnitLayer(i, numUnits[i], numUnits[i - 1], value, randomize);
		}
	}

	// Helper function to initialize a certain layer.
	private void initUnitLayer(uint layer, uint num, uint numPrev,
			double value, bool randomize) {
		units[layer].length = num;
		oldWeightUpdate[layer].length = num;
		for(int i = 0; i < num; i++) {
			units[layer][i].length = numPrev + 1; // include the bias weight
			oldWeightUpdate[layer][i].length = numPrev + 1;
			
			for(int j = 0; j < numPrev + 1; j++) {
				if(randomize)
					units[layer][i][j] = rnd() * value * 2 - value; // between -value and value
				else
					units[layer][i][j] = value;
				oldWeightUpdate[layer][i][j] = 0;
			}
		}
	}

	////////////////////////////////////////////////////// Evaluation //////////////////////////////////////////////////////
	/// Evaluates the neural network.
	public double[] evaluate(double[] inputs) {
		return evaluateFull(inputs)[$ - 1]; // the last item (outputs) of the return value
	}

	/// Evaluates the neural network and returns the output from all units.
	public double[][] evaluateFull(double[] inputs) {
		if(inputs.length != numInputs)
			throw new InputException("Wrong length of inputs.");
		double[][] outputs;
		outputs.length = units.length;
		outputs[0] = evaluateLayer(0, inputs);
		for(int i = 1; i < units.length; i++) {
			outputs[i] = this.evaluateLayer(i, outputs[i - 1]);
		}
		return outputs;
	}

	// Helper function to evaluate the outputs of a single layer.
	private double[] evaluateLayer(uint layer, double[] layerInputs) {
		double[] output;
		output.length = units[layer].length;
		//printArray(layerInputs);
		for(int i = 0; i < units[layer].length; i++) {
			output[i] = evaluateUnit(layer, i, layerInputs);
		}
		return output;
	}

	// Helper function to evaluate the output of a single unit.
	private double evaluateUnit(uint layer, uint unit, double[] layerInputs) {
		//writef("(%d,%d)=",layer,unit);
		//printArray(layerInputs);
		double total = units[layer][unit][0]; //bias
		for(int i = 1; i < layerInputs.length + 1; i++) {
			total += layerInputs[i - 1] * units[layer][unit][i]; // wi * xi
		//writef("@");
		}
		//writefln("        ! %f",total);
		if(functions[layer] != null)
			return functions[layer](total); // apply the function (if there is one)
		writefln("no function");
		return total; // just return the result instead
	}

	////////////////////////////////////////////////////// Training //////////////////////////////////////////////////////
	/// Trains the neural network.
	/// TODO: Pull error calculation into a separate function. (maybe)
	public void train(double[][] trainingInputs, double[][] trainingOutputs,
			bool stochastic = false) {
		if(trainingInputs.length != trainingOutputs.length)
			throw new InputException(
					"trainingInputs and trainingOutputs must be the same size");
		double[][][] weightUpdate;
		double[][] outputsError;
		double[][] outputs;
		double total; //temp variable
		
		// Initialize the weightUpdate and outputsError variables
		weightUpdate.length = units.length;
		outputsError.length = units.length;
		//writefln("#%d,%d",weightUpdate.length,outputsError.length);
		for(int i = 0; i < units.length; i++) {
			weightUpdate[i].length = units[i].length;
			outputsError[i].length = units[i].length;
			//writefln("##(%d)%d,%d",i,weightUpdate[i].length,outputsError[i].length);
			for(int j = 0; j < weightUpdate[i].length; j++) {
				weightUpdate[i][j].length = units[i][j].length;
				for(int k = 0; k < weightUpdate[i][j].length; k++)
					weightUpdate[i][j][k] = 0.0f;
			//writefln("###(%d)%d",j,weightUpdate[i][j].length);
			}
		}
		
		
		// Loop through each of the training examples
		for(int example = 0; example < trainingInputs.length; example++) {
			outputs = evaluateFull(trainingInputs[example]);
			
			
			// Computing error of output layer
			for(int i = 0; i < outputs[$ - 1].length; i++) { // units of last layer
				//writefln("{%d,%d,%d,%d}",example,i,outputs.length,outputsError[$-1].length);
				outputsError[$ - 1][i] = outputs[$ - 1][i] * (1 - outputs[$ - 1][i]) * (trainingOutputs[example][i] - outputs[$ - 1][i]);
			}
			// o(1-o)(t-o)
			
			//printArray(outputsError[$-1]);
			//printArray(units[length-1]);
			
			//*
			// Loop through each of the hidden layers (backwards - BACKpropagation!)
			for(int layer = units.length - 2; layer >= 0; layer--) { // -2 to skip the output layer
				//writef("|");
				// loop through the units in each hidden layer
				for(int unit = 0; unit < units[layer].length; unit++) {
					//writef("*");
					total = 0;
					// total up w * e for the units the output of this unit goes into
					for(int k = 0; k < units[layer + 1].length; k++) {
						//writef("{weight=%f,error=%f}", units[layer+1][k][unit+1/* +1 for bias*/], outputsError[layer+1][k]);
						total += units[layer + 1][k][unit + 1/* +1 for bias*/] * outputsError[layer + 1][k];
					}
					//writefln("=%f(total)",total);
					// multiply total by o(1-o), store in outputsError
					outputsError[layer][unit] = outputs[layer][unit] * (1 - outputs[layer][unit]) * total;
				}
			}
			//writefln();
			
			//writef("outputError="); printArray(outputsError);
			
			// special case for the units that receive the input values
			for(int unit = 0; unit < units[0].length; unit++) { // unit
				//writefln(":%d,%d,%d,%d",j,weightUpdate.length,weightUpdate[0].length,weightUpdate[0][j].length);
				weightUpdate[0][unit][0] += outputsError[0][unit]; //bias
				for(int input = 1; input < units[0][unit].length; input++) { // input
					weightUpdate[0][unit][input] += outputsError[0][unit] * trainingInputs[example][input - 1]; // account for bias
				}
			}
			
			
			// Update the weightUpdate array OR update the weights
			for(int i = 1; i < units.length; i++) { // layer
				for(int j = 0; j < units[i].length; j++) { // unit
					weightUpdate[i][j][0] += outputsError[i][j]; //bias
					for(int k = 1; k < units[i][j].length; k++) { // input
						//writefln("[%d,%d,%d]=%f; %f; %f",i,j,k,weightUpdate[i][j][k],outputsError[i][j],outputs[i-1][k-1]);
						weightUpdate[i][j][k] += outputsError[i][j] * outputs[i - 1][k - 1]; // previous layer, account for bias
					}
				}
			}
			if(stochastic) {
				// Apply the weightUpdate array to the weights
				for(int i = 0; i < units.length; i++) { // layer
					for(int j = 0; j < units[i].length; j++) { // unit
						for(int k = 0; k < units[i][j].length; k++) { // input
							units[i][j][k] += this.learningRate * weightUpdate[i][j][k] + (this.momentum * this.oldWeightUpdate[i][j][k]);
							this.oldWeightUpdate[i][j][k] = weightUpdate[i][j][k];
							weightUpdate[i][j][k] = 0;
						}
					}
				}
			}
		}
		
		if(!stochastic) {
			// Apply the weightUpdate array to the weights
			for(int i = 0; i < units.length; i++) { // layer
				for(int j = 0; j < units[i].length; j++) { // unit
					for(int k = 0; k < units[i][j].length; k++) { // input
						//writefln("[%d,%d,%d]=%f; %f",i,j,k,units[i][j][k],weightUpdate[i][j][k]);
						units[i][j][k] += this.learningRate * weightUpdate[i][j][k] + (this.momentum * this.oldWeightUpdate[i][j][k]);
					}
				}
			}
			this.oldWeightUpdate = weightUpdate;
		}
	}

	/// Calculate the output error
	double calculateError(double[][] trainingInputs, double[][] trainingOutputs) {
		if(trainingInputs.length != trainingOutputs.length)
			throw new InputException(
					"trainingInputs and trainingOutputs must be the same size");
		double[] outputs;
		double total = 0, temp;
		for(int i = 0; i < trainingInputs.length; i++) {
			outputs = evaluate(trainingInputs[i]);
			if(outputs.length != trainingOutputs[i].length)
				throw new InputException("Wrong output length");
			for(int j = 0; j < outputs.length; j++) {
				temp = trainingOutputs[i][j] - outputs[j];
				//writefln("&%f,%f",temp*temp,total);
				total += temp * temp;
			}
		}
		return 0.5 * total;
	}

	double[][][] getWeights() {
		return units.dup;
	}
}