3
|
1 module aid.nn.multilevel.backprop;
|
|
2
|
|
3 import aid.nn.outputFunctions;
|
|
4 import aid.misc;
|
|
5 import std.random;
|
|
6 import std.stream;
|
|
7
|
|
8 class Backprop {
|
|
9 private uint numInputs;
|
|
10 private float[][][] units; // Includes the output units. units[layer][unit][inputWeight]
|
|
11 private OutputFunctionPtr[] functions;
|
|
12 public float learningRate;
|
|
13
|
|
14 ///Constructor
|
|
15 public this(uint numInputs,uint[] numUnits,OutputFunctionPtr[] functions,float value=0.05,bool randomize=true){
|
|
16 if(numUnits.length == 0) throw new InputException("numUnits must be greater than 0");
|
|
17 if(numUnits.length != functions.length) throw new InputException("numUnits and functions must be the same length");
|
|
18 this.numInputs = numInputs;
|
|
19 this.functions = functions;
|
|
20 initUnitLayer(0,numUnits[0],numInputs,value,randomize);
|
|
21 for(int i=1; i<numUnits.length; i++){
|
|
22 initUnitLayer(i,numUnits[i],numUnits[i-1],value,randomize);
|
|
23 }
|
|
24 }
|
|
25
|
|
26 // Helper function to initialize a certain layer.
|
|
27 private void initUnitLayer(uint layer,uint num,uint numPrev,float value,bool randomize){
|
|
28 units[layer].length = num;
|
|
29 for(int i=0; i<num; i++){
|
|
30 units[layer][i].length = numPrev+1; // include the bias weight
|
|
31 for(int j=0; j<numPrev+1; j++){
|
|
32 if(randomize) units[layer][i][j] = rnd() * value * 2 - value; // between -value and value
|
|
33 else units[layer][i][j] = value;
|
|
34 }
|
|
35 }
|
|
36 }
|
|
37
|
|
38 ////////////////////////////////////////////////////// Evaluation //////////////////////////////////////////////////////
|
|
39 /// Evaluates the neural network.
|
|
40 public float[] evaluate(float[] inputs){
|
|
41 return evaluateFull(inputs)[$]; // the last item (outputs) of the return value
|
|
42 }
|
|
43
|
|
44 /// Evaluates the neural network and returns the output from all units.
|
|
45 public float[][] evaluateFull(float[] inputs){
|
|
46 if(inputs.length != numInputs) throw new InputException("Wrong length of inputs.");
|
|
47 float[][] outputs;
|
|
48 outputs.length = units.length;
|
|
49 outputs[0] = evaluateLayer(0,inputs);
|
|
50 for(int i=0; i<units.length; i++){
|
|
51 outputs[i] = this.evaluateLayer(i,outputs[i-1]);
|
|
52 }
|
|
53 return outputs;
|
|
54 }
|
|
55
|
|
56 // Helper function to evaluate the outputs of a single layer.
|
|
57 private float[] evaluateLayer(uint layer,float[] layerInputs){
|
|
58 float[] output;
|
|
59 output.length = layerInputs.length;
|
|
60 for(int i=0; i<layerInputs.length; i++){
|
|
61 output[i] = evaluateUnit(layer,i,layerInputs);
|
|
62 }
|
|
63 return output;
|
|
64 }
|
|
65
|
|
66 // Helper function to evaluate the output of a single unit.
|
|
67 private float evaluateUnit(uint layer, uint unit, float[] layerInputs){
|
|
68 float total = units[layer][unit][0]; //bias
|
|
69 for(int i=1; i<layerInputs.length; i++){
|
|
70 total += layerInputs[i-1] * units[layer][unit][i]; // wi * xi
|
|
71 }
|
|
72 if(functions[layer] != null) return functions[layer](total); // apply the function (if there is one)
|
|
73 else return total; // just return the result instead
|
|
74 }
|
|
75
|
|
76
|
|
77 ////////////////////////////////////////////////////// Training //////////////////////////////////////////////////////
|
|
78 /// Trains the neural network.
|
|
79 /// TODO:
|
|
80 /// Pull error calculation into a separate function.
|
|
81 public void train(float[][] allInputs, float[][] allOutputs){
|
|
82 if(allInputs.length != allOutputs.length) throw new InputException("allInputs and allOutputs must be the same size");
|
|
83 float[][][] weightUpdate;
|
|
84 float[][] outputsError;
|
|
85 float[][] outputs;
|
|
86 float total; //temp variable
|
|
87
|
|
88 // Initialize the weightUpdate and outputsError variables
|
|
89 weightUpdate.length = units.length;
|
|
90 outputsError.length = units.length;
|
|
91 for(int i=0; i<weightUpdate.length; i++){
|
|
92 weightUpdate[i].length = units[i].length;
|
|
93 outputsError[i].length = units[i].length;
|
|
94 for(int j=0; j<weightUpdate[i].length; i++){
|
|
95 weightUpdate[i][j].length = units[i][j].length;
|
|
96 }
|
|
97 }
|
|
98
|
|
99
|
|
100 // Loop through each of the training examples
|
|
101 for(int example=0; example < allInputs.length; example++){
|
|
102 outputs = evaluateFull(allInputs[example]);
|
|
103
|
|
104 // Computing error of output layer
|
|
105 for(int i=0; i<outputs[$].length; i++)
|
|
106 outputsError[$][i] = outputs[$][i] * (1 - outputs[$][i]) * (allOutputs[example][i] - outputs[$][i]); // o(1-o)(t-o)
|
|
107
|
|
108 // Loop through each of the hidden layers (backwards - BACKpropagation!)
|
|
109 for(int i=units.length-2; i >= 0; i--){ // -2 to skip the output layer
|
|
110 // loop through the units in each hidden layer
|
|
111 for(int j=0; j<units[i].length; j++){
|
|
112 total=0;
|
|
113 // total up w * e for the units the output of this unit goes into
|
|
114 for(int k=0; k<units[i+1].length; k++){
|
|
115 total += units[i+1][k][j+1] * outputsError[i+1][k];
|
|
116 }
|
|
117 // multiply total by o(1-o), store in outputsError
|
|
118 outputsError[i][j] = outputs[i][j] * (1 - outputs[i][j]) * total;
|
|
119 }
|
|
120 }
|
|
121
|
|
122 // special case for the units that receive the input values
|
|
123 for(int j=0; j<units[0].length; j++){ // unit
|
|
124 weightUpdate[0][j][0] += outputsError[0][j]; //bias
|
|
125 for(int k=1; k<units[0][j].length; k++){ // input
|
|
126 weightUpdate[0][j][k] += outputsError[0][j] * allInputs[example][k-1];
|
|
127 }
|
|
128 }
|
|
129
|
|
130 // Update the weightUpdate array
|
|
131 for(int i=1; i<units.length; i++){ // layer
|
|
132 for(int j=0; j<units[i].length; j++){ // unit
|
|
133 weightUpdate[i][j][0] += outputsError[i][j]; //bias
|
|
134 for(int k=1; k<units[i][j].length; k++){ // input
|
|
135 weightUpdate[i][j][k] += outputsError[i][j] * outputs[i-1][k-1]; // previous layer, account for bias
|
|
136 }
|
|
137 }
|
|
138 }
|
|
139 }
|
|
140
|
|
141 // Apply the weightUpdate array to the weights
|
|
142 for(int i=0; i<units.length; i++){ // layer
|
|
143 for(int j=0; j<units[i].length; j++){ // unit
|
|
144 for(int k=0; k<units[i][j].length; k++){ // input
|
|
145 units[i][j][k] += this.learningRate * weightUpdate[i][j][k];
|
|
146 }
|
|
147 }
|
|
148 }
|
|
149 }
|
|
150
|
|
151 /// Calculate the output error
|
|
152 float calculateError(float[][] allInputs, float[][] allOutputs){
|
|
153 if(allInputs.length != allOutputs.length) throw new InputException("allInputs and allOutputs must be the same size");
|
|
154 float[] outputs;
|
|
155 float total,temp;
|
|
156 for(int i=0; i<allInputs.length; i++){
|
|
157 outputs = evaluate(allInputs[i]);
|
|
158 if(outputs.length != allOutputs[i].length) throw new InputException("Wrong output length");
|
|
159 for(int j=0; j<outputs.length; j++){
|
|
160 temp = allOutputs[i][j] - outputs[j];
|
|
161 total += temp * temp;
|
|
162 }
|
|
163 }
|
|
164 return 0.5 * total;
|
|
165 }
|
|
166 }
|
|
167
|
|
168
|
|
169
|
|
170
|