3
|
1 module aid.nn.multilevel.backprop;
|
|
2
|
|
3 import aid.nn.outputFunctions;
|
|
4 import aid.misc;
|
|
5 import std.random;
|
|
6 import std.stream;
|
4
|
7 import std.stdio;
|
3
|
8
|
|
9 class Backprop {
|
|
10 private uint numInputs;
|
4
|
11 private double[][][] units; // Includes the output units. units[layer][unit][inputWeight]
|
3
|
12 private OutputFunctionPtr[] functions;
|
4
|
13 public double learningRate;
|
3
|
14
|
|
15 ///Constructor
|
4
|
16 public this(uint numInputs,uint[] numUnits,OutputFunctionPtr[] functions,double learningRate=0.03,double value=0.1,bool randomize=true){
|
3
|
17 if(numUnits.length == 0) throw new InputException("numUnits must be greater than 0");
|
|
18 if(numUnits.length != functions.length) throw new InputException("numUnits and functions must be the same length");
|
|
19 this.numInputs = numInputs;
|
|
20 this.functions = functions;
|
4
|
21 this.learningRate = learningRate;
|
|
22 units.length = numUnits.length;
|
3
|
23 initUnitLayer(0,numUnits[0],numInputs,value,randomize);
|
|
24 for(int i=1; i<numUnits.length; i++){
|
|
25 initUnitLayer(i,numUnits[i],numUnits[i-1],value,randomize);
|
|
26 }
|
|
27 }
|
|
28
|
|
29 // Helper function to initialize a certain layer.
|
4
|
30 private void initUnitLayer(uint layer,uint num,uint numPrev,double value,bool randomize){
|
3
|
31 units[layer].length = num;
|
|
32 for(int i=0; i<num; i++){
|
|
33 units[layer][i].length = numPrev+1; // include the bias weight
|
|
34 for(int j=0; j<numPrev+1; j++){
|
|
35 if(randomize) units[layer][i][j] = rnd() * value * 2 - value; // between -value and value
|
|
36 else units[layer][i][j] = value;
|
|
37 }
|
|
38 }
|
|
39 }
|
|
40
|
|
41 ////////////////////////////////////////////////////// Evaluation //////////////////////////////////////////////////////
|
|
42 /// Evaluates the neural network.
|
4
|
43 public double[] evaluate(double[] inputs){
|
|
44 return evaluateFull(inputs)[$-1]; // the last item (outputs) of the return value
|
3
|
45 }
|
|
46
|
|
47 /// Evaluates the neural network and returns the output from all units.
|
4
|
48 public double[][] evaluateFull(double[] inputs){
|
3
|
49 if(inputs.length != numInputs) throw new InputException("Wrong length of inputs.");
|
4
|
50 double[][] outputs;
|
3
|
51 outputs.length = units.length;
|
|
52 outputs[0] = evaluateLayer(0,inputs);
|
4
|
53 for(int i=1; i<units.length; i++){
|
3
|
54 outputs[i] = this.evaluateLayer(i,outputs[i-1]);
|
|
55 }
|
|
56 return outputs;
|
|
57 }
|
|
58
|
|
59 // Helper function to evaluate the outputs of a single layer.
|
4
|
60 private double[] evaluateLayer(uint layer,double[] layerInputs){
|
|
61 double[] output;
|
|
62 output.length = units[layer].length;
|
|
63 //printArray(layerInputs);
|
|
64 for(int i=0; i<units[layer].length; i++){
|
3
|
65 output[i] = evaluateUnit(layer,i,layerInputs);
|
|
66 }
|
|
67 return output;
|
|
68 }
|
|
69
|
|
70 // Helper function to evaluate the output of a single unit.
|
4
|
71 private double evaluateUnit(uint layer, uint unit, double[] layerInputs){
|
|
72 //writef("(%d,%d)=",layer,unit);
|
|
73 //printArray(layerInputs);
|
|
74 double total = units[layer][unit][0]; //bias
|
|
75 for(int i=1; i<layerInputs.length+1; i++){
|
3
|
76 total += layerInputs[i-1] * units[layer][unit][i]; // wi * xi
|
4
|
77 //writef("@");
|
3
|
78 }
|
4
|
79 //writefln(" ! %f",total);
|
3
|
80 if(functions[layer] != null) return functions[layer](total); // apply the function (if there is one)
|
4
|
81 writefln("no function");
|
|
82 return total; // just return the result instead
|
3
|
83 }
|
|
84
|
|
85
|
|
86 ////////////////////////////////////////////////////// Training //////////////////////////////////////////////////////
|
|
87 /// Trains the neural network.
|
|
88 /// TODO:
|
|
89 /// Pull error calculation into a separate function.
|
4
|
90 public void train(double[][] trainingInputs, double[][] trainingOutputs){
|
|
91 if(trainingInputs.length != trainingOutputs.length) throw new InputException("trainingInputs and trainingOutputs must be the same size");
|
|
92 double[][][] weightUpdate;
|
|
93 double[][] outputsError;
|
|
94 double[][] outputs;
|
|
95 double total; //temp variable
|
3
|
96
|
|
97 // Initialize the weightUpdate and outputsError variables
|
|
98 weightUpdate.length = units.length;
|
|
99 outputsError.length = units.length;
|
4
|
100 //writefln("#%d,%d",weightUpdate.length,outputsError.length);
|
|
101 for(int i=0; i<units.length; i++){
|
3
|
102 weightUpdate[i].length = units[i].length;
|
|
103 outputsError[i].length = units[i].length;
|
4
|
104 //writefln("##(%d)%d,%d",i,weightUpdate[i].length,outputsError[i].length);
|
|
105 for(int j=0; j<weightUpdate[i].length; j++){
|
3
|
106 weightUpdate[i][j].length = units[i][j].length;
|
4
|
107 for(int k=0; k<weightUpdate[i][j].length; k++) weightUpdate[i][j][k] = 0.0f;
|
|
108 //writefln("###(%d)%d",j,weightUpdate[i][j].length);
|
3
|
109 }
|
|
110 }
|
|
111
|
|
112
|
|
113 // Loop through each of the training examples
|
4
|
114 for(int example=0; example < trainingInputs.length; example++){
|
|
115 outputs = evaluateFull(trainingInputs[example]);
|
3
|
116
|
|
117 // Computing error of output layer
|
4
|
118 for(int i=0; i<outputs[$-1].length; i++){ // units of last layer
|
|
119 //writefln("{%d,%d,%d,%d}",example,i,outputs.length,outputsError[$-1].length);
|
|
120 outputsError[$-1][i] = outputs[$-1][i] * (1 - outputs[$-1][i]) * (trainingOutputs[example][i] - outputs[$-1][i]);
|
|
121 } // o(1-o)(t-o)
|
3
|
122
|
4
|
123 //printArray(outputsError[$-1]);
|
|
124 //printArray(units[length-1]);
|
|
125
|
|
126 //*
|
3
|
127 // Loop through each of the hidden layers (backwards - BACKpropagation!)
|
4
|
128 for(int layer=units.length-2; layer >= 0; layer--){ // -2 to skip the output layer
|
|
129 //writef("|");
|
3
|
130 // loop through the units in each hidden layer
|
4
|
131 for(int unit=0; unit<units[layer].length; unit++){
|
|
132 //writef("*");
|
3
|
133 total=0;
|
|
134 // total up w * e for the units the output of this unit goes into
|
4
|
135 for(int k=0; k<units[layer+1].length; k++){
|
|
136 //writef("{weight=%f,error=%f}", units[layer+1][k][unit+1/* +1 for bias*/], outputsError[layer+1][k]);
|
|
137 total += units[layer+1][k][unit+1/* +1 for bias*/] * outputsError[layer+1][k];
|
3
|
138 }
|
4
|
139 //writefln("=%f(total)",total);
|
3
|
140 // multiply total by o(1-o), store in outputsError
|
4
|
141 outputsError[layer][unit] = outputs[layer][unit] * (1 - outputs[layer][unit]) * total;
|
3
|
142 }
|
4
|
143 } //writefln();
|
|
144
|
|
145 //writef("outputError="); printArray(outputsError);
|
3
|
146
|
|
147 // special case for the units that receive the input values
|
4
|
148 for(int unit=0; unit<units[0].length; unit++){ // unit
|
|
149 //writefln(":%d,%d,%d,%d",j,weightUpdate.length,weightUpdate[0].length,weightUpdate[0][j].length);
|
|
150 weightUpdate[0][unit][0] += outputsError[0][unit]; //bias
|
|
151 for(int input=1; input<units[0][unit].length; input++){ // input
|
|
152 weightUpdate[0][unit][input] += outputsError[0][unit] * trainingInputs[example][input-1]; // account for bias
|
3
|
153 }
|
|
154 }
|
|
155
|
|
156 // Update the weightUpdate array
|
|
157 for(int i=1; i<units.length; i++){ // layer
|
|
158 for(int j=0; j<units[i].length; j++){ // unit
|
|
159 weightUpdate[i][j][0] += outputsError[i][j]; //bias
|
|
160 for(int k=1; k<units[i][j].length; k++){ // input
|
4
|
161 //writefln("[%d,%d,%d]=%f; %f; %f",i,j,k,weightUpdate[i][j][k],outputsError[i][j],outputs[i-1][k-1]);
|
3
|
162 weightUpdate[i][j][k] += outputsError[i][j] * outputs[i-1][k-1]; // previous layer, account for bias
|
|
163 }
|
|
164 }
|
|
165 }
|
|
166 }
|
|
167
|
|
168 // Apply the weightUpdate array to the weights
|
|
169 for(int i=0; i<units.length; i++){ // layer
|
|
170 for(int j=0; j<units[i].length; j++){ // unit
|
|
171 for(int k=0; k<units[i][j].length; k++){ // input
|
4
|
172 //writefln("[%d,%d,%d]=%f; %f",i,j,k,units[i][j][k],weightUpdate[i][j][k]);
|
3
|
173 units[i][j][k] += this.learningRate * weightUpdate[i][j][k];
|
|
174 }
|
|
175 }
|
|
176 }
|
|
177 }
|
|
178
|
|
179 /// Calculate the output error
|
4
|
180 double calculateError(double[][] trainingInputs, double[][] trainingOutputs){
|
|
181 if(trainingInputs.length != trainingOutputs.length) throw new InputException("trainingInputs and trainingOutputs must be the same size");
|
|
182 double[] outputs;
|
|
183 double total=0,temp;
|
|
184 for(int i=0; i<trainingInputs.length; i++){
|
|
185 outputs = evaluate(trainingInputs[i]);
|
|
186 if(outputs.length != trainingOutputs[i].length) throw new InputException("Wrong output length");
|
3
|
187 for(int j=0; j<outputs.length; j++){
|
4
|
188 temp = trainingOutputs[i][j] - outputs[j];
|
|
189 //writefln("&%f,%f",temp*temp,total);
|
3
|
190 total += temp * temp;
|
|
191 }
|
|
192 }
|
|
193 return 0.5 * total;
|
|
194 }
|
4
|
195
|
|
196 double[][][] getWeights(){
|
|
197 return units.dup;
|
|
198 }
|
|
199 }
|
|
200
|
|
201 void printArray(double[] array){
|
|
202 writef("[");
|
|
203 for(int i=0; i<array.length-1; i++){
|
|
204 writef("%f, ",array[i]);
|
|
205 }
|
|
206 writefln("%f]",array[$-1]);
|
|
207 }
|
|
208
|
|
209 void printArray(double[][] array){
|
|
210 writef("[");
|
|
211 for(int i=0; i<array.length; i++){
|
|
212 printArray(array[i]);
|
|
213 }
|
|
214 writefln("]");
|
|
215 }
|
|
216
|
|
217 void printArray(double[][][] array){
|
|
218 writef("[");
|
|
219 for(int i=0; i<array.length; i++){
|
|
220 printArray(array[i]);
|
|
221 }
|
|
222 writefln("]");
|
3
|
223 }
|
|
224
|
|
225
|