-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgradient.py
158 lines (123 loc) · 5.66 KB
/
gradient.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import numpy as np
from network import Network, average
from organism import *
import numpy
from constants import *
# dodać organizmy i będzie wprosty sposób done
# Może dodać dokładność rozwiązania jako dodatkowy argument
from utils import gradient, sigmoid_der, progress_bar
def gradient_algorithm(func, arg_num, domain_list, min_max, probe_numb):
"""
:type func: function
"""
organisms = create_population(arg_num, domain_list, probe_numb)
iterations = 0
while (iterations < MAX_EPOCHS):
iterations += 1
# dla każdego organizmu
for org in organisms:
# policz gradient
grad = gradient(func, org.data) # (derivatives[i](*org.data))
# zaktualizuj argumenty względem gradientu
for i in range(arg_num):
# bezpiecznijsza metoda to przesuwanie o jaką stałą
# w sensie coś niezależnego bezpośrednio od pochodnej, tak jak teraz
# ale powinna ona być zmienna od generacji
# konkretnie maleć, ale o ile? LOGARYTM?!
org.data[i] -= numpy.sign(grad[i]) * 0.01
asses(organisms, func)
organisms = sorted(organisms, key=lambda x: x.ocena, reverse=False)
print(iterations, '/', MAX_EPOCHS)
asses(organisms, func)
organisms = sorted(organisms, key=lambda x: x.ocena, reverse=False)
return organisms[0].data
# weź zmień by funkcj gradientowa przyjmowała argumenty:
# funkcja,dziedziny, liczbe argumentów (nawet jeśli miałby nie korzystać), tryb min_max, liczba próbek
# (function,arg_num,domain_list,min_max,probe_num, learn_rate,tol)
# a parametry learn_rate i tol niech będą na końcu (żeby nie trzeba było uwzględniać innych wejść
# wtedy ładnie będzie można także testować tę funkcję
def gradient_func(function, start, min_max=MIN,
learn_rate=0.1, max_iter=1_000, tol=0.0001):
for i in range(max_iter):
diffs = gradient(function, start)
diffs = [d if d < learn_rate else learn_rate for d in diffs]
if all((abs(d) < tol for d in diffs)):
break
for i in range(len(start)):
start[i] += (min_max == MAX) * diffs[i]
start[i] -= (min_max == MIN) * diffs[i]
return start
def computate_derivatives(net, input, ex_output):
"""
Computates derivatives for single pair of output and expected output of net
:param net: Network to calculate derivatives for
:param input: single input for net, list of arguments
:param ex_output: single expected_output, list of arguments
:return: derivatives packed into Network object
"""
# var to hold results to return
ders_net = Network.create_empty(net.model_shape)
# compute values for each neuron
net_output = net.process(input)
# d_cost/d_value from net output
d_cost_d_a = 2 * (np.matrix(net_output) - np.matrix(ex_output))
for i in reversed(range(net.nb_layers)):
v_prev = np.matrix(input) if i == 0 else net.layers[i-1]["v"]
d_cost_d_z = np.multiply(np.transpose(d_cost_d_a), sigmoid_der(net.layers[i]["z"]))
d_cost_d_b = d_cost_d_z
d_cost_d_w = np.transpose(v_prev) @ np.transpose(d_cost_d_z)
d_cost_d_a = np.transpose(net.layers[i]["w"] @ d_cost_d_z)
ders_net.layers[i]["w"] = d_cost_d_w
ders_net.layers[i]["b"] = d_cost_d_b
return ders_net
def net_gradient(net, inputs, targets, **kwargs):
"""
Correct function for neural network. Uses **gradient** mechanics to upgrade network weights and biases.
:param inputs: list of network's inputs
(when single input for network is a list, then it is list of lists)
:param expected_outputs: list of expected outputs for each network's inputs in previous argument
:return: history for debugging and ploting
"""
params = GradientConst.instance()
history = {
"all_costs": [],
"av_costs": [],
"success_rate": []
}
train_data = list(zip(inputs,targets))
for ep in range(params.MAX_EPOCHS):
epoch_costs = []
# train
for batch_ptr in range(0, len(train_data), params.BATCH_SIZE):
inout = train_data[batch_ptr:batch_ptr+params.BATCH_SIZE]
ders_buffer = []
for input, ex_output in inout:
# for debugging
cost = np.sum(np.float_power(np.subtract(net(input), ex_output), 2))
epoch_costs += [cost]
# ders is Network object which contains derivatives for weights and biases
ders = computate_derivatives(net, input, ex_output)
ders_buffer += [ders]
if len(ders_buffer) > 0:
net -= average(ders_buffer)
if kwargs.get("test_network_simple"):
history["all_costs"] += epoch_costs
history["av_costs"] += [np.mean(epoch_costs)]
if kwargs.get("test_data"):
success_rate = 0
for data in kwargs["test_data"]:
results = net(data[0])
if all(abs(a - y) < 0.5 for a, y in zip(results, data[1])):
success_rate += 1
success_rate /= len(kwargs["test_data"])
history["success_rate"] += [success_rate]
if success_rate > 0.9:
break
av_coeff = 5
if len(history["success_rate"]) > av_coeff and \
success_rate <= sum(history["success_rate"][-1 - av_coeff:-1]) / av_coeff and \
history["av_costs"][-1] >= sum(history["av_costs"][-1 - av_coeff:-1]) / av_coeff:
break
if params.SHOW_PROGRESS:
progress_bar(ep, params.MAX_EPOCHS)
return history