Adagrag-demo

实现这篇文章中前面两个tips。

实现了tip1 Adagrad + tip2 Stochastic Gradient Descent

demo代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#################
# 2020/03/06 #
# Adagrad demo #
#################

import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model

# data
x_data = [[338.], [333.], [328.], [207.], [226.], [25.], [179.], [60.], [208.], [606.]]

y_data = [640., 633., 619., 393., 428., 27., 193., 66., 226., 1591.]

# coordinate
x = np.arange(-200, -100, 1)
y = np.arange(-5, 5, 0.1)
Z = np.zeros((len(y), len(x)))

# cal the Loss of every point(function)
for i in range(len(x)):
for j in range(len(y)):
b = x[i]
w = y[j]
for k in range(len(x_data)):
Z[j][i] += (y_data[k] - b - w * x_data[k][0])**2

# initial
b = -120
w = -4
lr = 1 # learning rate
iteration = 100000

# record the iteration
b_his = [b]
w_his = [w]

# Adagrad
b_grad_sum2 = 0.0
w_grad_sum2 = 0.0

for i in range(iteration):
for k in range(len(x_data)):
b_grad = 2 * (y_data[k] - b - w * x_data[k][0]) * (-1)
w_grad = 2 * (y_data[k] - b - w * x_data[k][0]) * (-x_data[k][0])
b_grad_sum2 += b_grad**2
w_grad_sum2 += w_grad**2
b = b - lr / np.sqrt(b_grad_sum2) * b_grad
w = w - lr / np.sqrt(w_grad_sum2) * w_grad
b_his.append(b)
w_his.append(w)

# sklearn linear model
reg = linear_model.LinearRegression()
print(reg.fit(x_data, y_data))
print(reg.coef_[0])
print(reg.intercept_)

# display the figure
plt.contourf(x, y, Z, 50, alpha=0.5, cmap=plt.get_cmap('jet'))
plt.plot(reg.intercept_, reg.coef_, 'x', ms=13, lw=1.5, color='orange')
plt.plot(b_his, w_his, 'o-', ms=3, lw=1.5, color='black')

plt.xlim(-200, -100)
plt.ylim(-5, 5)
plt.xlabel('$b$', fontsize=16)
plt.ylabel('$w$', fontsize=16)
# plt.show()
plt.savefig("Loss.png")

Loss 迭代图

画出的图片很直观

89aAf0.png

Author

f7ed

Posted on

2020-03-09

Updated on

2020-07-03

Licensed under

CC BY-NC-SA 4.0


Comments