0%

【机器学习】深度学习入门

深度学习入门笔记

Rosenblatt模型

1
2
3
4
5
6
7
8
9
# dataset.py
import numpy as np

def get_beans(counts):
xs = np.random.rand(counts)
xs = np.sort(xs)
ys = [1.2*x+np.random.rand()/10 for x in xs]
return xs,ys

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# rosenblatt.py
import dataset
from matplotlib import pyplot as plt

xs, ys = dataset.get_beans(100)

# 配置图像
plt.title("size-toxicity function", fontsize=12)
plt.xlabel("bean size")
plt.ylabel("toxicity")

w = 0.05
alpha = 0.05
for i in range(100):
for j in range(100):
x = xs[j]
y = ys[j]
y_pre = w * x
error = y - y_pre
w = w + error * x * alpha

print("w=", w)
y_pre = w * xs
plt.plot(xs, y_pre)
plt.scatter(xs, ys)
plt.show()

  • import numpy as np # as xxx作为别名
  • for i in range(100) 生成0-99的循环100次
  • plt.scatter(xs,ys)# 绘制散点图

代价函数cost_function

1
2
3
4
5
#代价函数由平方误差推导得到
#代价函数:e=(y-w*x)^2=x^2*w^2+(-2x*y)*w+y^2
#a=x^2
#b=-2x*y
#求解斜率:k=2aw+b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#  cost_function.py
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(100)
# 配置图像
plt.title("size-toxicity function", fontsize=12)
plt.xlabel("bean size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)
# plt.show()

ws = np.arange(0, 3, 0.01)
es_a = [] # 定义列表
for w in ws:
y_pre = xs * w
es = (1 / 100) * np.sum((ys - y_pre) ** 2)
es_a.append(es) # 列表末尾添加新对象
es_min = min(es_a)
print("es最小值" + str(es_min))
w_num = es_a.index(es_min)
print("w的下标" + str(w_num))
print(str(ws[w_num]))
# 配置图像
# plt.title("size-toxicity function", fontsize=12)
# plt.xlabel("w")
# plt.ylabel("es_average")
# plt.plot(ws, es_a)
# plt.show()

plt.title("final", fontsize=12)
plt.xlabel("xs")
plt.ylabel("y_pre")
ys = xs * ws[w_num]
plt.plot(xs, ys)
plt.show()

  • np.arange()
  • list.append() # 列表末尾添加新对象
  • np.sum() # 元素求和
  • list.min() # 获得列表中的最小值
  • list.index() # 获得首次出现该元素的序列值

梯度下降和反向传播

  • 随机梯度下降
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# sgd1.py
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(500)
w = 0.01
y_pre = w * xs
plt.title("size-toxicity function", fontsize=12)
plt.xlabel("size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)
plt.plot(xs, y_pre)
plt.show()

# 随机梯度下降
for _ in range(500):
for i in range(500):
x = xs[i]
y = ys[i]
k = 2 * (x ** 2) * w + (-2 * x * y)
alpha = 0.1
w = w - alpha * k
plt.clf() # 清空窗口
plt.scatter(xs, ys)
y_pre = w * xs
plt.xlim(0, 1.2)
plt.ylim(0, 1.2)
plt.plot(xs, y_pre)
plt.pause(0.01)
  • 批量梯度下降
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# sgd2.py
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(500)
w = 0.01
y_pre = w * xs
plt.title("size-toxicity function", fontsize=12)
plt.xlabel("size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)
plt.plot(xs, y_pre)
plt.show()
# 批量梯度下降
alpha = 0.1
for _ in range(50):
for i in range(10):
xs_sum = xs[10*_+i]
ys_sum = ys[10*_+i]
k = 2 * (xs_sum ** 2) * w + (-2 * xs_sum * ys_sum)
k = k/10
w = w - alpha * k
plt.clf() # 清空窗口
y_pre = w * xs
plt.xlim(0, 1.2)
plt.ylim(0, 1.2)
plt.scatter(xs, ys)
plt.plot(xs, y_pre)
plt.pause(0.01)
  • 固定步长梯度下降
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# sgd3.py
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(500)
w = 0.01
y_pre = w * xs
plt.title("size-toxicity function", fontsize=12)
plt.xlabel("size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)
plt.plot(xs, y_pre)
plt.show()
# 固定步长梯度下降
step = 0.01 #步长
alpha = 0.1
for _ in range(50):
for i in range(10):
xs_sum = xs[10 * _ + i]
ys_sum = ys[10 * _ + i]
k = 2 * (xs_sum ** 2) * w + (-2 * xs_sum * ys_sum)
k = k / 10
if k > 0:
w = w - step
else:
w = w + step
plt.clf() # 清空窗口
y_pre = w * xs
plt.xlim(0, 1.2)
plt.ylim(0, 1.2)
plt.scatter(xs, ys)
plt.plot(xs, y_pre)
plt.pause(0.01)
  • plt.clf()#清空窗口
  • plt.xlim(a,b)
  • plt.ylim(a,b)
  • plt.pause(time)#延时
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# 预测函数为y=wx+b时,进行的方差代价函数图绘制(3维)
import matplotlib.pyplot as plt
import dataset
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

m = 100
xs, ys = dataset.get_beans(m)
plt.title("Size-Toxicity Function", fontsize=12)
plt.xlabel('Bean Size')
plt.ylabel('Toxicity')
plt.scatter(xs, ys)

w = 0.01
b = 0.01
y_pre = xs * w + b
plt.plot(xs, y_pre)
plt.show()

fig = plt.figure()
ax = Axes3D(fig)
ax.set_zlim(0, 2)
# 随机梯度下降
ws = np.arange(0, 2, 0.1)
bs = np.arange(-3, 3, 0.1)

for w in ws:
es = []
for b in bs:
y_pre = xs * w + b
e = (1 / m) * np.sum((ys - y_pre) ** 2)
es.append(e)
figure = ax.plot(bs, es, w, zdir='y')

plt.show()

激活函数

1
2
3
4
5
6
7
8
9
10
11
12
13
# dataset.py
import numpy as np

def get_beans(counts):
xs = np.random.rand(counts)
xs = np.sort(xs)
ys = np.zeros(counts)
for i in range(counts):
x = xs[i]
yi = 0.7*x+(0.5-np.random.rand())/50+0.5
if yi > 0.8:
ys[i] = 1
return xs,ys
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# activation.py
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(100)
w = 0.01
b = 0.01
z = w * xs + b
a = 1 / (1 + np.exp(-z))

plt.title("size-toxicity function", fontsize=12)
plt.xlabel("size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)
plt.plot(xs, a)
plt.show()

# 随机梯度下降
for _ in range(5000):
alpha = 0.05
for i in range(100):
x = xs[i]
y = ys[i]

z = w * x + b
a = 1 / (1 + np.exp(-z)) # 激活函数
e = (y - a) ** 2 # 代价函数

# 链式法则,反向传播,调整参数
deda = -2 * (y - a)
dadz = a * (1 - a)
dzdw = x
dedw = deda * dadz * dzdw
dzdb = 1
dedb = deda * dadz * dzdb

w = w - dedw * alpha
b = b - dedb * alpha # 反向传播
if _ % 100 == 0:
plt.clf() # 清空窗口
plt.scatter(xs, ys)
z = w * xs + b
a = 1 / (1 + np.exp(-z))
plt.xlim(0, 1)
plt.ylim(0, 1.2)
plt.plot(xs, a)
plt.pause(0.01)

引进了sigmoid函数作为神经网络的激活函数,将变量映射到0,1之间

$$
a= {1\over (1+exp(-z))}
$$

$$
0<=a<=1
$$

隐藏层

引入一层隐藏层,第一层有2个神经元。

权重和偏置的下标较为繁琐,容易混乱。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(100)


# 定义激活函数
def sigmoid(x):
return 1 / (1 + np.exp(-x))


# 定义前向传播函数
def forward_progration(x):
z11_1 = w11_1 * x + b1_1
a11_1 = sigmoid(z11_1)
z21_1 = w21_1 * x + b2_1
a21_1 = sigmoid(z21_1)
z1_2 = w11_2 * a11_1 + w12_2 * a21_1 + b1_2
a1_2 = sigmoid(z1_2)
return z11_1, a11_1, z21_1, a21_1, z1_2, a1_2


plt.title("size-toxicity function", fontsize=12)
plt.xlabel("size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)

plt.show()
# 第一层神经元
# 参数的下标:ab_c,a表示第a个神经元,b表示对应第b个输入,c表示第c层
w11_1 = np.random.rand()
w21_1 = np.random.rand()
b1_1 = np.random.rand()
b2_1 = np.random.rand()
# 第二层神经元,输出
w11_2 = np.random.rand()
w12_2 = np.random.rand()
b1_2 = np.random.rand()

# 随机梯度下降
for _ in range(5000):
alpha = 0.05
for i in range(100):
x = xs[i]
y = ys[i]

# 前向传播
z11_1, a11_1, z21_1, a21_1, z1_2, a1_2 = forward_progration(x)
# 代价函数
e = (y - a1_2) ** 2

# 链式法则
deda1_2 = -2 * (y - a1_2)
da1_2dz1_2 = a1_2 * (1 - a1_2)
dz1_2dw11_2 = a11_1
dz1_2dw21_2 = a21_1
dedw11_2 = deda1_2 * da1_2dz1_2 * dz1_2dw11_2
dedw21_2 = deda1_2 * da1_2dz1_2 * dz1_2dw21_2

dz1_2db1_2 = 1
dedb1_2 = deda1_2 * da1_2dz1_2 * dz1_2db1_2

dz1_2da11_1 = w11_2
da11_1dz11_1 = a11_1 * (1 - a11_1)
dz11_1dw11_1 = x
dedw11_1 = deda1_2 * da1_2dz1_2 * dz1_2da11_1 * da11_1dz11_1 * dz11_1dw11_1
dz1_1db1_1 = 1
dedb1_1 = deda1_2 * da1_2dz1_2 * dz1_2da11_1 * da11_1dz11_1 * dz1_1db1_1

dz1_2da12_1 = w12_2
da12_1dz21_1 = a21_1 * (1 - a21_1)
dz21_1dw21_1 = x
dedw21_1 = deda1_2 * da1_2dz1_2 * dz1_2da12_1 * da12_1dz21_1 * dz21_1dw21_1
dz21_1db2_1 = 1
dedb2_1 = deda1_2 * da1_2dz1_2 * dz1_2da12_1 * da12_1dz21_1 * dz21_1db2_1

# 反向传播
w11_1 = w11_1 - alpha * dedw11_1
w21_1 = w21_1 - alpha * dedw21_1
b1_1 = b1_1 - alpha * dedb1_1
b2_1 = b2_1 - alpha * dedb2_1
w11_2 = w11_2 - alpha * dedw11_2
w12_2 = w12_2 - alpha * dedw21_2
b1_2 = b1_2 - alpha * dedb1_2

if _ % 100 == 0:
plt.clf() # 清空窗口
plt.scatter(xs, ys)
z11_1, a11_1, z21_1, a21_1, z1_2, a1_2 = forward_progration(xs)
plt.xlim(0, 2.5)
plt.ylim(0, 1.2)
plt.plot(xs, a1_2)
plt.pause(0.01)

Keras框架

Anaconda安装Keras

  1. 首先安装anaconda,然后在Pycharm中配置使用。
  2. 点击Anaconda Prompt进入命令行
  3. 输入命令:conda create -n xxx python=3.8
  4. 输入命令:conda activate xxx
  5. 进入xxx后,输入:conda install tensorflow-gpu
  6. 输入:conda install keras-gpu
  7. y回车确认
  8. 等待安装后,进入pycharm新建项目,选择配置好的anaconda环境作为python的解释器
  9. 完成!

(Anaconda会自动帮我们安装适配的CUDA和Cudnn)

具体怎么使用Keras的代码可以看下面章节。

1
2
Keras的中文文档写道:
如果你以 TensorFlow 或 CNTK 后端运行,只要检测到任何可用的 GPU,那么代码将自动在 GPU 上运行。

但是我目前还没成功过,都是在CPU上进行训练的。。。

具体使用代码

Keras中文文档网站:https://keras.io/zh/

通过Keras框架的代码,可以简洁地完成前向传播,激活函数,损失函数(代价函数)。

使用时,仅需要考虑输入数据的维度,隐藏层神经元数量。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import time
import dataset
import numpy as np
import plot_utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

m = 100
X, Y = dataset.get_beans4(m)
plot_utils.show_scatter(X, Y)

time_start = time.time()

model = Sequential()
model.add(Dense(units=2, activation='sigmoid', input_dim=2))
model.add(Dense(units=1, activation='sigmoid'))

# model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.compile(loss='mean_squared_error', optimizer=SGD(lr=0.05), metrics=['accuracy'])# 配置学习率lr=0.05
model.fit(X, Y, epochs=5000, batch_size=10)

pres = model.predict(X)

time_end = time.time()
# plot_utils.show_scatter_curve(X, Y, pres)
plot_utils.show_scatter_surface(X, Y, model)
print('time cost', time_end - time_start, 's')

深度学习 DeepLearning

隐藏层超过3层就叫做深度神经网络

tensorflow游乐场(可视化):http://playground.tensorflow.org/

隐藏层1个神经元,就使得loss函数=0.5对应空间中的一条线,经过激活函数后会变成单个曲线

隐藏层2个神经元,就使得loss函数=0.5对应空间中的两条相交线,经过激活函数后会变成2个曲线

隐藏层3个神经元,就使得loss函数=0.5对应空间中的3条相交线,经过激活函数后会变成2个曲线

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# dataset.py
import numpy as np
import random

def get_beans(counts):
posX,posY = genSpiral(int(counts/2),0,1)
negX,negY = genSpiral(int(counts/2),np.pi,0)
X = np.vstack((posX,negX))
Y = np.hstack((posY,negY))
return X,Y

def genSpiral(counts,deltaT, label):
X = np.zeros((counts,2))
Y = np.zeros(counts)
for i in range(counts):
r = i / counts * 5
t = 1.75 * i / counts * 2 * np.pi + deltaT;
x1 = r * np.sin(t) + random.uniform(-0.1,0.1)
x2 = r * np.cos(t) + random.uniform(-0.1,0.1)
X[i] = np.array([x1,x2])
Y[i] = label
return X,Y

def dist(a, b):
dx = a['x'] - b['x'];
dy = a['y']- b['y'];
return np.sqrt(dx * dx + dy * dy);
def getCircleLabel(p, center):
radius = 1;
if dist(p, center) < (radius * 0.5):
return 1
else:
return 0

def randUniform(a=-1, b=1):
return np.random.rand() * (b - a) + a;

def classifyCircleData(numSamples=100, noise=0):
points = [];
Y = []
X = []
radius = 1;
num = int(numSamples/2)
for i in range(num):
r = randUniform(0, radius * 0.5);
angle = randUniform(0, 2 * np.pi);
x = r * np.sin(angle);
y = r * np.cos(angle);
noiseX = randUniform(-radius, radius) * noise;
noiseY = randUniform(-radius, radius) * noise;
label = getCircleLabel({'x': x + noiseX, 'y': y + noiseY}, {'x': 0, 'y': 0});
X.append([x+1,y+1])
Y.append(label)


for i in range(num):
r = randUniform(radius * 0.7, radius);
angle = randUniform(0, 2 * np.pi);
x = r * np.sin(angle);
y = r * np.cos(angle);
noiseX = randUniform(-radius, radius) * noise;
noiseY = randUniform(-radius, radius) * noise;
label = getCircleLabel({'x': x + noiseX, 'y': y + noiseY}, {'x': 0, 'y': 0});
X.append([x+1,y+1])
Y.append(label)

X = np.array(X)
Y = np.array(Y)

return X,Y

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# main.py
import time
import dataset
import numpy as np
import plot_utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

m = 100
X, Y = dataset.get_beans(m)
plot_utils.show_scatter(X, Y)

time_start = time.time()

model = Sequential()
model.add(Dense(units=8, activation='relu', input_dim=2))
model.add(Dense(units=8, activation='relu'))
model.add(Dense(units=8, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))


# model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.compile(loss='mean_squared_error', optimizer=SGD(lr=0.05), metrics=['accuracy'])
model.fit(X, Y, epochs=5000, batch_size=10)

pres = model.predict(X)

time_end = time.time()
# plot_utils.show_scatter_curve(X, Y, pres)
plot_utils.show_scatter_surface(X, Y, model)
print('time cost', time_end - time_start, 's')

卷积神经网络

图像识别初步,使用mnist数据集进行训练,未采用卷积

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#图像识别初步,使用mnist数据集进行训练,未采用卷积
import time
from tensorflow.keras.datasets import mnist
# import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
print("X_train shape:" + str(X_train.shape))
print("Y_train shape:" + str(Y_train.shape))
print("X_test shape:" + str(X_test.shape))
print("Y_test shape:" + str(Y_test.shape))

print(Y_train[0])
plt.imshow(X_train[0], cmap='gray')
plt.show()
X_train = X_train.reshape(60000, 784) / 255.0
X_test = X_test.reshape(10000, 784) / 255.0

Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)

time_start = time.time()

model = Sequential()
model.add(Dense(units=256, activation='relu', input_dim=784))
model.add(Dense(units=256, activation='relu'))
model.add(Dense(units=256, activation='relu'))
model.add(Dense(units=10, activation='softmax'))
# 送入训练

model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.05), metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=5000, batch_size=1024)

time_end = time.time()

print('time cost', time_end - time_start, 's')
print(model.get_weights())
loss, accuracy = model.evaluate(X_test, Y_test)
print("loss" + str(loss))
print("accuracy" + str(accuracy))

# 最后accuracy大概为96%

LeNet-5(经典卷积神经网络)复现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#  LeNet-5(经典卷积神经网络)复现
import time
from tensorflow.keras.datasets import mnist
# import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv2D # 二维卷积
from tensorflow.keras.layers import AveragePooling2D # 二维池化
from tensorflow.keras.layers import Flatten # 展平后,接入全连接层

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

X_train = X_train.reshape(60000, 28, 28, 1) / 255.0
X_test = X_test.reshape(10000, 28, 28, 1) / 255.0

Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)

time_start = time.time()

model = Sequential()
# filters表示过滤器(卷积核)数目,kernel_size表示卷积核大小,strides表示步长,padding:使用valid或same表示卷积的两种方式,
model.add(
Conv2D(filters=6, kernel_size=(5, 5), strides=(1, 1), input_shape=(28, 28, 1), padding='valid', activation='relu'))
# 池化层,为2*2
model.add(AveragePooling2D(pool_size=(2, 2)))
# 不用输入input_shape,Keras会自动计算输入
model.add(Conv2D(filters=16, kernel_size=(5, 5), strides=(1, 1), padding='valid', activation='relu'))
model.add(AveragePooling2D(pool_size=(2, 2)))
# 展平后送入全连接层Dense
model.add(Flatten())
model.add(Dense(units=120, activation='relu'))
model.add(Dense(units=84, activation='relu'))
model.add(Dense(units=10, activation='softmax'))
# 对于最终结果为多种类,一般采用softmax激活函数来进行激活,效果较好。
# 送入训练

# 采用多分类交叉熵代价函数categorical_crossentropy,效果较好;(之前采用的是均方误差函数)
model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.05), metrics=['accuracy'])
# epochs表示训练次数,batch_size表示每次取出数据进行计算的数目
model.fit(X_train, Y_train, epochs=50, batch_size=1024)

time_end = time.time()
# 评估测试表
print('time cost', time_end - time_start, 's')
print(model.get_weights())
loss, accuracy = model.evaluate(X_test, Y_test)
print("loss" + str(loss))
print("accuracy" + str(accuracy))

# 最终accuracy大概为98%