深度学习入门笔记

Rosenblatt模型

# dataset.py
import numpy as np

def get_beans(counts):
	xs = np.random.rand(counts)
	xs = np.sort(xs)
	ys = [1.2*x+np.random.rand()/10 for x in xs]
	return xs,ys

# rosenblatt.py
import dataset
from matplotlib import pyplot as plt

xs, ys = dataset.get_beans(100)

# 配置图像
plt.title("size-toxicity function", fontsize=12)
plt.xlabel("bean size")
plt.ylabel("toxicity")

w = 0.05
alpha = 0.05
for i in range(100):
    for j in range(100):
        x = xs[j]
        y = ys[j]
        y_pre = w * x
        error = y - y_pre
        w = w + error * x * alpha

print("w=", w)
y_pre = w * xs
plt.plot(xs, y_pre)
plt.scatter(xs, ys)
plt.show()

import numpy as np # as xxx作为别名
for i in range(100) 生成0-99的循环100次
plt.scatter(xs,ys)# 绘制散点图

代价函数cost_function

#代价函数由平方误差推导得到
#代价函数：e=(y-w*x)^2=x^2*w^2+(-2x*y)*w+y^2
#a=x^2
#b=-2x*y
#求解斜率：k=2aw+b

#  cost_function.py
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(100)
# 配置图像
plt.title("size-toxicity function", fontsize=12)
plt.xlabel("bean size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)
# plt.show()

ws = np.arange(0, 3, 0.01)
es_a = []  # 定义列表
for w in ws:
    y_pre = xs * w
    es = (1 / 100) * np.sum((ys - y_pre) ** 2)
    es_a.append(es)  # 列表末尾添加新对象
es_min = min(es_a)
print("es最小值" + str(es_min))
w_num = es_a.index(es_min)
print("w的下标" + str(w_num))
print(str(ws[w_num]))
# 配置图像
# plt.title("size-toxicity function", fontsize=12)
# plt.xlabel("w")
# plt.ylabel("es_average")
# plt.plot(ws, es_a)
# plt.show()

plt.title("final", fontsize=12)
plt.xlabel("xs")
plt.ylabel("y_pre")
ys = xs * ws[w_num]
plt.plot(xs, ys)
plt.show()

np.arange()
list.append() # 列表末尾添加新对象
np.sum() # 元素求和
list.min() # 获得列表中的最小值
list.index() # 获得首次出现该元素的序列值

梯度下降和反向传播

随机梯度下降

# sgd1.py
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(500)
w = 0.01
y_pre = w * xs
plt.title("size-toxicity function", fontsize=12)
plt.xlabel("size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)
plt.plot(xs, y_pre)
plt.show()

# 随机梯度下降
for _ in range(500):
    for i in range(500):
        x = xs[i]
        y = ys[i]
        k = 2 * (x ** 2) * w + (-2 * x * y)
        alpha = 0.1
        w = w - alpha * k
        plt.clf()  # 清空窗口
        plt.scatter(xs, ys)
        y_pre = w * xs
        plt.xlim(0, 1.2)
        plt.ylim(0, 1.2)
        plt.plot(xs, y_pre)
        plt.pause(0.01)

批量梯度下降

# sgd2.py
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(500)
w = 0.01
y_pre = w * xs
plt.title("size-toxicity function", fontsize=12)
plt.xlabel("size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)
plt.plot(xs, y_pre)
plt.show()
# 批量梯度下降
alpha = 0.1
for _ in range(50):
    for i in range(10):
        xs_sum = xs[10*_+i]
        ys_sum = ys[10*_+i]
        k = 2 * (xs_sum ** 2) * w + (-2 * xs_sum * ys_sum)
        k = k/10
        w = w - alpha * k
    plt.clf()  # 清空窗口
    y_pre = w * xs
    plt.xlim(0, 1.2)
    plt.ylim(0, 1.2)
    plt.scatter(xs, ys)
    plt.plot(xs, y_pre)
    plt.pause(0.01)

固定步长梯度下降

# sgd3.py
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(500)
w = 0.01
y_pre = w * xs
plt.title("size-toxicity function", fontsize=12)
plt.xlabel("size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)
plt.plot(xs, y_pre)
plt.show()
# 固定步长梯度下降
step = 0.01  #步长
alpha = 0.1
for _ in range(50):
    for i in range(10):
        xs_sum = xs[10 * _ + i]
        ys_sum = ys[10 * _ + i]
        k = 2 * (xs_sum ** 2) * w + (-2 * xs_sum * ys_sum)
        k = k / 10
        if k > 0:
            w = w - step
        else:
            w = w + step
    plt.clf()  # 清空窗口
    y_pre = w * xs
    plt.xlim(0, 1.2)
    plt.ylim(0, 1.2)
    plt.scatter(xs, ys)
    plt.plot(xs, y_pre)
    plt.pause(0.01)

plt.clf()#清空窗口
plt.xlim(a,b)
plt.ylim(a,b)
plt.pause(time)#延时

# 预测函数为y=wx+b时，进行的方差代价函数图绘制（3维）
import matplotlib.pyplot as plt
import dataset
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

m = 100
xs, ys = dataset.get_beans(m)
plt.title("Size-Toxicity Function", fontsize=12)
plt.xlabel('Bean Size')
plt.ylabel('Toxicity')
plt.scatter(xs, ys)

w = 0.01
b = 0.01
y_pre = xs * w + b
plt.plot(xs, y_pre)
plt.show()

fig = plt.figure()
ax = Axes3D(fig)
ax.set_zlim(0, 2)
#  随机梯度下降
ws = np.arange(0, 2, 0.1)
bs = np.arange(-3, 3, 0.1)

for w in ws:
    es = []
    for b in bs:
        y_pre = xs * w + b
        e = (1 / m) * np.sum((ys - y_pre) ** 2)
        es.append(e)
    figure = ax.plot(bs, es, w, zdir='y')

plt.show()

激活函数

# dataset.py
import numpy as np

def get_beans(counts):
	xs = np.random.rand(counts)
	xs = np.sort(xs)
	ys = np.zeros(counts)
	for i in range(counts):
		x = xs[i]
		yi = 0.7*x+(0.5-np.random.rand())/50+0.5
		if yi > 0.8:
			ys[i] = 1
	return xs,ys

# activation.py
import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(100)
w = 0.01
b = 0.01
z = w * xs + b
a = 1 / (1 + np.exp(-z))

plt.title("size-toxicity function", fontsize=12)
plt.xlabel("size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)
plt.plot(xs, a)
plt.show()

# 随机梯度下降
for _ in range(5000):
    alpha = 0.05
    for i in range(100):
        x = xs[i]
        y = ys[i]

        z = w * x + b
        a = 1 / (1 + np.exp(-z))  # 激活函数
        e = (y - a) ** 2  # 代价函数

        # 链式法则，反向传播，调整参数
        deda = -2 * (y - a)
        dadz = a * (1 - a)
        dzdw = x
        dedw = deda * dadz * dzdw
        dzdb = 1
        dedb = deda * dadz * dzdb

        w = w - dedw * alpha
        b = b - dedb * alpha  # 反向传播
    if _ % 100 == 0:
        plt.clf()  # 清空窗口
        plt.scatter(xs, ys)
        z = w * xs + b
        a = 1 / (1 + np.exp(-z))
        plt.xlim(0, 1)
        plt.ylim(0, 1.2)
        plt.plot(xs, a)
        plt.pause(0.01)

引进了sigmoid函数作为神经网络的激活函数，将变量映射到0，1之间

$$
a= {1\over (1+exp(-z))}
$$

$$
0<=a<=1
$$

隐藏层

引入一层隐藏层，第一层有2个神经元。

权重和偏置的下标较为繁琐，容易混乱。

import dataset
import numpy as np
import matplotlib.pyplot as plt

xs, ys = dataset.get_beans(100)


# 定义激活函数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


# 定义前向传播函数
def forward_progration(x):
    z11_1 = w11_1 * x + b1_1
    a11_1 = sigmoid(z11_1)
    z21_1 = w21_1 * x + b2_1
    a21_1 = sigmoid(z21_1)
    z1_2 = w11_2 * a11_1 + w12_2 * a21_1 + b1_2
    a1_2 = sigmoid(z1_2)
    return z11_1, a11_1, z21_1, a21_1, z1_2, a1_2


plt.title("size-toxicity function", fontsize=12)
plt.xlabel("size")
plt.ylabel("toxicity")
plt.scatter(xs, ys)

plt.show()
# 第一层神经元
# 参数的下标：ab_c,a表示第a个神经元，b表示对应第b个输入，c表示第c层
w11_1 = np.random.rand()
w21_1 = np.random.rand()
b1_1 = np.random.rand()
b2_1 = np.random.rand()
# 第二层神经元，输出
w11_2 = np.random.rand()
w12_2 = np.random.rand()
b1_2 = np.random.rand()

# 随机梯度下降
for _ in range(5000):
    alpha = 0.05
    for i in range(100):
        x = xs[i]
        y = ys[i]

        # 前向传播
        z11_1, a11_1, z21_1, a21_1, z1_2, a1_2 = forward_progration(x)
        # 代价函数
        e = (y - a1_2) ** 2

        # 链式法则
        deda1_2 = -2 * (y - a1_2)
        da1_2dz1_2 = a1_2 * (1 - a1_2)
        dz1_2dw11_2 = a11_1
        dz1_2dw21_2 = a21_1
        dedw11_2 = deda1_2 * da1_2dz1_2 * dz1_2dw11_2
        dedw21_2 = deda1_2 * da1_2dz1_2 * dz1_2dw21_2

        dz1_2db1_2 = 1
        dedb1_2 = deda1_2 * da1_2dz1_2 * dz1_2db1_2

        dz1_2da11_1 = w11_2
        da11_1dz11_1 = a11_1 * (1 - a11_1)
        dz11_1dw11_1 = x
        dedw11_1 = deda1_2 * da1_2dz1_2 * dz1_2da11_1 * da11_1dz11_1 * dz11_1dw11_1
        dz1_1db1_1 = 1
        dedb1_1 = deda1_2 * da1_2dz1_2 * dz1_2da11_1 * da11_1dz11_1 * dz1_1db1_1

        dz1_2da12_1 = w12_2
        da12_1dz21_1 = a21_1 * (1 - a21_1)
        dz21_1dw21_1 = x
        dedw21_1 = deda1_2 * da1_2dz1_2 * dz1_2da12_1 * da12_1dz21_1 * dz21_1dw21_1
        dz21_1db2_1 = 1
        dedb2_1 = deda1_2 * da1_2dz1_2 * dz1_2da12_1 * da12_1dz21_1 * dz21_1db2_1

        # 反向传播
        w11_1 = w11_1 - alpha * dedw11_1
        w21_1 = w21_1 - alpha * dedw21_1
        b1_1 = b1_1 - alpha * dedb1_1
        b2_1 = b2_1 - alpha * dedb2_1
        w11_2 = w11_2 - alpha * dedw11_2
        w12_2 = w12_2 - alpha * dedw21_2
        b1_2 = b1_2 - alpha * dedb1_2

    if _ % 100 == 0:
        plt.clf()  # 清空窗口
        plt.scatter(xs, ys)
        z11_1, a11_1, z21_1, a21_1, z1_2, a1_2 = forward_progration(xs)
        plt.xlim(0, 2.5)
        plt.ylim(0, 1.2)
        plt.plot(xs, a1_2)
        plt.pause(0.01)

Keras框架

Anaconda安装Keras

首先安装anaconda，然后在Pycharm中配置使用。
点击Anaconda Prompt进入命令行
输入命令：conda create -n xxx python=3.8
输入命令：conda activate xxx
进入xxx后，输入:conda install tensorflow-gpu
输入:conda install keras-gpu
y回车确认
等待安装后，进入pycharm新建项目，选择配置好的anaconda环境作为python的解释器
完成！

（Anaconda会自动帮我们安装适配的CUDA和Cudnn）

具体怎么使用Keras的代码可以看下面章节。

1 2	Keras的中文文档写道：如果你以 TensorFlow 或 CNTK 后端运行，只要检测到任何可用的 GPU，那么代码将自动在 GPU 上运行。

但是我目前还没成功过，都是在CPU上进行训练的。。。

具体使用代码

Keras中文文档网站：https://keras.io/zh/

通过Keras框架的代码，可以简洁地完成前向传播，激活函数，损失函数（代价函数）。

使用时，仅需要考虑输入数据的维度，隐藏层神经元数量。

import time
import dataset
import numpy as np
import plot_utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

m = 100
X, Y = dataset.get_beans4(m)
plot_utils.show_scatter(X, Y)

time_start = time.time()

model = Sequential()
model.add(Dense(units=2, activation='sigmoid', input_dim=2))
model.add(Dense(units=1, activation='sigmoid'))

# model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.compile(loss='mean_squared_error', optimizer=SGD(lr=0.05), metrics=['accuracy'])# 配置学习率lr=0.05
model.fit(X, Y, epochs=5000, batch_size=10)

pres = model.predict(X)

time_end = time.time()
# plot_utils.show_scatter_curve(X, Y, pres)
plot_utils.show_scatter_surface(X, Y, model)
print('time cost', time_end - time_start, 's')

深度学习 DeepLearning

隐藏层超过3层就叫做深度神经网络

tensorflow游乐场（可视化）：http://playground.tensorflow.org/

隐藏层1个神经元，就使得loss函数=0.5对应空间中的一条线，经过激活函数后会变成单个曲线

隐藏层2个神经元，就使得loss函数=0.5对应空间中的两条相交线，经过激活函数后会变成2个曲线

隐藏层3个神经元，就使得loss函数=0.5对应空间中的3条相交线，经过激活函数后会变成2个曲线

# dataset.py
import numpy as np
import random

def get_beans(counts):
	posX,posY = genSpiral(int(counts/2),0,1)
	negX,negY = genSpiral(int(counts/2),np.pi,0)
	X = np.vstack((posX,negX))
	Y = np.hstack((posY,negY))
	return X,Y

def genSpiral(counts,deltaT, label):
	X = np.zeros((counts,2))
	Y = np.zeros(counts)
	for i in range(counts):
		r = i / counts * 5
		t = 1.75 * i / counts * 2 * np.pi + deltaT;
		x1 = r * np.sin(t) + random.uniform(-0.1,0.1)
		x2 = r * np.cos(t) + random.uniform(-0.1,0.1)
		X[i] = np.array([x1,x2])
		Y[i] = label
	return X,Y 

def dist(a, b):
	dx = a['x'] - b['x'];
	dy = a['y']- b['y'];
	return np.sqrt(dx * dx + dy * dy);
def getCircleLabel(p, center):
	radius = 1;
	if dist(p, center) < (radius * 0.5):
		return 1
	else:
		return 0

def randUniform(a=-1, b=1):
  return np.random.rand() * (b - a) + a;

def classifyCircleData(numSamples=100, noise=0):
	points = [];
	Y = []
	X = []
	radius = 1;
	num = int(numSamples/2)
	for i in range(num):
		r = randUniform(0, radius * 0.5);
		angle = randUniform(0, 2 * np.pi);
		x = r * np.sin(angle);
		y = r * np.cos(angle);
		noiseX = randUniform(-radius, radius) * noise;
		noiseY = randUniform(-radius, radius) * noise;
		label = getCircleLabel({'x': x + noiseX, 'y': y + noiseY}, {'x': 0, 'y': 0});
		X.append([x+1,y+1])
		Y.append(label)
  

	for i in range(num):
		r = randUniform(radius * 0.7, radius);
		angle = randUniform(0, 2 * np.pi);
		x = r * np.sin(angle);
		y = r * np.cos(angle);
		noiseX = randUniform(-radius, radius) * noise;
		noiseY = randUniform(-radius, radius) * noise;
		label = getCircleLabel({'x': x + noiseX, 'y': y + noiseY}, {'x': 0, 'y': 0});
		X.append([x+1,y+1])
		Y.append(label)

	X = np.array(X)
	Y = np.array(Y)

	return X,Y

# main.py
import time
import dataset
import numpy as np
import plot_utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

m = 100
X, Y = dataset.get_beans(m)
plot_utils.show_scatter(X, Y)

time_start = time.time()

model = Sequential()
model.add(Dense(units=8, activation='relu', input_dim=2))
model.add(Dense(units=8, activation='relu'))
model.add(Dense(units=8, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))


# model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.compile(loss='mean_squared_error', optimizer=SGD(lr=0.05), metrics=['accuracy'])
model.fit(X, Y, epochs=5000, batch_size=10)

pres = model.predict(X)

time_end = time.time()
# plot_utils.show_scatter_curve(X, Y, pres)
plot_utils.show_scatter_surface(X, Y, model)
print('time cost', time_end - time_start, 's')

卷积神经网络

图像识别初步，使用mnist数据集进行训练，未采用卷积

#图像识别初步，使用mnist数据集进行训练，未采用卷积
import time
from tensorflow.keras.datasets import mnist
# import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
print("X_train shape:" + str(X_train.shape))
print("Y_train shape:" + str(Y_train.shape))
print("X_test shape:" + str(X_test.shape))
print("Y_test shape:" + str(Y_test.shape))

print(Y_train[0])
plt.imshow(X_train[0], cmap='gray')
plt.show()
X_train = X_train.reshape(60000, 784) / 255.0
X_test = X_test.reshape(10000, 784) / 255.0

Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)

time_start = time.time()

model = Sequential()
model.add(Dense(units=256, activation='relu', input_dim=784))
model.add(Dense(units=256, activation='relu'))
model.add(Dense(units=256, activation='relu'))
model.add(Dense(units=10, activation='softmax'))
# 送入训练

model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.05), metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=5000, batch_size=1024)

time_end = time.time()

print('time cost', time_end - time_start, 's')
print(model.get_weights())
loss, accuracy = model.evaluate(X_test, Y_test)
print("loss" + str(loss))
print("accuracy" + str(accuracy))

# 最后accuracy大概为96%

LeNet-5（经典卷积神经网络）复现

#  LeNet-5（经典卷积神经网络）复现
import time
from tensorflow.keras.datasets import mnist
# import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv2D  # 二维卷积
from tensorflow.keras.layers import AveragePooling2D  # 二维池化
from tensorflow.keras.layers import Flatten  # 展平后，接入全连接层

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

X_train = X_train.reshape(60000, 28, 28, 1) / 255.0
X_test = X_test.reshape(10000, 28, 28, 1) / 255.0

Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)

time_start = time.time()

model = Sequential()
# filters表示过滤器（卷积核）数目，kernel_size表示卷积核大小，strides表示步长，padding：使用valid或same表示卷积的两种方式，
model.add(
    Conv2D(filters=6, kernel_size=(5, 5), strides=(1, 1), input_shape=(28, 28, 1), padding='valid', activation='relu'))
#  池化层，为2*2
model.add(AveragePooling2D(pool_size=(2, 2)))
#  不用输入input_shape,Keras会自动计算输入
model.add(Conv2D(filters=16, kernel_size=(5, 5), strides=(1, 1), padding='valid', activation='relu'))
model.add(AveragePooling2D(pool_size=(2, 2)))
# 展平后送入全连接层Dense
model.add(Flatten())
model.add(Dense(units=120, activation='relu'))
model.add(Dense(units=84, activation='relu'))
model.add(Dense(units=10, activation='softmax'))
# 对于最终结果为多种类，一般采用softmax激活函数来进行激活，效果较好。
# 送入训练

# 采用多分类交叉熵代价函数categorical_crossentropy，效果较好；（之前采用的是均方误差函数）
model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.05), metrics=['accuracy'])
#  epochs表示训练次数，batch_size表示每次取出数据进行计算的数目
model.fit(X_train, Y_train, epochs=50, batch_size=1024)

time_end = time.time()
# 评估测试表
print('time cost', time_end - time_start, 's')
print(model.get_weights())
loss, accuracy = model.evaluate(X_test, Y_test)
print("loss" + str(loss))
print("accuracy" + str(accuracy))

#  最终accuracy大概为98%

Hw-lin's Blog

【机器学习】深度学习入门