0%

玩砸了, 回来了, 后面几天开始写毕设+复健, 希望有个班上.

阅读全文 »

 总之就是开始点深度学习的科技树了, 炼丹还真挺有意思的. 简单记录下我的第一个helloworld, 以kaggle平台上的Titanic作为样例, 使用11-100-100-2的全连接神经网络进行训练, 最后拿到了0.77左右的评分和50%的排名. 果然在数据量不大的情况下深度学习顶不过传统机器学习…后期应该可以修改为随机森林的形式, 就是会花费大量的训练时间.
代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import time
import pandas as pd


class Net(nn.Module):
def __init__(self, layers=[{
"name": "input",
'size': 11,
'act': nn.ReLU()
}, {
"name": "hidden1",
"size": 1000,
'act': nn.ReLU()
}, {
"name": "hidden2",
"size": 100,
'act': nn.ReLU()
}, {
"name": "opt",
"size": 2,
'act': nn.ReLU()
}]): # 传入超参数用于初始化
# nodes=[{
# 'name':name,
# 'size':number,
# 'act':nn.activation
# },
# ....
# ]
super().__init__()
self.layers = nn.Sequential()
self.name = ''
for i in range(len(layers)-1): # 输出层单独拎出来
layer = layers[i]
nextLayer = layers[i+1]
self.layers.add_module(layer['name'], nn.Linear(
layer['size'], nextLayer['size']))
self.layers.add_module(layer['name']+'ACT', layer['act'])
self.name += str(layer['size'])+'-'
optLayer = layers[-1]
self.layers.add_module('softMax', nn.Softmax(0))
self.name += str(optLayer['size'])

def forward(self,x):
x = self.layers(x)
return x

def train(self,eval=True, trainData=0, testData=0, optimizer=0,lossFunc=0,epoch=100, batchSize=100, batchNum=100):
if(not eval):
return
print(self.name)
trainLen = len(trainData['data'])
print(trainLen)
# testLen=len(testData)
loss=0
for __ in range(epoch): # 训练epoch指定的次数
cont = 0
for _ in range(batchNum): # 经过batchNum后更新参数
optimizer.zero_grad()
for _ in range(10): # 训练batchSize次
fwd=self.forward(trainData['data'][cont])
loss = lossFunc(fwd, trainData['answer'][cont])
loss.backward()
cont = (cont+1) % trainLen
optimizer.step()
if(__%10==0):
print(loss)
self.loss=str(loss)
pass


def loadData(path):
pass
oneHot = [[0, 1], [1, 0]] # 用于输出的onehot
data = pd.read_csv(path)
data = data.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])
data = pd.get_dummies(data, columns=['Sex', 'Embarked'])
data['NonAge'] = 0
data.loc[data['Age'].isna(), 'NonAge'] = 1
data['Age'] = data['Age'].fillna(0)
data = data.fillna(0)
print(data)
if('Survived' not in data):
data['Survived']=0
Answer = data.pop('Survived')
# 归一化
for col in data:
maximum = data[col].max()
if maximum > 0:
data[col] /= maximum
answer = []
for i in range(len(Answer)):
answer.append(oneHot[Answer[i]])
# 序列化到data上
# data{
# 'data':[tensor]
# 'answer':[tensor(oneHot)]
# }
data = np.array(data)
Answer = np.array(Answer)
data = {
'data': torch.FloatTensor(data).cuda(),
'answer': torch.FloatTensor(answer).cuda()
}
return data

def output(net):
testData=loadData('data/test.csv')
# print(testData)
net.eval()
print(net)
f=open('./result.csv','w')
f.write('PassengerId,Survived\n')
passid=892
for data in testData['data']:
pass
res=net.forward(data)
_,index=res.max(0)
if(index==0):
f.write(str(passid)+',1\n')
else:
f.write(str(passid)+',0\n')
passid+=1
f.close()
pass

if(__name__ == '__main__'):
data = loadData('data/train.csv')
print(data['data'][1].dtype)
for s in data['data']:
if(s.dtype!=data['data'][1].dtype):
print(s)
net = Net()
net=net.cuda()
E = 0.0001
O = np.random.choice([optim.SGD(net.parameters(), lr=E, momentum=np.random.random()*0.9),
optim.Adam(net.parameters(), lr=E),
optim.Adagrad(net.parameters(), lr=E),
optim.RMSprop(net.parameters(), lr=E,
momentum=np.random.random()*0.9)
])
# net.train(1,1,1,1,10,1,1)
#O=optim.SGD(net.parameters(), lr=E)
print(net.parameters())
print(O)
net.train(True,data,1, optimizer=O,lossFunc=nn.MSELoss())
#torch.save(net.state_dict(),'./myNet'+net.loss[:10])
output(net)
# print(newNet)

 之前遇到个问我数据库的, 直接 没了解过-还在学习-不太清楚 素质三联怼上去…正好以后自己写扫描器的时候也得用, 是时候再攀一次科技树了.
 本文介绍如何安装mongodb, 以及如何使用python对数据库进行增删改查.

阅读全文 »