-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmodel.py
More file actions
74 lines (60 loc) · 1.8 KB
/
Copy pathmodel.py
File metadata and controls
74 lines (60 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 1 22:24:36 2018
@author: Phoebe
"""
import numpy as np
path = "/Users/Phoebe/Desktop/data/input/"
filename = "train.csv"
f = open(path+filename, 'r', encoding = 'UTF-8-sig')
feature = []
labels = []
maxlab = []
meanlab = []
for line in f:
r = line.split(',')
f = np.array(r[:600]).astype(float)
feature.append(f)
l = np.array(r[600:]).astype(int)
labels.append(l)
maxlab.append(int(r[600]))
meanlab.append(int(r[601]))
print(len(feature),len(feature[0]),len(labels[0]))
print(labels[0], maxlab[0], meanlab[0])
filename2 = "combinedtest.csv"
f2 = open(path+filename2, 'r', encoding = 'UTF-8-sig')
tfeature = []
tlabels = []
tmaxlab = []
tmeanlab = []
for line in f2:
r = line.split(',')
f = np.array(r[:600]).astype(float)
tfeature.append(f)
l = np.array(r[600:]).astype(int)
tlabels.append(l)
tmaxlab.append(int(r[600]))
tmeanlab.append(int(r[601]))
print(len(tfeature),len(tfeature[0]),len(tlabels[0]))
print(tlabels[0], tmaxlab[0], tmeanlab[0])
'''
random forest
'''
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
max_clf = RandomForestClassifier(n_estimators = 300, max_features = 600,
max_depth = None, min_samples_split = 2,
oob_score = False, n_jobs = -1)
#clf.fit(feature, labels)
#score = clf.score(tfeature, tlabels)
#print(score)
max_clf.fit(feature, maxlab)
max_score = max_clf.score(tfeature, tmaxlab)
print("accuracy for max drop rate:%s"%max_score)
mean_clf = max_clf
mean_clf.fit(feature, maxlab)
mean_score = mean_clf.score(tfeature, tmaxlab)
print("accuracy for mean drop rate:%s"%mean_score)
#scores = cross_val_score(clf, feature, labels, cv=5)
#print(scores.mean())