-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathclusters-single.py
More file actions
89 lines (71 loc) · 3.32 KB
/
Copy pathclusters-single.py
File metadata and controls
89 lines (71 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import sys
from numpy import mean, std
from ebc import EBC
from matrix import SparseMatrix
def compareRandom(num_trials, tensor_dimensions, matrix_data, cluster_dimensions,
maxit_ebc, jitter_max_ebc, objective_tolerance):
deltas = []
objectives_M = []
objectives_Mr = []
iterations_M = []
iterations_Mr = []
noconverge_M = 0
noconverge_Mr = 0
for j in range(num_trials):
print "Trial ", j
M = SparseMatrix(tensor_dimensions)
M.read_data(matrix_data)
Mr = M.shuffle() # could also be M.shuffle_old()
M.normalize()
ebc_M = EBC(M, cluster_dimensions, maxit_ebc, jitter_max_ebc, objective_tolerance)
cXY_M, objective_M, it_M = ebc_M.run()
if it_M == maxit_ebc:
noconverge_M += 1
else:
iterations_M.append(it_M)
Mr.normalize()
ebc_Mr = EBC(Mr, cluster_dimensions, maxit_ebc, jitter_max_ebc, objective_tolerance)
cXY_Mr, objective_Mr, it_Mr = ebc_Mr.run()
if it_Mr == maxit_ebc:
noconverge_Mr += 1
else:
iterations_Mr.append(it_Mr)
objectives_M.append(objective_M)
objectives_Mr.append(objective_Mr)
deltas.append(objective_M - objective_Mr)
return deltas, objectives_M, objectives_Mr, iterations_M, iterations_Mr, noconverge_M, noconverge_Mr
def main():
data_file = sys.argv[1]
cols = [int(e) for e in sys.argv[2].split(",")]
K = [int(e) for e in sys.argv[3].split(",")]
N_trials = int(sys.argv[4])
output_file = sys.argv[5]
jitter_max = float(sys.argv[6])
max_iterations_ebc = int(sys.argv[7])
object_tol = float(sys.argv[8])
# get original data
raw_data = [line.split("\t") for line in open(data_file, "r")]
data = [[d[i] for i in cols] for d in raw_data]
data_dimensions = len(data[0]) - 1
# get axis length for each dimension
N = []
for dim in range(data_dimensions):
N.append(len(set([d[dim] for d in data])))
print(N)
D_1, obj_orig, obj_rand, it_orig, it_rand, noconv_orig, noconv_rand = compareRandom(num_trials=N_trials,
tensor_dimensions=N,
matrix_data=data,
cluster_dimensions=K,
maxit_ebc=max_iterations_ebc,
jitter_max_ebc=jitter_max,
objective_tolerance=object_tol)
# write final result to combined file (other processes also write to this file)
output_stream = open(output_file, "a")
output_stream.write("\t".join([str(e) for e in K]) + "\t" + str(mean(D_1)) + "\t" + str(std(D_1)) +
"\t" + str(mean(obj_orig)) + "\t" + str(mean(obj_rand)) +
"\t" + str(mean(it_orig)) + "\t" + str(mean(it_rand)) +
"\t" + str(noconv_orig) + "\t" + str(noconv_rand) + "\n")
output_stream.flush()
output_stream.close()
if __name__ == "__main__":
main()