Wan Xinyi
commited on
Commit
·
594a8f9
1
Parent(s):
4835f75
Add requirements
Browse files- requirements.txt +2 -0
- v_schedule.py +63 -50
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
drawsvg
|
2 |
+
pathlib
|
v_schedule.py
CHANGED
@@ -113,51 +113,51 @@ class PipelineGraph(object):
|
|
113 |
pending_w[stage].append((2, chunk, _cnt))
|
114 |
count[stage][cat * 2 + chunk] += 1
|
115 |
|
116 |
-
for _ in range(2 * self.n_stage):
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
|
162 |
# init_bubble = get_max_stage_bubble()
|
163 |
# print(stage_bubble)
|
@@ -288,7 +288,7 @@ class PipelineGraph(object):
|
|
288 |
_str += _c
|
289 |
print(_str)
|
290 |
|
291 |
-
def get_v_schedule(self):
|
292 |
schedule, end_time, max_bubble = None, None, None
|
293 |
expected_time = sum(self.fbw_cost) * self.n_micro * 2
|
294 |
for fill_b in [True, False]:
|
@@ -301,10 +301,12 @@ class PipelineGraph(object):
|
|
301 |
max_bubble = _max_bubble
|
302 |
schedule = _schedule
|
303 |
end_time = _end_time
|
|
|
|
|
304 |
# self.print_details(end_time, print_scaling=1)
|
305 |
-
bubble_rate = max_bubble / expected_time
|
306 |
-
print("%2d %3d, [%5d %5d %5d], %6d -> %6.4f" % \
|
307 |
-
(self.n_stage, self.n_micro, *self.fbw_cost, self.max_mem // self.f_mem, bubble_rate))
|
308 |
local_order = [[] for _ in range(self.n_stage)]
|
309 |
comm_id = {}
|
310 |
comm_id_counter = 0
|
@@ -434,14 +436,24 @@ if __name__ == '__main__':
|
|
434 |
# (32, 96, 10419, 10207, 7715, 408, 6144, 48, 64),
|
435 |
# (32, 128, 10408, 10204, 7703, 408, 6144, 48, 64),
|
436 |
# (32, 256, 10402, 10248, 7698, 460, 6144, 48, 64),
|
437 |
-
(4, 8, 6, 4, 4, 1, 4096, 32, 32),
|
438 |
# (8, 24, 29444, 29718, 19927, 527, 4096, 32, 32),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
]
|
440 |
s = 1024
|
441 |
|
442 |
# h, a, s = 4096, 32, 1024
|
443 |
# cost_f, cost_b, cost_w, cost_c = 29718, 29444, 19927, 527
|
444 |
-
for p, n, f, b, w, c, h, a,
|
445 |
mem_f = 34 * h + 5 * a * s
|
446 |
mem_w = - 32 * h
|
447 |
mem_b = - mem_w - mem_f
|
@@ -459,3 +471,4 @@ if __name__ == '__main__':
|
|
459 |
max_mem=mem_f * (p * 2 + m_offset),
|
460 |
)
|
461 |
graph.get_v_schedule()
|
|
|
|
113 |
pending_w[stage].append((2, chunk, _cnt))
|
114 |
count[stage][cat * 2 + chunk] += 1
|
115 |
|
116 |
+
# for _ in range(2 * self.n_stage):
|
117 |
+
# for i in range(self.n_stage):
|
118 |
+
# if count[i][1] >= count[i][0]:
|
119 |
+
# put(0, 0, i, assert_cnt=False)
|
120 |
+
# continue
|
121 |
+
# if i == self.n_stage - 1:
|
122 |
+
# put(0, 1, i, assert_cnt=False)
|
123 |
+
# continue
|
124 |
+
# fa_id = self.get_id(0, 1, i + 1, count[i][1])
|
125 |
+
# if 0 <= end_time[fa_id] < cur_time[i + 1]: # TODO
|
126 |
+
# put(0, 1, i, assert_cnt=False)
|
127 |
+
# else:
|
128 |
+
# put(0, 0, i, assert_cnt=False)
|
129 |
|
130 |
+
for i in range(self.n_stage):
|
131 |
+
put(0, 0, i)
|
132 |
+
for i in range(self.n_stage - 1, -1, -1):
|
133 |
+
if i == self.n_stage - 1:
|
134 |
+
put(0, 1, i)
|
135 |
+
continue
|
136 |
+
tmp = end_time[self.get_id(0, 1, i + 1, 0)] + self.c_cost
|
137 |
+
while mem[i] + self.fbw_mem[0] * (2 + i * 2) <= self.max_mem and cur_time[i] + self.fbw_cost[0] <= tmp and count[i][0] < self.n_micro:
|
138 |
+
for j in range(i + 1):
|
139 |
+
put(0, 0, j)
|
140 |
+
put(0, 1, i)
|
141 |
+
iter_chunk_ = 0
|
142 |
+
end_tmp = 0
|
143 |
+
for i in range(self.n_stage):
|
144 |
+
if i == 0:
|
145 |
+
end_tmp = cur_time[0] + self.fbw_cost[1]
|
146 |
+
continue
|
147 |
+
tmp = end_tmp + self.c_cost
|
148 |
+
while count[i][0] + count[i][1] < count[i - 1][0] + count[i - 1][1]:
|
149 |
+
for j in range(self.n_stage - 1, i - 1, -1):
|
150 |
+
if count[j][iter_chunk_] < self.n_micro:
|
151 |
+
put(0, iter_chunk_, j)
|
152 |
+
iter_chunk_ = 1 - iter_chunk_
|
153 |
+
# while mem[i] + self.fbw_mem[0] <= self.max_mem and cur_time[i] + self.fbw_cost[0] <= tmp:
|
154 |
+
# if iter_chunk_ == 0 and count[i][0] >= count[i - 1][0]:
|
155 |
+
# break
|
156 |
+
# for j in range(self.n_stage - 1, i - 1, -1):
|
157 |
+
# if count[j][iter_chunk_] < self.n_micro:
|
158 |
+
# put(0, iter_chunk_, j)
|
159 |
+
# iter_chunk_ = 1 - iter_chunk_
|
160 |
+
# end_tmp = max(tmp, cur_time[i]) + self.fbw_cost[1]
|
161 |
|
162 |
# init_bubble = get_max_stage_bubble()
|
163 |
# print(stage_bubble)
|
|
|
288 |
_str += _c
|
289 |
print(_str)
|
290 |
|
291 |
+
def get_v_schedule(self, only_run_time=False):
|
292 |
schedule, end_time, max_bubble = None, None, None
|
293 |
expected_time = sum(self.fbw_cost) * self.n_micro * 2
|
294 |
for fill_b in [True, False]:
|
|
|
301 |
max_bubble = _max_bubble
|
302 |
schedule = _schedule
|
303 |
end_time = _end_time
|
304 |
+
if only_run_time:
|
305 |
+
return max_bubble + expected_time
|
306 |
# self.print_details(end_time, print_scaling=1)
|
307 |
+
bubble_rate = max_bubble / (expected_time + max_bubble)
|
308 |
+
print("%2d %3d, [%5d %5d %5d %5d], %6d -> %6.4f" % \
|
309 |
+
(self.n_stage, self.n_micro, *self.fbw_cost, self.c_cost, self.max_mem // self.f_mem, bubble_rate))
|
310 |
local_order = [[] for _ in range(self.n_stage)]
|
311 |
comm_id = {}
|
312 |
comm_id_counter = 0
|
|
|
436 |
# (32, 96, 10419, 10207, 7715, 408, 6144, 48, 64),
|
437 |
# (32, 128, 10408, 10204, 7703, 408, 6144, 48, 64),
|
438 |
# (32, 256, 10402, 10248, 7698, 460, 6144, 48, 64),
|
439 |
+
# (4, 8, 6, 4, 4, 1, 4096, 32, 32),
|
440 |
# (8, 24, 29444, 29718, 19927, 527, 4096, 32, 32),
|
441 |
+
# ( 8, 32, 16099, 16504, 7589, 540, 2304, 24, 16),
|
442 |
+
(16, 48, 14407, 14380, 9676, 1610, 4096, 32, 32),
|
443 |
+
(16, 64, 14412, 14393, 9688, 1621, 4096, 32, 32),
|
444 |
+
(16, 128,14316, 14306, 9639, 1619, 4096, 32, 32),
|
445 |
+
(24, 72, 6763, 6969, 5251, 755, 5120, 40, 48),
|
446 |
+
(24, 96, 6783, 6984, 5259, 758, 5120, 40, 48),
|
447 |
+
(24, 192, 6785, 6990, 5260, 770, 5120, 40, 48),
|
448 |
+
(32, 96, 9458, 9748, 7288, 879, 6144, 48, 64),
|
449 |
+
(32, 128, 9469, 9744, 7306, 892, 6144, 48, 64),
|
450 |
+
(32, 256, 9447, 9644, 7193, 887, 6144, 48, 64),
|
451 |
]
|
452 |
s = 1024
|
453 |
|
454 |
# h, a, s = 4096, 32, 1024
|
455 |
# cost_f, cost_b, cost_w, cost_c = 29718, 29444, 19927, 527
|
456 |
+
for p, n, f, b, w, c, h, a, _ in settings:
|
457 |
mem_f = 34 * h + 5 * a * s
|
458 |
mem_w = - 32 * h
|
459 |
mem_b = - mem_w - mem_f
|
|
|
471 |
max_mem=mem_f * (p * 2 + m_offset),
|
472 |
)
|
473 |
graph.get_v_schedule()
|
474 |
+
break
|