36 #include "rt-hclib-def.h"
37 #include "runtime-support.h"
38 #include "runtime-hclib.h"
43 #define DEBUG_FORASYNC 0
45 void forasync1D_runner(
void * forasync_arg) {
49 void * user_arg = (
void *) user->arg;
52 for(i=loop0.low; i<loop0.high; i+=loop0.stride) {
53 (*user_fct_ptr)(user_arg, i);
57 void forasync2D_runner(
void * forasync_arg) {
61 void * user_arg = (
void *) user->arg;
65 for(i=loop0.low; i<loop0.high; i+=loop0.stride) {
66 for(j=loop1.low; j<loop1.high; j+=loop1.stride) {
67 (*user_fct_ptr)(user_arg, i, j);
72 void forasync3D_runner(
void * forasync_arg) {
76 void * user_arg = (
void *) user->arg;
81 for(i=loop0.low; i<loop0.high; i+=loop0.stride) {
82 for(j=loop1.low; j<loop1.high; j+=loop1.stride) {
83 for(k=loop2.low; k<loop2.high; k+=loop2.stride) {
84 (*user_fct_ptr)(user_arg, i, j, k);
89 printf(
"forasync spawned %d\n", nb_spawn);
93 void forasync1D_recursive(
void * forasync_arg) {
96 int high0 = loop0.high;
98 int stride0 = loop0.stride;
99 int tile0 = loop0.tile;
103 if((high0-low0) > tile0) {
104 int mid = (high0+low0)/2;
106 new_forasync_task = allocate_forasync1D_task();
107 new_forasync_task->task.forasync_task.def.fct_ptr = forasync1D_recursive;
108 new_forasync_task->task.forasync_task.def.arg = &(new_forasync_task->def);
109 new_forasync_task->def.base.user = forasync->base.user;
111 new_forasync_task->def.loop0 = new_loop0;
113 forasync->loop0.high = mid;
118 forasync1D_recursive(forasync_arg);
121 forasync1D_runner(forasync_arg);
125 void forasync2D_recursive(
void * forasync_arg) {
128 int high0 = loop0.high;
129 int low0 = loop0.low;
130 int stride0 = loop0.stride;
131 int tile0 = loop0.tile;
133 int high1 = loop1.high;
134 int low1 = loop1.low;
135 int stride1 = loop1.stride;
136 int tile1 = loop1.tile;
140 if((high0-low0) > tile0) {
141 int mid = (high0+low0)/2;
143 new_forasync_task = allocate_forasync2D_task();
144 new_forasync_task->task.forasync_task.def.fct_ptr = forasync2D_recursive;
145 new_forasync_task->task.forasync_task.def.arg = &(new_forasync_task->def);
146 new_forasync_task->def.base.user = forasync->base.user;
148 new_forasync_task->def.loop0 = new_loop0;
149 new_forasync_task->def.loop1 = loop1;
151 forasync->loop0.high = mid;
152 }
else if((high1-low1) > tile1) {
153 int mid = (high1+low1)/2;
155 new_forasync_task = allocate_forasync2D_task();
156 new_forasync_task->task.forasync_task.def.fct_ptr = forasync2D_recursive;
157 new_forasync_task->task.forasync_task.def.arg = &(new_forasync_task->def);
158 new_forasync_task->def.base.user = forasync->base.user;
159 new_forasync_task->def.loop0 = loop0;
161 new_forasync_task->def.loop1 = new_loop1;
163 forasync->loop1.high = mid;
166 if(new_forasync_task != NULL) {
171 forasync2D_recursive(forasync_arg);
173 forasync2D_runner(forasync_arg);
177 void forasync3D_recursive(
void * forasync_arg) {
180 int high0 = loop0.high;
181 int low0 = loop0.low;
182 int stride0 = loop0.stride;
183 int tile0 = loop0.tile;
185 int high1 = loop1.high;
186 int low1 = loop1.low;
187 int stride1 = loop1.stride;
188 int tile1 = loop1.tile;
190 int high2 = loop2.high;
191 int low2 = loop2.low;
192 int stride2 = loop2.stride;
193 int tile2 = loop2.tile;
197 if((high0-low0) > tile0) {
198 int mid = (high0+low0)/2;
200 new_forasync_task = allocate_forasync3D_task();
201 new_forasync_task->task.forasync_task.def.fct_ptr = forasync3D_recursive;
202 new_forasync_task->task.forasync_task.def.arg = &(new_forasync_task->def);
203 new_forasync_task->def.base.user = forasync->base.user;
205 new_forasync_task->def.loop0 = new_loop0;
206 new_forasync_task->def.loop1 = loop1;
207 new_forasync_task->def.loop2 = loop2;
209 forasync->loop0.high = mid;
210 }
else if((high1-low1) > tile1) {
211 int mid = (high1+low1)/2;
213 new_forasync_task = allocate_forasync3D_task();
214 new_forasync_task->task.forasync_task.def.fct_ptr = forasync3D_recursive;
215 new_forasync_task->task.forasync_task.def.arg = &(new_forasync_task->def);
216 new_forasync_task->def.base.user = forasync->base.user;
217 new_forasync_task->def.loop0 = loop0;
219 new_forasync_task->def.loop1 = new_loop1;
220 new_forasync_task->def.loop2 = loop2;
222 forasync->loop1.high = mid;
223 }
else if((high2-low2) > tile2) {
224 int mid = (high2+low2)/2;
226 new_forasync_task = allocate_forasync3D_task();
227 new_forasync_task->task.forasync_task.def.fct_ptr = forasync3D_recursive;
228 new_forasync_task->task.forasync_task.def.arg = &(new_forasync_task->def);
229 new_forasync_task->def.base.user = forasync->base.user;
230 new_forasync_task->def.loop0 = loop0;
231 new_forasync_task->def.loop1 = loop1;
233 new_forasync_task->def.loop2 = new_loop2;
235 forasync->loop2.high = mid;
238 if(new_forasync_task != NULL) {
243 forasync3D_recursive(forasync_arg);
245 forasync3D_runner(forasync_arg);
249 void forasync1D_flat(
void * forasync_arg) {
252 int high0 = loop0.high;
253 int stride0 = loop0.stride;
254 int tile0 = loop0.tile;
255 int nb_chunks = (int) (high0/tile0);
256 int size = tile0*nb_chunks;
257 finish_t * current_finish = get_current_finish();
259 for(low0 = loop0.low; low0<size; low0+=tile0) {
261 printf(
"Scheduling Task %d %d\n",low0,(low0+tile0));
265 new_forasync_task->task.forasync_task.def.fct_ptr = forasync1D_runner;
266 new_forasync_task->task.forasync_task.def.arg = &(new_forasync_task->def);
267 new_forasync_task->def.base.user = forasync->base.user;
268 loop_domain_t new_loop0 = {low0, low0+tile0, stride0, tile0};
269 new_forasync_task->def.loop0 = new_loop0;
275 printf(
"Scheduling Task %d %d\n",low0,high0);
278 new_forasync_task->task.forasync_task.def.fct_ptr = forasync1D_runner;
279 new_forasync_task->task.forasync_task.def.arg = &(new_forasync_task->def);
280 new_forasync_task->def.base.user = forasync->base.user;
281 loop_domain_t new_loop0 = {low0, high0, loop0.stride, loop0.tile};
282 new_forasync_task->def.loop0 = new_loop0;
287 void forasync2D_flat(
void * forasync_arg) {
291 finish_t * current_finish = get_current_finish();
293 for(low0=loop0.low; low0<loop0.high; low0+=loop0.tile) {
294 int high0 = (low0+loop0.tile)>loop0.high?loop0.high:(low0+loop0.tile);
296 printf(
"Scheduling Task Loop1 %d %d\n",low0,high0);
298 for(low1=loop1.low; low1<loop1.high; low1+=loop1.tile) {
299 int high1 = (low1+loop1.tile)>loop1.high?loop1.high:(low1+loop1.tile);
301 printf(
"Scheduling Task %d %d\n",low1,high1);
304 new_forasync_task->task.forasync_task.def.fct_ptr = forasync2D_runner;
305 new_forasync_task->task.forasync_task.def.arg = &(new_forasync_task->def);
306 new_forasync_task->def.base.user = forasync->base.user;
307 loop_domain_t new_loop0 = {low0, high0, loop0.stride, loop0.tile};
308 new_forasync_task->def.loop0 = new_loop0;
309 loop_domain_t new_loop1 = {low1, high1, loop1.stride, loop1.tile};
310 new_forasync_task->def.loop1 = new_loop1;
316 void forasync3D_flat(
void * forasync_arg) {
321 finish_t * current_finish = get_current_finish();
322 int low0, low1, low2;
323 for(low0=loop0.low; low0<loop0.high; low0+=loop0.tile) {
324 int high0 = (low0+loop0.tile)>loop0.high?loop0.high:(low0+loop0.tile);
326 printf(
"Scheduling Task Loop1 %d %d\n",low0,high0);
328 for(low1=loop1.low; low1<loop1.high; low1+=loop1.tile) {
329 int high1 = (low1+loop1.tile)>loop1.high?loop1.high:(low1+loop1.tile);
331 printf(
"Scheduling Task Loop2 %d %d\n",low1,high1);
333 for(low2=loop2.low; low2<loop2.high; low2+=loop2.tile) {
334 int high2 = (low2+loop2.tile)>loop2.high?loop2.high:(low2+loop2.tile);
336 printf(
"Scheduling Task %d %d\n",low2,high2);
339 new_forasync_task->task.forasync_task.def.fct_ptr = forasync3D_runner;
340 new_forasync_task->task.forasync_task.def.arg = &(new_forasync_task->def);
341 new_forasync_task->def.base.user = forasync->base.user;
342 loop_domain_t new_loop0 = {low0, high0, loop0.stride, loop0.tile};
343 new_forasync_task->def.loop0 = new_loop0;
344 loop_domain_t new_loop1 = {low1, high1, loop1.stride, loop1.tile};
345 new_forasync_task->def.loop1 = new_loop1;
346 loop_domain_t new_loop2 = {low2, high2, loop2.stride, loop2.tile};
347 new_forasync_task->def.loop2 = new_loop2;
354 static void forasync_internal(
void* user_fct_ptr,
void * user_arg,
361 user_def.fct_ptr = user_fct_ptr;
362 user_def.arg = user_arg;
365 if (accumed != NULL) {
369 assert(dim>0 && dim<4);
371 asyncFct_t fct_ptr_rec[3] = {forasync1D_recursive, forasync2D_recursive, forasync3D_recursive};
372 asyncFct_t fct_ptr_flat[3] = {forasync1D_flat, forasync2D_flat, forasync3D_flat};
376 (fct_ptr[dim-1])((
void *) &
forasync);
378 forasync2D_t forasync = {{&user_def}, loop_domain[0], loop_domain[1]};
379 (fct_ptr[dim-1])((
void *) &
forasync);
381 forasync3D_t forasync = {{&user_def}, loop_domain[0], loop_domain[1], loop_domain[2]};
382 (fct_ptr[dim-1])((
void *) &
forasync);
390 void forasync(
void* forasync_fct,
void * argv,
struct ddf_st ** ddf_list,
struct _phased_t * phased_clause,
392 forasync_internal(forasync_fct, argv, accumed, dim, domain, mode);