You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

2741 lines
201 KiB

/*
* Copyright 2013-2015 Formal Methods and Tools, University of Twente
*
* Licensed under the Apache License, Version 2.0 (the License);
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <unistd.h>
#include <stdint.h>
#include <stdio.h>
#include <pthread.h> /* for pthread_t */
#ifndef __LACE_H__
#define __LACE_H__
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/* Some flags */
#ifndef LACE_DEBUG_PROGRAMSTACK /* Write to stderr when 95% program stack reached */
#define LACE_DEBUG_PROGRAMSTACK 0
#endif
#ifndef LACE_LEAP_RANDOM /* Use random leaping when leapfrogging fails */
#define LACE_LEAP_RANDOM 1
#endif
#ifndef LACE_PIE_TIMES /* Record time spent stealing and leapfrogging */
#define LACE_PIE_TIMES 0
#endif
#ifndef LACE_COUNT_TASKS /* Count number of tasks executed */
#define LACE_COUNT_TASKS 0
#endif
#ifndef LACE_COUNT_STEALS /* Count number of steals performed */
#define LACE_COUNT_STEALS 0
#endif
#ifndef LACE_COUNT_SPLITS /* Count number of times the split point is moved */
#define LACE_COUNT_SPLITS 0
#endif
#ifndef LACE_COUNT_EVENTS
#define LACE_COUNT_EVENTS (LACE_PIE_TIMES || LACE_COUNT_TASKS || LACE_COUNT_STEALS || LACE_COUNT_SPLITS)
#endif
/* Typical cacheline size of system architectures */
#ifndef LINE_SIZE
#define LINE_SIZE 64
#endif
/* The size of a pointer, 8 bytes on a 64-bit architecture */
#define P_SZ (sizeof(void *))
#define PAD(x,b) ( ( (b) - ((x)%(b)) ) & ((b)-1) ) /* b must be power of 2 */
#define ROUND(x,b) ( (x) + PAD( (x), (b) ) )
/* The size is in bytes. Note that this is without the extra overhead from Lace.
The value must be greater than or equal to the maximum size of your tasks.
The task size is the maximum of the size of the result or of the sum of the parameter sizes. */
#ifndef LACE_TASKSIZE
#define LACE_TASKSIZE (6)*P_SZ
#endif
/* Some fences */
#ifndef compiler_barrier
#define compiler_barrier() { asm volatile("" ::: "memory"); }
#endif
#ifndef mfence
#define mfence() { asm volatile("mfence" ::: "memory"); }
#endif
/* Compiler specific branch prediction optimization */
#ifndef likely
#define likely(x) __builtin_expect((x),1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect((x),0)
#endif
#if LACE_PIE_TIMES
/* High resolution timer */
static inline uint64_t gethrtime()
{
uint32_t hi, lo;
asm volatile ("rdtsc" : "=a"(lo), "=d"(hi) :: "memory");
return (uint64_t)hi<<32 | lo;
}
#endif
#if LACE_COUNT_EVENTS
void lace_count_reset();
void lace_count_report_file(FILE *file);
#endif
#if LACE_COUNT_TASKS
#define PR_COUNTTASK(s) PR_INC(s,CTR_tasks)
#else
#define PR_COUNTTASK(s) /* Empty */
#endif
#if LACE_COUNT_STEALS
#define PR_COUNTSTEALS(s,i) PR_INC(s,i)
#else
#define PR_COUNTSTEALS(s,i) /* Empty */
#endif
#if LACE_COUNT_SPLITS
#define PR_COUNTSPLITS(s,i) PR_INC(s,i)
#else
#define PR_COUNTSPLITS(s,i) /* Empty */
#endif
#if LACE_COUNT_EVENTS
#define PR_ADD(s,i,k) ( ((s)->ctr[i])+=k )
#else
#define PR_ADD(s,i,k) /* Empty */
#endif
#define PR_INC(s,i) PR_ADD(s,i,1)
typedef enum {
#ifdef LACE_COUNT_TASKS
CTR_tasks, /* Number of tasks spawned */
#endif
#ifdef LACE_COUNT_STEALS
CTR_steal_tries, /* Number of steal attempts */
CTR_leap_tries, /* Number of leap attempts */
CTR_steals, /* Number of succesful steals */
CTR_leaps, /* Number of succesful leaps */
CTR_steal_busy, /* Number of steal busies */
CTR_leap_busy, /* Number of leap busies */
#endif
#ifdef LACE_COUNT_SPLITS
CTR_split_grow, /* Number of split right */
CTR_split_shrink,/* Number of split left */
CTR_split_req, /* Number of split requests */
#endif
CTR_fast_sync, /* Number of fast syncs */
CTR_slow_sync, /* Number of slow syncs */
#ifdef LACE_PIE_TIMES
CTR_init, /* Timer for initialization */
CTR_close, /* Timer for shutdown */
CTR_wapp, /* Timer for application code (steal) */
CTR_lapp, /* Timer for application code (leap) */
CTR_wsteal, /* Timer for steal code (steal) */
CTR_lsteal, /* Timer for steal code (leap) */
CTR_wstealsucc, /* Timer for succesful steal code (steal) */
CTR_lstealsucc, /* Timer for succesful steal code (leap) */
CTR_wsignal, /* Timer for signal after work (steal) */
CTR_lsignal, /* Timer for signal after work (leap) */
#endif
CTR_MAX
} CTR_index;
struct _WorkerP;
struct _Worker;
struct _Task;
#define THIEF_EMPTY ((struct _Worker*)0x0)
#define THIEF_TASK ((struct _Worker*)0x1)
#define THIEF_COMPLETED ((struct _Worker*)0x2)
#define TASK_COMMON_FIELDS(type) \
void (*f)(struct _WorkerP *, struct _Task *, struct type *); \
struct _Worker * volatile thief;
struct __lace_common_fields_only { TASK_COMMON_FIELDS(_Task) };
#define LACE_COMMON_FIELD_SIZE sizeof(struct __lace_common_fields_only)
typedef struct _Task {
TASK_COMMON_FIELDS(_Task)
char p1[PAD(LACE_COMMON_FIELD_SIZE, P_SZ)];
char d[LACE_TASKSIZE];
char p2[PAD(ROUND(LACE_COMMON_FIELD_SIZE, P_SZ) + LACE_TASKSIZE, LINE_SIZE)];
} Task;
typedef union __attribute__((packed)) {
struct {
uint32_t tail;
uint32_t split;
} ts;
uint64_t v;
} TailSplit;
typedef struct _Worker {
Task *dq;
TailSplit ts;
uint8_t allstolen;
char pad1[PAD(P_SZ+sizeof(TailSplit)+1, LINE_SIZE)];
uint8_t movesplit;
} Worker;
typedef struct _WorkerP {
Task *dq; // same as dq
Task *split; // same as dq+ts.ts.split
Task *end; // dq+dq_size
Worker *_public;
size_t stack_trigger; // for stack overflow detection
int16_t worker; // what is my worker id?
uint8_t allstolen; // my allstolen
volatile int enabled; // if this worker is enabled
#if LACE_COUNT_EVENTS
uint64_t ctr[CTR_MAX]; // counters
volatile uint64_t time;
volatile int level;
#endif
uint32_t seed; // my random seed (for lace_steal_random)
} WorkerP;
#define LACE_TYPEDEF_CB(t, f, ...) typedef t (*f)(WorkerP *, Task *, ##__VA_ARGS__);
LACE_TYPEDEF_CB(void, lace_startup_cb, void*);
/**
* Set verbosity level (0 = no startup messages, 1 = startup messages)
* Default level: 0
*/
void lace_set_verbosity(int level);
/**
* Initialize master structures for Lace with <n_workers> workers
* and default deque size of <dqsize>.
* Does not create new threads.
* Tries to detect number of cpus, if n_workers equals 0.
*/
void lace_init(int n_workers, size_t dqsize);
/**
* After lace_init, start all worker threads.
* If cb,arg are set, suspend this thread, call cb(arg) in a new thread
* and exit Lace upon return
* Otherwise, the current thread is initialized as a Lace thread.
*/
void lace_startup(size_t stacksize, lace_startup_cb, void* arg);
/**
* Initialize current thread as worker <idx> and allocate a deque with size <dqsize>.
* Use this when manually creating worker threads.
*/
void lace_init_worker(int idx, size_t dqsize);
/**
* Manually spawn worker <idx> with (optional) program stack size <stacksize>.
* If fun,arg are set, overrides default startup method.
* Typically: for workers 1...(n_workers-1): lace_spawn_worker(i, stack_size, 0, 0);
*/
pthread_t lace_spawn_worker(int idx, size_t stacksize, void *(*fun)(void*), void* arg);
/**
* Steal a random task.
*/
#define lace_steal_random() CALL(lace_steal_random)
void lace_steal_random_CALL(WorkerP*, Task*);
/**
* Steal random tasks until parameter *quit is set
* Note: task declarations at end; quit is of type int*
*/
#define lace_steal_random_loop(quit) CALL(lace_steal_random_loop, quit)
#define lace_steal_loop(quit) CALL(lace_steal_loop, quit)
/**
* Barrier (all workers must enter it before progressing)
*/
void lace_barrier();
/**
* Suspend and resume all other workers.
* May only be used when all other workers are idle.
*/
void lace_suspend();
void lace_resume();
/**
* When all tasks are suspended, workers can be temporarily disabled.
* With set_workers, all workers 0..(N-1) are enabled and N..max are disabled.
* You can never disable the current worker or reduce the number of workers below 1.
* You cannot add workers.
*/
void lace_disable_worker(int worker);
void lace_enable_worker(int worker);
void lace_set_workers(int workercount);
int lace_enabled_workers();
/**
* Retrieve number of Lace workers
*/
size_t lace_workers();
/**
* Retrieve default program stack size
*/
size_t lace_default_stacksize();
/**
* Retrieve current worker.
*/
WorkerP *lace_get_worker();
/**
* Retrieve the current head of the deque
*/
Task *lace_get_head(WorkerP *);
/**
* Exit Lace. Automatically called when started with cb,arg.
*/
void lace_exit();
#define LACE_STOLEN ((Worker*)0)
#define LACE_BUSY ((Worker*)1)
#define LACE_NOWORK ((Worker*)2)
#define TASK(f) ( f##_CALL )
#define WRAP(f, ...) ( f((WorkerP *)__lace_worker, (Task *)__lace_dq_head, ##__VA_ARGS__) )
#define SYNC(f) ( __lace_dq_head--, WRAP(f##_SYNC) )
#define DROP() ( __lace_dq_head--, WRAP(lace_drop) )
#define SPAWN(f, ...) ( WRAP(f##_SPAWN, ##__VA_ARGS__), __lace_dq_head++ )
#define CALL(f, ...) ( WRAP(f##_CALL, ##__VA_ARGS__) )
#define TOGETHER(f, ...) ( WRAP(f##_TOGETHER, ##__VA_ARGS__) )
#define NEWFRAME(f, ...) ( WRAP(f##_NEWFRAME, ##__VA_ARGS__) )
#define STEAL_RANDOM() ( CALL(lace_steal_random) )
#define LACE_WORKER_ID ( __lace_worker->worker )
/* Use LACE_ME to initialize Lace variables, in case you want to call multiple Lace tasks */
#define LACE_ME WorkerP * __attribute__((unused)) __lace_worker = lace_get_worker(); Task * __attribute__((unused)) __lace_dq_head = lace_get_head(__lace_worker);
#define TASK_IS_STOLEN(t) ((size_t)t->thief > 1)
#define TASK_IS_COMPLETED(t) ((size_t)t->thief == 2)
#define TASK_RESULT(t) (&t->d[0])
#if LACE_DEBUG_PROGRAMSTACK
static inline void CHECKSTACK(WorkerP *w)
{
if (w->stack_trigger != 0) {
register size_t rsp;
asm volatile("movq %%rsp, %0" : "+r"(rsp) : : "cc");
if (rsp < w->stack_trigger) {
fputs("Warning: program stack 95% used!\n", stderr);
w->stack_trigger = 0;
}
}
}
#else
#define CHECKSTACK(w) {}
#endif
typedef struct
{
Task *t;
uint8_t all;
char pad[64-sizeof(Task *)-sizeof(uint8_t)];
} lace_newframe_t;
extern lace_newframe_t lace_newframe;
/**
* Internal function to start participating on a task in a new frame
* Usually, <root> is set to NULL and the task is copied from lace_newframe.t
* It is possible to override the start task by setting <root>.
*/
void lace_do_together(WorkerP *__lace_worker, Task *__lace_dq_head, Task *task);
void lace_do_newframe(WorkerP *__lace_worker, Task *__lace_dq_head, Task *task);
void lace_yield(WorkerP *__lace_worker, Task *__lace_dq_head);
#define YIELD_NEWFRAME() { if (unlikely((*(volatile Task**)&lace_newframe.t) != NULL)) lace_yield(__lace_worker, __lace_dq_head); }
#if LACE_PIE_TIMES
static void lace_time_event( WorkerP *w, int event )
{
uint64_t now = gethrtime(),
prev = w->time;
switch( event ) {
// Enter application code
case 1 :
if( w->level /* level */ == 0 ) {
PR_ADD( w, CTR_init, now - prev );
w->level = 1;
} else if( w->level /* level */ == 1 ) {
PR_ADD( w, CTR_wsteal, now - prev );
PR_ADD( w, CTR_wstealsucc, now - prev );
} else {
PR_ADD( w, CTR_lsteal, now - prev );
PR_ADD( w, CTR_lstealsucc, now - prev );
}
break;
// Exit application code
case 2 :
if( w->level /* level */ == 1 ) {
PR_ADD( w, CTR_wapp, now - prev );
} else {
PR_ADD( w, CTR_lapp, now - prev );
}
break;
// Enter sync on stolen
case 3 :
if( w->level /* level */ == 1 ) {
PR_ADD( w, CTR_wapp, now - prev );
} else {
PR_ADD( w, CTR_lapp, now - prev );
}
w->level++;
break;
// Exit sync on stolen
case 4 :
if( w->level /* level */ == 1 ) {
fprintf( stderr, "This should not happen, level = %d\n", w->level );
} else {
PR_ADD( w, CTR_lsteal, now - prev );
}
w->level--;
break;
// Return from failed steal
case 7 :
if( w->level /* level */ == 0 ) {
PR_ADD( w, CTR_init, now - prev );
} else if( w->level /* level */ == 1 ) {
PR_ADD( w, CTR_wsteal, now - prev );
} else {
PR_ADD( w, CTR_lsteal, now - prev );
}
break;
// Signalling time
case 8 :
if( w->level /* level */ == 1 ) {
PR_ADD( w, CTR_wsignal, now - prev );
PR_ADD( w, CTR_wsteal, now - prev );
} else {
PR_ADD( w, CTR_lsignal, now - prev );
PR_ADD( w, CTR_lsteal, now - prev );
}
break;
// Done
case 9 :
if( w->level /* level */ == 0 ) {
PR_ADD( w, CTR_init, now - prev );
} else {
PR_ADD( w, CTR_close, now - prev );
}
break;
default: return;
}
w->time = now;
}
#else
#define lace_time_event( w, e ) /* Empty */
#endif
static Worker* __attribute__((noinline))
lace_steal(WorkerP *self, Task *__dq_head, Worker *victim)
{
if (!victim->allstolen) {
/* Must be a volatile. In GCC 4.8, if it is not declared volatile, the
compiler will optimize extra memory accesses to victim->ts instead
of comparing the local values ts.ts.tail and ts.ts.split, causing
thieves to steal non existent tasks! */
register TailSplit ts;
ts.v = *(volatile uint64_t *)&victim->ts.v;
if (ts.ts.tail < ts.ts.split) {
register TailSplit ts_new;
ts_new.v = ts.v;
ts_new.ts.tail++;
if (__sync_bool_compare_and_swap(&victim->ts.v, ts.v, ts_new.v)) {
// Stolen
Task *t = &victim->dq[ts.ts.tail];
t->thief = self->_public;
lace_time_event(self, 1);
t->f(self, __dq_head, t);
lace_time_event(self, 2);
t->thief = THIEF_COMPLETED;
lace_time_event(self, 8);
return LACE_STOLEN;
}
lace_time_event(self, 7);
return LACE_BUSY;
}
if (victim->movesplit == 0) {
victim->movesplit = 1;
PR_COUNTSPLITS(self, CTR_split_req);
}
}
lace_time_event(self, 7);
return LACE_NOWORK;
}
static int
lace_shrink_shared(WorkerP *w)
{
Worker *wt = w->_public;
TailSplit ts;
ts.v = wt->ts.v; /* Force in 1 memory read */
uint32_t tail = ts.ts.tail;
uint32_t split = ts.ts.split;
if (tail != split) {
uint32_t newsplit = (tail + split)/2;
wt->ts.ts.split = newsplit;
mfence();
tail = *(volatile uint32_t *)&(wt->ts.ts.tail);
if (tail != split) {
if (unlikely(tail > newsplit)) {
newsplit = (tail + split) / 2;
wt->ts.ts.split = newsplit;
}
w->split = w->dq + newsplit;
PR_COUNTSPLITS(w, CTR_split_shrink);
return 0;
}
}
wt->allstolen = 1;
w->allstolen = 1;
return 1;
}
static inline void
lace_leapfrog(WorkerP *__lace_worker, Task *__lace_dq_head)
{
lace_time_event(__lace_worker, 3);
Task *t = __lace_dq_head;
Worker *thief = t->thief;
if (thief != THIEF_COMPLETED) {
while ((size_t)thief <= 1) thief = t->thief;
/* PRE-LEAP: increase head again */
__lace_dq_head += 1;
/* Now leapfrog */
int attempts = 32;
while (thief != THIEF_COMPLETED) {
PR_COUNTSTEALS(__lace_worker, CTR_leap_tries);
Worker *res = lace_steal(__lace_worker, __lace_dq_head, thief);
if (res == LACE_NOWORK) {
YIELD_NEWFRAME();
if ((LACE_LEAP_RANDOM) && (--attempts == 0)) { lace_steal_random(); attempts = 32; }
} else if (res == LACE_STOLEN) {
PR_COUNTSTEALS(__lace_worker, CTR_leaps);
} else if (res == LACE_BUSY) {
PR_COUNTSTEALS(__lace_worker, CTR_leap_busy);
}
compiler_barrier();
thief = t->thief;
}
/* POST-LEAP: really pop the finished task */
/* no need to decrease __lace_dq_head, since it is a local variable */
compiler_barrier();
if (__lace_worker->allstolen == 0) {
/* Assume: tail = split = head (pre-pop) */
/* Now we do a real pop ergo either decrease tail,split,head or declare allstolen */
Worker *wt = __lace_worker->_public;
wt->allstolen = 1;
__lace_worker->allstolen = 1;
}
}
compiler_barrier();
t->thief = THIEF_EMPTY;
lace_time_event(__lace_worker, 4);
}
static __attribute__((noinline))
void lace_drop_slow(WorkerP *w, Task *__dq_head)
{
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) lace_leapfrog(w, __dq_head);
}
static inline __attribute__((unused))
void lace_drop(WorkerP *w, Task *__dq_head)
{
if (likely(0 == w->_public->movesplit)) {
if (likely(w->split <= __dq_head)) {
return;
}
}
lace_drop_slow(w, __dq_head);
}
// Task macros for tasks of arity 0
#define TASK_DECL_0(RTYPE, NAME) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { RTYPE res; } d; \
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
RTYPE NAME##_CALL(WorkerP *, Task * ); \
static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head ) \
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head ) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
\
\
lace_do_newframe(w, __dq_head, &_t); \
return ((TD_##NAME *)t)->d.res; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head ) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
\
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ((TD_##NAME *)t)->d.res; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head ); \
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head ); \
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define TASK_IMPL_0(RTYPE, NAME) \
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
t->d.res = NAME##_CALL(w, __dq_head ); \
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head ); \
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head ) \
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head ); \
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) )\
#define TASK_0(RTYPE, NAME) TASK_DECL_0(RTYPE, NAME) TASK_IMPL_0(RTYPE, NAME)
#define VOID_TASK_DECL_0(NAME) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
\
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
void NAME##_CALL(WorkerP *, Task * ); \
static inline void NAME##_SYNC(WorkerP *, Task *); \
static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head ) \
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head ) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
\
\
lace_do_newframe(w, __dq_head, &_t); \
return ; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head ) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
\
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head ); \
} \
\
static inline __attribute__((unused)) \
void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head ); \
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define VOID_TASK_IMPL_0(NAME) \
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
NAME##_CALL(w, __dq_head ); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head ); \
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
void NAME##_CALL(WorkerP *w, Task *__dq_head ) \
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head ); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) )\
#define VOID_TASK_0(NAME) VOID_TASK_DECL_0(NAME) VOID_TASK_IMPL_0(NAME)
// Task macros for tasks of arity 1
#define TASK_DECL_1(RTYPE, NAME, ATYPE_1) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; } args; RTYPE res; } d; \
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1); \
static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; \
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; \
\
lace_do_newframe(w, __dq_head, &_t); \
return ((TD_##NAME *)t)->d.res; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; \
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ((TD_##NAME *)t)->d.res; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define TASK_IMPL_1(RTYPE, NAME, ATYPE_1, ARG_1) \
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1); \
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1); \
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1)\
#define TASK_1(RTYPE, NAME, ATYPE_1, ARG_1) TASK_DECL_1(RTYPE, NAME, ATYPE_1) TASK_IMPL_1(RTYPE, NAME, ATYPE_1, ARG_1)
#define VOID_TASK_DECL_1(NAME, ATYPE_1) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; } args; } d; \
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1); \
static inline void NAME##_SYNC(WorkerP *, Task *); \
static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; \
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; \
\
lace_do_newframe(w, __dq_head, &_t); \
return ; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; \
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
} \
\
static inline __attribute__((unused)) \
void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define VOID_TASK_IMPL_1(NAME, ATYPE_1, ARG_1) \
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
NAME##_CALL(w, __dq_head , t->d.args.arg_1); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1); \
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1) \
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1)\
#define VOID_TASK_1(NAME, ATYPE_1, ARG_1) VOID_TASK_DECL_1(NAME, ATYPE_1) VOID_TASK_IMPL_1(NAME, ATYPE_1, ARG_1)
// Task macros for tasks of arity 2
#define TASK_DECL_2(RTYPE, NAME, ATYPE_1, ATYPE_2) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; } args; RTYPE res; } d; \
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2); \
static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
\
lace_do_newframe(w, __dq_head, &_t); \
return ((TD_##NAME *)t)->d.res; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ((TD_##NAME *)t)->d.res; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define TASK_IMPL_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) \
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2); \
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1, arg_2); \
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2)\
#define TASK_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) TASK_DECL_2(RTYPE, NAME, ATYPE_1, ATYPE_2) TASK_IMPL_2(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)
#define VOID_TASK_DECL_2(NAME, ATYPE_1, ATYPE_2) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; } args; } d; \
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2); \
static inline void NAME##_SYNC(WorkerP *, Task *); \
static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
\
lace_do_newframe(w, __dq_head, &_t); \
return ; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; \
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
} \
\
static inline __attribute__((unused)) \
void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define VOID_TASK_IMPL_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) \
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2); \
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2) \
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1, arg_2); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2)\
#define VOID_TASK_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2) VOID_TASK_DECL_2(NAME, ATYPE_1, ATYPE_2) VOID_TASK_IMPL_2(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2)
// Task macros for tasks of arity 3
#define TASK_DECL_3(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; } args; RTYPE res; } d;\
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3); \
static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
\
lace_do_newframe(w, __dq_head, &_t); \
return ((TD_##NAME *)t)->d.res; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ((TD_##NAME *)t)->d.res; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define TASK_IMPL_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) \
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3);\
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3); \
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3)\
#define TASK_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) TASK_DECL_3(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3) TASK_IMPL_3(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)
#define VOID_TASK_DECL_3(NAME, ATYPE_1, ATYPE_2, ATYPE_3) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; } args; } d; \
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3); \
static inline void NAME##_SYNC(WorkerP *, Task *); \
static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
\
lace_do_newframe(w, __dq_head, &_t); \
return ; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; \
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
} \
\
static inline __attribute__((unused)) \
void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3);\
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define VOID_TASK_IMPL_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) \
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3);\
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3)\
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3)\
#define VOID_TASK_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3) VOID_TASK_DECL_3(NAME, ATYPE_1, ATYPE_2, ATYPE_3) VOID_TASK_IMPL_3(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3)
// Task macros for tasks of arity 4
#define TASK_DECL_4(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; } args; RTYPE res; } d;\
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4);\
static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
\
lace_do_newframe(w, __dq_head, &_t); \
return ((TD_##NAME *)t)->d.res; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ((TD_##NAME *)t)->d.res; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define TASK_IMPL_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)\
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4);\
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4); \
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4)\
#define TASK_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4) TASK_DECL_4(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) TASK_IMPL_4(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)
#define VOID_TASK_DECL_4(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; } args; } d;\
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4);\
static inline void NAME##_SYNC(WorkerP *, Task *); \
static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
\
lace_do_newframe(w, __dq_head, &_t); \
return ; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4;\
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
} \
\
static inline __attribute__((unused)) \
void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define VOID_TASK_IMPL_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)\
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4);\
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4);\
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4)\
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4)\
#define VOID_TASK_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4) VOID_TASK_DECL_4(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4) VOID_TASK_IMPL_4(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4)
// Task macros for tasks of arity 5
#define TASK_DECL_5(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; } args; RTYPE res; } d;\
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5);\
static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
\
lace_do_newframe(w, __dq_head, &_t); \
return ((TD_##NAME *)t)->d.res; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ((TD_##NAME *)t)->d.res; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define TASK_IMPL_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)\
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5);\
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5); \
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5)\
#define TASK_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5) TASK_DECL_5(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) TASK_IMPL_5(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)
#define VOID_TASK_DECL_5(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; } args; } d;\
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5);\
static inline void NAME##_SYNC(WorkerP *, Task *); \
static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
\
lace_do_newframe(w, __dq_head, &_t); \
return ; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5;\
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
} \
\
static inline __attribute__((unused)) \
void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define VOID_TASK_IMPL_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)\
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5);\
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5);\
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5)\
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5)\
#define VOID_TASK_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5) VOID_TASK_DECL_5(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5) VOID_TASK_IMPL_5(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5)
// Task macros for tasks of arity 6
#define TASK_DECL_6(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6)\
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; ATYPE_6 arg_6; } args; RTYPE res; } d;\
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
RTYPE NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6);\
static inline RTYPE NAME##_SYNC(WorkerP *, Task *); \
static RTYPE NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
\
lace_do_newframe(w, __dq_head, &_t); \
return ((TD_##NAME *)t)->d.res; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
RTYPE NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ((TD_##NAME *)t)->d.res; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
} \
\
static inline __attribute__((unused)) \
RTYPE NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define TASK_IMPL_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)\
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
t->d.res = NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6);\
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
RTYPE NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \
} \
\
static inline __attribute__((always_inline)) \
RTYPE NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5, ATYPE_6 ARG_6)\
#define TASK_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6) TASK_DECL_6(RTYPE, NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6) TASK_IMPL_6(RTYPE, NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)
#define VOID_TASK_DECL_6(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6) \
\
typedef struct _TD_##NAME { \
TASK_COMMON_FIELDS(_TD_##NAME) \
union { struct { ATYPE_1 arg_1; ATYPE_2 arg_2; ATYPE_3 arg_3; ATYPE_4 arg_4; ATYPE_5 arg_5; ATYPE_6 arg_6; } args; } d;\
} TD_##NAME; \
\
/* If this line generates an error, please manually set the define LACE_TASKSIZE to a higher value */\
typedef char assertion_failed_task_descriptor_out_of_bounds_##NAME[(sizeof(TD_##NAME)<=sizeof(Task)) ? 0 : -1];\
\
void NAME##_WRAP(WorkerP *, Task *, TD_##NAME *); \
void NAME##_CALL(WorkerP *, Task * , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6);\
static inline void NAME##_SYNC(WorkerP *, Task *); \
static void NAME##_SYNC_SLOW(WorkerP *, Task *); \
\
static inline __attribute__((unused)) \
void NAME##_SPAWN(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
{ \
PR_COUNTTASK(w); \
\
TD_##NAME *t; \
TailSplit ts; \
uint32_t head, split, newsplit; \
\
/* assert(__dq_head < w->end); */ /* Assuming to be true */ \
\
t = (TD_##NAME *)__dq_head; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (unlikely(w->allstolen)) { \
if (wt->movesplit) wt->movesplit = 0; \
head = __dq_head - w->dq; \
ts = (TailSplit){{head,head+1}}; \
wt->ts.v = ts.v; \
compiler_barrier(); \
wt->allstolen = 0; \
w->split = __dq_head+1; \
w->allstolen = 0; \
} else if (unlikely(wt->movesplit)) { \
head = __dq_head - w->dq; \
split = w->split - w->dq; \
newsplit = (split + head + 2)/2; \
wt->ts.ts.split = newsplit; \
w->split = w->dq + newsplit; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
} \
\
static inline __attribute__((unused)) \
void NAME##_NEWFRAME(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
\
lace_do_newframe(w, __dq_head, &_t); \
return ; \
} \
\
static inline __attribute__((unused)) \
void NAME##_TOGETHER(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
{ \
Task _t; \
TD_##NAME *t = (TD_##NAME *)&_t; \
t->f = &NAME##_WRAP; \
t->thief = THIEF_TASK; \
t->d.args.arg_1 = arg_1; t->d.args.arg_2 = arg_2; t->d.args.arg_3 = arg_3; t->d.args.arg_4 = arg_4; t->d.args.arg_5 = arg_5; t->d.args.arg_6 = arg_6;\
\
lace_do_together(w, __dq_head, &_t); \
} \
\
static __attribute__((noinline)) \
void NAME##_SYNC_SLOW(WorkerP *w, Task *__dq_head) \
{ \
TD_##NAME *t; \
\
if ((w->allstolen) || (w->split > __dq_head && lace_shrink_shared(w))) { \
lace_leapfrog(w, __dq_head); \
t = (TD_##NAME *)__dq_head; \
return ; \
} \
\
compiler_barrier(); \
\
Worker *wt = w->_public; \
if (wt->movesplit) { \
Task *t = w->split; \
size_t diff = __dq_head - t; \
diff = (diff + 1) / 2; \
w->split = t + diff; \
wt->ts.ts.split += diff; \
compiler_barrier(); \
wt->movesplit = 0; \
PR_COUNTSPLITS(w, CTR_split_grow); \
} \
\
compiler_barrier(); \
\
t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
} \
\
static inline __attribute__((unused)) \
void NAME##_SYNC(WorkerP *w, Task *__dq_head) \
{ \
/* assert (__dq_head > 0); */ /* Commented out because we assume contract */ \
\
if (likely(0 == w->_public->movesplit)) { \
if (likely(w->split <= __dq_head)) { \
TD_##NAME *t = (TD_##NAME *)__dq_head; \
t->thief = THIEF_EMPTY; \
return NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
} \
} \
\
return NAME##_SYNC_SLOW(w, __dq_head); \
} \
\
\
#define VOID_TASK_IMPL_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)\
void NAME##_WRAP(WorkerP *w, Task *__dq_head, TD_##NAME *t __attribute__((unused))) \
{ \
NAME##_CALL(w, __dq_head , t->d.args.arg_1, t->d.args.arg_2, t->d.args.arg_3, t->d.args.arg_4, t->d.args.arg_5, t->d.args.arg_6);\
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker, Task *__lace_dq_head , ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6);\
\
/* NAME##_WORK is inlined in NAME##_CALL and the parameter __lace_in_task will disappear */\
void NAME##_CALL(WorkerP *w, Task *__dq_head , ATYPE_1 arg_1, ATYPE_2 arg_2, ATYPE_3 arg_3, ATYPE_4 arg_4, ATYPE_5 arg_5, ATYPE_6 arg_6)\
{ \
CHECKSTACK(w); \
return NAME##_WORK(w, __dq_head , arg_1, arg_2, arg_3, arg_4, arg_5, arg_6); \
} \
\
static inline __attribute__((always_inline)) \
void NAME##_WORK(WorkerP *__lace_worker __attribute__((unused)), Task *__lace_dq_head __attribute__((unused)) , ATYPE_1 ARG_1, ATYPE_2 ARG_2, ATYPE_3 ARG_3, ATYPE_4 ARG_4, ATYPE_5 ARG_5, ATYPE_6 ARG_6)\
#define VOID_TASK_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6) VOID_TASK_DECL_6(NAME, ATYPE_1, ATYPE_2, ATYPE_3, ATYPE_4, ATYPE_5, ATYPE_6) VOID_TASK_IMPL_6(NAME, ATYPE_1, ARG_1, ATYPE_2, ARG_2, ATYPE_3, ARG_3, ATYPE_4, ARG_4, ATYPE_5, ARG_5, ATYPE_6, ARG_6)
VOID_TASK_DECL_0(lace_steal_random);
VOID_TASK_DECL_1(lace_steal_random_loop, int*);
VOID_TASK_DECL_1(lace_steal_loop, int*);
VOID_TASK_DECL_2(lace_steal_loop_root, Task *, int*);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif