#include #include #include #include #include #include #define N_THREADS 130 #define N_DIMENSIONS 2000000 pthread_barrier_t b; long *p_s; long *q_s; double res[N_THREADS]; int active_threads[N_THREADS]; void setup() { p_s = (long *) malloc(N_DIMENSIONS * sizeof(long)); q_s = (long *) malloc(N_DIMENSIONS * sizeof(long)); for (long i = 0; i < N_DIMENSIONS; i++) { p_s[i] = rand() % 10; q_s[i] = rand() % 10; } } void *calculate(void* arg) { int *myrank = (int *)arg; long mystart = (long)(N_DIMENSIONS/ (double)N_THREADS * *myrank); long myend = (long)(N_DIMENSIONS / (double)N_THREADS * (*myrank + 1)); double sum = 0; for (long i = mystart; i < myend; i++) { sum += pow(p_s[i] - q_s[i], 2); } res[*myrank] = sum; printf("Thread2 %d finished\n", *myrank); fflush(stdout); pthread_barrier_wait( &b ); int n_threads = N_THREADS; // handle the odd number of threads! if(n_threads % 2 == 1) { if ( *myrank == N_THREADS - 1) { res[*myrank - 1] += res[*myrank]; active_threads[*myrank] = -1; } n_threads--; } pthread_barrier_wait( &b ); // Do not split threads if there is a single thread if (n_threads != 1) { n_threads = N_THREADS / 2; if (*myrank >= n_threads) { active_threads[*myrank] = -1; } } // Do a tree based sumation while(1) { if(active_threads[*myrank] != -1) { res[*myrank] = res[*myrank * 2] + res[*myrank *2 + 1]; } pthread_barrier_wait( &b ); n_threads = n_threads / 2; if (n_threads == 1 || n_threads == 0) { break; } if (*myrank >= n_threads) { //printf("Thread %d is on pause\n", *myrank); active_threads[*myrank] = -1; } pthread_barrier_wait( &b ); } return NULL; } int main() { pthread_t t[N_THREADS]; setup(); double dt = omp_get_wtime(); // start timing int p[N_THREADS]; // barrier & lock pthread_barrier_init( &b, NULL, N_THREADS ); for(int i = 0; i < N_THREADS; i++ ) { p[i] = i; pthread_create(&t[i], NULL, calculate, (void *)&p[i]); } for(int i=0; i