115 lines
2.7 KiB
C
115 lines
2.7 KiB
C
#include <stdio.h>
|
|
#include <pthread.h>
|
|
#include <omp.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <math.h>
|
|
|
|
#define N_THREADS 130
|
|
#define N_DIMENSIONS 2000000
|
|
|
|
pthread_barrier_t b;
|
|
long *p_s;
|
|
long *q_s;
|
|
double res[N_THREADS];
|
|
int active_threads[N_THREADS];
|
|
|
|
void setup() {
|
|
p_s = (long *) malloc(N_DIMENSIONS * sizeof(long));
|
|
q_s = (long *) malloc(N_DIMENSIONS * sizeof(long));
|
|
for (long i = 0; i < N_DIMENSIONS; i++) {
|
|
p_s[i] = rand() % 10;
|
|
q_s[i] = rand() % 10;
|
|
}
|
|
}
|
|
|
|
void *calculate(void* arg) {
|
|
int *myrank = (int *)arg;
|
|
long mystart = (long)(N_DIMENSIONS/ (double)N_THREADS * *myrank);
|
|
long myend = (long)(N_DIMENSIONS / (double)N_THREADS * (*myrank + 1));
|
|
double sum = 0;
|
|
|
|
|
|
for (long i = mystart; i < myend; i++) {
|
|
sum += pow(p_s[i] - q_s[i], 2);
|
|
}
|
|
|
|
res[*myrank] = sum;
|
|
printf("Thread2 %d finished\n", *myrank);
|
|
fflush(stdout);
|
|
pthread_barrier_wait( &b );
|
|
|
|
int n_threads = N_THREADS;
|
|
|
|
// handle the odd number of threads!
|
|
if(n_threads % 2 == 1) {
|
|
if ( *myrank == N_THREADS - 1) {
|
|
res[*myrank - 1] += res[*myrank];
|
|
active_threads[*myrank] = -1;
|
|
}
|
|
n_threads--;
|
|
}
|
|
|
|
pthread_barrier_wait( &b );
|
|
|
|
// Do not split threads if there is a single thread
|
|
if (n_threads != 1) {
|
|
n_threads = N_THREADS / 2;
|
|
if (*myrank >= n_threads) {
|
|
active_threads[*myrank] = -1;
|
|
}
|
|
}
|
|
|
|
|
|
// Do a tree based sumation
|
|
while(1) {
|
|
if(active_threads[*myrank] != -1) {
|
|
res[*myrank] = res[*myrank * 2] + res[*myrank *2 + 1];
|
|
}
|
|
pthread_barrier_wait( &b );
|
|
|
|
n_threads = n_threads / 2;
|
|
if (n_threads == 1 || n_threads == 0) {
|
|
break;
|
|
}
|
|
if (*myrank >= n_threads) {
|
|
//printf("Thread %d is on pause\n", *myrank);
|
|
active_threads[*myrank] = -1;
|
|
}
|
|
pthread_barrier_wait( &b );
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
int main() {
|
|
pthread_t t[N_THREADS];
|
|
setup();
|
|
double dt = omp_get_wtime(); // start timing
|
|
int p[N_THREADS];
|
|
|
|
// barrier & lock
|
|
pthread_barrier_init( &b, NULL, N_THREADS );
|
|
|
|
for(int i = 0; i < N_THREADS; i++ ) {
|
|
p[i] = i;
|
|
pthread_create(&t[i], NULL, calculate, (void *)&p[i]);
|
|
}
|
|
|
|
for(int i=0; i<N_THREADS; i++)
|
|
pthread_join(t[i], NULL);
|
|
|
|
dt = omp_get_wtime() - dt; // finish timing
|
|
|
|
printf("Execution time: %lf, caluclation result: %lf ", dt, sqrt(res[0]));
|
|
return 0;
|
|
}
|
|
|
|
// EXECUTION TIME
|
|
// 8 THREADS: Execution time: 0.807376, caluclation result: 49748.358094, N_DIMENSIONS: 200000000
|
|
// 4 THREADS: Execution time: 1.614672, caluclation result: 49748.358094, N_DIMENSIONS: 200000000
|
|
// 2 THREADS: Execution time: 3.223820, caluclation result: 57445.749660, N_DIMENSIONS: 200000000
|
|
// 1 THREADS: Execution time: 4.698835, caluclation result: 57445.749660, N_DIMENSIONS: 200000000
|
|
|