diff --git a/dn8/Makefile b/dn8/Makefile index 5b7412c..ab3f52d 100644 --- a/dn8/Makefile +++ b/dn8/Makefile @@ -28,3 +28,12 @@ run_remote: download_output: rsync -a --progress nsc:$(DESTINATION)/mandelbrot.png . + +run_local: + @test -n "$(width)" || (echo "width is not set"; exit 1) + @test -n "$(height)" || (echo "height is not set"; exit 1) + + @echo "Compiling..." + gcc -g -Wall -lm mandelbrot.c -o mandelbrot + @echo "Running on CPU" + ./mandelbrot $(width) $(height) diff --git a/dn8/cuda.cu b/dn8/cuda.cu index 3a41780..a77f82e 100644 --- a/dn8/cuda.cu +++ b/dn8/cuda.cu @@ -60,13 +60,10 @@ __global__ void mandelbrotGPU(unsigned char *img, int width, int height, int cha } } -// take params int main(int argc, char **argv) { - // first argument is width int width = atoi(argv[1]); - // second argument is height int height = atoi(argv[2]); int channels = 4; @@ -82,24 +79,18 @@ int main(int argc, char **argv) dim3 blockSize(BLOCK_SIZE, BLOCK_SIZE, 1); dim3 gridSize((width + blockSize.x - 1) / blockSize.x, (height + blockSize.y - 1) / blockSize.y, 1); - // Ustvarimo dogodke CUDA cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); - // Zacnemo beleziti dogodke cudaEventRecord(start); - // Zazenemo funkcijo na GPE mandelbrotGPU<<>>(img_d, width, height, channels); // Copy image to host checkCudaErrors(cudaMemcpy(img_h, img_d, data_size, cudaMemcpyDeviceToHost)); - // Zakljucimo z belezenjem dogodkov cudaEventRecord(stop); - - // Pocakamo da se dogodki zgodijo, saj se izvajajo asinhrono cudaEventSynchronize(stop); float milliseconds = 0; @@ -117,3 +108,81 @@ int main(int argc, char **argv) return 0; } + +/* RESULTS +resolution: 640,480, ngpus: 1, gpu_time: 1.664ms, cpu_time: 10.122344s -> speedup 6083.139423076924 +resolution: 640,480, ngpus: 2, gpu_time: 1.668ms, cpu_time: 10.122344s -> speedup 6068.551558752998 +resolution: 640,480, ngpus: 4, gpu_time: 1.676ms, cpu_time: 10.122344s -> speedup 6039.584725536993 +resolution: 640,600, ngpus: 1, gpu_time: 2.036ms, cpu_time: 10.124772s -> speedup 4972.874263261297 +resolution: 640,600, ngpus: 2, gpu_time: 2.051ms, cpu_time: 10.124772s -> speedup 4936.505119453925 +resolution: 640,600, ngpus: 4, gpu_time: 2.009ms, cpu_time: 10.124772s -> speedup 5039.707317073171 +resolution: 640,900, ngpus: 1, gpu_time: 2.132ms, cpu_time: 10.504951s -> speedup 4927.275328330206 +resolution: 640,900, ngpus: 2, gpu_time: 2.908ms, cpu_time: 10.504951s -> speedup 3612.431568088033 +resolution: 640,900, ngpus: 4, gpu_time: 2.882ms, cpu_time: 10.504951s -> speedup 3645.021165857044 +resolution: 640,1080, ngpus: 1, gpu_time: 2.639ms, cpu_time: 11.015781s -> speedup 4174.225464190982 +resolution: 640,1080, ngpus: 2, gpu_time: 3.366ms, cpu_time: 11.015781s -> speedup 3272.662210338681 +resolution: 640,1080, ngpus: 4, gpu_time: 3.372ms, cpu_time: 11.015781s -> speedup 3266.8389679715306 +resolution: 640,2160, ngpus: 1, gpu_time: 4.996ms, cpu_time: 12.115385s -> speedup 2425.0170136108886 +resolution: 640,2160, ngpus: 2, gpu_time: 4.905ms, cpu_time: 12.115385s -> speedup 2470.007135575943 +resolution: 640,2160, ngpus: 4, gpu_time: 5.701ms, cpu_time: 12.115385s -> speedup 2125.1333099456238 +resolution: 800,480, ngpus: 1, gpu_time: 2.005ms, cpu_time: 11.583709s -> speedup 5777.410972568579 +resolution: 800,480, ngpus: 2, gpu_time: 2.383ms, cpu_time: 11.583709s -> speedup 4860.97733948804 +resolution: 800,480, ngpus: 4, gpu_time: 1.978ms, cpu_time: 11.583709s -> speedup 5856.27350859454 +resolution: 800,600, ngpus: 1, gpu_time: 2.450ms, cpu_time: 11.500485s -> speedup 4694.07551020408 +resolution: 800,600, ngpus: 2, gpu_time: 2.399ms, cpu_time: 11.500485s -> speedup 4793.866194247603 +resolution: 800,600, ngpus: 4, gpu_time: 2.422ms, cpu_time: 11.500485s -> speedup 4748.342279108175 +resolution: 800,900, ngpus: 1, gpu_time: 3.512ms, cpu_time: 10.727533s -> speedup 3054.536731207289 +resolution: 800,900, ngpus: 2, gpu_time: 3.510ms, cpu_time: 10.727533s -> speedup 3056.277207977208 +resolution: 800,900, ngpus: 4, gpu_time: 3.500ms, cpu_time: 10.727533s -> speedup 3065.009428571428 +resolution: 800,1080, ngpus: 1, gpu_time: 3.453ms, cpu_time: 10.642499s -> speedup 3082.1022299449755 +resolution: 800,1080, ngpus: 2, gpu_time: 3.357ms, cpu_time: 10.642499s -> speedup 3170.2409889782543 +resolution: 800,1080, ngpus: 4, gpu_time: 4.108ms, cpu_time: 10.642499s -> speedup 2590.676484907498 +resolution: 800,2160, ngpus: 1, gpu_time: 6.405ms, cpu_time: 10.547187s -> speedup 1646.7114754098359 +resolution: 800,2160, ngpus: 2, gpu_time: 6.410ms, cpu_time: 10.547187s -> speedup 1645.426989079563 +resolution: 800,2160, ngpus: 4, gpu_time: 6.425ms, cpu_time: 10.547187s -> speedup 1641.5855252918286 +resolution: 1600,480, ngpus: 1, gpu_time: 2.932ms, cpu_time: 11.243271s -> speedup 3834.676330150068 +resolution: 1600,480, ngpus: 2, gpu_time: 2.894ms, cpu_time: 11.243271s -> speedup 3885.0279889426397 +resolution: 1600,480, ngpus: 4, gpu_time: 3.655ms, cpu_time: 11.243271s -> speedup 3076.134336525308 +resolution: 1600,600, ngpus: 1, gpu_time: 3.816ms, cpu_time: 11.046463s -> speedup 2894.7754192872117 +resolution: 1600,600, ngpus: 2, gpu_time: 4.511ms, cpu_time: 11.046463s -> speedup 2448.7836399911325 +resolution: 1600,600, ngpus: 4, gpu_time: 3.714ms, cpu_time: 11.046463s -> speedup 2974.276521270867 +resolution: 1600,900, ngpus: 1, gpu_time: 5.884ms, cpu_time: 10.917261s -> speedup 1855.4148538409245 +resolution: 1600,900, ngpus: 2, gpu_time: 5.070ms, cpu_time: 10.917261s -> speedup 2153.3059171597633 +resolution: 1600,900, ngpus: 4, gpu_time: 5.041ms, cpu_time: 10.917261s -> speedup 2165.6935131918267 +resolution: 1600,1080, ngpus: 1, gpu_time: 5.619ms, cpu_time: 11.436832s -> speedup 2035.3856558106427 +resolution: 1600,1080, ngpus: 2, gpu_time: 6.326ms, cpu_time: 11.436832s -> speedup 1807.9089472020235 +resolution: 1600,1080, ngpus: 4, gpu_time: 6.321ms, cpu_time: 11.436832s -> speedup 1809.33902863471 +resolution: 1600,2160, ngpus: 1, gpu_time: 10.906ms, cpu_time: 10.779845s -> speedup 988.43251421236 +resolution: 1600,2160, ngpus: 2, gpu_time: 10.772ms, cpu_time: 10.779845s -> speedup 1000.7282770144819 +resolution: 1600,2160, ngpus: 4, gpu_time: 10.741ms, cpu_time: 10.779845s -> speedup 1003.6165161530585 +resolution: 1920,480, ngpus: 1, gpu_time: 4.361ms, cpu_time: 11.150000s -> speedup 2556.75303829397 +resolution: 1920,480, ngpus: 2, gpu_time: 3.556ms, cpu_time: 11.150000s -> speedup 3135.5455568053994 +resolution: 1920,480, ngpus: 4, gpu_time: 3.559ms, cpu_time: 11.150000s -> speedup 3132.9025007024447 +resolution: 1920,600, ngpus: 1, gpu_time: 4.599ms, cpu_time: 11.943074s -> speedup 2596.884974994564 +resolution: 1920,600, ngpus: 2, gpu_time: 4.667ms, cpu_time: 11.943074s -> speedup 2559.0473537604453 +resolution: 1920,600, ngpus: 4, gpu_time: 4.607ms, cpu_time: 11.943074s -> speedup 2592.375515519861 +resolution: 1920,900, ngpus: 1, gpu_time: 5.625ms, cpu_time: 10.943963s -> speedup 1945.5934222222222 +resolution: 1920,900, ngpus: 2, gpu_time: 5.696ms, cpu_time: 10.943963s -> speedup 1921.341818820225 +resolution: 1920,900, ngpus: 4, gpu_time: 6.249ms, cpu_time: 10.943963s -> speedup 1751.314290286446 +resolution: 1920,1080, ngpus: 1, gpu_time: 7.881ms, cpu_time: 11.079364s -> speedup 1405.8322547900011 +resolution: 1920,1080, ngpus: 2, gpu_time: 6.993ms, cpu_time: 11.079364s -> speedup 1584.3506363506363 +resolution: 1920,1080, ngpus: 4, gpu_time: 7.749ms, cpu_time: 11.079364s -> speedup 1429.7798425603305 +resolution: 1920,2160, ngpus: 1, gpu_time: 13.916ms, cpu_time: 10.651142s -> speedup 765.3881862604197 +resolution: 1920,2160, ngpus: 2, gpu_time: 12.911ms, cpu_time: 10.651142s -> speedup 824.9664627062195 +resolution: 1920,2160, ngpus: 4, gpu_time: 12.922ms, cpu_time: 10.651142s -> speedup 824.2642005881441 +resolution: 3840,480, ngpus: 1, gpu_time: 6.109ms, cpu_time: 10.953693s -> speedup 1793.0419053854969 +resolution: 3840,480, ngpus: 2, gpu_time: 6.029ms, cpu_time: 10.953693s -> speedup 1816.8341350140986 +resolution: 3840,480, ngpus: 4, gpu_time: 6.752ms, cpu_time: 10.953693s -> speedup 1622.28865521327 +resolution: 3840,600, ngpus: 1, gpu_time: 8.130ms, cpu_time: 10.953526s -> speedup 1347.2971709717096 +resolution: 3840,600, ngpus: 2, gpu_time: 7.990ms, cpu_time: 10.953526s -> speedup 1370.9043804755945 +resolution: 3840,600, ngpus: 4, gpu_time: 8.000ms, cpu_time: 10.953526s -> speedup 1369.19075 +resolution: 3840,900, ngpus: 1, gpu_time: 10.816ms, cpu_time: 11.594220s -> speedup 1071.9508136094673 +resolution: 3840,900, ngpus: 2, gpu_time: 11.445ms, cpu_time: 11.594220s -> speedup 1013.038007863696 +resolution: 3840,900, ngpus: 4, gpu_time: 10.671ms, cpu_time: 11.594220s -> speedup 1086.5167275794208 +resolution: 3840,1080, ngpus: 1, gpu_time: 13.010ms, cpu_time: 11.026291s -> speedup 847.5242890084552 +resolution: 3840,1080, ngpus: 2, gpu_time: 13.600ms, cpu_time: 11.026291s -> speedup 810.7566911764707 +resolution: 3840,1080, ngpus: 4, gpu_time: 13.223ms, cpu_time: 11.026291s -> speedup 833.8721167662407 +resolution: 3840,2160, ngpus: 1, gpu_time: 24.669ms, cpu_time: 11.050334s -> speedup 447.9441404191495 +resolution: 3840,2160, ngpus: 2, gpu_time: 24.373ms, cpu_time: 11.050334s -> speedup 453.38423665531525 +resolution: 3840,2160, ngpus: 4, gpu_time: 24.410ms, cpu_time: 11.050334s -> speedup 452.69700942236784 +*/ diff --git a/dn8/get_speedups.py b/dn8/get_speedups.py new file mode 100644 index 0000000..40a5618 --- /dev/null +++ b/dn8/get_speedups.py @@ -0,0 +1,35 @@ + + +def main(): + # Read csv file + gpu_results = [] + with open('./results_gpu.csv', 'r') as f: + lines = f.readlines() + # Split lines into list of lists and strip whitespace + lines = [line.strip().split(',') for line in lines] + # Remove header + lines = lines[1:] + gpu_results = lines + + cpu_results = [] + with open('./results_cpu.csv', 'r') as f: + lines = f.readlines() + # Split lines into list of lists and strip whitespace + lines = [line.strip().split(',') for line in lines] + # Remove header + lines = lines[1:] + cpu_results = lines + + for i in range(len(gpu_results)): + for j in range(len(cpu_results)): + if gpu_results[i][0] == cpu_results[j][0] and gpu_results[i][1] == cpu_results[j][1]: + gpu_time = float(gpu_results[i][3]) # in milliseconds + cpu_time = float(cpu_results[j][2])# in seconds + speedup = cpu_time / (gpu_time / 1000) + print(f'resolution: {gpu_results[i][0]},{gpu_results[i][1]}, ngpus: {gpu_results[i][2]}, gpu_time: {gpu_results[i][3]}ms, cpu_time: {cpu_results[j][2]}s -> speedup {speedup}') + + + + +if __name__ == '__main__': + main() diff --git a/dn8/mandelbrot.c b/dn8/mandelbrot.c index 137a8b1..a201bb9 100644 --- a/dn8/mandelbrot.c +++ b/dn8/mandelbrot.c @@ -4,6 +4,7 @@ #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" +#include void mandelbrotCPU(unsigned char *image, int height, int width) { float x0, y0, x, y, xtemp; @@ -50,11 +51,16 @@ int main(void) int height = 3840; int width = 2160; int cpp=4; //stevilo barvnih kanalov na piksel + struct timeval tv1, tv2; //rezerviramo prostor za sliko (RGBA) unsigned char *image = (unsigned char *)malloc(height * width * sizeof(unsigned char) * cpp); - + + gettimeofday(&tv1, NULL); mandelbrotCPU(image, height, width); + gettimeofday(&tv2, NULL); + printf ("CPU: %f seconds\n", (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + + (double) (tv2.tv_sec - tv1.tv_sec)); //shranimo sliko stbi_write_png("mandelbrot.png", width, height, cpp, image, width * cpp); diff --git a/dn8/mandelbrot.png b/dn8/mandelbrot.png new file mode 100644 index 0000000..38de8dc Binary files /dev/null and b/dn8/mandelbrot.png differ diff --git a/dn8/results_cpu.csv b/dn8/results_cpu.csv new file mode 100644 index 0000000..594c391 --- /dev/null +++ b/dn8/results_cpu.csv @@ -0,0 +1,26 @@ +width,height,seconds +640,480,10.122344 +640,600,10.124772 +640,900,10.504951 +640,1080,11.015781 +640,2160,12.115385 +800,480,11.583709 +800,600,11.500485 +800,900,10.727533 +800,1080,10.642499 +800,2160,10.547187 +1600,480,11.243271 +1600,600,11.046463 +1600,900,10.917261 +1600,1080,11.436832 +1600,2160,10.779845 +1920,480,11.150000 +1920,600,11.943074 +1920,900,10.943963 +1920,1080,11.079364 +1920,2160,10.651142 +3840,480,10.953693 +3840,600,10.953526 +3840,900,11.594220 +3840,1080,11.026291 +3840,2160,11.050334 diff --git a/dn8/results_gpu.csv b/dn8/results_gpu.csv new file mode 100644 index 0000000..eff0457 --- /dev/null +++ b/dn8/results_gpu.csv @@ -0,0 +1,76 @@ +width,height,ngpus,milliseconds +640,480,1,1.664 +640,480,2,1.668 +640,480,4,1.676 +640,600,1,2.036 +640,600,2,2.051 +640,600,4,2.009 +640,900,1,2.132 +640,900,2,2.908 +640,900,4,2.882 +640,1080,1,2.639 +640,1080,2,3.366 +640,1080,4,3.372 +640,2160,1,4.996 +640,2160,2,4.905 +640,2160,4,5.701 +800,480,1,2.005 +800,480,2,2.383 +800,480,4,1.978 +800,600,1,2.450 +800,600,2,2.399 +800,600,4,2.422 +800,900,1,3.512 +800,900,2,3.510 +800,900,4,3.500 +800,1080,1,3.453 +800,1080,2,3.357 +800,1080,4,4.108 +800,2160,1,6.405 +800,2160,2,6.410 +800,2160,4,6.425 +1600,480,1,2.932 +1600,480,2,2.894 +1600,480,4,3.655 +1600,600,1,3.816 +1600,600,2,4.511 +1600,600,4,3.714 +1600,900,1,5.884 +1600,900,2,5.070 +1600,900,4,5.041 +1600,1080,1,5.619 +1600,1080,2,6.326 +1600,1080,4,6.321 +1600,2160,1,10.906 +1600,2160,2,10.772 +1600,2160,4,10.741 +1920,480,1,4.361 +1920,480,2,3.556 +1920,480,4,3.559 +1920,600,1,4.599 +1920,600,2,4.667 +1920,600,4,4.607 +1920,900,1,5.625 +1920,900,2,5.696 +1920,900,4,6.249 +1920,1080,1,7.881 +1920,1080,2,6.993 +1920,1080,4,7.749 +1920,2160,1,13.916 +1920,2160,2,12.911 +1920,2160,4,12.922 +3840,480,1,6.109 +3840,480,2,6.029 +3840,480,4,6.752 +3840,600,1,8.130 +3840,600,2,7.990 +3840,600,4,8.000 +3840,900,1,10.816 +3840,900,2,11.445 +3840,900,4,10.671 +3840,1080,1,13.010 +3840,1080,2,13.600 +3840,1080,4,13.223 +3840,2160,1,24.669 +3840,2160,2,24.373 +3840,2160,4,24.410 diff --git a/dn8/test.sh b/dn8/test.sh index 9adeb6d..60a7b3a 100755 --- a/dn8/test.sh +++ b/dn8/test.sh @@ -4,18 +4,31 @@ WIDTHS=( 640 800 1600 1920 3840 ) HEIGHTS=( 480 600 900 1080 2160 ) GPUS=(1 2 4) +# Boolean variable +TEST_GPU=true + +# Generate CSV +if [ "$TEST_GPU" = true ]; then + echo "width,height,ngpus,milliseconds" > results_gpu.csv +else + echo "width,height,seconds" > results_cpu.csv +fi -# loop through WIDTHS for WIDTH in "${WIDTHS[@]}" do - # loop through HEIGHTS for HEIGHT in "${HEIGHTS[@]}" do # loop through GPUS - for GPU in "${GPUS[@]}" - do - ret=$(make run_remote ntasks=1 ngpus=$GPU width=$WIDTH height=$HEIGHT | grep time | awk '{print $7}') - echo "Finished $WIDTH x $HEIGHT with $GPU GPUs --> time: $ret milliseconds" - done + if [ "$TEST_GPU" = true ]; then + for GPU in "${GPUS[@]}" + do + ret=$(make run_remote ntasks=1 ngpus=$GPU width=$WIDTH height=$HEIGHT | grep time | awk '{print $7}') + echo "$WIDTH,$HEIGHT,$GPU,$ret" >> results_gpu.csv + done + else + # run on CPU + ret=$( make run_local width=$WIDTH height=$HEIGHT | grep second | awk '{print $2}') + echo "$WIDTH,$HEIGHT,$ret" >> results_cpu.csv + fi done done