Cuda code added
parent
08d5a84f10
commit
16498d57e4
|
@ -0,0 +1,28 @@
|
|||
CC=nvcc
|
||||
CFLAGS=-g -lm
|
||||
DESTINATION=/ceph/grid/home/gs0104/dn/dn8
|
||||
PROGRAM_NAME=cuda
|
||||
MODULE_LOAD=module load CUDA/10.1.243-GCC-8.3.0
|
||||
|
||||
all: $(PROGRAM_NAME)
|
||||
|
||||
$(PROGRAM_NAME): $(PROGRAM_NAME).cu
|
||||
$(CC) $(CFLAGS) $(PROGRAM_NAME).cu -o $(PROGRAM_NAME)
|
||||
|
||||
clean:
|
||||
rm $(PROGRAM_NAME)
|
||||
|
||||
push:
|
||||
rsync -a --progress . nsc:$(DESTINATION)
|
||||
|
||||
run_remote:
|
||||
@test -n "$(ngpus)" || (echo "ngpus is not set"; exit 1)
|
||||
@test -n "$(ntasks)" || (echo "ntasks is not set"; exit 1)
|
||||
|
||||
@echo "Compiling..."
|
||||
ssh nsc 'cd $(DESTINATION); make clean;$(MODULE_LOAD); make $(PROGRAM_NAME)'
|
||||
@echo "Running on $(ngpus) GPUs with $(ntasks) tasks"
|
||||
ssh nsc '$(MODULE_LOAD); srun --reservation=fri --ntasks=$(ntasks) -G$(ngpus) $(DESTINATION)/$(PROGRAM_NAME)'
|
||||
|
||||
download_output:
|
||||
rsync -a --progress nsc:$(DESTINATION)/mandelbrot.png .
|
|
@ -0,0 +1,116 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "helper_cuda.h"
|
||||
|
||||
#define BLOCK_SIZE 4
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#include "stb_image_write.h"
|
||||
|
||||
|
||||
__global__ void printGPU(unsigned char *img, int width, int height, int channels)
|
||||
{
|
||||
|
||||
int global_X = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int global_Y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
int global_Z = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
int max_iteration = 1000;
|
||||
unsigned char max = 255;
|
||||
|
||||
if(global_X < width && global_Y < height && global_Z < channels)
|
||||
{
|
||||
int index = global_Z * width * height + global_Y * width + global_X;
|
||||
switch(global_Z)
|
||||
{
|
||||
case 0:
|
||||
float x0, y0, x, y, xtemp;
|
||||
int color;
|
||||
int iter;
|
||||
|
||||
x0 = (float)global_X / (float)width * 3.5 - 2.5;
|
||||
y0 = (float)global_Y / (float)height * 2.0 - 1.0;
|
||||
x = 0.0;
|
||||
y = 0.0;
|
||||
iter = 0;
|
||||
while ((x*x + y*y < 4) && (iter < max_iteration))
|
||||
{
|
||||
xtemp = x*x - y*y + x0;
|
||||
y = 2*x*y + y0;
|
||||
x = xtemp;
|
||||
iter++;
|
||||
}
|
||||
//izracunamo barvo (magic: http://linas.org/art-gallery/escape/smooth.html)
|
||||
color = 1.0 + iter - log(log(sqrt(x*x + y * y))) / log(2.0);
|
||||
color = (8 * max * color) / max_iteration;
|
||||
if (color > max)
|
||||
color = max;
|
||||
img[index] = color;
|
||||
break;
|
||||
case 3:
|
||||
img[index] = 1;
|
||||
break;
|
||||
default:
|
||||
img[index] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
|
||||
int width = 1024;
|
||||
int height = 1024;
|
||||
int channels = 4;
|
||||
|
||||
size_t data_size = width * height * channels * sizeof(unsigned char);
|
||||
unsigned char *img_d;
|
||||
unsigned char *img_h;
|
||||
// Allocate memory on GPU
|
||||
checkCudaErrors(cudaMalloc(&img_d, data_size));
|
||||
// allocate memory on the host also
|
||||
img_h = (unsigned char *)malloc(data_size * sizeof(unsigned char));
|
||||
|
||||
dim3 blockSize(BLOCK_SIZE, BLOCK_SIZE, 1);
|
||||
dim3 gridSize((width + blockSize.x - 1) / blockSize.x, (height + blockSize.y - 1) / blockSize.y, 1);
|
||||
|
||||
// Ustvarimo dogodke CUDA
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
|
||||
// Zacnemo beleziti dogodke
|
||||
cudaEventRecord(start);
|
||||
|
||||
// Zazenemo funkcijo na GPE
|
||||
printGPU<<<gridSize, blockSize>>>(img_d, width, height, channels);
|
||||
|
||||
// Copy image to host
|
||||
checkCudaErrors(cudaMemcpy(img_h, img_d, data_size, cudaMemcpyDeviceToHost));
|
||||
|
||||
// Zakljucimo z belezenjem dogodkov
|
||||
cudaEventRecord(stop);
|
||||
|
||||
// Pocakamo da se dogodki zgodijo, saj se izvajajo asinhrono
|
||||
cudaEventSynchronize(stop);
|
||||
|
||||
float milliseconds = 0;
|
||||
cudaEventElapsedTime(&milliseconds, start, stop);
|
||||
printf("Kernel & Memcpy Execution time is: %0.3f milliseconds \n", milliseconds);
|
||||
|
||||
// Save the picture
|
||||
stbi_write_png("mandelbrot.png", width, height, channels, img_h, width * channels);
|
||||
|
||||
// free the imgs
|
||||
free(img_h);
|
||||
checkCudaErrors(cudaFree(img_d));
|
||||
cudaEventDestroy(start);
|
||||
cudaEventDestroy(stop);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// These are CUDA Helper functions for initialization and error checking
|
||||
|
||||
#ifndef COMMON_HELPER_CUDA_H_
|
||||
#define COMMON_HELPER_CUDA_H_
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(a, b) (a > b ? a : b)
|
||||
#endif
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(a, b) (a < b ? a : b)
|
||||
#endif
|
||||
|
||||
#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
|
||||
#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
|
||||
|
||||
void check(cudaError_t result, char const *const func, const char *const file,
|
||||
int const line)
|
||||
{
|
||||
if (result)
|
||||
{
|
||||
fprintf(stderr, "CUDA error at %s:%d code=%d (%s) \"%s\" \n", file, line, (int)result, cudaGetErrorName(result), func);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
inline void __getLastCudaError(const char *errorMessage, const char *file,
|
||||
const int line)
|
||||
{
|
||||
cudaError_t err = cudaGetLastError();
|
||||
|
||||
if (cudaSuccess != err)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"%s(%i) : getLastCudaError() CUDA error :"
|
||||
" %s : (%d) %s.\n",
|
||||
file, line, errorMessage, (int)(err),
|
||||
cudaGetErrorString(err));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
// end of CUDA Helper Functions
|
||||
|
||||
#endif // COMMON_HELPER_CUDA_H_
|
|
@ -0,0 +1,63 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#include "stb_image_write.h"
|
||||
|
||||
void mandelbrotCPU(unsigned char *image, int height, int width) {
|
||||
float x0, y0, x, y, xtemp;
|
||||
int i, j;
|
||||
int color;
|
||||
int iter;
|
||||
int max_iteration = 1000; //max stevilo iteracij
|
||||
unsigned char max = 255; //max vrednost barvnega kanala
|
||||
|
||||
//za vsak piksel v sliki
|
||||
for (i = 0; i < height; i++)
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
x0 = (float)j / width * (float)3.5 - (float)2.5; //zacetna vrednost
|
||||
y0 = (float)i / height * (float)2.0 - (float)1.0;
|
||||
x = 0;
|
||||
y = 0;
|
||||
iter = 0;
|
||||
//ponavljamo, dokler ne izpolnemo enega izmed pogojev
|
||||
while ((x*x + y * y <= 4) && (iter < max_iteration))
|
||||
{
|
||||
xtemp = x * x - y * y + x0;
|
||||
y = 2 * x*y + y0;
|
||||
x = xtemp;
|
||||
iter++;
|
||||
}
|
||||
//izracunamo barvo (magic: http://linas.org/art-gallery/escape/smooth.html)
|
||||
color = 1.0 + iter - log(log(sqrt(x*x + y * y))) / log(2.0);
|
||||
color = (8 * max * color) / max_iteration;
|
||||
if (color > max)
|
||||
color = max;
|
||||
//zapisemo barvo RGBA
|
||||
image[4 * i*width + 4 * j + 0] = color; //Red
|
||||
image[4 * i*width + 4 * j + 1] = 0; // Green
|
||||
image[4 * i*width + 4 * j + 2] = 0; // Blue
|
||||
image[4 * i*width + 4 * j + 3] = 255; // Alpha
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(void)
|
||||
{
|
||||
//nastavimo velikost slike
|
||||
int height = 3840;
|
||||
int width = 2160;
|
||||
int cpp=4; //stevilo barvnih kanalov na piksel
|
||||
|
||||
//rezerviramo prostor za sliko (RGBA)
|
||||
unsigned char *image = (unsigned char *)malloc(height * width * sizeof(unsigned char) * cpp);
|
||||
|
||||
mandelbrotCPU(image, height, width);
|
||||
//shranimo sliko
|
||||
stbi_write_png("mandelbrot.png", width, height, cpp, image, width * cpp);
|
||||
|
||||
free(image);
|
||||
return 0;
|
||||
}
|
Binary file not shown.
After Width: | Height: | Size: 995 KiB |
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue