Big Data

CUDA 교육

원찬식 2014. 10. 28. 15:55

#include <stdio.h>
#define NN 10
//__device__ float a[10];   /global memory

__global__
void function ( float* input, int size ){
        /* GPU SOURCE CODING HERE!! */
        //int i=0; //define in register. i was define <<<3, 4>>> so make 12EA
        //int j=0;
        int i = blockIdx.x * blockDim.x + threadIdx.x;

        if (size < blockDim.x * gridDim.x){

                input[i] = input[i]*input[i];
                return;
        }else{
                return;
        }
}

int main()
{
        float *a, *b_dev;       //a:CPU , b_dev:GPU
        size_t memSize = sizeof(float) * NN;
        a = (float*) malloc( memSize ); //float a[10]

        //b = (float*) cudaMalloc ( sizeof(float) * 10 );       //This is can't use so use like next line
        cudaMalloc( (void**)&b_dev, memSize * NN );

        cudaMemcpy( b_dev, a, memSize, cudaMemcpyHostToDevice); //Upload

        function <<<3, 4>>> ( b_dev, NN );

        cudaMemcpy (a, b_dev, memSize, cudaMemcpyDeviceToHost); //Download

        printf("ERR MASSAGE :\n%s\n", cudaGetErrorString( cudaGetLastError() ));

        return 0;
}