float a;
float b;
for(int i = 0; i < centers_width; i++){
a = Centers[idnx * centers_width + i];
b = Input[idny * input_width + i];
sum = sum + pow( a - b , 2);
}
Output[idnx + idny * output_width] = sqrt(sum);
}
}
extern "C" void KernelEuclidianDistance(cudafloat *Output, int output_height, int output_width, cudafloat *Input, int input_width, cudafloat *Centers, int centers_width)
{
int blockSize = 16;
int wBlocks = output_width/blockSize + ((output_width%blockSize == 0)?0:1);
int hBlocks = output_height/blockSize + ((output_height%blockSize == 0)?0:1);
dim3 grid(wBlocks,hBlocks);
dim3 threads(blockSize,blockSize);
EuclidianDistance<<<grid,threads>>>(Output, output_height, output_width, Input, input_width, Centers, centers_width);
}
KERNEL FindMinKernel(cudafloat *Output, int output_height, int output_width, float *min_array, int* min_idx, cudafloat* Targets){