branches/wezzy/linbox/algorithms/openclkernels/kernel_muladd_modulus_sp.cl
r4072 r4090 1 1 /* 2 * kernel_modulus_ dp.cl2 * kernel_modulus_sp.cl 3 3 * 4 4 * Created on: Jul 5, 2011 … … 7 7 8 8 #define BLOCK_SIZE 16 9 #pragma OPENCL EXTENSION cl_khr_fp64 : enable10 9 11 10 __kernel void matrix_mul_kernel(__global float* D, float alpha, __global float* A, __global float* B, … … 60 59 61 60 //Scale Dsub by alpha 62 Dsub = fmod((alpha * Dsub), mod); 61 Dsub = alpha * Dsub; 62 Dsub = fmod(Dsub, mod); 63 63 64 if(beta != 0.0){ 65 //Add C scaled by beta to Dsub 66 float Csub = fmod((beta * C[d + ty * width_B + tx]), mod); 67 Dsub = fmod((Dsub + Csub), mod); 68 } 64 //Scalse Csub by beta 65 float Csub = C[d + ty * width_B + tx]; 66 Csub = beta * Csub; 67 Csub = fmod(Csub, mod); 68 69 //Add Dsub and Dsub 70 Dsub = Dsub + Csub; 71 Dsub = fmod(Dsub, mod); 69 72 70 73 //Add the sum to the appropriate spot
