 Timestamp:
 12/04/11 22:08:32 (3 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

branches/wezzy/linbox/algorithms/openclkernels/kernel_muladd_partial_16_dp.cl
r4072 r4090 9 9 #pragma OPENCL EXTENSION cl_khr_fp64 : enable 10 10 11 __kernel void matrix_mul_kernel(__global double* C, __global double* A, __global double* B,12 int width_A, int width_B, double mod){11 __kernel void matrix_mul_kernel(__global double* D, double alpha, __global double* A, __global double* B, 12 double beta, __global double* C, int width_A, int width_B, double mod){ 13 13 //Get Workgroup ID 14 14 int bx = get_group_id(0); … … 34 34 //Temporary storage for result 35 35 double Dsub = 0; 36 37 //Setup count for modulus every 32 iterations.38 int m = 0;39 36 40 37 //Loop over all the submatrices of A and B required to compute … … 62 59 63 60 //Scale Dsub by alpha 64 Dsub = fmod((alpha * Dsub), mod); 61 Dsub = alpha * Dsub; 62 Dsub = fmod(Dsub, mod); 65 63 66 if(beta != 0.0){ 67 //Add C scaled by beta to Dsub 68 double Csub = fmod((beta * C[d + ty * width_B + tx]), mod); 69 Dsub = fmod((Dsub + Csub), mod); 70 } 64 //Scalse Csub by beta 65 double Csub = C[d + ty * width_B + tx]; 66 Csub = beta * Csub; 67 Csub = fmod(Csub, mod); 68 69 //Add Dsub and Dsub 70 Dsub = Dsub + Csub; 71 Dsub = fmod(Dsub, mod); 71 72 72 73 //Add the sum to the appropriate spot
Note: See TracChangeset
for help on using the changeset viewer.