Ignore:
Timestamp:
12/04/11 22:08:32 (3 years ago)
Author:
wezowicz
Message:

Fixed memory leak in opencl-domain-memory.inl.
Add OpenCL specialization to muladd() along with 8 more kernels.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/wezzy/linbox/algorithms/opencl-kernels/kernel_muladd_partial_16_dp.cl

    r4072 r4090  
    99#pragma OPENCL EXTENSION cl_khr_fp64 : enable 
    1010 
    11 __kernel void matrix_mul_kernel(__global double* C, __global double* A, __global double* B, 
    12                 int width_A, int width_B, double mod){ 
     11__kernel void matrix_mul_kernel(__global double* D, double alpha, __global double* A, __global double* B, 
     12                double beta, __global double* C, int width_A, int width_B, double mod){ 
    1313        //Get Workgroup ID 
    1414        int bx = get_group_id(0); 
     
    3434        //Temporary storage for result 
    3535        double Dsub = 0; 
    36  
    37         //Setup count for modulus every 32 iterations. 
    38         int m = 0; 
    3936 
    4037        //Loop over all the sub-matrices of A and B required to compute 
     
    6259         
    6360        //Scale Dsub by alpha 
    64         Dsub = fmod((alpha * Dsub), mod); 
     61        Dsub = alpha * Dsub; 
     62        Dsub = fmod(Dsub, mod); 
    6563         
    66         if(beta != 0.0){ 
    67                 //Add C scaled by beta to Dsub 
    68                 double Csub = fmod((beta * C[d + ty * width_B + tx]), mod); 
    69                 Dsub = fmod((Dsub + Csub), mod); 
    70         } 
     64        //Scalse Csub by beta 
     65        double Csub = C[d + ty * width_B + tx]; 
     66        Csub = beta * Csub; 
     67        Csub = fmod(Csub, mod); 
     68         
     69        //Add Dsub and Dsub 
     70        Dsub = Dsub + Csub; 
     71        Dsub = fmod(Dsub, mod); 
    7172         
    7273        //Add the sum to the appropriate spot 
Note: See TracChangeset for help on using the changeset viewer.