Changeset 38 for include/fflas-ffpack/fflas_ftrmm_src.inl
- Timestamp:
- 08/28/07 09:37:54 (1 year ago)
- Files:
-
- 1 modified
-
include/fflas-ffpack/fflas_ftrmm_src.inl (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
-
include/fflas-ffpack/fflas_ftrmm_src.inl
r36 r38 29 29 #define __FFLAS__Aupdate __FFLAS__Atriang + nsplit * __FFLAS__Arowinc 30 30 #define __FFLAS__Arest A + nbblocsplit * nsplit * (lda+1) 31 #define __FFLAS__B updateB + (nbblocsplit - (i+1)) * nsplit * ldb32 #define __FFLAS__B recB + (nbblocsplit - i) * nsplit * ldb31 #define __FFLAS__Brec B + (nbblocsplit - (i+1)) * nsplit * ldb 32 #define __FFLAS__Bupdate B + (nbblocsplit - i) * nsplit * ldb 33 33 #define __FFLAS__Brest B + nbblocsplit * nsplit * ldb 34 #define __FFLAS__A1 A + nsplit* (lda + 1)35 #define __FFLAS__A2 A + nsplit* __FFLAS__Arowinc34 #define __FFLAS__A1 A + (nsplit) * (lda + 1) 35 #define __FFLAS__A2 A + (nsplit) * __FFLAS__Arowinc 36 36 #define __FFLAS__A3 A 37 #define __FFLAS__B1 B + nsplit * ldb37 #define __FFLAS__B1 B + (nsplit) * ldb 38 38 #define __FFLAS__B2 B 39 39 #else 40 #define __FFLAS__Atriang A + i * nsplit* (lda + 1)41 #define __FFLAS__Aupdate A + ( i+1) * nsplit* __FFLAS__Acolinc40 #define __FFLAS__Atriang A + (nrestsplit + i * nsplit) * (lda + 1) 41 #define __FFLAS__Aupdate A + (nrestsplit + i * nsplit) * __FFLAS__Acolinc 42 42 #define __FFLAS__Arest A 43 #define __FFLAS__B updateB + (nrestsplit + i * nsplit) * ldb44 #define __FFLAS__B rec B + MAX(0, int (nrestsplit) + (i-1) * nsplit) * ldb43 #define __FFLAS__Brec B + (nrestsplit + i * nsplit) * ldb 44 #define __FFLAS__Bupdate B 45 45 #define __FFLAS__Brest B 46 46 #define __FFLAS__A1 A 47 #define __FFLAS__A2 A + nsplit* __FFLAS__Acolinc48 #define __FFLAS__A3 A + nsplit* (lda + 1)49 #define __FFLAS__B1 B 50 #define __FFLAS__B2 B + nsplit* ldb47 #define __FFLAS__A2 A + (M-nsplit) * __FFLAS__Acolinc 48 #define __FFLAS__A3 A + (M-nsplit) * (lda + 1) 49 #define __FFLAS__B1 B 50 #define __FFLAS__B2 B + (M-nsplit) * ldb 51 51 #endif 52 52 #else … … 67 67 #define __FFLAS__Aupdate __FFLAS__Atriang + nsplit * __FFLAS__Acolinc 68 68 #define __FFLAS__Arest A + nbblocsplit * nsplit * (lda+1) 69 #define __FFLAS__B updateB + (nbblocsplit - (i+1)) * nsplit70 #define __FFLAS__B recB + (nbblocsplit - i) * nsplit69 #define __FFLAS__Brec B + (nbblocsplit - (i+1)) * nsplit 70 #define __FFLAS__Bupdate B + (nbblocsplit - i) * nsplit 71 71 #define __FFLAS__Brest B + nbblocsplit * nsplit 72 #define __FFLAS__A1 A + nsplit* (lda + 1)73 #define __FFLAS__A2 A + nsplit* __FFLAS__Acolinc72 #define __FFLAS__A1 A + (nsplit) * (lda + 1) 73 #define __FFLAS__A2 A + (nsplit) * __FFLAS__Acolinc 74 74 #define __FFLAS__A3 A 75 75 #define __FFLAS__B1 B + nsplit 76 #define __FFLAS__B2 B 77 #else 78 #define __FFLAS__Atriang A + i * nsplit * (lda + 1)79 #define __FFLAS__Aupdate A + ( i+1) * nsplit * __FFLAS__Arowinc76 #define __FFLAS__B2 B 77 #else 78 #define __FFLAS__Atriang A + (nrestsplit + i * nsplit) * (lda + 1) 79 #define __FFLAS__Aupdate A + (nrestsplit + i * nsplit) * __FFLAS__Arowinc 80 80 #define __FFLAS__Arest A 81 #define __FFLAS__B updateB + (nrestsplit + i * nsplit)82 #define __FFLAS__B rec B + MAX(0, int (nrestsplit) + (i-1) * nsplit)81 #define __FFLAS__Brec B + (nrestsplit + i * nsplit) 82 #define __FFLAS__Bupdate B 83 83 #define __FFLAS__Brest B 84 84 #define __FFLAS__A1 A 85 #define __FFLAS__A2 A + nsplit* __FFLAS__Arowinc86 #define __FFLAS__A3 A + nsplit* (lda + 1)87 #define __FFLAS__B1 B 88 #define __FFLAS__B2 B + nsplit85 #define __FFLAS__A2 A + (N-nsplit) * __FFLAS__Arowinc 86 #define __FFLAS__A3 A + (N-nsplit) * (lda + 1) 87 #define __FFLAS__B1 B 88 #define __FFLAS__B2 B + N-nsplit 89 89 #endif 90 90 #endif … … 159 159 F.neg(Mone, one); 160 160 161 static __FFLAS__DOMAIN D;162 //size_t nblas = TRSMBound<Field> (F);163 164 //size_t nbblocsblas = __FFLAS__Na / nblas;165 //size_t nrestblas = __FFLAS__Na % nblas;166 161 size_t nsplit = DotProdBound (F, 0, one, 167 162 #ifdef __FFLAS__DOUBLE … … 171 166 #endif 172 167 ); 173 //ndel = (ndel / nblas)*nblas; 174 //size_t nsplit = ndel;//MIN (ndel, (nbblocsblas+1) / 2 * nblas); 168 175 169 size_t nbblocsplit = (__FFLAS__Na-1) / nsplit; 176 170 size_t nrestsplit = ((__FFLAS__Na-1) % nsplit) +1; 177 171 178 //std::cout<<"nblas, ndel, nsplit, nbblocsplit, nrestsplit = "<<nblas<<" "<<ndel<<" " 179 //<<nsplit<<" "<<nbblocsplit<<" "<<nrestsplit<<std::endl; 180 181 if (nrestsplit){ 182 //std::cerr<<"nblas nrestsplit, M, N = "<<nblas<<" "<<nrestsplit<<" "<<M<<" "<<N<<std::endl; 172 if (nrestsplit) 183 173 this->delayed (F, __FFLAS__Mbrest, __FFLAS__Nbrest, 184 174 __FFLAS__Arest, lda, __FFLAS__Brest, ldb); 185 } 186 175 187 176 for ( size_t i = 0; i < nbblocsplit; ++i) { 188 177 189 // cerr<<"M,N,K = "<<M<<" "<<(N-(i+1)*nsplit)<<" "<<nsplit<<endl;190 178 #ifdef __FFLAS__RIGHT 191 179 fgemm (F, FflasNoTrans, Mjoin (Fflas, __FFLAS__TRANS), … … 197 185 __FFLAS__Aupdate, lda, __FFLAS__Brec, ldb, one, __FFLAS__Bupdate, ldb); 198 186 #endif 187 199 188 this->delayed (F, __FFLAS__Mb, __FFLAS__Nb, 200 189 __FFLAS__Atriang, lda, __FFLAS__Brec, ldb); … … 222 211 F.neg(Mone,one); 223 212 if (__FFLAS__Na == 1) 213 #ifdef __FFLAS__NONUNIT 224 214 fscal(F, __FFLAS__Bdim, *A, B, __FFLAS__Bnorminc); 225 226 else { // __FFLAS__Na > 1 215 #else 216 ; 217 #endif 218 219 else { // __FFLAS__Na > 1 227 220 size_t nsplit = __FFLAS__Na >> 1; 228 this->operator() (F, __FFLAS__Mb, __FFLAS__Nb, __FFLAS__A1, lda, __FFLAS__B1, ldb); 229 // cerr<<"delay, Nup = "<<delay<<" "<<Nup<<endl; 230 //cerr<<"M,N,K = "<<M<<" "<<Ndown<<" "<<Nup<<endl; 231 221 this->operator() (F, __FFLAS__Mb2, __FFLAS__Nb2, __FFLAS__A1, lda, __FFLAS__B1, ldb); 222 232 223 #ifdef __FFLAS__RIGHT 233 224 fgemm (F, FflasNoTrans , Mjoin (Fflas, __FFLAS__TRANS), 234 225 __FFLAS__Mb2, __FFLAS__Nb2, nsplit, one, 235 __FFLAS__B 1, ldb, __FFLAS__A2, lda, one, __FFLAS__B2, ldb);226 __FFLAS__B2, ldb, __FFLAS__A2, lda, one, __FFLAS__B1, ldb); 236 227 #else 237 228 fgemm (F, Mjoin (Fflas, __FFLAS__TRANS), FflasNoTrans, 238 229 __FFLAS__Mb2, __FFLAS__Nb2, nsplit, one, 239 __FFLAS__A2, lda, __FFLAS__B 1, ldb, one, __FFLAS__B2, ldb);240 #endif 241 this->operator() (F, __FFLAS__Mb 2, __FFLAS__Nb2, __FFLAS__A3, lda, __FFLAS__B2, ldb);230 __FFLAS__A2, lda, __FFLAS__B2, ldb, one, __FFLAS__B1, ldb); 231 #endif 232 this->operator() (F, __FFLAS__Mb, __FFLAS__Nb, __FFLAS__A3, lda, __FFLAS__B2, ldb); 242 233 } 243 234 }
