Changeset 18
- Timestamp:
- 04/28/07 20:58:32 (2 years ago)
- Location:
- include
- Files:
-
- 10 modified
-
config-blas.h (modified) (8 diffs)
-
fflas-ffpack/fflas.h (modified) (10 diffs)
-
fflas-ffpack/fflas_bounds.inl (modified) (4 diffs)
-
fflas-ffpack/fflas_fcopy.inl (modified) (1 diff)
-
fflas-ffpack/fflas_fgemm.inl (modified) (12 diffs)
-
fflas-ffpack/fflas_fgemv.inl (modified) (5 diffs)
-
fflas-ffpack/fflas_ftrmm.inl (modified) (24 diffs)
-
fflas-ffpack/fflas_ftrsm.inl (modified) (6 diffs)
-
fflas-ffpack/modular-balanced.h (modified) (1 diff)
-
fflas-ffpack/unparametric.h (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
include/config-blas.h
r1 r18 1 1 /* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ 2 /* linbox/algorithms/lifting-container-base.h2 /* config-blas.h 3 3 * Copyright (C) 2005 Pascal Giorgi 4 * 4 * 2007 Clement Pernet 5 5 * Written by Pascal Giorgi <pgiorgi@uwaterloo.ca> 6 6 * … … 51 51 // level 2 routines 52 52 void dgemv_ (const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); 53 void sgemv_ (const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); 53 54 void dger_ (const int*, const int*, const double*, const double*, const int*, const double*, const int*, double*, const int*); 54 55 55 56 // level 3 routines 56 57 void dtrsm_ (const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); 58 void strsm_ (const char*, const char*, const char*, const char*, const int*, const int*, const float*, const float*, const int*, float*, const int*); 57 59 void dtrmm_ (const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); 60 void strmm_ (const char*, const char*, const char*, const char*, const int*, const int*, const float*, const float*, const int*, float*, const int*); 61 void sgemm_ (const char*, const char*, const int*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); 58 62 void dgemm_ (const char*, const char*, const int*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); 59 63 } … … 128 132 dgemv_ ( EXT_BLAS_TRANSPOSE(TransA), &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); 129 133 } 134 void cblas_sgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, 135 const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY) 136 { 137 if (Order == CblasRowMajor) 138 sgemv_ ( EXT_BLAS_TRANSPOSE_tr(TransA), &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY); 139 else 140 sgemv_ ( EXT_BLAS_TRANSPOSE(TransA), &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); 141 } 130 142 131 143 void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N, const double alpha, const double *X, const int incX, … … 151 163 dtrsm_ ( EXT_BLAS_SIDE(Side), EXT_BLAS_UPLO(Uplo), EXT_BLAS_TRANSPOSE(TransA), EXT_BLAS_DIAG(Diag), &M, &N, &alpha, A, &lda, B, &ldb); 152 164 } 165 void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, 166 const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, 167 float *B, const int ldb) 168 { 169 if (Order == CblasRowMajor) 170 strsm_ ( EXT_BLAS_SIDE_tr(Side), EXT_BLAS_UPLO_tr(Uplo), EXT_BLAS_TRANSPOSE(TransA), EXT_BLAS_DIAG(Diag), &N, &M, &alpha, A, &lda, B, &ldb); 171 else 172 strsm_ ( EXT_BLAS_SIDE(Side), EXT_BLAS_UPLO(Uplo), EXT_BLAS_TRANSPOSE(TransA), EXT_BLAS_DIAG(Diag), &M, &N, &alpha, A, &lda, B, &ldb); 173 } 153 174 154 175 void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, … … 160 181 else 161 182 dtrmm_ ( EXT_BLAS_SIDE(Side), EXT_BLAS_UPLO(Uplo), EXT_BLAS_TRANSPOSE(TransA), EXT_BLAS_DIAG(Diag), &M, &N, &alpha, A, &lda, B, &ldb); 183 } 184 void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, 185 const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, 186 float *B, const int ldb) 187 { 188 if (Order == CblasRowMajor) 189 strmm_ ( EXT_BLAS_SIDE_tr(Side), EXT_BLAS_UPLO_tr(Uplo), EXT_BLAS_TRANSPOSE(TransA), EXT_BLAS_DIAG(Diag), &N, &M, &alpha, A, &lda, B, &ldb); 190 else 191 strmm_ ( EXT_BLAS_SIDE(Side), EXT_BLAS_UPLO(Uplo), EXT_BLAS_TRANSPOSE(TransA), EXT_BLAS_DIAG(Diag), &M, &N, &alpha, A, &lda, B, &ldb); 162 192 } 163 193 … … 170 200 else 171 201 dgemm_ ( EXT_BLAS_TRANSPOSE(TransA), EXT_BLAS_TRANSPOSE(TransB), &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); 202 } 203 void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, 204 const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, 205 const float beta, float *C, const int ldc) 206 { 207 if (Order == CblasRowMajor) 208 sgemm_ ( EXT_BLAS_TRANSPOSE(TransB), EXT_BLAS_TRANSPOSE(TransA), &N, &M, &K, &alpha, B, &ldb, A, &lda, &beta, C, &ldc); 209 else 210 sgemm_ ( EXT_BLAS_TRANSPOSE(TransA), EXT_BLAS_TRANSPOSE(TransB), &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); 172 211 } 173 212 … … 226 265 void cblas_dgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, 227 266 const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY); 267 268 void cblas_sgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, 269 const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY); 228 270 229 271 void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N, const double alpha, const double *X, const int incX, … … 237 279 double *B, const int ldb); 238 280 281 void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, 282 const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, 283 float *B, const int ldb); 284 239 285 void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, 240 286 const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, 241 287 double *B, const int ldb); 242 288 289 void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, 290 const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, 291 float *B, const int ldb); 292 243 293 void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, 244 294 const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, 245 295 const double beta, double *C, const int ldc) ; 296 void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, 297 const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, 298 const float beta, float *C, const int ldc) ; 246 299 247 300 // LAPACK routines -
include/fflas-ffpack/fflas.h
r14 r18 38 38 39 39 #define DOUBLE_MANTISSA 53 40 #define FLOAT_MANTISSA 24 40 41 41 42 class FFLAS { … … 47 48 enum FFLAS_SIDE { FflasLeft=141, FflasRight = 142 }; 48 49 50 typedef UnparametricField<float> FloatDomain; 51 49 52 typedef UnparametricField<double> DoubleDomain; 53 54 55 50 56 51 57 //--------------------------------------------------------------------- … … 224 230 */ 225 231 template<class Field> 226 static typename Field::Element* fgemm (const Field& F, 227 const enum FFLAS_TRANSPOSE ta, 228 const enum FFLAS_TRANSPOSE tb, 229 const size_t m, 230 const size_t n, 231 const size_t k, 232 const typename Field::Element alpha, 233 const typename Field::Element* A, 234 const size_t lda, 235 const typename Field::Element* B, 236 const size_t ldb, 237 const typename Field::Element beta, 238 typename Field::Element* C, 239 const size_t ldc){ 232 static typename Field::Element* 233 fgemm (const Field& F, 234 const enum FFLAS_TRANSPOSE ta, 235 const enum FFLAS_TRANSPOSE tb, 236 const size_t m, 237 const size_t n, 238 const size_t k, 239 const typename Field::Element alpha, 240 const typename Field::Element* A, 241 const size_t lda, 242 const typename Field::Element* B, 243 const size_t ldb, 244 const typename Field::Element beta, 245 typename Field::Element* C, 246 const size_t ldc){ 240 247 size_t ws =0; 241 248 if ( (ta==FflasNoTrans) && (tb==FflasNoTrans)) { … … 305 312 } 306 313 } 314 //--------------------------------------------------------------------- 315 // Finite Field matrix => float matrix 316 //--------------------------------------------------------------------- 317 template<class Field> 318 static void MatF2MatFl (const Field& F, 319 FloatDomain::Element* S, const size_t lds, 320 const typename Field::Element* E, 321 const size_t lde,const size_t m, const size_t n){ 322 323 const typename Field::Element* Ei = E; 324 FloatDomain::Element *Si=S; 325 size_t j; 326 for (; Ei < E+lde*m; Ei+=lde, Si += lds) 327 for ( j=0; j<n; ++j){ 328 F.convert(*(Si+j),*(Ei+j)); 329 } 330 } 307 331 308 332 //--------------------------------------------------------------------- … … 324 348 F.convert(*(Si+j),*(Ei+j)); 325 349 } 350 351 //--------------------------------------------------------------------- 352 // Finite Field matrix => float matrix 353 // Special design for upper-triangular matrices 354 //--------------------------------------------------------------------- 355 template<class Field> 356 static void MatF2MatFl_Triangular (const Field& F, 357 typename FloatDomain::Element* S, const size_t lds, 358 const typename Field::Element* const E, 359 const size_t lde, 360 const size_t m, const size_t n){ 361 362 const typename Field::Element* Ei = E; 363 typename FloatDomain::Element* Si = S; 364 size_t i=0, j; 365 for ( ; i<m;++i, Ei+=lde, Si+=lds) 366 for ( j=i; j<n;++j) 367 F.convert(*(Si+j),*(Ei+j)); 368 } 326 369 327 370 //--------------------------------------------------------------------- … … 336 379 typename Field::Element* Si = S; 337 380 const DoubleDomain::Element* Ei =E; 381 size_t j; 382 for ( ; Si < S+m*lds; Si += lds, Ei+= lde){ 383 for ( j=0; j<n;++j) 384 F.init( *(Si+j), *(Ei+j) ); 385 } 386 } 387 388 //--------------------------------------------------------------------- 389 // float matrix => Finite Field matrix 390 //--------------------------------------------------------------------- 391 template<class Field> 392 static void MatFl2MatF (const Field& F, 393 typename Field::Element* S, const size_t lds, 394 const typename FloatDomain::Element* E, const size_t lde, 395 const size_t m, const size_t n){ 396 397 typename Field::Element* Si = S; 398 const FloatDomain::Element* Ei =E; 338 399 size_t j; 339 400 for ( ; Si < S+m*lds; Si += lds, Ei+= lde){ … … 358 419 const typename Field::Element& beta); 359 420 421 template <class Field> 422 static size_t DotProdBoundCompute (const Field& F, const size_t w, 423 const typename Field::Element& beta); 424 425 template <class Element> 426 class callDotProdBoundCompute; 427 360 428 /** @brief Bound for the delayed modulus triangular system solving 361 429 * … … 369 437 template <class Field> 370 438 static size_t TRSMBound (const Field& F); 371 439 440 template <class Element> 441 class callTRSMBound; 372 442 373 443 template <class Field> … … 430 500 typename Field::Element * C, const size_t ldc, 431 501 const size_t kmax, const size_t w); 432 template<bool AreEq> 502 503 504 template<class Element> 433 505 class callWinoMain; 434 506 435 template< bool AreEq>507 template<class Element> 436 508 class callClassicMatmul; 437 509 438 template< bool AreEq>510 template<class Element> 439 511 class callFsquare; 440 512 441 template< bool AreEq>513 template<class Element> 442 514 class callMatVectProd; 443 515 … … 464 536 typename Field::Element * B, const size_t ldb, 465 537 const size_t nmax); 466 template< bool AreEq>538 template<class Element> 467 539 class callFtrsmLeftLowNoTrans; 468 540 469 template< bool AreEq>541 template<class Element> 470 542 class callFtrsmRightUpNoTrans; 471 543 472 template< bool AreEq>544 template<class Element> 473 545 class callFtrmmLeftUpNoTrans; 474 546 475 template< bool AreEq>547 template<class Element> 476 548 class callFtrmmLeftUpTrans; 477 549 478 template< bool AreEq>550 template<class Element> 479 551 class callFtrmmLeftLowNoTrans; 480 552 481 template< bool AreEq>553 template<class Element> 482 554 class callFtrmmLeftLowTrans; 483 555 484 template< bool AreEq>556 template<class Element> 485 557 class callFtrmmRightUpNoTrans; 486 558 487 template< bool AreEq>559 template<class Element> 488 560 class callFtrmmRightUpTrans; 489 561 490 template< bool AreEq>562 template<class Element> 491 563 class callFtrmmRightLowNoTrans; 492 564 493 template< bool AreEq>565 template<class Element> 494 566 class callFtrmmRightLowTrans; 495 567 -
include/fflas-ffpack/fflas_bounds.inl
r8 r18 22 22 //--------------------------------------------------------------------- 23 23 template <class Field> 24 inline size_t DotProdBoundCompute (const Field& F, const size_t w, 25 const typename Field::Element& beta) 26 { 27 typename Field::Element mone; 28 static FFLAS_INT_TYPE p; 29 F.characteristic(p); 30 F.init (mone, -1.0); 31 size_t kmax; 32 if (p == 0) 33 kmax = 2; 34 else 35 if (w > 0) { 36 size_t ex=1; 37 for (size_t i=0; i < w; ++i) ex *= 3; 38 //FFLAS_INT_TYPE c = (p-1)*(ex)/2; //bound for a centered representation 39 long long c = (p-1)*(1+ex)/2; 40 kmax = lround(( double(1ULL << DOUBLE_MANTISSA) /double(c*c) + 1)*(1 << w)); 41 if (kmax == ( 1ULL << w)) 42 kmax = 2; 43 } 44 else{ 45 long long c = p-1; 46 long long cplt=0; 47 if (!F.isZero (beta)) 48 if (F.isOne (beta) || F.areEqual (beta, mone)) 49 cplt = c; 50 else cplt = c*c; 51 kmax = lround(( double((1ULL << DOUBLE_MANTISSA) - cplt)) /double(c*c)); 52 if (kmax < 2) 53 kmax = 2; 54 } 55 return MIN(kmax,1ULL<<31); 56 } 24 inline size_t FFLAS::DotProdBoundCompute (const Field& F, const size_t w, 25 const typename Field::Element& beta){ 26 return callDotProdBoundCompute<typename Field::Element>() (F, w, beta); 27 } 28 29 template<class Element> 30 class FFLAS::callDotProdBoundCompute { 31 public: 32 template <class Field> 33 size_t operator () (const Field& F, const size_t w, 34 const typename Field::Element& beta) 35 { 36 typename Field::Element mone; 37 static FFLAS_INT_TYPE p; 38 F.characteristic(p); 39 F.init (mone, -1.0); 40 size_t kmax; 41 if (p == 0) 42 kmax = 2; 43 else 44 if (w > 0) { 45 size_t ex=1; 46 for (size_t i=0; i < w; ++i) ex *= 3; 47 //FFLAS_INT_TYPE c = (p-1)*(ex)/2; //bound for a centered representation 48 long long c = (p-1)*(1+ex)/2; 49 kmax = lround(( double(1ULL << DOUBLE_MANTISSA) /double(c*c) + 1)*(1 << w)); 50 if (kmax == ( 1ULL << w)) 51 kmax = 2; 52 } 53 else{ 54 long long c = p-1; 55 long long cplt=0; 56 if (!F.isZero (beta)) 57 if (F.isOne (beta) || F.areEqual (beta, mone)) 58 cplt = c; 59 else cplt = c*c; 60 kmax = lround(( double((1ULL << DOUBLE_MANTISSA) - cplt)) /double(c*c)); 61 if (kmax < 2) 62 kmax = 2; 63 } 64 return MIN(kmax,1ULL<<31); 65 } 66 }; 67 57 68 template <> 58 inline size_t DotProdBoundCompute (const Modular<double>& F, const size_t w, 59 const double& beta) 60 { 61 double mone; 62 static FFLAS_INT_TYPE p; 63 F.characteristic(p); 64 F.init (mone, -1.0); 65 size_t kmax; 66 if (p == 0) 67 kmax = 2; 68 else 69 if (w > 0) { 70 size_t ex=1; 71 for (size_t i=0; i < w; ++i) ex *= 3; 72 long long c; 69 class FFLAS::callDotProdBoundCompute<double> { 70 public: 71 template <class Field> 72 size_t operator() (const Field& F, const size_t w, 73 const double& beta) 74 { 75 double mone; 76 static FFLAS_INT_TYPE p; 77 F.characteristic(p); 78 F.init (mone, -1.0); 79 size_t kmax; 80 if (p == 0) 81 kmax = 2; 82 else 83 if (w > 0) { 84 size_t ex=1; 85 for (size_t i=0; i < w; ++i) ex *= 3; 86 long long c; 73 87 #ifndef _LINBOX_CONFIG_H 74 if (F.balanced) 75 c = (p-1)*(ex)/2; // balanced representation 76 else 77 #endif 78 c = (p-1)*(1+ex)/2; // positive representation 79 kmax = lround(double(1ULL << DOUBLE_MANTISSA) /(double(c*c) + 1)*(1ULL << w)); 80 if (kmax == ( 1ULL << w)) 81 kmax = 2; 82 } 83 else{ 84 long long c = p-1; 85 long long cplt=0; 86 if (!F.isZero (beta)) 87 if (F.isOne (beta) || F.areEqual (beta, mone)) 88 cplt = c; 89 else cplt = c*c; 90 kmax = lround( double((1ULL << DOUBLE_MANTISSA) - cplt) /(double(c*c))); 91 if (kmax < 2) 92 kmax = 2; 93 } 94 return (size_t) MIN(kmax,1ULL<<31); 95 } 88 if (F.balanced) 89 c = (p-1)*(ex)/2; // balanced representation 90 else 91 #endif 92 c = (p-1)*(1+ex)/2; // positive representation 93 kmax = lround(double(1ULL << DOUBLE_MANTISSA) /(double(c*c) + 1)*(1ULL << w)); 94 if (kmax == ( 1ULL << w)) 95 kmax = 2; 96 } 97 else{ 98 long long c = p-1; 99 long long cplt=0; 100 if (!F.isZero (beta)) 101 if (F.isOne (beta) || F.areEqual (beta, mone)) 102 cplt = c; 103 else cplt = c*c; 104 kmax = lround( double((1ULL << DOUBLE_MANTISSA) - cplt) /(double(c*c))); 105 if (kmax < 2) 106 kmax = 2; 107 } 108 return (size_t) MIN(kmax,1ULL<<31); 109 } 110 }; 111 112 template <> 113 class FFLAS::callDotProdBoundCompute<float> { 114 public: 115 template <class Field> 116 size_t operator () (const Field& F, const size_t w, 117 const float& beta) 118 { 119 float mone,one; 120 static FFLAS_INT_TYPE p; 121 F.characteristic(p); 122 F.init (one, 1.0F); 123 F.neg(mone,one); 124 size_t kmax; 125 if (p == 0) 126 kmax = 2; 127 else 128 if (w > 0) { 129 size_t ex=1; 130 for (size_t i=0; i < w; ++i) ex *= 3; 131 long long c; 132 #ifndef _LINBOX_CONFIG_H 133 if (F.balanced) 134 c = (p-1)*(ex)/2; // balanced representation 135 else 136 #endif 137 c = (p-1)*(1+ex)/2; // positive representation 138 kmax = lround(float(1ULL << FLOAT_MANTISSA) /(float(c*c) + 1)*(1ULL << w)); 139 if (kmax == ( 1ULL << w)) 140 kmax = 2; 141 } 142 else{ 143 long long c = p-1; 144 long long cplt=0; 145 if (!F.isZero (beta)) 146 if (F.isOne (beta) || F.areEqual (beta, mone)) 147 cplt = c; 148 else cplt = c*c; 149 kmax = lround( float((1ULL << FLOAT_MANTISSA) - cplt) /(float(c*c))); 150 if (kmax < 2) 151 kmax = 2; 152 } 153 return (size_t) MIN(kmax,1ULL<<31); 154 } 155 }; 96 156 97 157 template < class Field > …
