Changeset 21
- Timestamp:
- 05/01/07 19:23:34 (2 years ago)
- Files:
-
- 1 added
- 12 modified
-
include/fflas-ffpack/fflas.h (modified) (35 diffs)
-
include/fflas-ffpack/fflas_bounds.inl (modified) (5 diffs)
-
include/fflas-ffpack/fflas_fgemm.inl (modified) (49 diffs)
-
include/fflas-ffpack/fflas_fgemv.inl (modified) (13 diffs)
-
include/fflas-ffpack/fflas_ftrmm.inl (modified) (34 diffs)
-
include/fflas-ffpack/fflas_ftrsm.inl (modified) (15 diffs)
-
include/fflas-ffpack/fflas_ftrsv.inl (modified) (1 diff)
-
include/fflas-ffpack/ffpack.h (modified) (7 diffs)
-
include/fflas-ffpack/ffpack_charpoly.inl (modified) (1 diff)
-
include/fflas-ffpack/ffpack_ludivine.inl (modified) (7 diffs)
-
include/fflas-ffpack/ffpack_minpoly.inl (modified) (1 diff)
-
include/fflas-ffpack/modular-int.h (added)
-
tests/test-fgemv.C (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
include/fflas-ffpack/fflas.h
r18 r21 32 32 33 33 #ifndef __LINBOX_STRASSEN_OPTIMIZATION 34 #define WINOTHRESHOLD 75034 #define WINOTHRESHOLD 400 35 35 #else 36 36 #define WINOTHRESHOLD __LINBOX_WINOTHRESHOLD 37 37 #endif 38 38 39 // Thresholds determining which floating point representation to use, 40 // depending on the cardinality of the finite field. This is only used when 41 // the element representation is not a floating point type. 42 #define FLOAT_DOUBLE_THRESHOLD_0 430 43 #define FLOAT_DOUBLE_THRESHOLD_1 350 44 #define FLOAT_DOUBLE_THRESHOLD_2 175 45 39 46 #define DOUBLE_MANTISSA 53 40 47 #define FLOAT_MANTISSA 24 … … 47 54 enum FFLAS_DIAG { FflasNonUnit=131, FflasUnit=132 }; 48 55 enum FFLAS_SIDE { FflasLeft=141, FflasRight = 142 }; 49 56 57 /* Determine the type of the element representation for Matrix Mult kernel 58 * FflasDouble: to use the double precision BLAS 59 * FflasFloat: to use the single precison BLAS 60 * FflasFloat: for any other domain, that can not be converted to floating point integers 61 */ 62 enum FFLAS_BASE { FflasDouble = 151, FflasFloat = 152, FflasGeneric = 153}; 63 64 /* Representations of Z with floating point elements*/ 50 65 typedef UnparametricField<float> FloatDomain; 51 52 66 typedef UnparametricField<double> DoubleDomain; 53 67 … … 132 146 template<class Field> 133 147 static void 134 fgemv (const Field& F, const enumFFLAS_TRANSPOSE TransA,148 fgemv (const Field& F, const FFLAS_TRANSPOSE TransA, 135 149 const size_t M, const size_t N, 136 150 const typename Field::Element alpha, … … 161 175 template<class Field> 162 176 static void 163 ftrsv (const Field& F, const enumFFLAS_UPLO Uplo,164 const enum FFLAS_TRANSPOSE TransA, const enumFFLAS_DIAG Diag,177 ftrsv (const Field& F, const FFLAS_UPLO Uplo, 178 const FFLAS_TRANSPOSE TransA, const FFLAS_DIAG Diag, 165 179 const size_t N,const typename Field::Element * A, const size_t lda, 166 180 typename Field::Element * X, int incX); … … 177 191 template<class Field> 178 192 static void 179 ftrsm (const Field& F, const enumFFLAS_SIDE Side,180 const enumFFLAS_UPLO Uplo,181 const enumFFLAS_TRANSPOSE TransA,182 const enumFFLAS_DIAG Diag,193 ftrsm (const Field& F, const FFLAS_SIDE Side, 194 const FFLAS_UPLO Uplo, 195 const FFLAS_TRANSPOSE TransA, 196 const FFLAS_DIAG Diag, 183 197 const size_t M, const size_t N, 184 198 const typename Field::Element alpha, … … 193 207 template<class Field> 194 208 static void 195 ftrmm (const Field& F, const enumFFLAS_SIDE Side,196 const enumFFLAS_UPLO Uplo,197 const enumFFLAS_TRANSPOSE TransA,198 const enumFFLAS_DIAG Diag,209 ftrmm (const Field& F, const FFLAS_SIDE Side, 210 const FFLAS_UPLO Uplo, 211 const FFLAS_TRANSPOSE TransA, 212 const FFLAS_DIAG Diag, 199 213 const size_t M, const size_t N, 200 214 const typename Field::Element alpha, … … 211 225 static typename Field::Element* 212 226 fgemm( const Field& F, 213 const enumFFLAS_TRANSPOSE ta,214 const enumFFLAS_TRANSPOSE tb,227 const FFLAS_TRANSPOSE ta, 228 const FFLAS_TRANSPOSE tb, 215 229 const size_t m, 216 230 const size_t n, … … 221 235 const typename Field::Element beta, 222 236 typename Field::Element* C, const size_t ldc, 223 const size_t wl); 237 const size_t w){ 238 239 if (!(m && n && k)) return C; 240 241 FFLAS_BASE base = BaseCompute (F, w); 242 243 WinoMain (F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, 244 C, ldc, DotProdBound (F, w, beta, base), w, base); 245 return C; 246 }; 224 247 225 248 /** @brief Field GEneral Matrix Multiply … … 232 255 static typename Field::Element* 233 256 fgemm (const Field& F, 234 const enumFFLAS_TRANSPOSE ta,235 const enumFFLAS_TRANSPOSE tb,257 const FFLAS_TRANSPOSE ta, 258 const FFLAS_TRANSPOSE tb, 236 259 const size_t m, 237 260 const size_t n, … … 245 268 typename Field::Element* C, 246 269 const size_t ldc){ 247 size_t ws =0; 248 if ( (ta==FflasNoTrans) && (tb==FflasNoTrans)) { 249 size_t kt = MIN(MIN(k,m),n); 250 while (kt >= WINOTHRESHOLD){ 251 ws++; 252 kt/=2; 253 } 254 } 255 return fgemm(F, ta, tb, m, n, k, alpha, A, lda, B, ldb, 256 beta, C, ldc, ws); 270 271 if (!(m && n && k)) return C; 272 273 size_t w, kmax=0; 274 FFLAS_BASE base; 275 276 setMatMulParam<typename Field::Element> ()(F, MIN(MIN(m,n),k), beta, 277 w, base, kmax); 278 279 WinoMain (F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, 280 C, ldc, kmax, w, base); 281 return C; 257 282 } 258 283 … … 265 290 template<class Field> 266 291 static typename Field::Element* fsquare (const Field& F, 267 const enumFFLAS_TRANSPOSE ta,292 const FFLAS_TRANSPOSE ta, 268 293 const size_t n, 269 294 const typename Field::Element alpha, … … 417 442 template <class Field> 418 443 static size_t DotProdBound (const Field& F, const size_t w, 419 const typename Field::Element& beta); 444 const typename Field::Element& beta, 445 const FFLAS_BASE base); 420 446 421 447 template <class Field> 422 448 static size_t DotProdBoundCompute (const Field& F, const size_t w, 423 const typename Field::Element& beta); 424 425 template <class Element> 426 class callDotProdBoundCompute; 449 const typename Field::Element& beta, 450 const FFLAS_BASE base); 451 452 453 template <class Field> 454 static FFLAS_BASE BaseCompute (const Field& F, const size_t w); 455 456 static size_t WinoSteps (const size_t m); 457 458 // template <class Element> 459 // class callDotProdBoundCompute; 427 460 428 461 /** @brief Bound for the delayed modulus triangular system solving … … 441 474 class callTRSMBound; 442 475 476 /** @brief Set the optimal parameters for the Matrix Multiplication 477 */ 478 template <class Element> 479 class setMatMulParam; 480 443 481 template <class Field> 444 482 static void DynamicPealing( const Field& F, 445 const enumFFLAS_TRANSPOSE ta,446 const enumFFLAS_TRANSPOSE tb,483 const FFLAS_TRANSPOSE ta, 484 const FFLAS_TRANSPOSE tb, 447 485 const size_t m, const size_t n, const size_t k, 448 486 const typename Field::Element alpha, … … 455 493 template<class Field> 456 494 static void MatVectProd (const Field& F, 457 const enumFFLAS_TRANSPOSE TransA,495 const FFLAS_TRANSPOSE TransA, 458 496 const size_t M, const size_t N, 459 497 const typename Field::Element alpha, … … 465 503 template <class Field> 466 504 static void ClassicMatmul(const Field& F, 467 const enumFFLAS_TRANSPOSE ta,468 const enumFFLAS_TRANSPOSE tb,505 const FFLAS_TRANSPOSE ta, 506 const FFLAS_TRANSPOSE tb, 469 507 const size_t m, const size_t n, const size_t k, 470 508 const typename Field::Element alpha, … … 473 511 const typename Field::Element beta, 474 512 typename Field::Element * C, const size_t ldc, 475 const size_t kmax );513 const size_t kmax, const FFLAS_BASE base ); 476 514 477 515 // Winograd Multiplication alpha.A(n*k) * B(k*m) + beta . C(n*m) … … 479 517 template<class Field> 480 518 static void WinoCalc (const Field& F, 481 const enumFFLAS_TRANSPOSE ta,482 const enumFFLAS_TRANSPOSE tb,519 const FFLAS_TRANSPOSE ta, 520 const FFLAS_TRANSPOSE tb, 483 521 const size_t mr, const size_t nr,const size_t kr, 484 522 const typename Field::Element alpha, … … 487 525 const typename Field::Element beta, 488 526 typename Field::Element * C, const size_t ldc, 489 const size_t kmax, const size_t w );527 const size_t kmax, const size_t w, const FFLAS_BASE base); 490 528 491 529 template<class Field> 492 530 static void WinoMain (const Field& F, 493 const enumFFLAS_TRANSPOSE ta,494 const enumFFLAS_TRANSPOSE tb,531 const FFLAS_TRANSPOSE ta, 532 const FFLAS_TRANSPOSE tb, 495 533 const size_t m, const size_t n, const size_t k, 496 534 const typename Field::Element alpha, … … 499 537 const typename Field::Element beta, 500 538 typename Field::Element * C, const size_t ldc, 501 const size_t kmax, const size_t w );539 const size_t kmax, const size_t w, const FFLAS_BASE base); 502 540 503 541 … … 516 554 // Specialized routines for ftrsm 517 555 template<class Field> 518 static void ftrsmLeftUpNoTrans (const Field& F, const enumFFLAS_DIAG Diag,556 static void ftrsmLeftUpNoTrans (const Field& F, const FFLAS_DIAG Diag, 519 557 const size_t M, const size_t N, 520 558 const typename Field::Element alpha, … … 523 561 524 562 template<class Field> 525 static void ftrsmLeftUpTrans (const Field& F, const enumFFLAS_DIAG Diag,563 static void ftrsmLeftUpTrans (const Field& F, const FFLAS_DIAG Diag, 526 564 const size_t M, const size_t N, 527 565 const typename Field::Element alpha, … … 530 568 531 569 template<class Field> 532 static void ftrsmLeftLowNoTrans (const Field& F, const enumFFLAS_DIAG Diag,570 static void ftrsmLeftLowNoTrans (const Field& F, const FFLAS_DIAG Diag, 533 571 const size_t M, const size_t N, 534 572 const typename Field::Element alpha, … … 567 605 568 606 template<class Field> 569 static void ftrsmLeftLowTrans (const Field& F, const enumFFLAS_DIAG Diag,607 static void ftrsmLeftLowTrans (const Field& F, const FFLAS_DIAG Diag, 570 608 const size_t M, const size_t N, 571 609 const typename Field::Element alpha, … … 574 612 575 613 template<class Field> 576 static void ftrsmRightUpNoTrans (const Field& F, const enumFFLAS_DIAG Diag,614 static void ftrsmRightUpNoTrans (const Field& F, const FFLAS_DIAG Diag, 577 615 const size_t M, const size_t N, 578 616 const typename Field::Element alpha, … … 582 620 583 621 template<class Field> 584 static void ftrsmRightUpTrans (const Field& F, const enumFFLAS_DIAG Diag,622 static void ftrsmRightUpTrans (const Field& F, const FFLAS_DIAG Diag, 585 623 const size_t M, const size_t N, 586 624 const typename Field::Element alpha, … … 589 627 590 628 template<class Field> 591 static void ftrsmRightLowNoTrans (const Field& F, const enumFFLAS_DIAG Diag,629 static void ftrsmRightLowNoTrans (const Field& F, const FFLAS_DIAG Diag, 592 630 const size_t M, const size_t N, 593 631 const typename Field::Element alpha, … … 596 634 597 635 template<class Field> 598 static void ftrsmRightLowTrans (const Field& F, const enumFFLAS_DIAG Diag,636 static void ftrsmRightLowTrans (const Field& F, const FFLAS_DIAG Diag, 599 637 const size_t M, const size_t N, 600 638 const typename Field::Element alpha, … … 604 642 // Specialized routines for ftrmm 605 643 template<class Field> 606 static void ftrmmLeftUpNoTrans (const Field& F, const enumFFLAS_DIAG Diag,644 static void ftrmmLeftUpNoTrans (const Field& F, const FFLAS_DIAG Diag, 607 645 const size_t M, const size_t N, 608 646 const typename Field::Element * A, const size_t lda, … … 611 649 612 650 template<class Field> 613 static void ftrmmLeftUpTrans (const Field& F, const enumFFLAS_DIAG Diag,651 static void ftrmmLeftUpTrans (const Field& F, const FFLAS_DIAG Diag, 614 652 const size_t M, const size_t N, 615 653 const typename Field::Element * A, const size_t lda, … … 618 656 619 657 template<class Field> 620 static void ftrmmLeftLowNoTrans (const Field& F, const enumFFLAS_DIAG Diag,658 static void ftrmmLeftLowNoTrans (const Field& F, const FFLAS_DIAG Diag, 621 659 const size_t M, const size_t N, 622 660 const typename Field::Element * A, const size_t lda, … … 625 663 626 664 template<class Field> 627 static void ftrmmLeftLowTrans (const Field& F, const enumFFLAS_DIAG Diag,665 static void ftrmmLeftLowTrans (const Field& F, const FFLAS_DIAG Diag, 628 666 const size_t M, const size_t N, 629 667 const typename Field::Element * A, const size_t lda, … … 632 670 633 671 template<class Field> 634 static void ftrmmRightUpNoTrans (const Field& F, const enumFFLAS_DIAG Diag,672 static void ftrmmRightUpNoTrans (const Field& F, const FFLAS_DIAG Diag, 635 673 const size_t M, const size_t N, 636 674 const typename Field::Element * A, const size_t lda, … … 639 677 640 678 template<class Field> 641 static void ftrmmRightUpTrans (const Field& F, const enumFFLAS_DIAG Diag,679 static void ftrmmRightUpTrans (const Field& F, const FFLAS_DIAG Diag, 642 680 const size_t M, const size_t N, 643 681 const typename Field::Element * A, const size_t lda, … … 646 684 647 685 template<class Field> 648 static void ftrmmRightLowNoTrans (const Field& F, const enumFFLAS_DIAG Diag,686 static void ftrmmRightLowNoTrans (const Field& F, const FFLAS_DIAG Diag, 649 687 const size_t M, const size_t N, 650 688 const typename Field::Element * A, const size_t lda, … … 652 690 const size_t nmax); 653 691 template<class Field> 654 static void ftrmmRightLowTrans (const Field& F, const enumFFLAS_DIAG Diag,692 static void ftrmmRightLowTrans (const Field& F, const FFLAS_DIAG Diag, 655 693 const size_t M, const size_t N, 656 694 const typename Field::Element * A, const size_t lda, -
include/fflas-ffpack/fflas_bounds.inl
r18 r21 19 19 // Computes the maximal dimension k so that the product A*B+beta.C over Z, 20 20 // where A is m*k and B is k*n can be performed correctly with w Winograd 21 // recursion levels on the 53 bits of doublemantissa21 // recursion levels on the number of bits of the floating point mantissa 22 22 //--------------------------------------------------------------------- 23 23 template <class Field> 24 24 inline size_t FFLAS::DotProdBoundCompute (const Field& F, const size_t w, 25 const typename Field::Element& beta){ 26 return callDotProdBoundCompute<typename Field::Element>() (F, w, beta); 27 } 28 29 template<class Element> 30 class FFLAS::callDotProdBoundCompute { 31 public: 32 template <class Field> 33 size_t operator () (const Field& F, const size_t w, 34 const typename Field::Element& beta) 35 { 36 typename Field::Element mone; 37 static FFLAS_INT_TYPE p; 38 F.characteristic(p); 39 F.init (mone, -1.0); 40 size_t kmax; 41 if (p == 0) 42 kmax = 2; 43 else 44 if (w > 0) { 45 size_t ex=1; 46 for (size_t i=0; i < w; ++i) ex *= 3; 47 //FFLAS_INT_TYPE c = (p-1)*(ex)/2; //bound for a centered representation 48 long long c = (p-1)*(1+ex)/2; 49 kmax = lround(( double(1ULL << DOUBLE_MANTISSA) /double(c*c) + 1)*(1 << w)); 50 if (kmax == ( 1ULL << w)) 51 kmax = 2; 52 } 53 else{ 54 long long c = p-1; 55 long long cplt=0; 56 if (!F.isZero (beta)) 57 if (F.isOne (beta) || F.areEqual (beta, mone)) 58 cplt = c; 59 else cplt = c*c; 60 kmax = lround(( double((1ULL << DOUBLE_MANTISSA) - cplt)) /double(c*c)); 61 if (kmax < 2) 62 kmax = 2; 63 } 64 return MIN(kmax,1ULL<<31); 65 } 66 }; 67 68 template <> 69 class FFLAS::callDotProdBoundCompute<double> { 70 public: 71 template <class Field> 72 size_t operator() (const Field& F, const size_t w, 73 const double& beta) 74 { 75 double mone; 76 static FFLAS_INT_TYPE p; 77 F.characteristic(p); 78 F.init (mone, -1.0); 79 size_t kmax; 80 if (p == 0) 81 kmax = 2; 82 else 83 if (w > 0) { 84 size_t ex=1; 85 for (size_t i=0; i < w; ++i) ex *= 3; 86 long long c; 25 const typename Field::Element& beta, 26 const FFLAS_BASE base){ 27 28 typename Field::Element mone; 29 static FFLAS_INT_TYPE p; 30 F.characteristic(p); 31 F.init (mone, -1.0); 32 size_t kmax; 33 34 unsigned long mantissa = (base == FflasDouble)? DOUBLE_MANTISSA : FLOAT_MANTISSA; 35 36 if (p == 0) 37 kmax = 2; 38 else 39 if (w > 0) { 40 size_t ex=1; 41 for (size_t i=0; i < w; ++i) ex *= 3; 42 //FFLAS_INT_TYPE c = (p-1)*(ex)/2; //bound for a centered representation 43 long long c; 87 44 #ifndef _LINBOX_CONFIG_H 88 if (F.balanced)89 c = (p-1)*(ex)/2; // balanced representation90 else45 if (F.balanced) 46 c = (p-1)*(ex)/2; // balanced representation 47 else 91 48 #endif 92 c = (p-1)*(1+ex)/2; // positive representation 93 kmax = lround(double(1ULL << DOUBLE_MANTISSA) /(double(c*c) + 1)*(1ULL << w)); 94 if (kmax == ( 1ULL << w)) 95 kmax = 2; 96 } 97 else{ 98 long long c = p-1; 99 long long cplt=0; 100 if (!F.isZero (beta)) 101 if (F.isOne (beta) || F.areEqual (beta, mone)) 102 cplt = c; 103 else cplt = c*c; 104 kmax = lround( double((1ULL << DOUBLE_MANTISSA) - cplt) /(double(c*c))); 105 if (kmax < 2) 106 kmax = 2; 107 } 108 return (size_t) MIN(kmax,1ULL<<31); 109 } 110 }; 111 112 template <> 113 class FFLAS::callDotProdBoundCompute<float> { 114 public: 115 template <class Field> 116 size_t operator () (const Field& F, const size_t w, 117 const float& beta) 118 { 119 float mone,one; 120 static FFLAS_INT_TYPE p; 121 F.characteristic(p); 122 F.init (one, 1.0F); 123 F.neg(mone,one); 124 size_t kmax; 125 if (p == 0) 126 kmax = 2; 127 else 128 if (w > 0) { 129 size_t ex=1; 130 for (size_t i=0; i < w; ++i) &n
