Changeset 26
- Timestamp:
- 07/06/07 18:58:19 (1 year ago)
- Files:
-
- 4 added
- 7 modified
-
include/fflas-ffpack/fflas_fgemm.inl (modified) (3 diffs)
-
include/fflas-ffpack/ffpack.h (modified) (5 diffs)
-
include/fflas-ffpack/modular-balanced.h (modified) (1 diff)
-
tests/test-colechelon.C (added)
-
tests/test-fgemm.C (modified) (2 diffs)
-
tests/test-lqup.C (modified) (8 diffs)
-
tests/test-redcolechelon.C (added)
-
tests/test-redechelon.C (modified) (1 diff)
-
tests/test-redrowechelon.C (added)
-
tests/test-rowechelon.C (added)
-
tests/testeur_lqup.C (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
-
include/fflas-ffpack/fflas_fgemm.inl
r25 r26 1142 1142 if (w <= 0) 1143 1143 callClassicMatmul<double> () (F, ta, tb, m, n, k, 1144 alpha, A, lda, B, ldb, beta, C, ldc, kmax,base);1144 alpha, A, lda, B, ldb, beta, C, ldc, kmax,base); 1145 1145 else { 1146 if (k < kmax) { // switch on double 1147 // Temporary double matrices 1146 if (k < kmax) { // switch on delayed modulus 1148 1147 DoubleDomain::Element _alpha, _beta; 1149 1148 … … 1169 1168 WinoMain (DoubleDomain(), ta, tb, m, n, k, 1170 1169 _alpha, A, lda, B, ldb, _beta, C, ldc, kmax, w,base); 1171 // Conversion double = > GFq1170 // Modular reduction 1172 1171 for (double * Ci = C; Ci != C+m*ldc; Ci+=ldc) 1173 1172 for (size_t j = 0; j < n; ++j) … … 1180 1179 F.mulin (* (Ci + j), alpha); 1181 1180 } 1182 // Temporary double matrices destruction1183 1181 } 1184 1182 else{ -
include/fflas-ffpack/ffpack.h
r25 r26 464 464 * Compute the product UL of the upper, resp lower triangular matrices U and L 465 465 * stored one above the other in the square matrix A. 466 * The matrix U is supposed to beunit diagonal466 * Diag == Unit if the matrix U is unit diagonal 467 467 * 468 468 */ 469 469 template<class Field> 470 470 static void 471 ftrtrm (const Field& F, const size_t N, typename Field::Element * A, const size_t lda){ 471 ftrtrm (const Field& F, const FFLAS_DIAG diag, const size_t N, 472 typename Field::Element * A, const size_t lda){ 472 473 473 474 typename Field::Element one; 474 475 F.init(one,1.0); 475 476 if (N == 1)476 477 if (N == 1) 477 478 return; 478 479 size_t N1 = N/2; 479 480 size_t N2 = N-N1; 480 481 481 ftrtrm (F, N1, A, lda);482 ftrtrm (F, diag, N1, A, lda); 482 483 483 484 fgemm (F, FflasNoTrans, FflasNoTrans, N1, N1, N2, one, 484 485 A+N1, lda, A+N1*lda, lda, one, A, lda); 485 486 486 ftrmm (F, FflasRight, FflasLower, FflasNoTrans, FflasNonUnit, N1, N2, one, A + N1*(lda+1), lda, A + N1, lda);487 488 ftrmm (F, FflasLeft, FflasUpper, FflasNoTrans, FflasUnit, N2, N1, one, A + N1*(lda+1), lda, A + N1*lda, lda);489 490 ftrtrm (F, N2, A + N1*(lda+1), lda);487 ftrmm (F, FflasRight, FflasLower, FflasNoTrans, (diag == FflasUnit) ? FflasNonUnit : FflasUnit, N1, N2, one, A + N1*(lda+1), lda, A + N1, lda); 488 489 ftrmm (F, FflasLeft, FflasUpper, FflasNoTrans, diag, N2, N1, one, A + N1*(lda+1), lda, A + N1*lda, lda); 490 491 ftrtrm (F, diag, N2, A + N1*(lda+1), lda); 491 492 492 493 } … … 496 497 * 497 498 * After the computation A = [ M \ V ] such that AU = C is a column echelon 498 * decomposition of A, with U = P^T [ V (+Ir)] and C = M //+ Q [ Ir ]499 * [ 0 In-r ] // [ 0 ]499 * decomposition of A, with U = P^T [ V + Ir ] and C = M //+ Q [ Ir ] 500 * [ 0 In-r ] // [ 0 ] 500 501 * Qt = Q^T 501 502 */ … … 514 515 t1.clear(); 515 516 t1.start(); 516 r = LUdivine (F, FflasUnit, M, N, A, lda, P, Qt);517 r = LUdivine (F, FflasUnit, FflasNoTrans, M, N, A, lda, P, Qt); 517 518 t1.stop(); 518 519 //cerr<<"LU --> "<<t1.usertime()<<endl; … … 526 527 ftrmm (F, FflasLeft, FflasUpper, FflasNoTrans, FflasUnit, r, N-r, 527 528 mone, A, lda, A+r, lda); 529 530 t2.stop(); 531 //cerr<<"U^-1 --> "<<t2.usertime()<<endl; 532 533 return r; 534 } 535 536 /** 537 * Compute the Row Echelon form of the input matrix in-place. 538 * 539 * After the computation A = [ L \ M ] such that L A = R is a column echelon 540 * decomposition of A, with L = [ L+Ir 0 ] P and R = M 541 * [ In-r ] 542 * Qt = Q^T 543 */ 544 template <class Field> 545 static size_t 546 RowEchelonForm (const Field& F, const size_t M, const size_t N, 547 typename Field::Element * A, const size_t lda, 548 size_t* P, size_t* Qt){ 549 550 typename Field::Element one, mone; 551 F.init (one, 1.0); 552 F.neg (mone, one); 553 size_t r; 554 555 Timer t1; 556 t1.clear(); 557 t1.start(); 558 r = LUdivine (F, FflasUnit, FflasTrans, M, N, A, lda, P, Qt); 559 t1.stop(); 560 //cerr<<"LU --> "<<t1.usertime()<<endl; 561 562 Timer t2; 563 t2.clear(); 564 t2.start(); 565 ftrtri (F, FflasLower, FflasUnit, r, A, lda); 566 567 568 ftrmm (F, FflasRight, FflasLower, FflasNoTrans, FflasUnit, M-r, r, 569 mone, A, lda, A+r*lda, lda); 528 570 529 571 t2.stop(); … … 573 615 one, A, lda, A+r*lda, lda); 574 616 575 ftrtrm (F, r, A, lda); 617 ftrtrm (F, FflasUnit, r, A, lda); 618 t1.stop(); 619 //cerr<<"U^-1L^-1 --> "<<t1.usertime()<<endl; 620 621 return r; 622 623 } 624 625 /** 626 * Compute the Reduced Row Echelon form of the input matrix in-place. 627 * 628 * After the computation A = [ V ] such that L A = R is a reduced row echelon 629 * [ M 0 ] 630 * decomposition of A, where L = [ V ] P^T and R = [ Ir M ] Q 631 * [ 0 In-r ] [ 0 ] 632 * Qt = Q^T 633 */ 634 template <class Field> 635 static size_t 636 ReducedRowEchelonForm (const Field& F, const size_t M, const size_t N, 637 typename Field::Element * A, const size_t lda, 638 size_t* P, size_t* Qt){ 639 640 typename Field::Element one, mone; 641 F.init (one, 1.0); 642 F.neg (mone, one); 643 size_t r; 644 645 r = RowEchelonForm (F, M, N, A, lda, P, Qt); 646 647 Timer t1; 648 t1.clear(); 649 t1.start(); 650 // M = M Q^T 651 for (int i=0; i<r; ++i){ 652 if ( Qt[i]> (size_t) i ){ 653 fswap( F, i+1, 654 A + Qt[i], lda, 655 A + i, lda ); 656 } 657 } 658 659 ftrtri (F, FflasUpper, FflasNonUnit, r, A, lda); 660 661 ftrmm (F, FflasLeft, FflasUpper, FflasNoTrans, FflasNonUnit, r, N-r, 662 one, A, lda, A+r, lda); 663 664 ftrtrm (F, FflasNonUnit, r, A, lda); 665 576 666 t1.stop(); 577 667 //cerr<<"U^-1L^-1 --> "<<t1.usertime()<<endl; -
include/fflas-ffpack/modular-balanced.h
r24 r26 111 111 112 112 inline Element& init(Element& x, double y =0) const { 113 double tmp =y;113 double tmp; 114 114 115 115 //tmp = floor (y + 0.5); -
tests/test-fgemm.C
r25 r26 7 7 //------------------------------------------------------------------------- 8 8 9 #define DEBUG 09 #define DEBUG 1 10 10 #define NEWWINO 11 11 #define TIME 1 … … 15 15 using namespace std; 16 16 17 #include "fflas-ffpack/modular- positive.h"17 #include "fflas-ffpack/modular-balanced.h" 18 18 #include "timer.h" 19 19 #include "Matio.h" -
tests/test-lqup.C
r25 r26 20 20 #include "Matio.h" 21 21 #include "timer.h" 22 #include "fflas-ffpack/modular-balanced.h"22 //#include "fflas-ffpack/modular-balanced.h" 23 23 #include "fflas-ffpack/modular-positive.h" 24 24 #include "fflas-ffpack/ffpack.h" … … 68 68 A = read_field(F,argv[2],&m,&n); 69 69 } 70 for (j=0;j< n;j++)70 for (j=0;j<maxP;j++) 71 71 P[j]=0; 72 for (j=0;j<m ;j++)72 for (j=0;j<maxQ;j++) 73 73 Q[j]=0; 74 74 tim.clear(); … … 79 79 timc+=tim; 80 80 } 81 write_field (F,cerr<<"Result = "<<endl, A, m,n,n);81 //write_field (F,cerr<<"Result = "<<endl, A, m,n,n); 82 82 83 83 cerr<<"P = ["; … … 116 116 } 117 117 118 write_field(F,cerr<<"L = "<<endl,L,m,m,m);119 write_field(F,cerr<<"U = "<<endl,U,m,n,n);118 // write_field(F,cerr<<"L = "<<endl,L,m,m,m); 119 // write_field(F,cerr<<"U = "<<endl,U,m,n,n); 120 120 FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans, m,0,R, L, m, Q); 121 121 for ( int i=0; i<m; ++i ) 122 122 F.assign(*(L+i*(m+1)), one); 123 123 124 write_field(F,cerr<<"L = "<<endl,L,m,m,m);125 write_field(F,cerr<<"U = "<<endl,U,m,n,n);124 // write_field(F,cerr<<"L = "<<endl,L,m,m,m); 125 // write_field(F,cerr<<"U = "<<endl,U,m,n,n); 126 126 if (diag == FFLAS::FflasNonUnit) 127 127 for ( int i=0; i<R; ++i ) … … 134 134 } 135 135 } 136 write_field(F,cerr<<"L = "<<endl,L,m,m,m);137 write_field(F,cerr<<"U = "<<endl,U,m,n,n);136 // write_field(F,cerr<<"L = "<<endl,L,m,m,m); 137 // write_field(F,cerr<<"U = "<<endl,U,m,n,n); 138 138 139 139 FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, m,0,R, U, n, P); … … 167 167 F.assign( *(U+i+j*n), zero); 168 168 } 169 write_field(F,cerr<<"L = "<<endl,L,m,n,n);170 write_field(F,cerr<<"U = "<<endl,U,n,n,n);169 // write_field(F,cerr<<"L = "<<endl,L,m,n,n); 170 // write_field(F,cerr<<"U = "<<endl,U,n,n,n); 171 171 172 172 FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, n,0,R, U, n, Q); … … 185 185 } 186 186 } 187 write_field(F,cerr<<"L = "<<endl,L,m,n,n);188 write_field(F,cerr<<"U = "<<endl,U,n,n,n);187 // write_field(F,cerr<<"L = "<<endl,L,m,n,n); 188 // write_field(F,cerr<<"U = "<<endl,U,n,n,n); 189 189 190 190 FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, n,0,R, L, n, P); … … 199 199 fail=true; 200 200 201 write_field(F,cerr<<"X = "<<endl,X,m,n,n);202 write_field(F,cerr<<"B = "<<endl,B,m,n,n);201 // write_field(F,cerr<<"X = "<<endl,X,m,n,n); 202 // write_field(F,cerr<<"B = "<<endl,B,m,n,n); 203 203 delete[] B; 204 204 if (fail) -
tests/test-redechelon.C
r25 r26 8 8 9 9 //------------------------------------------------------------------------- 10 #define DEBUG 010 #define DEBUG 1 11 11 // Debug option 0: no debug 12 12 // 1: check A = LQUP -
tests/testeur_lqup.C
r25 r26 12 12 using namespace std; 13 13 //#include "fflas-ffpack/modular-int.h" 14 #include "fflas-ffpack/modular-positive.h"15 //#include "fflas-ffpack/modular-balanced.h"14 //#include "fflas-ffpack/modular-positive.h" 15 #include "fflas-ffpack/modular-balanced.h" 16 16 #include "timer.h" 17 17 #include "Matio.h" … … 82 82 P = new size_t[M]; 83 83 Q = new size_t[N]; 84 84 for (size_t i=0; i<M; ++i) P[i] = 0; 85 for (size_t i=0; i<N; ++i) Q[i] = 0; 85 86 } 86 87 else{ … … 90 91 P = new size_t[N]; 91 92 Q = new size_t[M]; 92 93 for (size_t i=0; i<N; ++i) P[i] = 0; 94 for (size_t i=0; i<M; ++i) Q[i] = 0; 93 95 } 94 96 … … 209 211 FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, N,0,R, U, N, Q); 210 212 for (size_t i=0; i<N; ++i) 211 F.assign (*(U+i*(N+1)),one);213 F.assign (*(U+i*(N+1)),one); 212 214 if (diag == FFLAS::FflasNonUnit) 213 215 for ( size_t i=0; i<R; ++i ) … … 263 265 for (size_t i=0; i<M; ++i) 264 266 for (size_t j=0; j<N; ++j) 265 cerr<<i+1<<" "<<j+1<<" "<<((size_t) *(Abis+i*lda+j) )<<endl; 267 if (!(*(Abis+i*lda+j))) 268 cerr<<i+1<<" "<<j+1<<" "<<((size_t) *(Abis+i*lda+j) )<<endl; 266 269 cerr<<"0 0 0"<<endl<<endl; 267 270
