| 17 | | /* From M Abshoff */ |
| 18 | | #if defined(__sun) |
| 19 | | #define lround(x) my_lround(x) |
| 20 | | static long my_lround(double x) |
| 21 | | { |
| 22 | | return (long) ((x) >= 0 ? (x) + 0.5 : (x) - 0.5); |
| 23 | | } |
| 24 | | #endif |
| 25 | | |
| 26 | | //--------------------------------------------------------------------- |
| 27 | | // DotProdBound : |
| 28 | | // Computes the maximal dimension k so that the product A*B+beta.C over Z, |
| 29 | | // where A is m*k and B is k*n can be performed correctly with w Winograd |
| 30 | | // recursion levels on the number of bits of the floating point mantissa |
| 31 | | //--------------------------------------------------------------------- |
| 32 | | template <class Field> |
| 33 | | inline size_t FFLAS::DotProdBoundCompute (const Field& F, const size_t w, |
| 34 | | const typename Field::Element& beta, |
| 35 | | const FFLAS_BASE base){ |
| | 17 | /** |
| | 18 | * MatMulParameters |
| | 19 | * |
| | 20 | * \brief Computes the threshold parameters for the cascade |
| | 21 | * Matmul algorithm |
| | 22 | * |
| | 23 | * |
| | 24 | * \param F Finite Field/Ring of the computation. |
| | 25 | * \param k Common dimension of A and B, in the product A x B |
| | 26 | * \param bet Computing AB + beta C |
| | 27 | * \param delayedDim Returns the size of blocks that can be multiplied |
| | 28 | * over Z with no overflow |
| | 29 | * \param base Returns the type of BLAS representation to use |
| | 30 | * \param winoRecLevel Returns the number of recursion levels of |
| | 31 | * Strassen-Winograd's algorithm to perform |
| | 32 | * \param winoLevelProvided tells whether the user forced the number of |
| | 33 | * recursive level of Winograd's algorithm |
| | 34 | */ |
| | 35 | template <class Field> |
| | 36 | inline void FFLAS::MatMulParameters (const Field& F, |
| | 37 | const size_t k, |
| | 38 | const typename Field::Element& beta, |
| | 39 | size_t& delayedDim, |
| | 40 | FFLAS_BASE& base, |
| | 41 | size_t& winoRecLevel, |
| | 42 | bool winoLevelProvided) { |
| | 43 | |
| | 44 | // Strategy : determine Winograd's recursion first, then choose appropriate |
| | 45 | // floating point representation, and finally the blocking dimension. |
| | 46 | // Can be improved for some cases. |
| | 47 | |
| | 48 | if (!winoLevelProvided) |
| | 49 | winoRecLevel = WinoSteps (k); |
| | 50 | base = BaseCompute (F, winoRecLevel); |
| | 51 | delayedDim = DotProdBound (F, winoRecLevel, beta, base); |
| | 52 | |
| | 53 | size_t n = k; |
| | 54 | size_t winoDel = winoRecLevel; |
| | 55 | |
| | 56 | // Computes the delayedDim, only depending on the recursive levels |
| | 57 | // that must be performed over Z |
| | 58 | while (winoDel > 0 && delayedDim < n) { |
| | 59 | winoDel--; |
| | 60 | delayedDim = DotProdBound (F, winoDel, beta, base); |
| | 61 | n >>= 1; |
| | 62 | } |
| | 63 | delayedDim = MIN (n, delayedDim); |
| | 64 | } |
| | 65 | |
| | 66 | /** |
| | 67 | * DotProdBound |
| | 68 | * |
| | 69 | * \brief computes the maximal size for delaying the modular reduction |
| | 70 | * in a dotproduct |
| | 71 | * |
| | 72 | * This is the default version assuming a conversion to a positive modular representation |
| | 73 | * |
| | 74 | * \param F Finite Field/Ring of the computation |
| | 75 | * \param winoRecLevel Number of recusrive Strassen-Winograd levels (if any, 0 otherwise) |
| | 76 | * \param beta Computing AB + beta C |
| | 77 | * \param base Type of floating point representation for delayed modular computations |
| | 78 | * |
| | 79 | */ |
| | 80 | template <class Field> |
| | 81 | inline size_t FFLAS::DotProdBound (const Field& F, |
| | 82 | const size_t w, |
| | 83 | const typename Field::Element& beta, |
| | 84 | const FFLAS_BASE base) { |
| 45 | | if (p == 0) |
| 46 | | kmax = 2; |
| 47 | | else |
| 48 | | if (w > 0) { |
| 49 | | size_t ex=1; |
| 50 | | for (size_t i=0; i < w; ++i) ex *= 3; |
| 51 | | //FFLAS_INT_TYPE c = (p-1)*(ex)/2; //bound for a centered representation |
| 52 | | FFLAS_INT_TYPE c; |
| 53 | | #ifndef _LINBOX_LINBOX_CONFIG_H |
| 54 | | if (F.balanced) |
| 55 | | c = (p-1)*(ex)/2; // balanced representation |
| 56 | | else |
| 57 | | #endif |
| 58 | | c = (p-1)*(1+ex)/2; |
| 59 | | kmax = lround(( double(1ULL << mantissa) /double(c*c) + 1)*(1ULL << w)); |
| 60 | | if (kmax == ( 1ULL << w)) |
| 61 | | kmax = 2; |
| 62 | | } |
| 63 | | else{ |
| 64 | | FFLAS_INT_TYPE c = p-1; |
| 65 | | FFLAS_INT_TYPE cplt=0; |
| 66 | | if (!F.isZero (beta)) |
| 67 | | if (F.isOne (beta) || F.areEqual (beta, mone)) |
| 68 | | cplt = c; |
| 69 | | else cplt = c*c; |
| 70 | | kmax = lround(( double((1ULL << mantissa) - cplt)) /double(c*c)); |
| 71 | | if (kmax < 2) |
| 72 | | kmax = 2; |
| 73 | | } |
| 74 | | //cerr<<"kmax = "<<kmax<<endl; |
| 75 | | return (size_t) MIN(kmax,1ULL<<31); |
| 76 | | } |
| 77 | | |
| 78 | | |
| 79 | | |
| 80 | | template < class Field > |
| 81 | | inline size_t FFLAS::DotProdBound (const Field& F, const size_t w, |
| 82 | | const typename Field::Element& beta, |
| 83 | | const FFLAS_BASE base) |
| 84 | | { |
| 85 | | static Field G = F; |
| 86 | | static FFLAS_INT_TYPE pig; |
| 87 | | G.characteristic(pig); |
| 88 | | FFLAS_INT_TYPE pif; |
| 89 | | F.characteristic(pif); |
| 90 | | static typename Field::Element b = beta; |
| 91 | | static size_t w2 = w; |
| 92 | | static FFLAS_BASE base2 = base; |
| 93 | | static size_t kmax = DotProdBoundCompute (F, w, beta, base); |
| 94 | | if ((b != beta) || (pif != pig) || (w2 != w) || (base2!=base)) { |
| 95 | | G = F; |
| 96 | | w2 = w; |
| 97 | | b = beta; |
| 98 | | base2 = base; |
| 99 | | kmax = DotProdBoundCompute (F, w, beta, base); |
| 100 | | } |
| 101 | | return kmax; |
| 102 | | } |
| 103 | | |
| 104 | | //--------------------------------------------------------------------- |
| 105 | | // TRSMBound |
| 106 | | // Computes nmax s.t. (p-1)/2*(p^{nmax-1} + (p-2)^{nmax-1}) < 2^53 |
| 107 | | //--------------------------------------------------------------------- |
| 108 | | inline size_t bound_compute_double(const FFLAS_INT_TYPE pi) { |
| 109 | | |
| 110 | | FFLAS_INT_TYPE p=pi,p1=1,p2=1; |
| 111 | | size_t nmax = 1; |
| 112 | | FFLAS_INT_TYPE max = ( FFLAS_INT_TYPE( 1ULL<<(DOUBLE_MANTISSA+1) )/(p-1)); |
| | 160 | FFLAS_INT_TYPE pi; |
| | 161 | F.characteristic(pi); |
| | 162 | FFLAS_BASE base; |
| | 163 | switch (w) { |
| | 164 | case 0: base = (pi < FLOAT_DOUBLE_THRESHOLD_0)? FflasFloat : FflasDouble; |
| | 165 | break; |
| | 166 | case 1: base = (pi < FLOAT_DOUBLE_THRESHOLD_1)? FflasFloat : FflasDouble; |
| | 167 | break; |
| | 168 | case 2: base = (pi < FLOAT_DOUBLE_THRESHOLD_2)? FflasFloat : FflasDouble; |
| | 169 | break; |
| | 170 | default: base = FflasDouble; |
| | 171 | break; |
| | 172 | } |
| | 173 | return base; |
| | 174 | } |
| | 175 | |
| | 176 | |
| | 177 | /************************************************************************************* |
| | 178 | * Specializations for ModularPositive and ModularBalanced over double and float |
| | 179 | *************************************************************************************/ |
| | 180 | |
| | 181 | template <class Element> |
| | 182 | inline double computeFactor (const ModularBalanced<Element>& F, const size_t w){ |
| | 183 | FFLAS_INT_TYPE p; |
| | 184 | F.characteristic(p); |
| | 185 | size_t ex=1; |
| | 186 | for (size_t i=0; i < w; ++i) ex *= 3; |
| | 187 | return (p - 1) * ex / 2; |
| | 188 | } |
| | 189 | |
| | 190 | template <> |
| | 191 | inline FFLAS::FFLAS_BASE FFLAS::BaseCompute (const Modular<double>& F, |
| | 192 | const size_t w){ |
| | 193 | return FflasDouble; |
| | 194 | } |
| | 195 | |
| | 196 | template <> |
| | 197 | inline FFLAS::FFLAS_BASE FFLAS::BaseCompute (const Modular<float>& F, |
| | 198 | const size_t w){ |
| | 199 | return FflasFloat; |
| | 200 | } |
| | 201 | |
| | 202 | template <> |
| | 203 | inline FFLAS::FFLAS_BASE FFLAS::BaseCompute (const ModularBalanced<double>& F, |
| | 204 | const size_t w){ |
| | 205 | return FflasDouble; |
| | 206 | } |
| | 207 | |
| | 208 | template <> |
| | 209 | inline FFLAS::FFLAS_BASE FFLAS::BaseCompute (const ModularBalanced<float>& F, |
| | 210 | const size_t w){ |
| | 211 | return FflasFloat; |
| | 212 | } |
| | 213 | |
| | 214 | /** |
| | 215 | * TRSMBound |
| | 216 | * |
| | 217 | * \brief computes the maximal size for delaying the modular reduction |
| | 218 | * in a triangular system resolution |
| | 219 | * |
| | 220 | * This is the default version over an arbitrary field. |
| | 221 | * It is currently never used (the recursive algorithm is run until n=1 in this case) |
| | 222 | * |
| | 223 | * \param F Finite Field/Ring of the computation |
| | 224 | * |
| | 225 | */ |
| | 226 | template <class Field> |
| | 227 | inline size_t FFLAS::TRSMBound (const Field& F) { |
| | 228 | return 1; |
| | 229 | } |
| | 230 | |
| | 231 | /** |
| | 232 | * Specialization for positive modular representation over double |
| | 233 | * Computes nmax s.t. (p-1)/2*(p^{nmax-1} + (p-2)^{nmax-1}) < 2^53 |
| | 234 | * See [Dumas Giorgi Pernet 06, arXiv:cs/0601133] |
| | 235 | */ |
| | 236 | template<> |
| | 237 | inline size_t FFLAS::TRSMBound (const Modular<double>& F){ |
| | 238 | |
| | 239 | FFLAS_INT_TYPE pi; |
| | 240 | F.characteristic(pi); |
| | 241 | unsigned long long p = pi, p1 = 1, p2 = 1; |
| | 242 | size_t nmax = 0; |
| | 243 | unsigned long long max = ( (1ULL << (DOUBLE_MANTISSA + 1) ) / (p - 1)); |
| 180 | | class FFLAS::callTRSMBound<double> { |
| 181 | | public: |
| 182 | | template <class Field> |
| 183 | | size_t operator () (const Field& F) { |
| 184 | | FFLAS_INT_TYPE pi; |
| 185 | | F.characteristic(pi); |
| 186 | | static FFLAS_INT_TYPE p=pi; |
| 187 | | #ifdef _LINBOX_LINBOX_CONFIG_H |
| 188 | | static size_t nmax = bound_compute_double(pi); |
| 189 | | #else |
| 190 | | static size_t nmax = (F.balanced) ? bound_compute_double_balanced(pi) : bound_compute_double(pi); |
| 191 | | #endif |
| 192 | | if (p == pi) |
| 193 | | return nmax; |
| 194 | | else |
| 195 | | #ifdef _LINBOX_LINBOX_CONFIG_H |
| 196 | | return nmax= bound_compute_double (p=pi); //(F.balanced) ? bound_compute_balanced(p=pi) : bound_compute(p=pi); |
| 197 | | #else |
| 198 | | return nmax = (F.balanced) ? bound_compute_double_balanced(p=pi) : bound_compute_double(p=pi); |
| 199 | | #endif |
| 200 | | } |
| 201 | | }; |
| 202 | | |
| 203 | | template<> |
| 204 | | class FFLAS::callTRSMBound<float> { |
| 205 | | public: |
| 206 | | template <class Field> |
| 207 | | size_t operator () (const Field& F) { |
| 208 | | FFLAS_INT_TYPE pi; |
| 209 | | F.characteristic(pi); |
| 210 | | static FFLAS_INT_TYPE p=pi; |
| 211 | | #ifdef _LINBOX_LINBOX_CONFIG_H |
| 212 | | static size_t nmax = bound_compute_float(pi); |
| 213 | | #else |
| 214 | | static size_t nmax = (F.balanced) ? bound_compute_float_balanced(pi) : bound_compute_float(pi); |
| 215 | | #endif |
| 216 | | if (p == pi) |
| 217 | | return nmax; |
| 218 | | else |
| 219 | | #ifdef _LINBOX_LINBOX_CONFIG_H |
| 220 | | return nmax= bound_compute_float (p=pi); //(F.balanced) ? bound_compute_balanced(p=pi) : bound_compute(p=pi); |
| 221 | | #else |
| 222 | | return nmax = (F.balanced) ? bound_compute_float_balanced(p=pi) : bound_compute_float(p=pi); |
| 223 | | #endif |
| 224 | | } |
| 225 | | }; |
| 226 | | |
| 227 | | |
| 228 | | |
| 229 | | /** Automatic computation of the parameters for Matrix Multiplication |
| 230 | | * |
| 231 | | */ |
| 232 | | |
| 233 | | // To be tuned up: use statics to perform the computations only once for a given set of parameters |
| 234 | | template <class Element> |
| 235 | | class FFLAS::setMatMulParam { |
| 236 | | public: |
| 237 | | template <class Field> |
| 238 | | void operator() (const Field& F, const size_t m, |
| 239 | | const typename Field::Element& beta, |
| 240 | | size_t& w, FFLAS_BASE& base, size_t& kmax){ |
| 241 | | |
| 242 | | // Strategy : determine Winograd's recursion first, then choose appropriate |
| 243 | | // floating point representation, and finally the blocking dimension. |
| 244 | | // Can be improved for some cases. |
| 245 | | static size_t ms = m; |
| 246 | | static size_t ks = WinoSteps (m); |
| 247 | | |
| 248 | | if (m != ms) |
| 249 | | ks = WinoSteps (ms = m); |
| 250 | | |
| 251 | | w = ks; |
| 252 | | |
| 253 | | base = BaseCompute<typename Field::Element> ()(F, w); |
| 254 | | kmax = DotProdBound (F, w, beta, base); |
| 255 | | } |
| 256 | | }; |
| 257 | | |
| 258 | | template<> |
| 259 | | class FFLAS::setMatMulParam<double> { |
| 260 | | public: |
| 261 | | template <class Field> |
| 262 | | void operator () (const Field& F, const size_t m, |
| 263 | | const typename Field::Element& beta, |
| 264 | | size_t& w, FFLAS_BASE& base, size_t& kmax){ |
| 265 | | |
| 266 | | static size_t ms = m; |
| 267 | | static size_t ks = WinoSteps (m); |
| 268 | | |
| 269 | | if (m != ms) |
| 270 | | ks = WinoSteps (ms = m); |
| 271 | | |
| 272 | | w = ks; |
| 273 | | base = FflasDouble; |
| 274 | | kmax = DotProdBound (F, w, beta, base); |
| 275 | | } |
| 276 | | }; |
| 277 | | |
| 278 | | template<> |
| 279 | | class FFLAS::setMatMulParam<float> { |
| 280 | | public: |
| 281 | | template <class Field> |
| 282 | | void operator () (const Field& F, const size_t m, |
| 283 | | const typename Field::Element& beta, |
| 284 | | size_t& w, FFLAS_BASE& base, size_t& kmax){ |
| 285 | | |
| 286 | | static size_t ms = m; |
| 287 | | static size_t ks = WinoSteps (m); |
| 288 | | |
| 289 | | if (m != ms) |
| 290 | | ks = WinoSteps (ms = m); |
| 291 | | |
| 292 | | w = ks; |
| 293 | | base = FflasFloat; |
| 294 | | kmax = DotProdBound (F, w, beta, base); |
| 295 | | } |
| 296 | | }; |
| 297 | | |
| 298 | | inline size_t FFLAS::WinoSteps (const size_t m) { |
| 299 | | |
| 300 | | size_t w=0; |
| 301 | | size_t mt = m; |
| 302 | | while (mt >= WINOTHRESHOLD){ |
| 303 | | w++; |
| 304 | | mt/=2; |
| 305 | | } |
| 306 | | //cerr<<"Winostep = "<<w<<endl; |
| 307 | | return w; |
| 308 | | } |
| 309 | | |
| 310 | | |
| 311 | | template <class Element> |
| 312 | | class FFLAS::BaseCompute { |
| 313 | | public: |
| 314 | | template <class Field> |
| 315 | | FFLAS::FFLAS_BASE operator() (const Field& F, const size_t w){ |
| 316 | | |
| 317 | | FFLAS_INT_TYPE pi; |
| 318 | | F.characteristic(pi); |
| 319 | | FFLAS_BASE base; |
| 320 | | switch (w) { |
| 321 | | case 0: base = (pi < FLOAT_DOUBLE_THRESHOLD_0)? FflasFloat : FflasDouble; |
| 322 | | break; |
| 323 | | case 1: base = (pi < FLOAT_DOUBLE_THRESHOLD_1)? FflasFloat : FflasDouble; |
| 324 | | break; |
| 325 | | case 2: base = (pi < FLOAT_DOUBLE_THRESHOLD_2)? FflasFloat : FflasDouble; |
| 326 | | break; |
| 327 | | default: base = FflasDouble; |
| 328 | | break; |
| 329 | | } |
| 330 | | //cerr<<"base = "<<base<<endl; |
| 331 | | return base; |
| 332 | | // case 0: return (pi < FLOAT_DOUBLE_THRESHOLD_0)? FflasFloat : FflasDouble; |
| 333 | | // case 1: return (pi < FLOAT_DOUBLE_THRESHOLD_1)? FflasFloat : FflasDouble; |
| 334 | | // case 2: return (pi < FLOAT_DOUBLE_THRESHOLD_2)? FflasFloat : FflasDouble; |
| 335 | | // default: return FflasDouble; |
| 336 | | } |
| 337 | | }; |
| 338 | | |
| 339 | | template<> |
| 340 | | class FFLAS::BaseCompute<double> { |
| 341 | | public: |
| 342 | | template <class Field> |
| 343 | | FFLAS::FFLAS_BASE operator() (const Field& F, const size_t w){ |
| 344 | | return FflasDouble; |
| 345 | | } |
| 346 | | }; |
| 347 | | |
| 348 | | template<> |
| 349 | | class FFLAS::BaseCompute<float> { |
| 350 | | public: |
| 351 | | template <class Field> |
| 352 | | FFLAS::FFLAS_BASE operator() (const Field& F, const size_t w){ |
| 353 | | return FflasFloat; |
| 354 | | } |
| 355 | | }; |
| | 300 | inline size_t FFLAS::TRSMBound (const ModularBalanced<float>& F){ |
| | 301 | |
| | 302 | FFLAS_INT_TYPE pi; |
| | 303 | F.characteristic (pi); |
| | 304 | unsigned long long p = (pi + 1) / 2, p1 = 1; |
| | 305 | size_t nmax = 0; |
| | 306 | unsigned long long max = ((1ULL << (FLOAT_MANTISSA + 1)) / (pi - 1)); |
| | 307 | while (p1 < max){ |
| | 308 | p1 *= p; |
| | 309 | nmax++; |
| | 310 | } |
| | 311 | return nmax; |
| | 312 | |
| | 313 | } |
| | 314 | |