From 33ce8f39707cdcca9bf86588f559f691961e8bef Mon Sep 17 00:00:00 2001 From: Richard Kreckel Date: Sun, 15 Feb 2009 22:16:11 +0100 Subject: [PATCH] Prefer GMP's multiplication routine (if GMP version >= 4.0). Recent GMP multiplication routines (mpn_mul) are somewhat faster than CLN's. For huge operands, this is due to better tuned FFT. For medium-sized operands it's due to more algorithms (Toom 3-way.) For small operands, I couldn't find a difference. Let's just use mpn_mul if it's available. For huge binsplit sums, this can half the runtimes, on amd64. --- src/base/digitseq/cl_DS_mul.cc | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/base/digitseq/cl_DS_mul.cc b/src/base/digitseq/cl_DS_mul.cc index c3d3b5e..6201e1a 100644 --- a/src/base/digitseq/cl_DS_mul.cc +++ b/src/base/digitseq/cl_DS_mul.cc @@ -456,7 +456,6 @@ namespace cln { #endif -// int cl_mul_algo = 0; void cl_UDS_mul (const uintD* sourceptr1, uintC len1, const uintD* sourceptr2, uintC len2, uintD* destptr) @@ -473,12 +472,9 @@ namespace cln { { mulu_loop_lsp(lsprefnext(sourceptr1),sourceptr2,destptr,len2); } else { -// if (cl_mul_algo > 0) -// mulu_fftcs(sourceptr1,len1,sourceptr2,len2,destptr); -// else -// if (cl_mul_algo > 0) -// mulu_nussbaumer(sourceptr1,len1,sourceptr2,len2,destptr); -// else +#if CL_USE_GMP && __GNU_MP__ >= 4 + mpn_mul(destptr,sourceptr2,len2,sourceptr1,len1); +#else if (len1 < cl_karatsuba_threshold) // Multiplikation nach Schulmethode mulu_2loop(sourceptr1,len1,sourceptr2,len2,destptr); @@ -493,10 +489,11 @@ namespace cln { //mulu_nussbaumer(sourceptr1,len1,sourceptr2,len2,destptr); //mulu_fft_modp3(sourceptr1,len1,sourceptr2,len2,destptr); mulu_fft_modm(sourceptr1,len1,sourceptr2,len2,destptr); +#endif #ifdef DEBUG_MUL_XXX { // Check the correctness of an other multiplication algorithm: CL_ALLOCA_STACK; - var uintD tmpprod_xxx = cl_alloc_array(uintD,len1+len2); + var uintD* tmpprod_xxx = cl_alloc_array(uintD,len1+len2); mulu_xxx(sourceptr1,len1,sourceptr2,len2,arrayLSDptr(tmpprod_xxx,len1+len2)); if (compare_loop_msp(destptr lspop (len1+len2),arrayMSDptr(tmpprod_xxx,len1+len2),len1+len2)) throw runtime_exception(); @@ -523,10 +520,14 @@ namespace cln { { if (len < cl_karatsuba_threshold) mulu_2loop_square(sourceptr,len,destptr); else +#if CL_USE_GMP && __GNU_MP__ >= 4 + mpn_mul(destptr,sourceptr,len,sourceptr,len); +#else if (!(len >= cl_fftm_threshold)) mulu_karatsuba_square(sourceptr,len,destptr); else mulu_fft_modm(sourceptr,len,sourceptr,len,destptr); +#endif } }