- src/float/lfloat/algebraic/cl_LF_sqrt.cc, src/base/digitseq/cl_DS_sqrt.cc:

Readjusted break-even points.
26 years ago · 1d18b76873
3 changed files with 27 additions and 4 deletions
--- a/7
+++ b/7
@ -1,9 +1,14 @@
 2000-05-27  Richard Kreckel  <kreckel@ginac.de>
        * src/float/lfloat/algebraic/cl_LF_sqrt.cc,
          src/base/digitseq/cl_DS_sqrt.cc: Readjusted break-even points.
 2000-05-24  Richard Kreckel  <kreckel@ginac.de>
        * autoconf/config.*: Updated to new version from FSF
          (the new libtool wants this).
        * src/Makefile.in: added $(LDFLAGS) to link step.
        * src/base/digitseq/cl_2DS_div.cc, cl_2DS_recip.cc: Adjusted 
        * src/base/digitseq/cl_2DS_div.cc, cl_2DS_recip.cc: Readjusted 
          break-even points.
 2000-05-23  Bruno Haible  <haible@clisp.cons.org>
--- a/src/base/digitseq/cl_DS_sqrt.cc
+++ b/src/base/digitseq/cl_DS_sqrt.cc
@ -14,6 +14,24 @@
 // We observe the following timings:
 // Time for square root of a_len = 2*N by b_len = N digits,
 // OS: Linux 2.2, intDsize==32,        OS: TRU64/4.0, intDsize==64,
 // Machine: P-III/450MHz               Machine: EV5/300MHz:
 //      N   standard  Newton           standard  Newton
 //      30   0.00002   0.00009          0.00011   0.00027
 //     100   0.00012   0.00052          0.00057   0.0017
 //     300   0.00087   0.0031           0.0037    0.0091
 //    1000   0.0089    0.020            0.037     0.069
 //    3000   0.087     0.11  <-(~3200)  0.30      0.28  <- (~2750)
 //   10000   1.27      0.55             3.5       1.3
 //   30000  12.7       1.35            31.1       3.4
 // Newton faster for 3200<N            Newton faster for 2750<N
 // When in doubt, prefer to choose the standard algorithm.
 #if CL_USE_GMP
  static inline cl_boolean cl_recipsqrt_suitable (uintL n)
  { return (cl_boolean)(n >= 3200); }
 #else
 // Use the old default values from CLN version <= 1.0.3 as a crude estimate.
 // Time for square root of a_len = 2*N by b_len = N digits,
 // on a i486 33 MHz running Linux:
 //      N   standard  Newton
 //      10    0.00022 0.00132
@ -27,9 +45,9 @@
 //    5000   24.1    10.7
 //   10000   98      23.2
 //   -----> Newton faster for 1570 <= N <= 1790 and for N >= 2100.
 // When in doubt, prefer to choose the standard algorithm.
  static inline cl_boolean cl_recipsqrt_suitable (uintL n)
    { return (cl_boolean)(n >= 2100); }
  { return (cl_boolean)(n >= 2100); }
 #endif
 // Workaround gcc-2.7.0 bug on i386.
 #if defined(__GNUC__)
--- a/src/float/lfloat/algebraic/cl_LF_sqrt.cc
+++ b/src/float/lfloat/algebraic/cl_LF_sqrt.cc
@ -64,7 +64,7 @@ const cl_LF sqrt (const cl_LF& x)
      var uintD* y_mantMSDptr = arrayMSDptr(TheLfloat(y)->data,len);
      // Wurzel ziehen:
 #ifndef CL_LF_PEDANTIC
      if (len > 1900) // Das ist etwa 10% bis 20% schneller (im Mittel 15%).
      if (len > 2900) // This is about 15% faster
        { // Kehrwert der Wurzel errechnen:
          var uintD* s_MSDptr;
          var uintD* s_LSDptr;