From 76ff0ad8c10d6f5c0f56287523cad2173d17c075 Mon Sep 17 00:00:00 2001 From: Richard Kreckel Date: Fri, 11 Jan 2008 16:50:15 +0000 Subject: [PATCH] Make some functions more memory efficient: * src/float/transcendental/cl_LF_tran.h (eval_rational_series): The evaluation of streamed rational series may profit from shift-counting Q, too. Introduce a template parameter to determine whether shift-counting is to be used or not. * src/float/transcendental/cl_LF_ratseries_pqb.cc: Introduce template parameter. * src/float/transcendental/cl_LF_ratseries_pqa.cc: Likewise. * src/float/transcendental/cl_LF_ratseries_pqab.cc: Likewise. * src/float/transcendental/cl_LF_ratseries_qa.cc: Likewise. * src/float/transcendental/cl_LF_ratseries_qab.cc: Likewise. * src/float/transcendental/cl_LF_ratseries_q.cc: Likewise, added overload for streamed expansion. * src/float/transcendental/cl_LF_ratseries_qb.cc: Likewise. * src/float/transcendental/cl_LF_ratseries_pq.cc: Introduce template parameter, added overload for streamed expansion using shift-counts. * src/float/transcendental/cl_LF_zeta3.cc: Adapt to above changes. * src/float/transcendental/cl_LF_pi.cc: Likewise. * src/float/transcendental/cl_LF_eulerconst.cc: Likewise. * src/float/transcendental/cl_LF_catalanconst.cc: Likewise. * src/float/transcendental/cl_LF_cossin_aux.cc: Likewise. * src/float/transcendental/cl_LF_coshsinh_aux.cc: Likewise. * src/float/transcendental/cl_LF_atanh_recip.cc: Use streamed series. * src/float/transcendental/cl_LF_atan_recip.cc: Likewise. * src/float/transcendental/cl_LF_exp1.cc: Likewise. * src/float/transcendental/cl_LF_exp_aux.cc: Likewise. * src/float/transcendental/cl_LF_ratseries.cc: Removed. --- ChangeLog | 30 +++ src/float/transcendental/cl_LF_atan_recip.cc | 42 ++-- src/float/transcendental/cl_LF_atanh_recip.cc | 37 ++-- .../transcendental/cl_LF_catalanconst.cc | 14 +- .../transcendental/cl_LF_coshsinh_aux.cc | 13 +- src/float/transcendental/cl_LF_cossin_aux.cc | 13 +- src/float/transcendental/cl_LF_eulerconst.cc | 30 +-- src/float/transcendental/cl_LF_exp1.cc | 35 ++-- src/float/transcendental/cl_LF_exp_aux.cc | 47 +++-- src/float/transcendental/cl_LF_pi.cc | 10 +- .../transcendental/cl_LF_ratseries_pq.cc | 186 ++++++++++++++---- .../transcendental/cl_LF_ratseries_pqa.cc | 84 ++++---- .../transcendental/cl_LF_ratseries_pqab.cc | 84 ++++---- .../transcendental/cl_LF_ratseries_pqb.cc | 81 ++++---- src/float/transcendental/cl_LF_ratseries_q.cc | 72 ++++++- .../transcendental/cl_LF_ratseries_qa.cc | 5 +- .../transcendental/cl_LF_ratseries_qab.cc | 5 +- .../transcendental/cl_LF_ratseries_qb.cc | 82 +++++++- src/float/transcendental/cl_LF_tran.h | 132 +++++++------ src/float/transcendental/cl_LF_zeta3.cc | 4 +- src/float/transcendental/cl_LF_zeta_int.cc | 4 +- 21 files changed, 664 insertions(+), 346 deletions(-) diff --git a/ChangeLog b/ChangeLog index 661f9e7..bd5292c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,33 @@ +2008-01-11 Richard B. Kreckel + + Make some functions more memory efficient: + * src/float/transcendental/cl_LF_tran.h (eval_rational_series): The + evaluation of streamed rational series may profit from shift-counting Q, + too. Introduce a template parameter to determine whether shift-counting + is to be used or not. + * src/float/transcendental/cl_LF_ratseries_pqb.cc: Introduce template + parameter. + * src/float/transcendental/cl_LF_ratseries_pqa.cc: Likewise. + * src/float/transcendental/cl_LF_ratseries_pqab.cc: Likewise. + * src/float/transcendental/cl_LF_ratseries_qa.cc: Likewise. + * src/float/transcendental/cl_LF_ratseries_qab.cc: Likewise. + * src/float/transcendental/cl_LF_ratseries_q.cc: Likewise, added + overload for streamed expansion. + * src/float/transcendental/cl_LF_ratseries_qb.cc: Likewise. + * src/float/transcendental/cl_LF_ratseries_pq.cc: Introduce template + parameter, added overload for streamed expansion using shift-counts. + * src/float/transcendental/cl_LF_zeta3.cc: Adapt to above changes. + * src/float/transcendental/cl_LF_pi.cc: Likewise. + * src/float/transcendental/cl_LF_eulerconst.cc: Likewise. + * src/float/transcendental/cl_LF_catalanconst.cc: Likewise. + * src/float/transcendental/cl_LF_cossin_aux.cc: Likewise. + * src/float/transcendental/cl_LF_coshsinh_aux.cc: Likewise. + * src/float/transcendental/cl_LF_atanh_recip.cc: Use streamed series. + * src/float/transcendental/cl_LF_atan_recip.cc: Likewise. + * src/float/transcendental/cl_LF_exp1.cc: Likewise. + * src/float/transcendental/cl_LF_exp_aux.cc: Likewise. + * src/float/transcendental/cl_LF_ratseries.cc: Removed. + 2008-01-06 Ralf Wildenhues Richard B. Kreckel diff --git a/src/float/transcendental/cl_LF_atan_recip.cc b/src/float/transcendental/cl_LF_atan_recip.cc index 832ac02..2b87d53 100644 --- a/src/float/transcendental/cl_LF_atan_recip.cc +++ b/src/float/transcendental/cl_LF_atan_recip.cc @@ -13,7 +13,6 @@ #include "cln/lfloat.h" #include "cl_LF.h" #include "cl_LF_tran.h" -#include "cl_alloca.h" #undef floor #include @@ -30,23 +29,30 @@ const cl_LF cl_atan_recip (cl_I m, uintC len) var uintC actuallen = len + 1; var cl_I m2 = m*m+1; var uintC N = (uintC)(0.69314718*intDsize*actuallen/::log(double_approx(m2))) + 1; - CL_ALLOCA_STACK; - var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var uintC n; - new (&pv[0]) cl_I (m); - new (&qv[0]) cl_I (m2); - for (n = 1; n < N; n++) { - new (&pv[n]) cl_I (2*n); - new (&qv[n]) cl_I ((2*n+1)*m2); - } - var cl_pq_series series; - series.pv = pv; series.qv = qv; series.qsv = NULL; - var cl_LF result = eval_rational_series(N,series,actuallen); - for (n = 0; n < N; n++) { - pv[n].~cl_I(); - qv[n].~cl_I(); - } + struct rational_series_stream : cl_pq_series_stream { + var uintC n; + var cl_I m; + var cl_I m2; + static cl_pq_series_term computenext (cl_pq_series_stream& thisss) + { + var rational_series_stream& thiss = (rational_series_stream&)thisss; + var uintC n = thiss.n; + var cl_pq_series_term result; + if (n==0) { + result.p = thiss.m; + result.q = thiss.m2; + } else { + result.p = 2*n; + result.q = (2*n+1)*thiss.m2; + } + thiss.n = n+1; + return result; + } + rational_series_stream(const cl_I& m_, const cl_I& m2_) + : cl_pq_series_stream (rational_series_stream::computenext), + n(0), m(m_), m2(m2_) {} + } series(m,m2); + var cl_LF result = eval_rational_series(N,series,actuallen); return shorten(result,len); } // Bit complexity (N = len): O(log(N)^2*M(N)). diff --git a/src/float/transcendental/cl_LF_atanh_recip.cc b/src/float/transcendental/cl_LF_atanh_recip.cc index 113dfc0..84b4092 100644 --- a/src/float/transcendental/cl_LF_atanh_recip.cc +++ b/src/float/transcendental/cl_LF_atanh_recip.cc @@ -13,7 +13,6 @@ #include "cln/lfloat.h" #include "cl_LF.h" #include "cl_LF_tran.h" -#include "cl_alloca.h" #undef floor #include @@ -28,24 +27,26 @@ namespace cln { const cl_LF cl_atanh_recip (cl_I m, uintC len) { var uintC actuallen = len + 1; - var cl_I m2 = m*m; var uintC N = (uintC)(0.69314718*intDsize/2*actuallen/::log(double_approx(m))) + 1; - CL_ALLOCA_STACK; - var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var uintC n; - for (n = 0; n < N; n++) { - new (&bv[n]) cl_I ((cl_I)(2*n+1)); - new (&qv[n]) cl_I (n==0 ? m : m2); - } - var cl_qb_series series; - series.bv = bv; - series.qv = qv; series.qsv = NULL; - var cl_LF result = eval_rational_series(N,series,actuallen); - for (n = 0; n < N; n++) { - bv[n].~cl_I(); - qv[n].~cl_I(); - } + struct rational_series_stream : cl_qb_series_stream { + var uintC n; + var cl_I m; + var cl_I m2; + static cl_qb_series_term computenext (cl_qb_series_stream& thisss) + { + var rational_series_stream& thiss = (rational_series_stream&)thisss; + var uintC n = thiss.n; + var cl_qb_series_term result; + result.b = 2*n+1; + result.q = (n==0 ? thiss.m : thiss.m2); + thiss.n = n+1; + return result; + } + rational_series_stream(const cl_I& m_) + : cl_qb_series_stream (rational_series_stream::computenext), + n(0), m(m_), m2(square(m_)) {} + } series(m); + var cl_LF result = eval_rational_series(N,series,actuallen); return shorten(result,len); } // Bit complexity (N = len): O(log(N)^2*M(N)). diff --git a/src/float/transcendental/cl_LF_catalanconst.cc b/src/float/transcendental/cl_LF_catalanconst.cc index 5f3f2ee..40db8e9 100644 --- a/src/float/transcendental/cl_LF_catalanconst.cc +++ b/src/float/transcendental/cl_LF_catalanconst.cc @@ -26,7 +26,7 @@ const cl_LF compute_catalanconst_ramanujan (uintC len) // Every summand gives 0.6 new decimal digits in precision. // The sum is best evaluated using fixed-point arithmetic, // so that the precision is reduced for the later summands. - var uintC actuallen = len + 2; // 2 Schutz-Digits + var uintC actuallen = len + 2; // 2 guard digits var sintC scale = intDsize*actuallen; var cl_I sum = 0; var cl_I n = 0; @@ -80,7 +80,7 @@ const cl_LF compute_catalanconst_ramanujan_fast (uintC len) // p(n) = n for n>0, q(n) = 2*(2*n+1) for n>0. var uintC N = (intDsize/2)*actuallen; // 4^-N <= 2^(-intDsize*actuallen). - var cl_LF fsum = eval_rational_series(N,series,actuallen,actuallen); + var cl_LF fsum = eval_rational_series(N,series,actuallen,actuallen); var cl_LF g = scale_float(The(cl_LF)(3*fsum) + The(cl_LF)(pi(actuallen)) @@ -93,7 +93,7 @@ const cl_LF compute_catalanconst_ramanujan_fast (uintC len) const cl_LF compute_catalanconst_expintegral1 (uintC len) { // We compute f(x) classically and g(x) using the partial sums of f(x). - var uintC actuallen = len+2; // 2 Schutz-Digits + var uintC actuallen = len+2; // 2 guard digits var uintC x = (uintC)(0.693148*intDsize*actuallen)+1; var uintC N = (uintC)(2.718281828*x); var cl_LF fterm = cl_I_to_LF(1,actuallen); @@ -123,7 +123,7 @@ const cl_LF compute_catalanconst_expintegral1 (uintC len) // the sums. const cl_LF compute_catalanconst_expintegral2 (uintC len) { - var uintC actuallen = len+2; // 2 Schutz-Digits + var uintC actuallen = len+2; // 2 guard digits var uintC x = (uintC)(0.693148*intDsize*actuallen)+1; var uintC N = (uintC)(2.718281828*x); CL_ALLOCA_STACK; @@ -154,7 +154,7 @@ const cl_LF compute_catalanconst_expintegral2 (uintC len) // Using Cohen-Villegas-Zagier acceleration, but without binary splitting. const cl_LF compute_catalanconst_cvz1 (uintC len) { - var uintC actuallen = len+2; // 2 Schutz-Digits + var uintC actuallen = len+2; // 2 guard digits var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1; #if 0 var cl_LF fterm = cl_I_to_LF(2*(cl_I)N*(cl_I)N,actuallen); @@ -205,7 +205,7 @@ const cl_LF compute_catalanconst_cvz1 (uintC len) // Using Cohen-Villegas-Zagier acceleration, with binary splitting. const cl_LF compute_catalanconst_cvz2 (uintC len) { - var uintC actuallen = len+2; // 2 Schutz-Digits + var uintC actuallen = len+2; // 2 guard digits var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1; CL_ALLOCA_STACK; var cl_pqd_series_term* args = (cl_pqd_series_term*) cl_alloca(N*sizeof(cl_pqd_series_term)); @@ -266,7 +266,7 @@ const cl_LF compute_catalanconst_lupas (uintC len) } series; var uintC actuallen = len + 2; // 2 guard digits var uintC N = (intDsize/2)*actuallen; - var cl_LF fsum = eval_rational_series(N,series,actuallen,actuallen); + var cl_LF fsum = eval_rational_series(N,series,actuallen,actuallen); var cl_LF g = fsum*cl_I_to_LF(19,actuallen)/cl_I_to_LF(18,actuallen); return shorten(g,len); } diff --git a/src/float/transcendental/cl_LF_coshsinh_aux.cc b/src/float/transcendental/cl_LF_coshsinh_aux.cc index 92f2e4c..fab71c5 100644 --- a/src/float/transcendental/cl_LF_coshsinh_aux.cc +++ b/src/float/transcendental/cl_LF_coshsinh_aux.cc @@ -52,8 +52,8 @@ const cl_LF_cosh_sinh_t cl_coshsinh_aux (const cl_I& p, uintE lq, uintC len) // a(n) = 1, b(n) = 1, // p(0) = p, p(n) = p^2 for n>0, // q(0) = 2^lq, q(n) = (2*n)*(2*n+1)*(2^lq)^2 for n>0. - var uintC actuallen = len+1; // 1 Schutz-Digit - // How many terms to we need for M bits of precision? N/2 terms suffice, + var uintC actuallen = len+1; // 1 guard digit + // How many terms do we need for M bits of precision? N/2 terms suffice, // provided that // 1/(2^(N*lp)*N!) < 2^-M // <== N*(log(N)-1)+N*lp*log(2) > M*log(2) @@ -73,7 +73,6 @@ const cl_LF_cosh_sinh_t cl_coshsinh_aux (const cl_I& p, uintE lq, uintC len) CL_ALLOCA_STACK; var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I)); var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var uintC* qsv = (uintC*) cl_alloca(N*sizeof(uintC)); var uintC n; var cl_I p2 = square(p); var cl_LF sinhsum; @@ -85,8 +84,8 @@ const cl_LF_cosh_sinh_t cl_coshsinh_aux (const cl_I& p, uintE lq, uintC len) init1(cl_I, qv[n]) (((cl_I)n*(cl_I)(2*n+1)) << (2*lq+1)); } var cl_pq_series series; - series.pv = pv; series.qv = qv; series.qsv = qsv; - sinhsum = eval_rational_series(N,series,actuallen); + series.pv = pv; series.qv = qv; + sinhsum = eval_rational_series(N,series,actuallen); for (n = 0; n < N; n++) { pv[n].~cl_I(); qv[n].~cl_I(); @@ -102,8 +101,8 @@ const cl_LF_cosh_sinh_t cl_coshsinh_aux (const cl_I& p, uintE lq, uintC len) init1(cl_I, qv[n]) (((cl_I)n*(cl_I)(2*n-1)) << (2*lq+1)); } var cl_pq_series series; - series.pv = pv; series.qv = qv; series.qsv = qsv; - coshsum = eval_rational_series(N,series,actuallen); + series.pv = pv; series.qv = qv; + coshsum = eval_rational_series(N,series,actuallen); for (n = 0; n < N; n++) { pv[n].~cl_I(); qv[n].~cl_I(); diff --git a/src/float/transcendental/cl_LF_cossin_aux.cc b/src/float/transcendental/cl_LF_cossin_aux.cc index 86e630d..793bdcf 100644 --- a/src/float/transcendental/cl_LF_cossin_aux.cc +++ b/src/float/transcendental/cl_LF_cossin_aux.cc @@ -52,8 +52,8 @@ const cl_LF_cos_sin_t cl_cossin_aux (const cl_I& p, uintE lq, uintC len) // a(n) = 1, b(n) = 1, // p(0) = p, p(n) = -p^2 for n>0, // q(0) = 2^lq, q(n) = (2*n)*(2*n+1)*(2^lq)^2 for n>0. - var uintC actuallen = len+1; // 1 Schutz-Digit - // How many terms to we need for M bits of precision? N/2 terms suffice, + var uintC actuallen = len+1; // 1 guard digit + // How many terms do we need for M bits of precision? N/2 terms suffice, // provided that // 1/(2^(N*lp)*N!) < 2^-M // <== N*(log(N)-1)+N*lp*log(2) > M*log(2) @@ -73,7 +73,6 @@ const cl_LF_cos_sin_t cl_cossin_aux (const cl_I& p, uintE lq, uintC len) CL_ALLOCA_STACK; var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I)); var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var uintC* qsv = (uintC*) cl_alloca(N*sizeof(uintC)); var uintC n; var cl_I p2 = -square(p); var cl_LF sinsum; @@ -85,8 +84,8 @@ const cl_LF_cos_sin_t cl_cossin_aux (const cl_I& p, uintE lq, uintC len) init1(cl_I, qv[n]) (((cl_I)n*(cl_I)(2*n+1)) << (2*lq+1)); } var cl_pq_series series; - series.pv = pv; series.qv = qv; series.qsv = qsv; - sinsum = eval_rational_series(N,series,actuallen); + series.pv = pv; series.qv = qv; + sinsum = eval_rational_series(N,series,actuallen); for (n = 0; n < N; n++) { pv[n].~cl_I(); qv[n].~cl_I(); @@ -102,8 +101,8 @@ const cl_LF_cos_sin_t cl_cossin_aux (const cl_I& p, uintE lq, uintC len) init1(cl_I, qv[n]) (((cl_I)n*(cl_I)(2*n-1)) << (2*lq+1)); } var cl_pq_series series; - series.pv = pv; series.qv = qv; series.qsv = qsv; - cossum = eval_rational_series(N,series,actuallen); + series.pv = pv; series.qv = qv; + cossum = eval_rational_series(N,series,actuallen); for (n = 0; n < N; n++) { pv[n].~cl_I(); qv[n].~cl_I(); diff --git a/src/float/transcendental/cl_LF_eulerconst.cc b/src/float/transcendental/cl_LF_eulerconst.cc index 7eff07f..0cc8bbf 100644 --- a/src/float/transcendental/cl_LF_eulerconst.cc +++ b/src/float/transcendental/cl_LF_eulerconst.cc @@ -71,7 +71,7 @@ const cl_LF compute_eulerconst_expintegral (uintC len) // If we computed this with floating-point numbers, we would have // to more than double the floating-point precision because of the large // extinction which takes place. But luckily we compute with integers. - var uintC actuallen = len+1; // 1 Schutz-Digit + var uintC actuallen = len+1; // 1 guard digit var uintC z = (uintC)(0.693148*intDsize*actuallen)+1; var uintC N = (uintC)(3.591121477*z); CL_ALLOCA_STACK; @@ -86,8 +86,8 @@ const cl_LF compute_eulerconst_expintegral (uintC len) } var cl_pqb_series series; series.bv = bv; - series.pv = pv; series.qv = qv; series.qsv = NULL; - var cl_LF fsum = eval_rational_series(N,series,actuallen); + series.pv = pv; series.qv = qv; + var cl_LF fsum = eval_rational_series(N,series,actuallen); for (n = 0; n < N; n++) { bv[n].~cl_I(); pv[n].~cl_I(); @@ -146,7 +146,7 @@ const cl_LF compute_eulerconst_expintegral1 (uintC len) // Finally we compute the sums of the series f(x) and g(x) with N terms // each. // We compute f(x) classically and g(x) using the partial sums of f(x). - var uintC actuallen = len+2; // 2 Schutz-Digits + var uintC actuallen = len+2; // 2 guard digits var uintC x = (uintC)(0.693148*intDsize*actuallen)+1; var uintC N = (uintC)(2.718281828*x); var cl_LF one = cl_I_to_LF(1,actuallen); @@ -186,7 +186,7 @@ const cl_LF compute_eulerconst_expintegral1 (uintC len) // the sums. const cl_LF compute_eulerconst_expintegral2 (uintC len) { - var uintC actuallen = len+2; // 2 Schutz-Digits + var uintC actuallen = len+2; // 2 guard digits var uintC x = (uintC)(0.693148*intDsize*actuallen)+1; var uintC N = (uintC)(2.718281828*x); CL_ALLOCA_STACK; @@ -276,7 +276,7 @@ const cl_LF compute_eulerconst_expintegral2 (uintC len) const cl_LF compute_eulerconst_besselintegral1 (uintC len) { // We compute f(x) classically and g(x) using the partial sums of f(x). - var uintC actuallen = len+1; // 1 Schutz-Digit + var uintC actuallen = len+1; // 1 guard digit var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1; var uintC N = (uintC)(3.591121477*sx); var cl_I x = square((cl_I)sx); @@ -326,7 +326,7 @@ const cl_LF compute_eulerconst_besselintegral2 (uintC len) // WARNING: The memory used by this algorithm grown quadratically in N. // (Because HD_n grows like exp(n), hence HN_n grows like exp(n) as // well, and we store all HN_n values in an array!) - var uintC actuallen = len+1; // 1 Schutz-Digit + var uintC actuallen = len+1; // 1 guard digit var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1; var uintC N = (uintC)(3.591121477*sx); var cl_I x = square((cl_I)sx); @@ -343,8 +343,8 @@ const cl_LF compute_eulerconst_besselintegral2 (uintC len) init1(cl_I, qv[n]) ((cl_I)n*(cl_I)n); } var cl_pq_series fseries; - fseries.pv = pv; fseries.qv = qv; fseries.qsv = NULL; - var cl_LF fsum = eval_rational_series(N,fseries,actuallen); + fseries.pv = pv; fseries.qv = qv; + var cl_LF fsum = eval_rational_series(N,fseries,actuallen); for (n = 0; n < N; n++) { pv[n].~cl_I(); qv[n].~cl_I(); @@ -365,8 +365,8 @@ const cl_LF compute_eulerconst_besselintegral2 (uintC len) } var cl_pqa_series gseries; gseries.av = av; - gseries.pv = pv; gseries.qv = qv; gseries.qsv = NULL; - var cl_LF gsum = eval_rational_series(N,gseries,actuallen); + gseries.pv = pv; gseries.qv = qv; + var cl_LF gsum = eval_rational_series(N,gseries,actuallen); for (n = 0; n < N; n++) { av[n].~cl_I(); pv[n].~cl_I(); @@ -408,7 +408,7 @@ struct cl_rational_series_for_g : cl_pqa_series_stream { }; const cl_LF compute_eulerconst_besselintegral3 (uintC len) { - var uintC actuallen = len+1; // 1 Schutz-Digit + var uintC actuallen = len+1; // 1 guard digit var uintC sx = (uintC)(0.25*0.693148*intDsize*actuallen)+1; var uintC N = (uintC)(3.591121477*sx); var cl_I x = square((cl_I)sx); @@ -424,15 +424,15 @@ const cl_LF compute_eulerconst_besselintegral3 (uintC len) init1(cl_I, qv[n]) ((cl_I)n*(cl_I)n); } var cl_pq_series fseries; - fseries.pv = pv; fseries.qv = qv; fseries.qsv = NULL; - var cl_LF fsum = eval_rational_series(N,fseries,actuallen); + fseries.pv = pv; fseries.qv = qv; + var cl_LF fsum = eval_rational_series(N,fseries,actuallen); for (n = 0; n < N; n++) { pv[n].~cl_I(); qv[n].~cl_I(); } // Evaluate g(x). var cl_rational_series_for_g gseries = cl_rational_series_for_g(x); - var cl_LF gsum = eval_rational_series(N,gseries,actuallen); + var cl_LF gsum = eval_rational_series(N,gseries,actuallen); var cl_LF result = gsum/fsum - ln(cl_I_to_LF(sx,actuallen)); return shorten(result,len); // verkürzen und fertig } diff --git a/src/float/transcendental/cl_LF_exp1.cc b/src/float/transcendental/cl_LF_exp1.cc index 0f55fe5..286fad1 100644 --- a/src/float/transcendental/cl_LF_exp1.cc +++ b/src/float/transcendental/cl_LF_exp1.cc @@ -13,7 +13,6 @@ #include "cl_LF_tran.h" #include "cl_LF.h" #include "cln/integer.h" -#include "cl_alloca.h" #undef floor #include @@ -26,8 +25,8 @@ const cl_LF compute_exp1 (uintC len) // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n))) // with appropriate N, and // a(n) = 1, b(n) = 1, p(n) = 1, q(n) = n for n>0. - var uintC actuallen = len+1; // 1 Schutz-Digit - // How many terms to we need for M bits of precision? N terms suffice, + var uintC actuallen = len+1; // 1 guard digit + // How many terms do we need for M bits of precision? N terms suffice, // provided that // 1/N! < 2^-M // <== N*(log(N)-1) > M*log(2) @@ -42,20 +41,22 @@ const cl_LF compute_exp1 (uintC len) var uintC N1 = (uintC)(0.693147*intDsize*actuallen/(::log((double)N0)-1.0)); var uintC N2 = (uintC)(0.693148*intDsize*actuallen/(::log((double)N1)-1.0))+1; var uintC N = N2+2; - CL_ALLOCA_STACK; - var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var uintC* qsv = (uintC*) cl_alloca(N*sizeof(uintC)); - var uintC n; - for (n = 0; n < N; n++) { - init1(cl_I, qv[n]) (n==0 ? 1 : n); - } - var cl_q_series series; - series.qv = qv; - series.qsv = (len >= 1000 && len <= 10000 ? qsv : 0); // tiny speedup - var cl_LF fsum = eval_rational_series(N,series,actuallen); - for (n = 0; n < N; n++) { - qv[n].~cl_I(); - } + struct rational_series_stream : cl_q_series_stream { + var uintC n; + static cl_q_series_term computenext (cl_q_series_stream& thisss) + { + var rational_series_stream& thiss = (rational_series_stream&)thisss; + var uintC n = thiss.n; + var cl_q_series_term result; + result.q = (n==0 ? 1 : n); + thiss.n = n+1; + return result; + } + rational_series_stream() + : cl_q_series_stream (rational_series_stream::computenext), + n(0) {} + } series; + var cl_LF fsum = eval_rational_series(N,series,actuallen); return shorten(fsum,len); // verkürzen und fertig } // Bit complexity (N = len): O(log(N)*M(N)). diff --git a/src/float/transcendental/cl_LF_exp_aux.cc b/src/float/transcendental/cl_LF_exp_aux.cc index 8ac3297..bc25233 100644 --- a/src/float/transcendental/cl_LF_exp_aux.cc +++ b/src/float/transcendental/cl_LF_exp_aux.cc @@ -13,7 +13,6 @@ #include "cl_LF_tran.h" #include "cl_LF.h" #include "cln/integer.h" -#include "cl_alloca.h" #include "cln/exception.h" #undef floor @@ -39,8 +38,8 @@ const cl_LF cl_exp_aux (const cl_I& p, uintE lq, uintC len) // Evaluate a sum(0 <= n < N, a(n)/b(n) * (p(0)...p(n))/(q(0)...q(n))) // with appropriate N, and // a(n) = 1, b(n) = 1, p(n) = p for n>0, q(n) = n*2^lq for n>0. - var uintC actuallen = len+1; // 1 Schutz-Digit - // How many terms to we need for M bits of precision? N terms suffice, + var uintC actuallen = len+1; // 1 guard digit + // How many terms do we need for M bits of precision? N terms suffice, // provided that // 1/(2^(N*lp)*N!) < 2^-M // <== N*(log(N)-1)+N*lp*log(2) > M*log(2) @@ -56,24 +55,30 @@ const cl_LF cl_exp_aux (const cl_I& p, uintE lq, uintC len) var uintC N1 = (uintC)(0.693147*intDsize*actuallen/(::log((double)N0)-1.0+0.693148*lp)); var uintC N2 = (uintC)(0.693148*intDsize*actuallen/(::log((double)N1)-1.0+0.693147*lp))+1; var uintC N = N2+2; - CL_ALLOCA_STACK; - var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I)); - var uintC* qsv = (uintC*) cl_alloca(N*sizeof(uintC)); - var uintC n; - init1(cl_I, pv[0]) (1); - init1(cl_I, qv[0]) (1); - for (n = 1; n < N; n++) { - init1(cl_I, pv[n]) (p); - init1(cl_I, qv[n]) ((cl_I)n << lq); - } - var cl_pq_series series; - series.pv = pv; series.qv = qv; series.qsv = qsv; - var cl_LF fsum = eval_rational_series(N,series,actuallen); - for (n = 0; n < N; n++) { - pv[n].~cl_I(); - qv[n].~cl_I(); - } + struct rational_series_stream : cl_pq_series_stream { + var uintC n; + var cl_I p; + var uintE lq; + static cl_pq_series_term computenext (cl_pq_series_stream& thisss) + { + var rational_series_stream& thiss = (rational_series_stream&)thisss; + var uintC n = thiss.n; + var cl_pq_series_term result; + if (n==0) { + result.p = 1; + result.q = 1; + } else { + result.p = thiss.p; + result.q = (cl_I)n << thiss.lq; + } + thiss.n = n+1; + return result; + } + rational_series_stream(const cl_I& p_, uintE lq_) + : cl_pq_series_stream (rational_series_stream::computenext), + n (0), p(p_), lq(lq_) {} + } series(p, lq); + var cl_LF fsum = eval_rational_series(N,series,actuallen); return shorten(fsum,len); // verkürzen und fertig }} // Bit complexity (N = len, and if p has length O(log N) and ql = O(log N)): diff --git a/src/float/transcendental/cl_LF_pi.cc b/src/float/transcendental/cl_LF_pi.cc index e82abee..248e75e 100644 --- a/src/float/transcendental/cl_LF_pi.cc +++ b/src/float/transcendental/cl_LF_pi.cc @@ -61,7 +61,7 @@ const cl_LF compute_pi_brent_salamin (uintC len) // ) // (/ (expt a 2) t) // ) - var uintC actuallen = len + 1; // 1 Schutz-Digit + var uintC actuallen = len + 1; // 1 guard digit var uintE uexp_limit = LF_exp_mid - intDsize*len; // Ein Long-Float ist genau dann betragsmäßig <2^-n, wenn // sein Exponent < LF_exp_mid-n = uexp_limit ist. @@ -111,7 +111,7 @@ const cl_LF compute_pi_brent_salamin_quartic (uintC len) // = 2^(k+1) * [wa_k^4 - ((wa_k^2+wb_k^2)/2)^2]. // Hence, // pi = AGM(1,1/sqrt(2))^2 * 1/(1/2 - sum(k even, 2^k*[....])). - var uintC actuallen = len + 1; // 1 Schutz-Digit + var uintC actuallen = len + 1; // 1 guard digit var uintE uexp_limit = LF_exp_mid - intDsize*len; var cl_LF one = cl_I_to_LF(1,actuallen); var cl_LF a = one; @@ -154,7 +154,7 @@ const cl_LF compute_pi_ramanujan_163 (uintC len) // in precision. // The sum is best evaluated using fixed-point arithmetic, // so that the precision is reduced for the later summands. - var uintC actuallen = len + 4; // 4 Schutz-Digits + var uintC actuallen = len + 4; // 4 guard digits var sintC scale = intDsize*actuallen; static const cl_I A = "163096908"; static const cl_I B = "6541681608"; @@ -216,7 +216,7 @@ const cl_LF compute_pi_ramanujan_163_fast (uintC len) : cl_pqa_series_stream (rational_series_stream::computenext), n (0) {} } series; - var uintC actuallen = len + 4; // 4 Schutz-Digits + var uintC actuallen = len + 4; // 4 guard digits static const cl_I A = "163096908"; static const cl_I B = "6541681608"; static const cl_I J1 = "10939058860032000"; // 72*abs(J) @@ -230,7 +230,7 @@ const cl_LF compute_pi_ramanujan_163_fast (uintC len) var uintC N = (n_slope*actuallen)/32 + 1; // N > intDsize*log(2)/log(|J|) * actuallen, hence // |J|^-N < 2^(-intDsize*actuallen). - var cl_LF fsum = eval_rational_series(N,series,actuallen,actuallen); + var cl_LF fsum = eval_rational_series(N,series,actuallen,actuallen); static const cl_I J3 = "262537412640768000"; // -1728*J var cl_LF pires = sqrt(cl_I_to_LF(J3,actuallen)) / fsum; return shorten(pires,len); // verkürzen und fertig diff --git a/src/float/transcendental/cl_LF_ratseries_pq.cc b/src/float/transcendental/cl_LF_ratseries_pq.cc index 2939900..30907d2 100644 --- a/src/float/transcendental/cl_LF_ratseries_pq.cc +++ b/src/float/transcendental/cl_LF_ratseries_pq.cc @@ -1,4 +1,4 @@ -// eval_rational_series(). +// eval_rational_series(). // General includes. #include "cl_sysdep.h" @@ -14,6 +14,7 @@ #include "cln/real.h" #include "cln/exception.h" #include "cl_LF.h" +#include "cl_alloca.h" namespace cln { @@ -86,8 +87,18 @@ static void eval_pq_series_aux (uintC N1, uintC N2, } } +template<> +const cl_LF eval_rational_series (uintC N, const cl_pq_series& args, uintC len) +{ + if (N==0) + return cl_I_to_LF(0,len); + var cl_I Q, T; + eval_pq_series_aux(0,N,args,NULL,&Q,&T); + return cl_I_to_LF(T,len) / cl_I_to_LF(Q,len); +} + static void eval_pqs_series_aux (uintC N1, uintC N2, - const cl_pq_series& args, + const cl_pq_series& args, const uintC* qsv, cl_I* P, cl_I* Q, uintC* QS, cl_I* T) { switch (N2 - N1) { @@ -96,15 +107,15 @@ static void eval_pqs_series_aux (uintC N1, uintC N2, case 1: if (P) { *P = args.pv[N1]; } *Q = args.qv[N1]; - *QS = args.qsv[N1]; + *QS = qsv[N1]; *T = args.pv[N1]; break; case 2: { var cl_I p01 = args.pv[N1] * args.pv[N1+1]; if (P) { *P = p01; } *Q = args.qv[N1] * args.qv[N1+1]; - *QS = args.qsv[N1] + args.qsv[N1+1]; - *T = ((args.qv[N1+1] * args.pv[N1]) << args.qsv[N1+1]) + *QS = qsv[N1] + qsv[N1+1]; + *T = ((args.qv[N1+1] * args.pv[N1]) << qsv[N1+1]) + p01; break; } @@ -114,9 +125,9 @@ static void eval_pqs_series_aux (uintC N1, uintC N2, if (P) { *P = p012; } var cl_I q12 = args.qv[N1+1] * args.qv[N1+2]; *Q = args.qv[N1] * q12; - *QS = args.qsv[N1] + args.qsv[N1+1] + args.qsv[N1+2]; - *T = ((q12 * args.pv[N1]) << (args.qsv[N1+1] + args.qsv[N1+2])) - + ((args.qv[N1+2] * p01) << args.qsv[N1+2]) + *QS = qsv[N1] + qsv[N1+1] + qsv[N1+2]; + *T = ((q12 * args.pv[N1]) << (qsv[N1+1] + qsv[N1+2])) + + ((args.qv[N1+2] * p01) << qsv[N1+2]) + p012; break; } @@ -128,10 +139,10 @@ static void eval_pqs_series_aux (uintC N1, uintC N2, var cl_I q23 = args.qv[N1+2] * args.qv[N1+3]; var cl_I q123 = args.qv[N1+1] * q23; *Q = args.qv[N1] * q123; - *QS = args.qsv[N1] + args.qsv[N1+1] + args.qsv[N1+2] + args.qsv[N1+3]; - *T = ((((((q123 * args.pv[N1]) << args.qsv[N1+1]) - + q23 * p01) << args.qsv[N1+2]) - + args.qv[N1+3] * p012) << args.qsv[N1+3]) + *QS = qsv[N1] + qsv[N1+1] + qsv[N1+2] + qsv[N1+3]; + *T = ((((((q123 * args.pv[N1]) << qsv[N1+1]) + + q23 * p01) << qsv[N1+2]) + + args.qv[N1+3] * p012) << qsv[N1+3]) + p0123; break; } @@ -140,11 +151,11 @@ static void eval_pqs_series_aux (uintC N1, uintC N2, // Compute left part. var cl_I LP, LQ, LT; var uintC LQS; - eval_pqs_series_aux(N1,Nm,args,&LP,&LQ,&LQS,<); + eval_pqs_series_aux(N1,Nm,args,qsv,&LP,&LQ,&LQS,<); // Compute right part. var cl_I RP, RQ, RT; var uintC RQS; - eval_pqs_series_aux(Nm,N2,args,(P?&RP:(cl_I*)0),&RQ,&RQS,&RT); + eval_pqs_series_aux(Nm,N2,args,qsv,(P?&RP:(cl_I*)0),&RQ,&RQS,&RT); // Put together partial results. if (P) { *P = LP*RP; } *Q = LQ*RQ; @@ -156,36 +167,25 @@ static void eval_pqs_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, const cl_pq_series& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, const cl_pq_series& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); var cl_I Q, T; - if (!args.qsv) { - eval_pq_series_aux(0,N,args,NULL,&Q,&T); - return cl_I_to_LF(T,len) / cl_I_to_LF(Q,len); - } else { - // Precomputation of the shift counts: - // Split qv[n] into qv[n]*2^qsv[n]. - { - var cl_I* qp = args.qv; - var uintC* qsp = args.qsv; - for (var uintC n = 0; n < N; n++, qp++, qsp++) { - // Pull out maximal power of 2 out of *qp = args.qv[n]. - var uintC qs = 0; - if (!zerop(*qp)) { - qs = ord2(*qp); - if (qs > 0) - *qp = *qp >> qs; - } - *qsp = qs; - } - } - // Main computation. - var uintC QS; - eval_pqs_series_aux(0,N,args,NULL,&Q,&QS,&T); - return cl_I_to_LF(T,len) / scale_float(cl_I_to_LF(Q,len),QS); + // Precomputation of the shift counts: + // Split qv[n] into qv[n]*2^qsv[n]. + CL_ALLOCA_STACK; + var uintC* qsv = (uintC*) cl_alloca(N*sizeof(uintC)); + var cl_I* qp = args.qv; + var uintC* qsp = qsv; + for (var uintC n = 0; n < N; n++, qp++, qsp++) { + *qsp = pullout_shiftcount(*qp); } + // Main computation. + var uintC QS; + eval_pqs_series_aux(0,N,args,qsv,NULL,&Q,&QS,&T); + return cl_I_to_LF(T,len) / scale_float(cl_I_to_LF(Q,len),QS); } static void eval_pq_series_aux (uintC N1, uintC N2, @@ -262,7 +262,8 @@ static void eval_pq_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, cl_pq_series_stream& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, cl_pq_series_stream& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); @@ -271,6 +272,108 @@ const cl_LF eval_rational_series (uintC N, cl_pq_series_stream& args, uintC len) return cl_I_to_LF(T,len) / cl_I_to_LF(Q,len); } +static void eval_pqs_series_aux (uintC N1, uintC N2, + cl_pq_series_stream& args, + cl_I* P, cl_I* Q, uintC* QS, cl_I* T) +{ + switch (N2 - N1) { + case 0: + throw runtime_exception(); break; + case 1: { + var cl_pq_series_term v0 = args.next(); // [N1] + var uintC qs0 = pullout_shiftcount(v0.q); + if (P) { *P = v0.p; } + *Q = v0.q; + *QS = qs0; + *T = v0.p; + break; + } + case 2: { + var cl_pq_series_term v0 = args.next(); // [N1] + var cl_pq_series_term v1 = args.next(); // [N1+1] + var uintC qs0 = pullout_shiftcount(v0.q); + var uintC qs1 = pullout_shiftcount(v1.q); + var cl_I p01 = v0.p * v1.p; + if (P) { *P = p01; } + *Q = v0.q * v1.q; + *QS = qs0 + qs1; + *T = ((v1.q * v0.p) << qs1) + + p01; + break; + } + case 3: { + var cl_pq_series_term v0 = args.next(); // [N1] + var cl_pq_series_term v1 = args.next(); // [N1+1] + var cl_pq_series_term v2 = args.next(); // [N1+2] + var uintC qs0 = pullout_shiftcount(v0.q); + var uintC qs1 = pullout_shiftcount(v1.q); + var uintC qs2 = pullout_shiftcount(v2.q); + var cl_I p01 = v0.p * v1.p; + var cl_I p012 = p01 * v2.p; + if (P) { *P = p012; } + var cl_I q12 = v1.q * v2.q; + *Q = v0.q * q12; + *QS = qs0 + qs1 + qs2; + *T = ((q12 * v0.p) << (qs1 + qs2)) + + ((v2.q * p01) << qs2) + + p012; + break; + } + case 4: { + var cl_pq_series_term v0 = args.next(); // [N1] + var cl_pq_series_term v1 = args.next(); // [N1+1] + var cl_pq_series_term v2 = args.next(); // [N1+2] + var cl_pq_series_term v3 = args.next(); // [N1+3] + var uintC qs0 = pullout_shiftcount(v0.q); + var uintC qs1 = pullout_shiftcount(v1.q); + var uintC qs2 = pullout_shiftcount(v2.q); + var uintC qs3 = pullout_shiftcount(v3.q); + var cl_I p01 = v0.p * v1.p; + var cl_I p012 = p01 * v2.p; + var cl_I p0123 = p012 * v3.p; + if (P) { *P = p0123; } + var cl_I q23 = v2.q * v3.q; + var cl_I q123 = v1.q * q23; + *Q = v0.q * q123; + *QS = qs0 + qs1 + qs2 + qs3; + *T = ((((((q123 * v0.p) << qs1) + + q23 * p01) << qs2) + + v3.q * p012) << qs3) + + p0123; + break; + } + default: { + var uintC Nm = (N1+N2)/2; // midpoint + // Compute left part. + var cl_I LP, LQ, LT; + var uintC LQS; + eval_pqs_series_aux(N1,Nm,args,&LP,&LQ,&LQS,<); + // Compute right part. + var cl_I RP, RQ, RT; + var uintC RQS; + eval_pqs_series_aux(Nm,N2,args,(P?&RP:(cl_I*)0),&RQ,&RQS,&RT); + // Put together partial results. + if (P) { *P = LP*RP; } + *Q = LQ*RQ; + *QS = LQS+RQS; + // S = LS + LP/LQ * RS, so T = RQ*LT + LP*RT. + *T = ((RQ*LT) << RQS) + LP*RT; + break; + } + } +} + +template<> +const cl_LF eval_rational_series (uintC N, cl_pq_series_stream& args, uintC len) +{ + if (N==0) + return cl_I_to_LF(0,len); + var cl_I Q, T; + var uintC QS; + eval_pqs_series_aux(0,N,args,NULL,&Q,&QS,&T); + return cl_I_to_LF(T,len) / scale_float(cl_I_to_LF(Q,len),QS); +} + static void eval_pq_series_aux (uintC N1, uintC N2, cl_pq_series_stream& args, cl_R* P, cl_R* Q, cl_R* T, @@ -351,7 +454,8 @@ static void eval_pq_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, cl_pq_series_stream& args, uintC len, uintC trunclen) +template<> +const cl_LF eval_rational_series (uintC N, cl_pq_series_stream& args, uintC len, uintC trunclen) { if (N==0) return cl_I_to_LF(0,len); diff --git a/src/float/transcendental/cl_LF_ratseries_pqa.cc b/src/float/transcendental/cl_LF_ratseries_pqa.cc index f11e2a7..a9e5877 100644 --- a/src/float/transcendental/cl_LF_ratseries_pqa.cc +++ b/src/float/transcendental/cl_LF_ratseries_pqa.cc @@ -1,4 +1,4 @@ -// eval_rational_series(). +// eval_rational_series(). // General includes. #include "cl_sysdep.h" @@ -14,6 +14,7 @@ #include "cln/real.h" #include "cln/exception.h" #include "cl_LF.h" +#include "cl_alloca.h" namespace cln { @@ -86,8 +87,18 @@ static void eval_pqa_series_aux (uintC N1, uintC N2, } } +template<> +const cl_LF eval_rational_series (uintC N, const cl_pqa_series& args, uintC len) +{ + if (N==0) + return cl_I_to_LF(0,len); + var cl_I Q, T; + eval_pqa_series_aux(0,N,args,NULL,&Q,&T); + return cl_I_to_LF(T,len) / cl_I_to_LF(Q,len); +} + static void eval_pqsa_series_aux (uintC N1, uintC N2, - const cl_pqa_series& args, + const cl_pqa_series& args, const uintC* qsv, cl_I* P, cl_I* Q, uintC* QS, cl_I* T) { switch (N2 - N1) { @@ -96,15 +107,15 @@ static void eval_pqsa_series_aux (uintC N1, uintC N2, case 1: if (P) { *P = args.pv[N1]; } *Q = args.qv[N1]; - *QS = args.qsv[N1]; + *QS = qsv[N1]; *T = args.av[N1] * args.pv[N1]; break; case 2: { var cl_I p01 = args.pv[N1] * args.pv[N1+1]; if (P) { *P = p01; } *Q = args.qv[N1] * args.qv[N1+1]; - *QS = args.qsv[N1] + args.qsv[N1+1]; - *T = ((args.qv[N1+1] * args.av[N1] * args.pv[N1]) << args.qsv[N1+1]) + *QS = qsv[N1] + qsv[N1+1]; + *T = ((args.qv[N1+1] * args.av[N1] * args.pv[N1]) << qsv[N1+1]) + args.av[N1+1] * p01; break; } @@ -114,9 +125,9 @@ static void eval_pqsa_series_aux (uintC N1, uintC N2, if (P) { *P = p012; } var cl_I q12 = args.qv[N1+1] * args.qv[N1+2]; *Q = args.qv[N1] * q12; - *QS = args.qsv[N1] + args.qsv[N1+1] + args.qsv[N1+2]; - *T = ((q12 * args.av[N1] * args.pv[N1]) << (args.qsv[N1+1] + args.qsv[N1+2])) - + ((args.qv[N1+2] * args.av[N1+1] * p01) << args.qsv[N1+2]) + *QS = qsv[N1] + qsv[N1+1] + qsv[N1+2]; + *T = ((q12 * args.av[N1] * args.pv[N1]) << (qsv[N1+1] + qsv[N1+2])) + + ((args.qv[N1+2] * args.av[N1+1] * p01) << qsv[N1+2]) + args.av[N1+2] * p012; break; } @@ -128,10 +139,10 @@ static void eval_pqsa_series_aux (uintC N1, uintC N2, var cl_I q23 = args.qv[N1+2] * args.qv[N1+3]; var cl_I q123 = args.qv[N1+1] * q23; *Q = args.qv[N1] * q123; - *QS = args.qsv[N1] + args.qsv[N1+1] + args.qsv[N1+2] + args.qsv[N1+3]; - *T = ((((((q123 * args.av[N1] * args.pv[N1]) << args.qsv[N1+1]) - + q23 * args.av[N1+1] * p01) << args.qsv[N1+2]) - + args.qv[N1+3] * args.av[N1+2] * p012) << args.qsv[N1+3]) + *QS = qsv[N1] + qsv[N1+1] + qsv[N1+2] + qsv[N1+3]; + *T = ((((((q123 * args.av[N1] * args.pv[N1]) << qsv[N1+1]) + + q23 * args.av[N1+1] * p01) << qsv[N1+2]) + + args.qv[N1+3] * args.av[N1+2] * p012) << qsv[N1+3]) + args.av[N1+3] * p0123; break; } @@ -140,11 +151,11 @@ static void eval_pqsa_series_aux (uintC N1, uintC N2, // Compute left part. var cl_I LP, LQ, LT; var uintC LQS; - eval_pqsa_series_aux(N1,Nm,args,&LP,&LQ,&LQS,<); + eval_pqsa_series_aux(N1,Nm,args,qsv,&LP,&LQ,&LQS,<); // Compute right part. var cl_I RP, RQ, RT; var uintC RQS; - eval_pqsa_series_aux(Nm,N2,args,(P?&RP:(cl_I*)0),&RQ,&RQS,&RT); + eval_pqsa_series_aux(Nm,N2,args,qsv,(P?&RP:(cl_I*)0),&RQ,&RQS,&RT); // Put together partial results. if (P) { *P = LP*RP; } *Q = LQ*RQ; @@ -156,36 +167,25 @@ static void eval_pqsa_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, const cl_pqa_series& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, const cl_pqa_series& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); var cl_I Q, T; - if (!args.qsv) { - eval_pqa_series_aux(0,N,args,NULL,&Q,&T); - return cl_I_to_LF(T,len) / cl_I_to_LF(Q,len); - } else { - // Precomputation of the shift counts: - // Split qv[n] into qv[n]*2^qsv[n]. - { - var cl_I* qp = args.qv; - var uintC* qsp = args.qsv; - for (var uintC n = 0; n < N; n++, qp++, qsp++) { - // Pull out maximal power of 2 out of *qp = args.qv[n]. - var uintC qs = 0; - if (!zerop(*qp)) { - qs = ord2(*qp); - if (qs > 0) - *qp = *qp >> qs; - } - *qsp = qs; - } - } - // Main computation. - var uintC QS; - eval_pqsa_series_aux(0,N,args,NULL,&Q,&QS,&T); - return cl_I_to_LF(T,len) / scale_float(cl_I_to_LF(Q,len),QS); + // Precomputation of the shift counts: + // Split qv[n] into qv[n]*2^qsv[n]. + CL_ALLOCA_STACK; + var uintC* qsv = (uintC*) cl_alloca(N*sizeof(uintC)); + var cl_I* qp = args.qv; + var uintC* qsp = qsv; + for (var uintC n = 0; n < N; n++, qp++, qsp++) { + *qsp = pullout_shiftcount(*qp); } + // Main computation. + var uintC QS; + eval_pqsa_series_aux(0,N,args,qsv,NULL,&Q,&QS,&T); + return cl_I_to_LF(T,len) / scale_float(cl_I_to_LF(Q,len),QS); } static void eval_pqa_series_aux (uintC N1, uintC N2, @@ -262,7 +262,8 @@ static void eval_pqa_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, cl_pqa_series_stream& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, cl_pqa_series_stream& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); @@ -351,7 +352,8 @@ static void eval_pqa_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, cl_pqa_series_stream& args, uintC len, uintC trunclen) +template<> +const cl_LF eval_rational_series (uintC N, cl_pqa_series_stream& args, uintC len, uintC trunclen) { if (N==0) return cl_I_to_LF(0,len); diff --git a/src/float/transcendental/cl_LF_ratseries_pqab.cc b/src/float/transcendental/cl_LF_ratseries_pqab.cc index 8c624fd..ce41b45 100644 --- a/src/float/transcendental/cl_LF_ratseries_pqab.cc +++ b/src/float/transcendental/cl_LF_ratseries_pqab.cc @@ -1,4 +1,4 @@ -// eval_rational_series(). +// eval_rational_series(). // General includes. #include "cl_sysdep.h" @@ -14,6 +14,7 @@ #include "cln/real.h" #include "cln/exception.h" #include "cl_LF.h" +#include "cl_alloca.h" namespace cln { @@ -94,8 +95,18 @@ static void eval_pqab_series_aux (uintC N1, uintC N2, } } +template<> +const cl_LF eval_rational_series (uintC N, const cl_pqab_series& args, uintC len) +{ + if (N==0) + return cl_I_to_LF(0,len); + var cl_I Q, B, T; + eval_pqab_series_aux(0,N,args,NULL,&Q,&B,&T); + return cl_I_to_LF(T,len) / cl_I_to_LF(B*Q,len); +} + static void eval_pqsab_series_aux (uintC N1, uintC N2, - const cl_pqab_series& args, + const cl_pqab_series& args, const uintC* qsv, cl_I* P, cl_I* Q, uintC* QS, cl_I* B, cl_I* T) { switch (N2 - N1) { @@ -104,7 +115,7 @@ static void eval_pqsab_series_aux (uintC N1, uintC N2, case 1: if (P) { *P = args.pv[N1]; } *Q = args.qv[N1]; - *QS = args.qsv[N1]; + *QS = qsv[N1]; *B = args.bv[N1]; *T = args.av[N1] * args.pv[N1]; break; @@ -112,9 +123,9 @@ static void eval_pqsab_series_aux (uintC N1, uintC N2, var cl_I p01 = args.pv[N1] * args.pv[N1+1]; if (P) { *P = p01; } *Q = args.qv[N1] * args.qv[N1+1]; - *QS = args.qsv[N1] + args.qsv[N1+1]; + *QS = qsv[N1] + qsv[N1+1]; *B = args.bv[N1] * args.bv[N1+1]; - *T = ((args.bv[N1+1] * args.qv[N1+1] * args.av[N1] * args.pv[N1]) << args.qsv[N1+1]) + *T = ((args.bv[N1+1] * args.qv[N1+1] * args.av[N1] * args.pv[N1]) << qsv[N1+1]) + args.bv[N1] * args.av[N1+1] * p01; break; } @@ -124,11 +135,11 @@ static void eval_pqsab_series_aux (uintC N1, uintC N2, if (P) { *P = p012; } var cl_I q12 = args.qv[N1+1] * args.qv[N1+2]; *Q = args.qv[N1] * q12; - *QS = args.qsv[N1] + args.qsv[N1+1] + args.qsv[N1+2]; + *QS = qsv[N1] + qsv[N1+1] + qsv[N1+2]; var cl_I b12 = args.bv[N1+1] * args.bv[N1+2]; *B = args.bv[N1] * b12; - *T = ((b12 * q12 * args.av[N1] * args.pv[N1]) << (args.qsv[N1+1] + args.qsv[N1+2])) - + args.bv[N1] * (((args.bv[N1+2] * args.qv[N1+2] * args.av[N1+1] * p01) << args.qsv[N1+2]) + *T = ((b12 * q12 * args.av[N1] * args.pv[N1]) << (qsv[N1+1] + qsv[N1+2])) + + args.bv[N1] * (((args.bv[N1+2] * args.qv[N1+2] * args.av[N1+1] * p01) << qsv[N1+2]) + args.bv[N1+1] * args.av[N1+2] * p012); break; } @@ -140,13 +151,13 @@ static void eval_pqsab_series_aux (uintC N1, uintC N2, var cl_I q23 = args.qv[N1+2] * args.qv[N1+3]; var cl_I q123 = args.qv[N1+1] * q23; *Q = args.qv[N1] * q123; - *QS = args.qsv[N1] + args.qsv[N1+1] + args.qsv[N1+2] + args.qsv[N1+3]; + *QS = qsv[N1] + qsv[N1+1] + qsv[N1+2] + qsv[N1+3]; var cl_I b01 = args.bv[N1] * args.bv[N1+1]; var cl_I b23 = args.bv[N1+2] * args.bv[N1+3]; *B = b01 * b23; - *T = ((b23 * (((args.bv[N1+1] * q123 * args.av[N1] * args.pv[N1]) << args.qsv[N1+1]) - + args.bv[N1] * q23 * args.av[N1+1] * p01)) << (args.qsv[N1+2] + args.qsv[N1+3])) - + b01 * (((args.bv[N1+3] * args.qv[N1+3] * args.av[N1+2] * p012) << args.qsv[N1+3]) + *T = ((b23 * (((args.bv[N1+1] * q123 * args.av[N1] * args.pv[N1]) << qsv[N1+1]) + + args.bv[N1] * q23 * args.av[N1+1] * p01)) << (qsv[N1+2] + qsv[N1+3])) + + b01 * (((args.bv[N1+3] * args.qv[N1+3] * args.av[N1+2] * p012) << qsv[N1+3]) + args.bv[N1+2] * args.av[N1+3] * p0123); break; } @@ -155,11 +166,11 @@ static void eval_pqsab_series_aux (uintC N1, uintC N2, // Compute left part. var cl_I LP, LQ, LB, LT; var uintC LQS; - eval_pqsab_series_aux(N1,Nm,args,&LP,&LQ,&LQS,&LB,<); + eval_pqsab_series_aux(N1,Nm,args,qsv,&LP,&LQ,&LQS,&LB,<); // Compute right part. var cl_I RP, RQ, RB, RT; var uintC RQS; - eval_pqsab_series_aux(Nm,N2,args,(P?&RP:(cl_I*)0),&RQ,&RQS,&RB,&RT); + eval_pqsab_series_aux(Nm,N2,args,qsv,(P?&RP:(cl_I*)0),&RQ,&RQS,&RB,&RT); // Put together partial results. if (P) { *P = LP*RP; } *Q = LQ*RQ; @@ -172,36 +183,25 @@ static void eval_pqsab_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, const cl_pqab_series& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, const cl_pqab_series& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); var cl_I Q, B, T; - if (!args.qsv) { - eval_pqab_series_aux(0,N,args,NULL,&Q,&B,&T); - return cl_I_to_LF(T,len) / cl_I_to_LF(B*Q,len); - } else { - // Precomputation of the shift counts: - // Split qv[n] into qv[n]*2^qsv[n]. - { - var cl_I* qp = args.qv; - var uintC* qsp = args.qsv; - for (var uintC n = 0; n < N; n++, qp++, qsp++) { - // Pull out maximal power of 2 out of *qp = args.qv[n]. - var uintC qs = 0; - if (!zerop(*qp)) { - qs = ord2(*qp); - if (qs > 0) - *qp = *qp >> qs; - } - *qsp = qs; - } - } - // Main computation. - var uintC QS; - eval_pqsab_series_aux(0,N,args,NULL,&Q,&QS,&B,&T); - return cl_I_to_LF(T,len) / scale_float(cl_I_to_LF(B*Q,len),QS); + // Precomputation of the shift counts: + // Split qv[n] into qv[n]*2^qsv[n]. + CL_ALLOCA_STACK; + var uintC* qsv = (uintC*) cl_alloca(N*sizeof(uintC)); + var cl_I* qp = args.qv; + var uintC* qsp = qsv; + for (var uintC n = 0; n < N; n++, qp++, qsp++) { + *qsp = pullout_shiftcount(*qp); } + // Main computation. + var uintC QS; + eval_pqsab_series_aux(0,N,args,qsv,NULL,&Q,&QS,&B,&T); + return cl_I_to_LF(T,len) / scale_float(cl_I_to_LF(B*Q,len),QS); } static void eval_pqab_series_aux (uintC N1, uintC N2, @@ -286,7 +286,8 @@ static void eval_pqab_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, cl_pqab_series_stream& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, cl_pqab_series_stream& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); @@ -384,7 +385,8 @@ static void eval_pqab_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, cl_pqab_series_stream& args, uintC len, uintC trunclen) +template<> +const cl_LF eval_rational_series (uintC N, cl_pqab_series_stream& args, uintC len, uintC trunclen) { if (N==0) return cl_I_to_LF(0,len); diff --git a/src/float/transcendental/cl_LF_ratseries_pqb.cc b/src/float/transcendental/cl_LF_ratseries_pqb.cc index 84dbf79..a65c2f7 100644 --- a/src/float/transcendental/cl_LF_ratseries_pqb.cc +++ b/src/float/transcendental/cl_LF_ratseries_pqb.cc @@ -1,4 +1,4 @@ -// eval_rational_series(). +// eval_rational_series(). // General includes. #include "cl_sysdep.h" @@ -14,6 +14,7 @@ #include "cln/real.h" #include "cln/exception.h" #include "cl_LF.h" +#include "cl_alloca.h" namespace cln { @@ -94,8 +95,18 @@ static void eval_pqb_series_aux (uintC N1, uintC N2, } } +template<> +const cl_LF eval_rational_series (uintC N, const cl_pqb_series& args, uintC len) +{ + if (N==0) + return cl_I_to_LF(0,len); + var cl_I Q, B, T; + eval_pqb_series_aux(0,N,args,NULL,&Q,&B,&T); + return cl_I_to_LF(T,len) / cl_I_to_LF(B*Q,len); +} + static void eval_pqsb_series_aux (uintC N1, uintC N2, - const cl_pqb_series& args, + const cl_pqb_series& args, const uintC* qsv, cl_I* P, cl_I* Q, uintC* QS, cl_I* B, cl_I* T) { switch (N2 - N1) { @@ -104,7 +115,7 @@ static void eval_pqsb_series_aux (uintC N1, uintC N2, case 1: if (P) { *P = args.pv[N1]; } *Q = args.qv[N1]; - *QS = args.qsv[N1]; + *QS = qsv[N1]; *B = args.bv[N1]; *T = args.pv[N1]; break; @@ -112,9 +123,9 @@ static void eval_pqsb_series_aux (uintC N1, uintC N2, var cl_I p01 = args.pv[N1] * args.pv[N1+1]; if (P) { *P = p01; } *Q = args.qv[N1] * args.qv[N1+1]; - *QS = args.qsv[N1] + args.qsv[N1+1]; + *QS = qsv[N1] + qsv[N1+1]; *B = args.bv[N1] * args.bv[N1+1]; - *T = ((args.bv[N1+1] * args.qv[N1+1] * args.pv[N1]) << args.qsv[N1+1]) + *T = ((args.bv[N1+1] * args.qv[N1+1] * args.pv[N1]) << qsv[N1+1]) + args.bv[N1] * p01; break; } @@ -124,11 +135,11 @@ static void eval_pqsb_series_aux (uintC N1, uintC N2, if (P) { *P = p012; } var cl_I q12 = args.qv[N1+1] * args.qv[N1+2]; *Q = args.qv[N1] * q12; - *QS = args.qsv[N1] + args.qsv[N1+1] + args.qsv[N1+2]; + *QS = qsv[N1] + qsv[N1+1] + qsv[N1+2]; var cl_I b12 = args.bv[N1+1] * args.bv[N1+2]; *B = args.bv[N1] * b12; - *T = ((b12 * q12 * args.pv[N1]) << (args.qsv[N1+1] + args.qsv[N1+2])) - + args.bv[N1] * (((args.bv[N1+2] * args.qv[N1+2] * p01) << args.qsv[N1+2]) + *T = ((b12 * q12 * args.pv[N1]) << (qsv[N1+1] + qsv[N1+2])) + + args.bv[N1] * (((args.bv[N1+2] * args.qv[N1+2] * p01) << qsv[N1+2]) + args.bv[N1+1] * p012); break; } @@ -140,13 +151,13 @@ static void eval_pqsb_series_aux (uintC N1, uintC N2, var cl_I q23 = args.qv[N1+2] * args.qv[N1+3]; var cl_I q123 = args.qv[N1+1] * q23; *Q = args.qv[N1] * q123; - *QS = args.qsv[N1] + args.qsv[N1+1] + args.qsv[N1+2] + args.qsv[N1+3]; + *QS = qsv[N1] + qsv[N1+1] + qsv[N1+2] + qsv[N1+3]; var cl_I b01 = args.bv[N1] * args.bv[N1+1]; var cl_I b23 = args.bv[N1+2] * args.bv[N1+3]; *B = b01 * b23; - *T = ((b23 * (((args.bv[N1+1] * q123 * args.pv[N1]) << args.qsv[N1+1]) - + args.bv[N1] * q23 * p01)) << (args.qsv[N1+2] + args.qsv[N1+3])) - + b01 * (((args.bv[N1+3] * args.qv[N1+3] * p012) << args.qsv[N1+3]) + *T = ((b23 * (((args.bv[N1+1] * q123 * args.pv[N1]) << qsv[N1+1]) + + args.bv[N1] * q23 * p01)) << (qsv[N1+2] + qsv[N1+3])) + + b01 * (((args.bv[N1+3] * args.qv[N1+3] * p012) << qsv[N1+3]) + args.bv[N1+2] * p0123); break; } @@ -155,11 +166,11 @@ static void eval_pqsb_series_aux (uintC N1, uintC N2, // Compute left part. var cl_I LP, LQ, LB, LT; var uintC LQS; - eval_pqsb_series_aux(N1,Nm,args,&LP,&LQ,&LQS,&LB,<); + eval_pqsb_series_aux(N1,Nm,args,qsv,&LP,&LQ,&LQS,&LB,<); // Compute right part. var cl_I RP, RQ, RB, RT; var uintC RQS; - eval_pqsb_series_aux(Nm,N2,args,(P?&RP:(cl_I*)0),&RQ,&RQS,&RB,&RT); + eval_pqsb_series_aux(Nm,N2,args,qsv,(P?&RP:(cl_I*)0),&RQ,&RQS,&RB,&RT); // Put together partial results. if (P) { *P = LP*RP; } *Q = LQ*RQ; @@ -172,36 +183,25 @@ static void eval_pqsb_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, const cl_pqb_series& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, const cl_pqb_series& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); var cl_I Q, B, T; - if (!args.qsv) { - eval_pqb_series_aux(0,N,args,NULL,&Q,&B,&T); - return cl_I_to_LF(T,len) / cl_I_to_LF(B*Q,len); - } else { - // Precomputation of the shift counts: - // Split qv[n] into qv[n]*2^qsv[n]. - { - var cl_I* qp = args.qv; - var uintC* qsp = args.qsv; - for (var uintC n = 0; n < N; n++, qp++, qsp++) { - // Pull out maximal power of 2 out of *qp = args.qv[n]. - var uintC qs = 0; - if (!zerop(*qp)) { - qs = ord2(*qp); - if (qs > 0) - *qp = *qp >> qs; - } - *qsp = qs; - } - } - // Main computation. - var uintC QS; - eval_pqsb_series_aux(0,N,args,NULL,&Q,&QS,&B,&T); - return cl_I_to_LF(T,len) / scale_float(cl_I_to_LF(B*Q,len),QS); + // Precomputation of the shift counts: + // Split qv[n] into qv[n]*2^qsv[n]. + CL_ALLOCA_STACK; + var uintC* qsv = (uintC*) cl_alloca(N*sizeof(uintC)); + var cl_I* qp = args.qv; + var uintC* qsp = qsv; + for (var uintC n = 0; n < N; n++, qp++, qsp++) { + *qsp = pullout_shiftcount(*qp); } + // Main computation. + var uintC QS; + eval_pqsb_series_aux(0,N,args,qsv,NULL,&Q,&QS,&B,&T); + return cl_I_to_LF(T,len) / scale_float(cl_I_to_LF(B*Q,len),QS); } static void eval_pqb_series_aux (uintC N1, uintC N2, @@ -286,7 +286,8 @@ static void eval_pqb_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, cl_pqb_series_stream& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, cl_pqb_series_stream& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); diff --git a/src/float/transcendental/cl_LF_ratseries_q.cc b/src/float/transcendental/cl_LF_ratseries_q.cc index 6e93909..b0beeaf 100644 --- a/src/float/transcendental/cl_LF_ratseries_q.cc +++ b/src/float/transcendental/cl_LF_ratseries_q.cc @@ -1,4 +1,4 @@ -// eval_rational_series(). +// eval_rational_series(). // General includes. #include "cl_sysdep.h" @@ -74,7 +74,75 @@ static void eval_q_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, const cl_q_series& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, const cl_q_series& args, uintC len) +{ + if (N==0) + return cl_I_to_LF(0,len); + var cl_I Q, T; + eval_q_series_aux(0,N,args,&Q,&T); + return cl_I_to_LF(T,len) / cl_I_to_LF(Q,len); +} + +static void eval_q_series_aux (uintC N1, uintC N2, + cl_q_series_stream& args, + cl_I* Q, cl_I* T) +{ + switch (N2 - N1) { + case 0: + throw runtime_exception(); break; + case 1: { + var cl_q_series_term v0 = args.next(); // [N1] + *Q = v0.q; + *T = 1; + break; + } + case 2: { + var cl_q_series_term v0 = args.next(); // [N1] + var cl_q_series_term v1 = args.next(); // [N1+1] + *Q = v0.q * v1.q; + *T = v1.q + 1; + break; + } + case 3: { + var cl_q_series_term v0 = args.next(); // [N1] + var cl_q_series_term v1 = args.next(); // [N1+1] + var cl_q_series_term v2 = args.next(); // [N1+2] + var cl_I q12 = v1.q * v2.q; + *Q = v0.q * q12; + *T = q12 + v2.q + 1; + break; + } + case 4: { + var cl_q_series_term v0 = args.next(); // [N1] + var cl_q_series_term v1 = args.next(); // [N1+1] + var cl_q_series_term v2 = args.next(); // [N1+2] + var cl_q_series_term v3 = args.next(); // [N1+3] + var cl_I q23 = v2.q * v3.q; + var cl_I q123 = v1.q * q23; + *Q = v0.q * q123; + *T = q123 + q23 + v3.q + 1; + break; + } + default: { + var uintC Nm = (N1+N2)/2; // midpoint + // Compute left part. + var cl_I LQ, LT; + eval_q_series_aux(N1,Nm,args,&LQ,<); + // Compute right part. + var cl_I RQ, RT; + eval_q_series_aux(Nm,N2,args,&RQ,&RT); + // Put together partial results. + *Q = LQ*RQ; + // S = LS + 1/LQ * RS, so T = RQ*LT + RT. + *T = RQ*LT + RT; + break; + } + } +} + +template<> +const cl_LF eval_rational_series (uintC N, cl_q_series_stream& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); diff --git a/src/float/transcendental/cl_LF_ratseries_qa.cc b/src/float/transcendental/cl_LF_ratseries_qa.cc index 306089e..e42ca3f 100644 --- a/src/float/transcendental/cl_LF_ratseries_qa.cc +++ b/src/float/transcendental/cl_LF_ratseries_qa.cc @@ -1,4 +1,4 @@ -// eval_rational_series(). +// eval_rational_series(). // General includes. #include "cl_sysdep.h" @@ -74,7 +74,8 @@ static void eval_qa_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, const cl_qa_series& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, const cl_qa_series& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); diff --git a/src/float/transcendental/cl_LF_ratseries_qab.cc b/src/float/transcendental/cl_LF_ratseries_qab.cc index 8ea60c1..8d4e1a1 100644 --- a/src/float/transcendental/cl_LF_ratseries_qab.cc +++ b/src/float/transcendental/cl_LF_ratseries_qab.cc @@ -1,4 +1,4 @@ -// eval_rational_series(). +// eval_rational_series(). // General includes. #include "cl_sysdep.h" @@ -82,7 +82,8 @@ static void eval_qab_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, const cl_qab_series& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, const cl_qab_series& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); diff --git a/src/float/transcendental/cl_LF_ratseries_qb.cc b/src/float/transcendental/cl_LF_ratseries_qb.cc index c006f09..10c79f9 100644 --- a/src/float/transcendental/cl_LF_ratseries_qb.cc +++ b/src/float/transcendental/cl_LF_ratseries_qb.cc @@ -1,4 +1,4 @@ -// eval_rational_series(). +// eval_rational_series(). // General includes. #include "cl_sysdep.h" @@ -82,7 +82,8 @@ static void eval_qb_series_aux (uintC N1, uintC N2, } } -const cl_LF eval_rational_series (uintC N, const cl_qb_series& args, uintC len) +template<> +const cl_LF eval_rational_series (uintC N, const cl_qb_series& args, uintC len) { if (N==0) return cl_I_to_LF(0,len); @@ -90,6 +91,83 @@ const cl_LF eval_rational_series (uintC N, const cl_qb_series& args, uintC len) eval_qb_series_aux(0,N,args,&Q,&B,&T); return cl_I_to_LF(T,len) / cl_I_to_LF(B*Q,len); } + +static void eval_qb_series_aux (uintC N1, uintC N2, + cl_qb_series_stream& args, + cl_I* Q, cl_I* B, cl_I* T) +{ + switch (N2 - N1) { + case 0: + throw runtime_exception(); break; + case 1: { + var cl_qb_series_term v0 = args.next(); // [N1] + *Q = v0.q; + *B = v0.b; + *T = 1; + break; + } + case 2: { + var cl_qb_series_term v0 = args.next(); // [N1] + var cl_qb_series_term v1 = args.next(); // [N1+1] + *Q = v0.q * v1.q; + *B = v0.b * v1.b; + *T = v1.b * v1.q + v0.b; + break; + } + case 3: { + var cl_qb_series_term v0 = args.next(); // [N1] + var cl_qb_series_term v1 = args.next(); // [N1+1] + var cl_qb_series_term v2 = args.next(); // [N1+2] + var cl_I q12 = v1.q * v2.q; + *Q = v0.q * q12; + var cl_I b12 = v1.b * v2.b; + *B = v0.b * b12; + *T = b12 * q12 + v0.b * (v2.b * v2.q + v1.b); + break; + } + case 4: { + var cl_qb_series_term v0 = args.next(); // [N1] + var cl_qb_series_term v1 = args.next(); // [N1+1] + var cl_qb_series_term v2 = args.next(); // [N1+2] + var cl_qb_series_term v3 = args.next(); // [N1+3] + var cl_I q23 = v2.q * v3.q; + var cl_I q123 = v1.q * q23; + *Q = v0.q * q123; + var cl_I b01 = v0.b * v1.b; + var cl_I b23 = v2.b * v3.b; + *B = b01 * b23; + *T = b23 * (v1.b * q123 + v0.b * q23) + + b01 * (v3.b * v3.q + v2.b); + break; + } + default: { + var uintC Nm = (N1+N2)/2; // midpoint + // Compute left part. + var cl_I LQ, LB, LT; + eval_qb_series_aux(N1,Nm,args,&LQ,&LB,<); + // Compute right part. + var cl_I RQ, RB, RT; + eval_qb_series_aux(Nm,N2,args,&RQ,&RB,&RT); + // Put together partial results. + *Q = LQ*RQ; + *B = LB*RB; + // S = LS + 1/LQ * RS, so T = RB*RQ*LT + LB*RT. + *T = RB*RQ*LT + LB*RT; + break; + } + } +} + +template<> +const cl_LF eval_rational_series (uintC N, cl_qb_series_stream& args, uintC len) +{ + if (N==0) + return cl_I_to_LF(0,len); + var cl_I Q, B, T; + eval_qb_series_aux(0,N,args,&Q,&B,&T); + return cl_I_to_LF(T,len) / cl_I_to_LF(B*Q,len); +} + // Bit complexity (if p(n), q(n), a(n), b(n) have length O(log(n))): // O(log(N)^2*M(N)). diff --git a/src/float/transcendental/cl_LF_tran.h b/src/float/transcendental/cl_LF_tran.h index 3484f9e..d5e902e 100644 --- a/src/float/transcendental/cl_LF_tran.h +++ b/src/float/transcendental/cl_LF_tran.h @@ -47,27 +47,20 @@ namespace cln { // the memory consumption but it also bears a potential for rounding errors. // A minimum trunclen that guarantees correctness must be evaluated on a // case-by-case basis. +// +// As a variation, it is sometimes advantageous to factor out from q[0..N-1] +// powers of two and put them back in again at the end of the computation by +// left-shifting. These are distinguished by a boolean template parameter. +// (Some combinations might not be implemented yet.) -struct cl_rational_series { - // To be set explicitly. - const cl_I* pv; - cl_I* qv; - const cl_I* av; - const cl_I* bv; - uintC* qsv; -}; -extern const cl_LF eval_rational_series (uintC N, const cl_rational_series& args, uintC len); - -// In each the special cases below, none of (a,b,p,q) can be NULL. But qs can -// still be given or NULL. +// In each of the special cases below, none of (a,b,p,q) can be NULL. struct cl_pqab_series { const cl_I* pv; cl_I* qv; const cl_I* av; const cl_I* bv; - uintC* qsv; }; struct cl_pqab_series_term { cl_I p; @@ -81,15 +74,14 @@ struct cl_pqab_series_stream { // Constructor. cl_pqab_series_stream (cl_pqab_series_term (*n)(cl_pqab_series_stream&)) : nextfn (n) {} }; -extern const cl_LF eval_rational_series (uintC N, const cl_pqab_series& args, uintC len); -extern const cl_LF eval_rational_series (uintC N, cl_pqab_series_stream& args, uintC len); -extern const cl_LF eval_rational_series (uintC N, cl_pqab_series_stream& args, uintC len, uintC trunclen); +template const cl_LF eval_rational_series (uintC N, const cl_pqab_series& args, uintC len); +template const cl_LF eval_rational_series (uintC N, cl_pqab_series_stream& args, uintC len); +template const cl_LF eval_rational_series (uintC N, cl_pqab_series_stream& args, uintC len, uintC trunclen); struct cl_pqb_series { const cl_I* pv; cl_I* qv; const cl_I* bv; - uintC* qsv; }; struct cl_pqb_series_term { cl_I p; @@ -102,15 +94,14 @@ struct cl_pqb_series_stream { // Constructor. cl_pqb_series_stream (cl_pqb_series_term (*n)(cl_pqb_series_stream&)) : nextfn (n) {} }; -extern const cl_LF eval_rational_series (uintC N, const cl_pqb_series& args, uintC len); -extern const cl_LF eval_rational_series (uintC N, cl_pqb_series_stream& args, uintC len); -extern const cl_LF eval_rational_series (uintC N, cl_pqb_series_stream& args, uintC len, uintC trunclen); +template const cl_LF eval_rational_series (uintC N, const cl_pqb_series& args, uintC len); +template const cl_LF eval_rational_series (uintC N, cl_pqb_series_stream& args, uintC len); +template const cl_LF eval_rational_series (uintC N, cl_pqb_series_stream& args, uintC len, uintC trunclen); struct cl_pqa_series { const cl_I* pv; cl_I* qv; const cl_I* av; - uintC* qsv; }; struct cl_pqa_series_term { cl_I p; @@ -123,14 +114,13 @@ struct cl_pqa_series_stream { // Constructor. cl_pqa_series_stream (cl_pqa_series_term (*n)(cl_pqa_series_stream&)) : nextfn (n) {} }; -extern const cl_LF eval_rational_series (uintC N, const cl_pqa_series& args, uintC len); -extern const cl_LF eval_rational_series (uintC N, cl_pqa_series_stream& args, uintC len); -extern const cl_LF eval_rational_series (uintC N, cl_pqa_series_stream& args, uintC len, uintC trunclen); +template const cl_LF eval_rational_series (uintC N, const cl_pqa_series& args, uintC len); +template const cl_LF eval_rational_series (uintC N, cl_pqa_series_stream& args, uintC len); +template const cl_LF eval_rational_series (uintC N, cl_pqa_series_stream& args, uintC len, uintC trunclen); struct cl_pq_series { const cl_I* pv; cl_I* qv; - uintC* qsv; }; struct cl_pq_series_term { cl_I p; @@ -142,81 +132,98 @@ struct cl_pq_series_stream { // Constructor. cl_pq_series_stream (cl_pq_series_term (*n)(cl_pq_series_stream&)) : nextfn (n) {} }; -extern const cl_LF eval_rational_series (uintC N, const cl_pq_series& args, uintC len); -extern const cl_LF eval_rational_series (uintC N, cl_pq_series_stream& args, uintC len); -extern const cl_LF eval_rational_series (uintC N, cl_pq_series_stream& args, uintC len, uintC trunclen); +template const cl_LF eval_rational_series (uintC N, const cl_pq_series& args, uintC len); +template const cl_LF eval_rational_series (uintC N, cl_pq_series_stream& args, uintC len); +template const cl_LF eval_rational_series (uintC N, cl_pq_series_stream& args, uintC len, uintC trunclen); struct cl_pab_series { const cl_I* pv; const cl_I* av; const cl_I* bv; }; -extern const cl_LF eval_rational_series (uintC N, const cl_pab_series& args, uintC len); +const cl_LF eval_rational_series (uintC N, const cl_pab_series& args, uintC len); struct cl_pb_series { const cl_I* pv; const cl_I* bv; }; -extern const cl_LF eval_rational_series (uintC N, const cl_pb_series& args, uintC len); +const cl_LF eval_rational_series (uintC N, const cl_pb_series& args, uintC len); struct cl_pa_series { const cl_I* pv; const cl_I* av; }; -extern const cl_LF eval_rational_series (uintC N, const cl_pa_series& args, uintC len); +const cl_LF eval_rational_series (uintC N, const cl_pa_series& args, uintC len); struct cl_p_series { const cl_I* pv; }; -extern const cl_LF eval_rational_series (uintC N, const cl_p_series& args, uintC len); +const cl_LF eval_rational_series (uintC N, const cl_p_series& args, uintC len); struct cl_qab_series { cl_I* qv; const cl_I* av; const cl_I* bv; - uintC* qsv; }; -extern const cl_LF eval_rational_series (uintC N, const cl_qab_series& args, uintC len); +template const cl_LF eval_rational_series (uintC N, const cl_qab_series& args, uintC len); struct cl_qb_series { cl_I* qv; const cl_I* bv; - uintC* qsv; }; -extern const cl_LF eval_rational_series (uintC N, const cl_qb_series& args, uintC len); +struct cl_qb_series_term { + cl_I q; + cl_I b; +}; +struct cl_qb_series_stream { + cl_qb_series_term (*nextfn)(cl_qb_series_stream&); + cl_qb_series_term next () { return nextfn(*this); } + // Constructor. + cl_qb_series_stream (cl_qb_series_term (*n)(cl_qb_series_stream&)) : nextfn (n) {} +}; +template const cl_LF eval_rational_series (uintC N, const cl_qb_series& args, uintC len); +template const cl_LF eval_rational_series (uintC N, cl_qb_series_stream& args, uintC len); struct cl_qa_series { cl_I* qv; const cl_I* av; - uintC* qsv; }; -extern const cl_LF eval_rational_series (uintC N, const cl_qa_series& args, uintC len); +template const cl_LF eval_rational_series (uintC N, const cl_qa_series& args, uintC len); struct cl_q_series { cl_I* qv; - uintC* qsv; }; -extern const cl_LF eval_rational_series (uintC N, const cl_q_series& args, uintC len); +struct cl_q_series_term { + cl_I q; +}; +struct cl_q_series_stream { + cl_q_series_term (*nextfn)(cl_q_series_stream&); + cl_q_series_term next () { return nextfn(*this); } + // Constructor. + cl_q_series_stream (cl_q_series_term (*n)(cl_q_series_stream&)) : nextfn (n) {} +}; +template const cl_LF eval_rational_series (uintC N, const cl_q_series& args, uintC len); +template const cl_LF eval_rational_series (uintC N, cl_q_series_stream& args, uintC len); struct cl_ab_series { const cl_I* av; const cl_I* bv; }; -extern const cl_LF eval_rational_series (uintC N, const cl_ab_series& args, uintC len); +const cl_LF eval_rational_series (uintC N, const cl_ab_series& args, uintC len); struct cl_b_series { const cl_I* bv; }; -extern const cl_LF eval_rational_series (uintC N, const cl_b_series& args, uintC len); +const cl_LF eval_rational_series (uintC N, const cl_b_series& args, uintC len); struct cl_a_series { const cl_I* av; }; -extern const cl_LF eval_rational_series (uintC N, const cl_a_series& args, uintC len); +const cl_LF eval_rational_series (uintC N, const cl_a_series& args, uintC len); struct cl__series { }; -extern const cl_LF eval_rational_series (uintC N, const cl__series& args, uintC len); +const cl_LF eval_rational_series (uintC N, const cl__series& args, uintC len); // [Generalization.] @@ -252,13 +259,13 @@ struct cl_pqcd_series_stream { // Constructor. cl_pqcd_series_stream( cl_pqcd_series_term (*n)(cl_pqcd_series_stream&)) : nextfn (n) {} }; -extern void eval_pqcd_series_aux (uintC N, cl_pqcd_series_term* args, cl_pqcd_series_result& Z, bool rightmost = true); -extern void eval_pqcd_series_aux (uintC N, cl_pqcd_series_stream& args, cl_pqcd_series_result& Z, bool rightmost = true); -extern void eval_pqcd_series_aux (uintC N, cl_pqcd_series_stream& args, cl_pqcd_series_result& Z, uintC trunclen, bool rightmost = true); +void eval_pqcd_series_aux (uintC N, cl_pqcd_series_term* args, cl_pqcd_series_result& Z, bool rightmost = true); +void eval_pqcd_series_aux (uintC N, cl_pqcd_series_stream& args, cl_pqcd_series_result& Z, bool rightmost = true); +void eval_pqcd_series_aux (uintC N, cl_pqcd_series_stream& args, cl_pqcd_series_result& Z, uintC trunclen, bool rightmost = true); // Ditto, but returns U/S. -extern const cl_LF eval_pqcd_series (uintC N, cl_pqcd_series_term* args, uintC len); -extern const cl_LF eval_pqcd_series (uintC N, cl_pqcd_series_stream& args, uintC len); -extern const cl_LF eval_pqcd_series (uintC N, cl_pqcd_series_stream& args, uintC len, uintC trunclen); +const cl_LF eval_pqcd_series (uintC N, cl_pqcd_series_term* args, uintC len); +const cl_LF eval_pqcd_series (uintC N, cl_pqcd_series_stream& args, uintC len); +const cl_LF eval_pqcd_series (uintC N, cl_pqcd_series_stream& args, uintC len, uintC trunclen); // [Special case c(n)=1.] // Subroutine: @@ -291,13 +298,26 @@ struct cl_pqd_series_stream { // Constructor. cl_pqd_series_stream( cl_pqd_series_term (*n)(cl_pqd_series_stream&)) : nextfn (n) {} }; -extern void eval_pqd_series_aux (uintC N, cl_pqd_series_term* args, cl_pqd_series_result& Z, bool rightmost = true); -extern void eval_pqd_series_aux (uintC N, cl_pqd_series_stream& args, cl_pqd_series_result& Z, bool rightmost = true); -extern void eval_pqd_series_aux (uintC N, cl_pqd_series_stream& args, cl_pqd_series_result& Z, uintC trunclen, bool rightmost = true); +void eval_pqd_series_aux (uintC N, cl_pqd_series_term* args, cl_pqd_series_result& Z, bool rightmost = true); +void eval_pqd_series_aux (uintC N, cl_pqd_series_stream& args, cl_pqd_series_result& Z, bool rightmost = true); +void eval_pqd_series_aux (uintC N, cl_pqd_series_stream& args, cl_pqd_series_result& Z, uintC trunclen, bool rightmost = true); // Ditto, but returns U/S. -extern const cl_LF eval_pqd_series (uintC N, cl_pqd_series_term* args, uintC len); -extern const cl_LF eval_pqd_series (uintC N, cl_pqd_series_stream& args, uintC len); -extern const cl_LF eval_pqd_series (uintC N, cl_pqd_series_stream& args, uintC len, uintC trunclen); +const cl_LF eval_pqd_series (uintC N, cl_pqd_series_term* args, uintC len); +const cl_LF eval_pqd_series (uintC N, cl_pqd_series_stream& args, uintC len); +const cl_LF eval_pqd_series (uintC N, cl_pqd_series_stream& args, uintC len, uintC trunclen); + +// Helper function to divide q by the largest s such that 2^s divides q, returns s. +inline uintC +pullout_shiftcount(cl_I& q) +{ + var uintC qs = 0; + if (!zerop(q)) { + qs = ord2(q); + if (qs > 0) + q = q >> qs; + } + return qs; +} // Helper function to convert integer of length > trunclen to long float of // length = trunclen. diff --git a/src/float/transcendental/cl_LF_zeta3.cc b/src/float/transcendental/cl_LF_zeta3.cc index 1c09773..0d71af1 100644 --- a/src/float/transcendental/cl_LF_zeta3.cc +++ b/src/float/transcendental/cl_LF_zeta3.cc @@ -59,10 +59,10 @@ const cl_LF zeta3 (uintC len) // with appropriate N, and // a(n) = 205*n^2+250*n+77, b(n) = 1, // p(0) = 1, p(n) = -n^5 for n>0, q(n) = 32*(2n+1)^5. - var uintC actuallen = len+2; // 2 Schutz-Digits + var uintC actuallen = len+2; // 2 guard digits var uintC N = ceiling(actuallen*intDsize,10); // 1024^-N <= 2^(-intDsize*actuallen). - var cl_LF sum = eval_rational_series(N,series,actuallen,actuallen); + var cl_LF sum = eval_rational_series(N,series,actuallen,actuallen); return scale_float(shorten(sum,len),-1); } // Bit complexity (N := len): O(log(N)^2*M(N)). diff --git a/src/float/transcendental/cl_LF_zeta_int.cc b/src/float/transcendental/cl_LF_zeta_int.cc index 433b7fc..9460c93 100644 --- a/src/float/transcendental/cl_LF_zeta_int.cc +++ b/src/float/transcendental/cl_LF_zeta_int.cc @@ -24,7 +24,7 @@ const cl_LF compute_zeta_exp (int s, uintC len) // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s), // with convergence acceleration through exp(x), and evaluated // using the binary-splitting algorithm. - var uintC actuallen = len+2; // 2 Schutz-Digits + var uintC actuallen = len+2; // 2 guard digits var uintC x = (uintC)(0.693148*intDsize*actuallen)+1; var uintC N = (uintC)(2.718281828*x); CL_ALLOCA_STACK; @@ -59,7 +59,7 @@ const cl_LF compute_zeta_cvz1 (int s, uintC len) // Method: // zeta(s) = 1/(1-2^(1-s)) sum(n=0..infty, (-1)^n/(n+1)^s), // with Cohen-Villegas-Zagier convergence acceleration. - var uintC actuallen = len+2; // 2 Schutz-Digits + var uintC actuallen = len+2; // 2 guard digits var uintC N = (uintC)(0.39321985*intDsize*actuallen)+1; var cl_I fterm = 2*(cl_I)N*(cl_I)N; var cl_I fsum = fterm;