You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

281 lines
8.3 KiB

25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
  1. // Computation of artanh(1/m) (m integer) to high precision.
  2. #include "cln/integer.h"
  3. #include "cln/rational.h"
  4. #include "cln/real.h"
  5. #include "cln/complex.h"
  6. #include "cln/lfloat.h"
  7. #include "cl_LF.h"
  8. #include "cl_LF_tran.h"
  9. #include "cl_alloca.h"
  10. #include <cstdlib>
  11. #include <cstring>
  12. #include "cln/timing.h"
  13. #undef floor
  14. #include <cmath>
  15. #define floor cln_floor
  16. // Method 1: atanh(1/m) = sum(n=0..infty, 1/(2n+1) * 1/m^(2n+1))
  17. // Method 2: atanh(1/m) = sum(n=0..infty, (-4)^n*n!^2/(2n+1)! * m/(m^2-1)^(n+1))
  18. // a. Using long floats. [N^2]
  19. // b. Simulating long floats using integers. [N^2]
  20. // c. Using integers, no binary splitting. [N^2]
  21. // d. Using integers, with binary splitting. [FAST]
  22. // Method 3: general built-in algorithm. [FAST]
  23. // Method 4: atanh(x) = 1/2 ln((1+x)/(1-x)),
  24. // using the general built-in algorithm [FAST]
  25. // Method 1: atanh(1/m) = sum(n=0..infty, 1/(2n+1) * 1/m^(2n+1))
  26. const cl_LF atanh_recip_1a (cl_I m, uintC len)
  27. {
  28. var uintC actuallen = len + 1;
  29. var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
  30. var cl_I m2 = m*m;
  31. var cl_LF fterm = cl_I_to_LF(1,actuallen)/m;
  32. var cl_LF fsum = fterm;
  33. for (var uintL n = 1; fterm >= eps; n++) {
  34. fterm = fterm/m2;
  35. fterm = cl_LF_shortenwith(fterm,eps);
  36. fsum = fsum + LF_to_LF(fterm/(2*n+1),actuallen);
  37. }
  38. return shorten(fsum,len);
  39. }
  40. const cl_LF atanh_recip_1b (cl_I m, uintC len)
  41. {
  42. var uintC actuallen = len + 1;
  43. var cl_I m2 = m*m;
  44. var cl_I fterm = floor1((cl_I)1 << (intDsize*actuallen), m);
  45. var cl_I fsum = fterm;
  46. for (var uintL n = 1; fterm > 0; n++) {
  47. fterm = floor1(fterm,m2);
  48. fsum = fsum + floor1(fterm,2*n+1);
  49. }
  50. return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
  51. }
  52. const cl_LF atanh_recip_1c (cl_I m, uintC len)
  53. {
  54. var uintC actuallen = len + 1;
  55. var cl_I m2 = m*m;
  56. var sintL N = (sintL)(0.69314718*intDsize/2*actuallen/log(double_approx(m))) + 1;
  57. var cl_I num = 0, den = 1; // "lazy rational number"
  58. for (sintL n = N-1; n>=0; n--) {
  59. // Multiply sum with 1/m^2:
  60. den = den * m2;
  61. // Add 1/(2n+1):
  62. num = num*(2*n+1) + den;
  63. den = den*(2*n+1);
  64. }
  65. den = den*m;
  66. var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
  67. return shorten(result,len);
  68. }
  69. const cl_LF atanh_recip_1d (cl_I m, uintC len)
  70. {
  71. var uintC actuallen = len + 1;
  72. var cl_I m2 = m*m;
  73. var uintL N = (uintL)(0.69314718*intDsize/2*actuallen/log(double_approx(m))) + 1;
  74. CL_ALLOCA_STACK;
  75. var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  76. var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  77. var uintL n;
  78. for (n = 0; n < N; n++) {
  79. new (&bv[n]) cl_I ((cl_I)(2*n+1));
  80. new (&qv[n]) cl_I (n==0 ? m : m2);
  81. }
  82. var cl_rational_series series;
  83. series.av = NULL; series.bv = bv;
  84. series.pv = NULL; series.qv = qv; series.qsv = NULL;
  85. var cl_LF result = eval_rational_series(N,series,actuallen);
  86. for (n = 0; n < N; n++) {
  87. bv[n].~cl_I();
  88. qv[n].~cl_I();
  89. }
  90. return shorten(result,len);
  91. }
  92. // Method 2: atanh(1/m) = sum(n=0..infty, (-4)^n*n!^2/(2n+1)! * m/(m^2-1)^(n+1))
  93. const cl_LF atanh_recip_2a (cl_I m, uintC len)
  94. {
  95. var uintC actuallen = len + 1;
  96. var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
  97. var cl_I m2 = m*m-1;
  98. var cl_LF fterm = cl_I_to_LF(m,actuallen)/m2;
  99. var cl_LF fsum = fterm;
  100. for (var uintL n = 1; fterm >= eps; n++) {
  101. fterm = The(cl_LF)((2*n)*fterm)/((2*n+1)*m2);
  102. fterm = cl_LF_shortenwith(fterm,eps);
  103. if ((n % 2) == 0)
  104. fsum = fsum + LF_to_LF(fterm,actuallen);
  105. else
  106. fsum = fsum - LF_to_LF(fterm,actuallen);
  107. }
  108. return shorten(fsum,len);
  109. }
  110. const cl_LF atanh_recip_2b (cl_I m, uintC len)
  111. {
  112. var uintC actuallen = len + 1;
  113. var cl_I m2 = m*m-1;
  114. var cl_I fterm = floor1((cl_I)m << (intDsize*actuallen), m2);
  115. var cl_I fsum = fterm;
  116. for (var uintL n = 1; fterm > 0; n++) {
  117. fterm = floor1((2*n)*fterm,(2*n+1)*m2);
  118. if ((n % 2) == 0)
  119. fsum = fsum + fterm;
  120. else
  121. fsum = fsum - fterm;
  122. }
  123. return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
  124. }
  125. const cl_LF atanh_recip_2c (cl_I m, uintC len)
  126. {
  127. var uintC actuallen = len + 1;
  128. var cl_I m2 = m*m-1;
  129. var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(double_approx(m2))) + 1;
  130. var cl_I num = 0, den = 1; // "lazy rational number"
  131. for (uintL n = N; n>0; n--) {
  132. // Multiply sum with -(2n)/(2n+1)(m^2+1):
  133. num = num * (2*n);
  134. den = - den * ((2*n+1)*m2);
  135. // Add 1:
  136. num = num + den;
  137. }
  138. num = num*m;
  139. den = den*m2;
  140. var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
  141. return shorten(result,len);
  142. }
  143. const cl_LF atanh_recip_2d (cl_I m, uintC len)
  144. {
  145. var uintC actuallen = len + 1;
  146. var cl_I m2 = m*m-1;
  147. var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(double_approx(m2))) + 1;
  148. CL_ALLOCA_STACK;
  149. var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  150. var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  151. var uintL n;
  152. new (&pv[0]) cl_I (m);
  153. new (&qv[0]) cl_I (m2);
  154. for (n = 1; n < N; n++) {
  155. new (&pv[n]) cl_I (-(cl_I)(2*n));
  156. new (&qv[n]) cl_I ((2*n+1)*m2);
  157. }
  158. var cl_rational_series series;
  159. series.av = NULL; series.bv = NULL;
  160. series.pv = pv; series.qv = qv; series.qsv = NULL;
  161. var cl_LF result = eval_rational_series(N,series,actuallen);
  162. for (n = 0; n < N; n++) {
  163. pv[n].~cl_I();
  164. qv[n].~cl_I();
  165. }
  166. return shorten(result,len);
  167. }
  168. // Main program: Compute and display the timings.
  169. int main (int argc, char * argv[])
  170. {
  171. int repetitions = 1;
  172. if ((argc >= 3) && !strcmp(argv[1],"-r")) {
  173. repetitions = atoi(argv[2]);
  174. argc -= 2; argv += 2;
  175. }
  176. if (argc < 2)
  177. exit(1);
  178. cl_I m = (cl_I)argv[1];
  179. uintL len = atoi(argv[2]);
  180. cl_LF p;
  181. ln(cl_I_to_LF(1000,len+10)); // fill cache
  182. // Method 1.
  183. { CL_TIMING;
  184. for (int rep = repetitions; rep > 0; rep--)
  185. { p = atanh_recip_1a(m,len); }
  186. }
  187. cout << p << endl;
  188. { CL_TIMING;
  189. for (int rep = repetitions; rep > 0; rep--)
  190. { p = atanh_recip_1b(m,len); }
  191. }
  192. cout << p << endl;
  193. { CL_TIMING;
  194. for (int rep = repetitions; rep > 0; rep--)
  195. { p = atanh_recip_1c(m,len); }
  196. }
  197. cout << p << endl;
  198. { CL_TIMING;
  199. for (int rep = repetitions; rep > 0; rep--)
  200. { p = atanh_recip_1d(m,len); }
  201. }
  202. cout << p << endl;
  203. // Method 2.
  204. { CL_TIMING;
  205. for (int rep = repetitions; rep > 0; rep--)
  206. { p = atanh_recip_2a(m,len); }
  207. }
  208. cout << p << endl;
  209. { CL_TIMING;
  210. for (int rep = repetitions; rep > 0; rep--)
  211. { p = atanh_recip_2b(m,len); }
  212. }
  213. cout << p << endl;
  214. { CL_TIMING;
  215. for (int rep = repetitions; rep > 0; rep--)
  216. { p = atanh_recip_2c(m,len); }
  217. }
  218. cout << p << endl;
  219. { CL_TIMING;
  220. for (int rep = repetitions; rep > 0; rep--)
  221. { p = atanh_recip_2d(m,len); }
  222. }
  223. cout << p << endl;
  224. // Method 3.
  225. { CL_TIMING;
  226. for (int rep = repetitions; rep > 0; rep--)
  227. { p = The(cl_LF)(atanh(cl_RA_to_LF(1/(cl_RA)m,len))); }
  228. }
  229. cout << p << endl;
  230. // Method 4.
  231. { CL_TIMING;
  232. for (int rep = repetitions; rep > 0; rep--)
  233. { p = The(cl_LF)(scale_float(ln(cl_RA_to_LF((cl_RA)(m+1)/(cl_RA)(m-1),len)),-1)); }
  234. }
  235. cout << p << endl;
  236. }
  237. // Timings of the above algorithms, on an i486 33 MHz, running Linux.
  238. // m = 3 -> 1/2 ln(2)
  239. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  240. // 10 0.021 0.014 0.019 0.012 0.029 0.015 0.023 0.015 0.015
  241. // 25 0.060 0.041 0.073 0.041 0.082 0.046 0.086 0.051 0.066
  242. // 50 0.164 0.110 0.258 0.120 0.203 0.124 0.295 0.142
  243. // 100 0.49 0.35 1.05 0.37 0.60 0.35 1.19 0.42
  244. // 250 2.5 1.9 7.2 1.7 2.9 1.9 8.0 1.8
  245. // 500 10.1 7.2 33.4 5.5 10.7 7.3 36.5 5.9
  246. // 1000 38 30 145 16.1 39 29 158 16.8
  247. // 2500 231 188 976 53 237 186 1081 58
  248. // asymp. N^2 N^2 N^2 FAST N^2 N^2 N^2 FAST
  249. //
  250. // m = 9 -> 1/2 ln(5/4)
  251. // N 1a 1b 1c 1d 2a 2b 2c 2d 3 4
  252. // 10 0.0106 0.0072 0.0084 0.0061 0.0139 0.0073 0.0098 0.0073 0.0140 0.0211
  253. // 25 0.031 0.021 0.029 0.019 0.039 0.022 0.031 0.022 0.063 0.081
  254. // 50 0.083 0.057 0.091 0.056 0.098 0.058 0.098 0.060 0.232 0.212
  255. // 100 0.25 0.17 0.32 0.16 0.28 0.17 0.34 0.17 0.60 0.59
  256. // 250 1.28 0.94 2.11 0.77 1.40 0.91 2.18 0.76 2.76 2.76
  257. // 500 5.1 3.6 9.4 2.5 5.2 3.4 9.3 2.4 10.4 9.7
  258. // 1000 19.1 14.7 42 7.8 18.5 13.6 42 7.4 31 30
  259. // 2500 116 93 279 29.6 113 86 278 30.0 129 125
  260. // asymp. N^2 N^2 N^2 FAST N^2 N^2 N^2 FAST FAST FAST