You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

281 lines
8.3 KiB

25 years ago
  1. // Computation of artanh(1/m) (m integer) to high precision.
  2. #include "cl_integer.h"
  3. #include "cl_rational.h"
  4. #include "cl_real.h"
  5. #include "cl_complex.h"
  6. #include "cl_lfloat.h"
  7. #include "cl_LF.h"
  8. #include "cl_LF_tran.h"
  9. #include "cl_alloca.h"
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include "cl_timing.h"
  13. #undef floor
  14. #include <math.h>
  15. #define floor cln_floor
  16. // Method 1: atanh(1/m) = sum(n=0..infty, 1/(2n+1) * 1/m^(2n+1))
  17. // Method 2: atanh(1/m) = sum(n=0..infty, (-4)^n*n!^2/(2n+1)! * m/(m^2-1)^(n+1))
  18. // a. Using long floats. [N^2]
  19. // b. Simulating long floats using integers. [N^2]
  20. // c. Using integers, no binary splitting. [N^2]
  21. // d. Using integers, with binary splitting. [FAST]
  22. // Method 3: general built-in algorithm. [FAST]
  23. // Method 4: atanh(x) = 1/2 ln((1+x)/(1-x)),
  24. // using the general built-in algorithm [FAST]
  25. // Method 1: atanh(1/m) = sum(n=0..infty, 1/(2n+1) * 1/m^(2n+1))
  26. const cl_LF atanh_recip_1a (cl_I m, uintC len)
  27. {
  28. var uintC actuallen = len + 1;
  29. var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
  30. var cl_I m2 = m*m;
  31. var cl_LF fterm = cl_I_to_LF(1,actuallen)/m;
  32. var cl_LF fsum = fterm;
  33. for (var uintL n = 1; fterm >= eps; n++) {
  34. fterm = fterm/m2;
  35. fterm = cl_LF_shortenwith(fterm,eps);
  36. fsum = fsum + LF_to_LF(fterm/(2*n+1),actuallen);
  37. }
  38. return shorten(fsum,len);
  39. }
  40. const cl_LF atanh_recip_1b (cl_I m, uintC len)
  41. {
  42. var uintC actuallen = len + 1;
  43. var cl_I m2 = m*m;
  44. var cl_I fterm = floor1((cl_I)1 << (intDsize*actuallen), m);
  45. var cl_I fsum = fterm;
  46. for (var uintL n = 1; fterm > 0; n++) {
  47. fterm = floor1(fterm,m2);
  48. fsum = fsum + floor1(fterm,2*n+1);
  49. }
  50. return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
  51. }
  52. const cl_LF atanh_recip_1c (cl_I m, uintC len)
  53. {
  54. var uintC actuallen = len + 1;
  55. var cl_I m2 = m*m;
  56. var sintL N = (sintL)(0.69314718*intDsize/2*actuallen/log(cl_double_approx(m))) + 1;
  57. var cl_I num = 0, den = 1; // "lazy rational number"
  58. for (sintL n = N-1; n>=0; n--) {
  59. // Multiply sum with 1/m^2:
  60. den = den * m2;
  61. // Add 1/(2n+1):
  62. num = num*(2*n+1) + den;
  63. den = den*(2*n+1);
  64. }
  65. den = den*m;
  66. var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
  67. return shorten(result,len);
  68. }
  69. const cl_LF atanh_recip_1d (cl_I m, uintC len)
  70. {
  71. var uintC actuallen = len + 1;
  72. var cl_I m2 = m*m;
  73. var uintL N = (uintL)(0.69314718*intDsize/2*actuallen/log(cl_double_approx(m))) + 1;
  74. CL_ALLOCA_STACK;
  75. var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  76. var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  77. var uintL n;
  78. for (n = 0; n < N; n++) {
  79. new (&bv[n]) cl_I ((cl_I)(2*n+1));
  80. new (&qv[n]) cl_I (n==0 ? m : m2);
  81. }
  82. var cl_rational_series series;
  83. series.av = NULL; series.bv = bv;
  84. series.pv = NULL; series.qv = qv; series.qsv = NULL;
  85. var cl_LF result = eval_rational_series(N,series,actuallen);
  86. for (n = 0; n < N; n++) {
  87. bv[n].~cl_I();
  88. qv[n].~cl_I();
  89. }
  90. return shorten(result,len);
  91. }
  92. // Method 2: atanh(1/m) = sum(n=0..infty, (-4)^n*n!^2/(2n+1)! * m/(m^2-1)^(n+1))
  93. const cl_LF atanh_recip_2a (cl_I m, uintC len)
  94. {
  95. var uintC actuallen = len + 1;
  96. var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
  97. var cl_I m2 = m*m-1;
  98. var cl_LF fterm = cl_I_to_LF(m,actuallen)/m2;
  99. var cl_LF fsum = fterm;
  100. for (var uintL n = 1; fterm >= eps; n++) {
  101. fterm = The(cl_LF)((2*n)*fterm)/((2*n+1)*m2);
  102. fterm = cl_LF_shortenwith(fterm,eps);
  103. if ((n % 2) == 0)
  104. fsum = fsum + LF_to_LF(fterm,actuallen);
  105. else
  106. fsum = fsum - LF_to_LF(fterm,actuallen);
  107. }
  108. return shorten(fsum,len);
  109. }
  110. const cl_LF atanh_recip_2b (cl_I m, uintC len)
  111. {
  112. var uintC actuallen = len + 1;
  113. var cl_I m2 = m*m-1;
  114. var cl_I fterm = floor1((cl_I)m << (intDsize*actuallen), m2);
  115. var cl_I fsum = fterm;
  116. for (var uintL n = 1; fterm > 0; n++) {
  117. fterm = floor1((2*n)*fterm,(2*n+1)*m2);
  118. if ((n % 2) == 0)
  119. fsum = fsum + fterm;
  120. else
  121. fsum = fsum - fterm;
  122. }
  123. return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
  124. }
  125. const cl_LF atanh_recip_2c (cl_I m, uintC len)
  126. {
  127. var uintC actuallen = len + 1;
  128. var cl_I m2 = m*m-1;
  129. var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(cl_double_approx(m2))) + 1;
  130. var cl_I num = 0, den = 1; // "lazy rational number"
  131. for (uintL n = N; n>0; n--) {
  132. // Multiply sum with -(2n)/(2n+1)(m^2+1):
  133. num = num * (2*n);
  134. den = - den * ((2*n+1)*m2);
  135. // Add 1:
  136. num = num + den;
  137. }
  138. num = num*m;
  139. den = den*m2;
  140. var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
  141. return shorten(result,len);
  142. }
  143. const cl_LF atanh_recip_2d (cl_I m, uintC len)
  144. {
  145. var uintC actuallen = len + 1;
  146. var cl_I m2 = m*m-1;
  147. var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(cl_double_approx(m2))) + 1;
  148. CL_ALLOCA_STACK;
  149. var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  150. var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  151. var uintL n;
  152. new (&pv[0]) cl_I (m);
  153. new (&qv[0]) cl_I (m2);
  154. for (n = 1; n < N; n++) {
  155. new (&pv[n]) cl_I (-(cl_I)(2*n));
  156. new (&qv[n]) cl_I ((2*n+1)*m2);
  157. }
  158. var cl_rational_series series;
  159. series.av = NULL; series.bv = NULL;
  160. series.pv = pv; series.qv = qv; series.qsv = NULL;
  161. var cl_LF result = eval_rational_series(N,series,actuallen);
  162. for (n = 0; n < N; n++) {
  163. pv[n].~cl_I();
  164. qv[n].~cl_I();
  165. }
  166. return shorten(result,len);
  167. }
  168. // Main program: Compute and display the timings.
  169. int main (int argc, char * argv[])
  170. {
  171. int repetitions = 1;
  172. if ((argc >= 3) && !strcmp(argv[1],"-r")) {
  173. repetitions = atoi(argv[2]);
  174. argc -= 2; argv += 2;
  175. }
  176. if (argc < 2)
  177. exit(1);
  178. cl_I m = (cl_I)argv[1];
  179. uintL len = atoi(argv[2]);
  180. cl_LF p;
  181. ln(cl_I_to_LF(1000,len+10)); // fill cache
  182. // Method 1.
  183. { CL_TIMING;
  184. for (int rep = repetitions; rep > 0; rep--)
  185. { p = atanh_recip_1a(m,len); }
  186. }
  187. cout << p << endl;
  188. { CL_TIMING;
  189. for (int rep = repetitions; rep > 0; rep--)
  190. { p = atanh_recip_1b(m,len); }
  191. }
  192. cout << p << endl;
  193. { CL_TIMING;
  194. for (int rep = repetitions; rep > 0; rep--)
  195. { p = atanh_recip_1c(m,len); }
  196. }
  197. cout << p << endl;
  198. { CL_TIMING;
  199. for (int rep = repetitions; rep > 0; rep--)
  200. { p = atanh_recip_1d(m,len); }
  201. }
  202. cout << p << endl;
  203. // Method 2.
  204. { CL_TIMING;
  205. for (int rep = repetitions; rep > 0; rep--)
  206. { p = atanh_recip_2a(m,len); }
  207. }
  208. cout << p << endl;
  209. { CL_TIMING;
  210. for (int rep = repetitions; rep > 0; rep--)
  211. { p = atanh_recip_2b(m,len); }
  212. }
  213. cout << p << endl;
  214. { CL_TIMING;
  215. for (int rep = repetitions; rep > 0; rep--)
  216. { p = atanh_recip_2c(m,len); }
  217. }
  218. cout << p << endl;
  219. { CL_TIMING;
  220. for (int rep = repetitions; rep > 0; rep--)
  221. { p = atanh_recip_2d(m,len); }
  222. }
  223. cout << p << endl;
  224. // Method 3.
  225. { CL_TIMING;
  226. for (int rep = repetitions; rep > 0; rep--)
  227. { p = The(cl_LF)(atanh(cl_RA_to_LF(1/(cl_RA)m,len))); }
  228. }
  229. cout << p << endl;
  230. // Method 4.
  231. { CL_TIMING;
  232. for (int rep = repetitions; rep > 0; rep--)
  233. { p = The(cl_LF)(scale_float(ln(cl_RA_to_LF((cl_RA)(m+1)/(cl_RA)(m-1),len)),-1)); }
  234. }
  235. cout << p << endl;
  236. }
  237. // Timings of the above algorithms, on an i486 33 MHz, running Linux.
  238. // m = 3 -> 1/2 ln(2)
  239. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  240. // 10 0.021 0.014 0.019 0.012 0.029 0.015 0.023 0.015 0.015
  241. // 25 0.060 0.041 0.073 0.041 0.082 0.046 0.086 0.051 0.066
  242. // 50 0.164 0.110 0.258 0.120 0.203 0.124 0.295 0.142
  243. // 100 0.49 0.35 1.05 0.37 0.60 0.35 1.19 0.42
  244. // 250 2.5 1.9 7.2 1.7 2.9 1.9 8.0 1.8
  245. // 500 10.1 7.2 33.4 5.5 10.7 7.3 36.5 5.9
  246. // 1000 38 30 145 16.1 39 29 158 16.8
  247. // 2500 231 188 976 53 237 186 1081 58
  248. // asymp. N^2 N^2 N^2 FAST N^2 N^2 N^2 FAST
  249. //
  250. // m = 9 -> 1/2 ln(5/4)
  251. // N 1a 1b 1c 1d 2a 2b 2c 2d 3 4
  252. // 10 0.0106 0.0072 0.0084 0.0061 0.0139 0.0073 0.0098 0.0073 0.0140 0.0211
  253. // 25 0.031 0.021 0.029 0.019 0.039 0.022 0.031 0.022 0.063 0.081
  254. // 50 0.083 0.057 0.091 0.056 0.098 0.058 0.098 0.060 0.232 0.212
  255. // 100 0.25 0.17 0.32 0.16 0.28 0.17 0.34 0.17 0.60 0.59
  256. // 250 1.28 0.94 2.11 0.77 1.40 0.91 2.18 0.76 2.76 2.76
  257. // 500 5.1 3.6 9.4 2.5 5.2 3.4 9.3 2.4 10.4 9.7
  258. // 1000 19.1 14.7 42 7.8 18.5 13.6 42 7.4 31 30
  259. // 2500 116 93 279 29.6 113 86 278 30.0 129 125
  260. // asymp. N^2 N^2 N^2 FAST N^2 N^2 N^2 FAST FAST FAST