274 lines
7.9 KiB

25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
  1. // Computation of arctan(1/m) (m integer) to high precision.
  2. #include "cln/integer.h"
  3. #include "cln/rational.h"
  4. #include "cln/real.h"
  5. #include "cln/lfloat.h"
  6. #include "cl_LF.h"
  7. #include "cl_LF_tran.h"
  8. #include "cl_alloca.h"
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include "cln/timing.h"
  12. #undef floor
  13. #include <math.h>
  14. #define floor cln_floor
  15. using namespace cln;
  16. // Method 1: atan(1/m) = sum(n=0..infty, (-1)^n/(2n+1) * 1/m^(2n+1))
  17. // Method 2: atan(1/m) = sum(n=0..infty, 4^n*n!^2/(2n+1)! * m/(m^2+1)^(n+1))
  18. // a. Using long floats. [N^2]
  19. // b. Simulating long floats using integers. [N^2]
  20. // c. Using integers, no binary splitting. [N^2]
  21. // d. Using integers, with binary splitting. [FAST]
  22. // Method 3: general built-in algorithm. [FAST]
  23. // Method 1: atan(1/m) = sum(n=0..infty, (-1)^n/(2n+1) * 1/m^(2n+1))
  24. const cl_LF atan_recip_1a (cl_I m, uintC len)
  25. {
  26. var uintC actuallen = len + 1;
  27. var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
  28. var cl_I m2 = m*m;
  29. var cl_LF fterm = cl_I_to_LF(1,actuallen)/m;
  30. var cl_LF fsum = fterm;
  31. for (var uintL n = 1; fterm >= eps; n++) {
  32. fterm = fterm/m2;
  33. fterm = cl_LF_shortenwith(fterm,eps);
  34. if ((n % 2) == 0)
  35. fsum = fsum + LF_to_LF(fterm/(2*n+1),actuallen);
  36. else
  37. fsum = fsum - LF_to_LF(fterm/(2*n+1),actuallen);
  38. }
  39. return shorten(fsum,len);
  40. }
  41. const cl_LF atan_recip_1b (cl_I m, uintC len)
  42. {
  43. var uintC actuallen = len + 1;
  44. var cl_I m2 = m*m;
  45. var cl_I fterm = floor1((cl_I)1 << (intDsize*actuallen), m);
  46. var cl_I fsum = fterm;
  47. for (var uintL n = 1; fterm > 0; n++) {
  48. fterm = floor1(fterm,m2);
  49. if ((n % 2) == 0)
  50. fsum = fsum + floor1(fterm,2*n+1);
  51. else
  52. fsum = fsum - floor1(fterm,2*n+1);
  53. }
  54. return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
  55. }
  56. const cl_LF atan_recip_1c (cl_I m, uintC len)
  57. {
  58. var uintC actuallen = len + 1;
  59. var cl_I m2 = m*m;
  60. var sintL N = (sintL)(0.69314718*intDsize/2*actuallen/log(double_approx(m))) + 1;
  61. var cl_I num = 0, den = 1; // "lazy rational number"
  62. for (sintL n = N-1; n>=0; n--) {
  63. // Multiply sum with 1/m^2:
  64. den = den * m2;
  65. // Add (-1)^n/(2n+1):
  66. if ((n % 2) == 0)
  67. num = num*(2*n+1) + den;
  68. else
  69. num = num*(2*n+1) - den;
  70. den = den*(2*n+1);
  71. }
  72. den = den*m;
  73. var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
  74. return shorten(result,len);
  75. }
  76. const cl_LF atan_recip_1d (cl_I m, uintC len)
  77. {
  78. var uintC actuallen = len + 1;
  79. var cl_I m2 = m*m;
  80. var uintL N = (uintL)(0.69314718*intDsize/2*actuallen/log(double_approx(m))) + 1;
  81. CL_ALLOCA_STACK;
  82. var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  83. var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  84. var uintL n;
  85. for (n = 0; n < N; n++) {
  86. new (&bv[n]) cl_I ((n % 2) == 0 ? (cl_I)(2*n+1) : -(cl_I)(2*n+1));
  87. new (&qv[n]) cl_I (n==0 ? m : m2);
  88. }
  89. var cl_rational_series series;
  90. series.av = NULL; series.bv = bv;
  91. series.pv = NULL; series.qv = qv; series.qsv = NULL;
  92. var cl_LF result = eval_rational_series(N,series,actuallen);
  93. for (n = 0; n < N; n++) {
  94. bv[n].~cl_I();
  95. qv[n].~cl_I();
  96. }
  97. return shorten(result,len);
  98. }
  99. // Method 2: atan(1/m) = sum(n=0..infty, 4^n*n!^2/(2n+1)! * m/(m^2+1)^(n+1))
  100. const cl_LF atan_recip_2a (cl_I m, uintC len)
  101. {
  102. var uintC actuallen = len + 1;
  103. var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
  104. var cl_I m2 = m*m+1;
  105. var cl_LF fterm = cl_I_to_LF(m,actuallen)/m2;
  106. var cl_LF fsum = fterm;
  107. for (var uintL n = 1; fterm >= eps; n++) {
  108. fterm = The(cl_LF)((2*n)*fterm)/((2*n+1)*m2);
  109. fterm = cl_LF_shortenwith(fterm,eps);
  110. fsum = fsum + LF_to_LF(fterm,actuallen);
  111. }
  112. return shorten(fsum,len);
  113. }
  114. const cl_LF atan_recip_2b (cl_I m, uintC len)
  115. {
  116. var uintC actuallen = len + 1;
  117. var cl_I m2 = m*m+1;
  118. var cl_I fterm = floor1((cl_I)m << (intDsize*actuallen), m2);
  119. var cl_I fsum = fterm;
  120. for (var uintL n = 1; fterm > 0; n++) {
  121. fterm = floor1((2*n)*fterm,(2*n+1)*m2);
  122. fsum = fsum + fterm;
  123. }
  124. return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
  125. }
  126. const cl_LF atan_recip_2c (cl_I m, uintC len)
  127. {
  128. var uintC actuallen = len + 1;
  129. var cl_I m2 = m*m+1;
  130. var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(double_approx(m2))) + 1;
  131. var cl_I num = 0, den = 1; // "lazy rational number"
  132. for (uintL n = N; n>0; n--) {
  133. // Multiply sum with (2n)/(2n+1)(m^2+1):
  134. num = num * (2*n);
  135. den = den * ((2*n+1)*m2);
  136. // Add 1:
  137. num = num + den;
  138. }
  139. num = num*m;
  140. den = den*m2;
  141. var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
  142. return shorten(result,len);
  143. }
  144. const cl_LF atan_recip_2d (cl_I m, uintC len)
  145. {
  146. var uintC actuallen = len + 1;
  147. var cl_I m2 = m*m+1;
  148. var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(double_approx(m2))) + 1;
  149. CL_ALLOCA_STACK;
  150. var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  151. var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  152. var uintL n;
  153. new (&pv[0]) cl_I (m);
  154. new (&qv[0]) cl_I (m2);
  155. for (n = 1; n < N; n++) {
  156. new (&pv[n]) cl_I (2*n);
  157. new (&qv[n]) cl_I ((2*n+1)*m2);
  158. }
  159. var cl_rational_series series;
  160. series.av = NULL; series.bv = NULL;
  161. series.pv = pv; series.qv = qv; series.qsv = NULL;
  162. var cl_LF result = eval_rational_series(N,series,actuallen);
  163. for (n = 0; n < N; n++) {
  164. pv[n].~cl_I();
  165. qv[n].~cl_I();
  166. }
  167. return shorten(result,len);
  168. }
  169. // Main program: Compute and display the timings.
  170. int main (int argc, char * argv[])
  171. {
  172. int repetitions = 1;
  173. if ((argc >= 3) && !strcmp(argv[1],"-r")) {
  174. repetitions = atoi(argv[2]);
  175. argc -= 2; argv += 2;
  176. }
  177. if (argc < 2)
  178. exit(1);
  179. cl_I m = (cl_I)argv[1];
  180. uintL len = atoi(argv[2]);
  181. cl_LF p;
  182. ln(cl_I_to_LF(1000,len+10)); // fill cache
  183. // Method 1.
  184. { CL_TIMING;
  185. for (int rep = repetitions; rep > 0; rep--)
  186. { p = atan_recip_1a(m,len); }
  187. }
  188. cout << p << endl;
  189. { CL_TIMING;
  190. for (int rep = repetitions; rep > 0; rep--)
  191. { p = atan_recip_1b(m,len); }
  192. }
  193. cout << p << endl;
  194. { CL_TIMING;
  195. for (int rep = repetitions; rep > 0; rep--)
  196. { p = atan_recip_1c(m,len); }
  197. }
  198. cout << p << endl;
  199. { CL_TIMING;
  200. for (int rep = repetitions; rep > 0; rep--)
  201. { p = atan_recip_1d(m,len); }
  202. }
  203. cout << p << endl;
  204. // Method 2.
  205. { CL_TIMING;
  206. for (int rep = repetitions; rep > 0; rep--)
  207. { p = atan_recip_2a(m,len); }
  208. }
  209. cout << p << endl;
  210. { CL_TIMING;
  211. for (int rep = repetitions; rep > 0; rep--)
  212. { p = atan_recip_2b(m,len); }
  213. }
  214. cout << p << endl;
  215. { CL_TIMING;
  216. for (int rep = repetitions; rep > 0; rep--)
  217. { p = atan_recip_2c(m,len); }
  218. }
  219. cout << p << endl;
  220. { CL_TIMING;
  221. for (int rep = repetitions; rep > 0; rep--)
  222. { p = atan_recip_2d(m,len); }
  223. }
  224. cout << p << endl;
  225. // Method 3.
  226. { CL_TIMING;
  227. for (int rep = repetitions; rep > 0; rep--)
  228. { p = The(cl_LF)(atan(cl_RA_to_LF(1/(cl_RA)m,len))); }
  229. }
  230. cout << p << endl;
  231. }
  232. // Timings of the above algorithms, on an i486 33 MHz, running Linux.
  233. // m = 390112. (For J�rg Arndt's formula (4.15).)
  234. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  235. // 10 0.0027 0.0018 0.0019 0.0019 0.0032 0.0022 0.0019 0.0019 0.0042
  236. // 25 0.0085 0.0061 0.0058 0.0061 0.0095 0.0069 0.0056 0.0061 0.028
  237. // 50 0.024 0.018 0.017 0.017 0.026 0.020 0.016 0.017 0.149
  238. // 100 0.075 0.061 0.057 0.054 0.079 0.065 0.052 0.052 0.71
  239. // 250 0.41 0.33 0.32 0.26 0.42 0.36 0.28 0.24 3.66
  240. // 500 1.57 1.31 1.22 0.88 1.57 1.36 1.10 0.83 13.7
  241. // 1000 6.08 5.14 4.56 2.76 6.12 5.36 4.06 2.58 45.5
  242. // 2500 36.5 32.2 25.8 10.2 38.4 33.6 22.2 9.1 191
  243. // 5000
  244. // 10000
  245. // asymp. N^2 N^2 N^2 FAST N^2 N^2 N^2 FAST FAST
  246. //
  247. // m = 319. (For J�rg Arndt's formula (4.7).)
  248. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  249. // 1000 6.06 4.40 9.17 3.82 5.29 3.90 7.50 3.53 50.3
  250. //
  251. // m = 18. (For J�rg Arndt's formula (4.4).)
  252. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  253. // 1000 11.8 9.0 22.3 6.0 10.2 7.7 17.1 5.7 54.3