You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

274 lines
7.9 KiB

25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
25 years ago
  1. // Computation of arctan(1/m) (m integer) to high precision.
  2. #include "cln/integer.h"
  3. #include "cln/rational.h"
  4. #include "cln/real.h"
  5. #include "cln/lfloat.h"
  6. #include "cl_LF.h"
  7. #include "cl_LF_tran.h"
  8. #include "cl_alloca.h"
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include "cln/timing.h"
  12. #undef floor
  13. #include <math.h>
  14. #define floor cln_floor
  15. using namespace cln;
  16. // Method 1: atan(1/m) = sum(n=0..infty, (-1)^n/(2n+1) * 1/m^(2n+1))
  17. // Method 2: atan(1/m) = sum(n=0..infty, 4^n*n!^2/(2n+1)! * m/(m^2+1)^(n+1))
  18. // a. Using long floats. [N^2]
  19. // b. Simulating long floats using integers. [N^2]
  20. // c. Using integers, no binary splitting. [N^2]
  21. // d. Using integers, with binary splitting. [FAST]
  22. // Method 3: general built-in algorithm. [FAST]
  23. // Method 1: atan(1/m) = sum(n=0..infty, (-1)^n/(2n+1) * 1/m^(2n+1))
  24. const cl_LF atan_recip_1a (cl_I m, uintC len)
  25. {
  26. var uintC actuallen = len + 1;
  27. var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
  28. var cl_I m2 = m*m;
  29. var cl_LF fterm = cl_I_to_LF(1,actuallen)/m;
  30. var cl_LF fsum = fterm;
  31. for (var uintL n = 1; fterm >= eps; n++) {
  32. fterm = fterm/m2;
  33. fterm = cl_LF_shortenwith(fterm,eps);
  34. if ((n % 2) == 0)
  35. fsum = fsum + LF_to_LF(fterm/(2*n+1),actuallen);
  36. else
  37. fsum = fsum - LF_to_LF(fterm/(2*n+1),actuallen);
  38. }
  39. return shorten(fsum,len);
  40. }
  41. const cl_LF atan_recip_1b (cl_I m, uintC len)
  42. {
  43. var uintC actuallen = len + 1;
  44. var cl_I m2 = m*m;
  45. var cl_I fterm = floor1((cl_I)1 << (intDsize*actuallen), m);
  46. var cl_I fsum = fterm;
  47. for (var uintL n = 1; fterm > 0; n++) {
  48. fterm = floor1(fterm,m2);
  49. if ((n % 2) == 0)
  50. fsum = fsum + floor1(fterm,2*n+1);
  51. else
  52. fsum = fsum - floor1(fterm,2*n+1);
  53. }
  54. return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
  55. }
  56. const cl_LF atan_recip_1c (cl_I m, uintC len)
  57. {
  58. var uintC actuallen = len + 1;
  59. var cl_I m2 = m*m;
  60. var sintL N = (sintL)(0.69314718*intDsize/2*actuallen/log(double_approx(m))) + 1;
  61. var cl_I num = 0, den = 1; // "lazy rational number"
  62. for (sintL n = N-1; n>=0; n--) {
  63. // Multiply sum with 1/m^2:
  64. den = den * m2;
  65. // Add (-1)^n/(2n+1):
  66. if ((n % 2) == 0)
  67. num = num*(2*n+1) + den;
  68. else
  69. num = num*(2*n+1) - den;
  70. den = den*(2*n+1);
  71. }
  72. den = den*m;
  73. var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
  74. return shorten(result,len);
  75. }
  76. const cl_LF atan_recip_1d (cl_I m, uintC len)
  77. {
  78. var uintC actuallen = len + 1;
  79. var cl_I m2 = m*m;
  80. var uintL N = (uintL)(0.69314718*intDsize/2*actuallen/log(double_approx(m))) + 1;
  81. CL_ALLOCA_STACK;
  82. var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  83. var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  84. var uintL n;
  85. for (n = 0; n < N; n++) {
  86. new (&bv[n]) cl_I ((n % 2) == 0 ? (cl_I)(2*n+1) : -(cl_I)(2*n+1));
  87. new (&qv[n]) cl_I (n==0 ? m : m2);
  88. }
  89. var cl_rational_series series;
  90. series.av = NULL; series.bv = bv;
  91. series.pv = NULL; series.qv = qv; series.qsv = NULL;
  92. var cl_LF result = eval_rational_series(N,series,actuallen);
  93. for (n = 0; n < N; n++) {
  94. bv[n].~cl_I();
  95. qv[n].~cl_I();
  96. }
  97. return shorten(result,len);
  98. }
  99. // Method 2: atan(1/m) = sum(n=0..infty, 4^n*n!^2/(2n+1)! * m/(m^2+1)^(n+1))
  100. const cl_LF atan_recip_2a (cl_I m, uintC len)
  101. {
  102. var uintC actuallen = len + 1;
  103. var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
  104. var cl_I m2 = m*m+1;
  105. var cl_LF fterm = cl_I_to_LF(m,actuallen)/m2;
  106. var cl_LF fsum = fterm;
  107. for (var uintL n = 1; fterm >= eps; n++) {
  108. fterm = The(cl_LF)((2*n)*fterm)/((2*n+1)*m2);
  109. fterm = cl_LF_shortenwith(fterm,eps);
  110. fsum = fsum + LF_to_LF(fterm,actuallen);
  111. }
  112. return shorten(fsum,len);
  113. }
  114. const cl_LF atan_recip_2b (cl_I m, uintC len)
  115. {
  116. var uintC actuallen = len + 1;
  117. var cl_I m2 = m*m+1;
  118. var cl_I fterm = floor1((cl_I)m << (intDsize*actuallen), m2);
  119. var cl_I fsum = fterm;
  120. for (var uintL n = 1; fterm > 0; n++) {
  121. fterm = floor1((2*n)*fterm,(2*n+1)*m2);
  122. fsum = fsum + fterm;
  123. }
  124. return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
  125. }
  126. const cl_LF atan_recip_2c (cl_I m, uintC len)
  127. {
  128. var uintC actuallen = len + 1;
  129. var cl_I m2 = m*m+1;
  130. var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(double_approx(m2))) + 1;
  131. var cl_I num = 0, den = 1; // "lazy rational number"
  132. for (uintL n = N; n>0; n--) {
  133. // Multiply sum with (2n)/(2n+1)(m^2+1):
  134. num = num * (2*n);
  135. den = den * ((2*n+1)*m2);
  136. // Add 1:
  137. num = num + den;
  138. }
  139. num = num*m;
  140. den = den*m2;
  141. var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
  142. return shorten(result,len);
  143. }
  144. const cl_LF atan_recip_2d (cl_I m, uintC len)
  145. {
  146. var uintC actuallen = len + 1;
  147. var cl_I m2 = m*m+1;
  148. var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(double_approx(m2))) + 1;
  149. CL_ALLOCA_STACK;
  150. var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  151. var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  152. var uintL n;
  153. new (&pv[0]) cl_I (m);
  154. new (&qv[0]) cl_I (m2);
  155. for (n = 1; n < N; n++) {
  156. new (&pv[n]) cl_I (2*n);
  157. new (&qv[n]) cl_I ((2*n+1)*m2);
  158. }
  159. var cl_rational_series series;
  160. series.av = NULL; series.bv = NULL;
  161. series.pv = pv; series.qv = qv; series.qsv = NULL;
  162. var cl_LF result = eval_rational_series(N,series,actuallen);
  163. for (n = 0; n < N; n++) {
  164. pv[n].~cl_I();
  165. qv[n].~cl_I();
  166. }
  167. return shorten(result,len);
  168. }
  169. // Main program: Compute and display the timings.
  170. int main (int argc, char * argv[])
  171. {
  172. int repetitions = 1;
  173. if ((argc >= 3) && !strcmp(argv[1],"-r")) {
  174. repetitions = atoi(argv[2]);
  175. argc -= 2; argv += 2;
  176. }
  177. if (argc < 2)
  178. exit(1);
  179. cl_I m = (cl_I)argv[1];
  180. uintL len = atoi(argv[2]);
  181. cl_LF p;
  182. ln(cl_I_to_LF(1000,len+10)); // fill cache
  183. // Method 1.
  184. { CL_TIMING;
  185. for (int rep = repetitions; rep > 0; rep--)
  186. { p = atan_recip_1a(m,len); }
  187. }
  188. cout << p << endl;
  189. { CL_TIMING;
  190. for (int rep = repetitions; rep > 0; rep--)
  191. { p = atan_recip_1b(m,len); }
  192. }
  193. cout << p << endl;
  194. { CL_TIMING;
  195. for (int rep = repetitions; rep > 0; rep--)
  196. { p = atan_recip_1c(m,len); }
  197. }
  198. cout << p << endl;
  199. { CL_TIMING;
  200. for (int rep = repetitions; rep > 0; rep--)
  201. { p = atan_recip_1d(m,len); }
  202. }
  203. cout << p << endl;
  204. // Method 2.
  205. { CL_TIMING;
  206. for (int rep = repetitions; rep > 0; rep--)
  207. { p = atan_recip_2a(m,len); }
  208. }
  209. cout << p << endl;
  210. { CL_TIMING;
  211. for (int rep = repetitions; rep > 0; rep--)
  212. { p = atan_recip_2b(m,len); }
  213. }
  214. cout << p << endl;
  215. { CL_TIMING;
  216. for (int rep = repetitions; rep > 0; rep--)
  217. { p = atan_recip_2c(m,len); }
  218. }
  219. cout << p << endl;
  220. { CL_TIMING;
  221. for (int rep = repetitions; rep > 0; rep--)
  222. { p = atan_recip_2d(m,len); }
  223. }
  224. cout << p << endl;
  225. // Method 3.
  226. { CL_TIMING;
  227. for (int rep = repetitions; rep > 0; rep--)
  228. { p = The(cl_LF)(atan(cl_RA_to_LF(1/(cl_RA)m,len))); }
  229. }
  230. cout << p << endl;
  231. }
  232. // Timings of the above algorithms, on an i486 33 MHz, running Linux.
  233. // m = 390112. (For J�rg Arndt's formula (4.15).)
  234. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  235. // 10 0.0027 0.0018 0.0019 0.0019 0.0032 0.0022 0.0019 0.0019 0.0042
  236. // 25 0.0085 0.0061 0.0058 0.0061 0.0095 0.0069 0.0056 0.0061 0.028
  237. // 50 0.024 0.018 0.017 0.017 0.026 0.020 0.016 0.017 0.149
  238. // 100 0.075 0.061 0.057 0.054 0.079 0.065 0.052 0.052 0.71
  239. // 250 0.41 0.33 0.32 0.26 0.42 0.36 0.28 0.24 3.66
  240. // 500 1.57 1.31 1.22 0.88 1.57 1.36 1.10 0.83 13.7
  241. // 1000 6.08 5.14 4.56 2.76 6.12 5.36 4.06 2.58 45.5
  242. // 2500 36.5 32.2 25.8 10.2 38.4 33.6 22.2 9.1 191
  243. // 5000
  244. // 10000
  245. // asymp. N^2 N^2 N^2 FAST N^2 N^2 N^2 FAST FAST
  246. //
  247. // m = 319. (For J�rg Arndt's formula (4.7).)
  248. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  249. // 1000 6.06 4.40 9.17 3.82 5.29 3.90 7.50 3.53 50.3
  250. //
  251. // m = 18. (For J�rg Arndt's formula (4.4).)
  252. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  253. // 1000 11.8 9.0 22.3 6.0 10.2 7.7 17.1 5.7 54.3