You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

273 lines
7.9 KiB

25 years ago
  1. // Computation of arctan(1/m) (m integer) to high precision.
  2. #include "cl_integer.h"
  3. #include "cl_rational.h"
  4. #include "cl_real.h"
  5. #include "cl_lfloat.h"
  6. #include "cl_LF.h"
  7. #include "cl_LF_tran.h"
  8. #include "cl_alloca.h"
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include "cl_timing.h"
  12. #undef floor
  13. #include <math.h>
  14. #define floor cln_floor
  15. // Method 1: atan(1/m) = sum(n=0..infty, (-1)^n/(2n+1) * 1/m^(2n+1))
  16. // Method 2: atan(1/m) = sum(n=0..infty, 4^n*n!^2/(2n+1)! * m/(m^2+1)^(n+1))
  17. // a. Using long floats. [N^2]
  18. // b. Simulating long floats using integers. [N^2]
  19. // c. Using integers, no binary splitting. [N^2]
  20. // d. Using integers, with binary splitting. [FAST]
  21. // Method 3: general built-in algorithm. [FAST]
  22. // Method 1: atan(1/m) = sum(n=0..infty, (-1)^n/(2n+1) * 1/m^(2n+1))
  23. const cl_LF atan_recip_1a (cl_I m, uintC len)
  24. {
  25. var uintC actuallen = len + 1;
  26. var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
  27. var cl_I m2 = m*m;
  28. var cl_LF fterm = cl_I_to_LF(1,actuallen)/m;
  29. var cl_LF fsum = fterm;
  30. for (var uintL n = 1; fterm >= eps; n++) {
  31. fterm = fterm/m2;
  32. fterm = cl_LF_shortenwith(fterm,eps);
  33. if ((n % 2) == 0)
  34. fsum = fsum + LF_to_LF(fterm/(2*n+1),actuallen);
  35. else
  36. fsum = fsum - LF_to_LF(fterm/(2*n+1),actuallen);
  37. }
  38. return shorten(fsum,len);
  39. }
  40. const cl_LF atan_recip_1b (cl_I m, uintC len)
  41. {
  42. var uintC actuallen = len + 1;
  43. var cl_I m2 = m*m;
  44. var cl_I fterm = floor1((cl_I)1 << (intDsize*actuallen), m);
  45. var cl_I fsum = fterm;
  46. for (var uintL n = 1; fterm > 0; n++) {
  47. fterm = floor1(fterm,m2);
  48. if ((n % 2) == 0)
  49. fsum = fsum + floor1(fterm,2*n+1);
  50. else
  51. fsum = fsum - floor1(fterm,2*n+1);
  52. }
  53. return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
  54. }
  55. const cl_LF atan_recip_1c (cl_I m, uintC len)
  56. {
  57. var uintC actuallen = len + 1;
  58. var cl_I m2 = m*m;
  59. var sintL N = (sintL)(0.69314718*intDsize/2*actuallen/log(cl_double_approx(m))) + 1;
  60. var cl_I num = 0, den = 1; // "lazy rational number"
  61. for (sintL n = N-1; n>=0; n--) {
  62. // Multiply sum with 1/m^2:
  63. den = den * m2;
  64. // Add (-1)^n/(2n+1):
  65. if ((n % 2) == 0)
  66. num = num*(2*n+1) + den;
  67. else
  68. num = num*(2*n+1) - den;
  69. den = den*(2*n+1);
  70. }
  71. den = den*m;
  72. var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
  73. return shorten(result,len);
  74. }
  75. const cl_LF atan_recip_1d (cl_I m, uintC len)
  76. {
  77. var uintC actuallen = len + 1;
  78. var cl_I m2 = m*m;
  79. var uintL N = (uintL)(0.69314718*intDsize/2*actuallen/log(cl_double_approx(m))) + 1;
  80. CL_ALLOCA_STACK;
  81. var cl_I* bv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  82. var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  83. var uintL n;
  84. for (n = 0; n < N; n++) {
  85. new (&bv[n]) cl_I ((n % 2) == 0 ? (cl_I)(2*n+1) : -(cl_I)(2*n+1));
  86. new (&qv[n]) cl_I (n==0 ? m : m2);
  87. }
  88. var cl_rational_series series;
  89. series.av = NULL; series.bv = bv;
  90. series.pv = NULL; series.qv = qv; series.qsv = NULL;
  91. var cl_LF result = eval_rational_series(N,series,actuallen);
  92. for (n = 0; n < N; n++) {
  93. bv[n].~cl_I();
  94. qv[n].~cl_I();
  95. }
  96. return shorten(result,len);
  97. }
  98. // Method 2: atan(1/m) = sum(n=0..infty, 4^n*n!^2/(2n+1)! * m/(m^2+1)^(n+1))
  99. const cl_LF atan_recip_2a (cl_I m, uintC len)
  100. {
  101. var uintC actuallen = len + 1;
  102. var cl_LF eps = scale_float(cl_I_to_LF(1,actuallen),-intDsize*(sintL)actuallen);
  103. var cl_I m2 = m*m+1;
  104. var cl_LF fterm = cl_I_to_LF(m,actuallen)/m2;
  105. var cl_LF fsum = fterm;
  106. for (var uintL n = 1; fterm >= eps; n++) {
  107. fterm = The(cl_LF)((2*n)*fterm)/((2*n+1)*m2);
  108. fterm = cl_LF_shortenwith(fterm,eps);
  109. fsum = fsum + LF_to_LF(fterm,actuallen);
  110. }
  111. return shorten(fsum,len);
  112. }
  113. const cl_LF atan_recip_2b (cl_I m, uintC len)
  114. {
  115. var uintC actuallen = len + 1;
  116. var cl_I m2 = m*m+1;
  117. var cl_I fterm = floor1((cl_I)m << (intDsize*actuallen), m2);
  118. var cl_I fsum = fterm;
  119. for (var uintL n = 1; fterm > 0; n++) {
  120. fterm = floor1((2*n)*fterm,(2*n+1)*m2);
  121. fsum = fsum + fterm;
  122. }
  123. return scale_float(cl_I_to_LF(fsum,len),-intDsize*(sintL)actuallen);
  124. }
  125. const cl_LF atan_recip_2c (cl_I m, uintC len)
  126. {
  127. var uintC actuallen = len + 1;
  128. var cl_I m2 = m*m+1;
  129. var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(cl_double_approx(m2))) + 1;
  130. var cl_I num = 0, den = 1; // "lazy rational number"
  131. for (uintL n = N; n>0; n--) {
  132. // Multiply sum with (2n)/(2n+1)(m^2+1):
  133. num = num * (2*n);
  134. den = den * ((2*n+1)*m2);
  135. // Add 1:
  136. num = num + den;
  137. }
  138. num = num*m;
  139. den = den*m2;
  140. var cl_LF result = cl_I_to_LF(num,actuallen)/cl_I_to_LF(den,actuallen);
  141. return shorten(result,len);
  142. }
  143. const cl_LF atan_recip_2d (cl_I m, uintC len)
  144. {
  145. var uintC actuallen = len + 1;
  146. var cl_I m2 = m*m+1;
  147. var uintL N = (uintL)(0.69314718*intDsize*actuallen/log(cl_double_approx(m2))) + 1;
  148. CL_ALLOCA_STACK;
  149. var cl_I* pv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  150. var cl_I* qv = (cl_I*) cl_alloca(N*sizeof(cl_I));
  151. var uintL n;
  152. new (&pv[0]) cl_I (m);
  153. new (&qv[0]) cl_I (m2);
  154. for (n = 1; n < N; n++) {
  155. new (&pv[n]) cl_I (2*n);
  156. new (&qv[n]) cl_I ((2*n+1)*m2);
  157. }
  158. var cl_rational_series series;
  159. series.av = NULL; series.bv = NULL;
  160. series.pv = pv; series.qv = qv; series.qsv = NULL;
  161. var cl_LF result = eval_rational_series(N,series,actuallen);
  162. for (n = 0; n < N; n++) {
  163. pv[n].~cl_I();
  164. qv[n].~cl_I();
  165. }
  166. return shorten(result,len);
  167. }
  168. // Main program: Compute and display the timings.
  169. int main (int argc, char * argv[])
  170. {
  171. int repetitions = 1;
  172. if ((argc >= 3) && !strcmp(argv[1],"-r")) {
  173. repetitions = atoi(argv[2]);
  174. argc -= 2; argv += 2;
  175. }
  176. if (argc < 2)
  177. exit(1);
  178. cl_I m = (cl_I)argv[1];
  179. uintL len = atoi(argv[2]);
  180. cl_LF p;
  181. ln(cl_I_to_LF(1000,len+10)); // fill cache
  182. // Method 1.
  183. { CL_TIMING;
  184. for (int rep = repetitions; rep > 0; rep--)
  185. { p = atan_recip_1a(m,len); }
  186. }
  187. cout << p << endl;
  188. { CL_TIMING;
  189. for (int rep = repetitions; rep > 0; rep--)
  190. { p = atan_recip_1b(m,len); }
  191. }
  192. cout << p << endl;
  193. { CL_TIMING;
  194. for (int rep = repetitions; rep > 0; rep--)
  195. { p = atan_recip_1c(m,len); }
  196. }
  197. cout << p << endl;
  198. { CL_TIMING;
  199. for (int rep = repetitions; rep > 0; rep--)
  200. { p = atan_recip_1d(m,len); }
  201. }
  202. cout << p << endl;
  203. // Method 2.
  204. { CL_TIMING;
  205. for (int rep = repetitions; rep > 0; rep--)
  206. { p = atan_recip_2a(m,len); }
  207. }
  208. cout << p << endl;
  209. { CL_TIMING;
  210. for (int rep = repetitions; rep > 0; rep--)
  211. { p = atan_recip_2b(m,len); }
  212. }
  213. cout << p << endl;
  214. { CL_TIMING;
  215. for (int rep = repetitions; rep > 0; rep--)
  216. { p = atan_recip_2c(m,len); }
  217. }
  218. cout << p << endl;
  219. { CL_TIMING;
  220. for (int rep = repetitions; rep > 0; rep--)
  221. { p = atan_recip_2d(m,len); }
  222. }
  223. cout << p << endl;
  224. // Method 3.
  225. { CL_TIMING;
  226. for (int rep = repetitions; rep > 0; rep--)
  227. { p = The(cl_LF)(atan(cl_RA_to_LF(1/(cl_RA)m,len))); }
  228. }
  229. cout << p << endl;
  230. }
  231. // Timings of the above algorithms, on an i486 33 MHz, running Linux.
  232. // m = 390112. (For J�rg Arndt's formula (4.15).)
  233. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  234. // 10 0.0027 0.0018 0.0019 0.0019 0.0032 0.0022 0.0019 0.0019 0.0042
  235. // 25 0.0085 0.0061 0.0058 0.0061 0.0095 0.0069 0.0056 0.0061 0.028
  236. // 50 0.024 0.018 0.017 0.017 0.026 0.020 0.016 0.017 0.149
  237. // 100 0.075 0.061 0.057 0.054 0.079 0.065 0.052 0.052 0.71
  238. // 250 0.41 0.33 0.32 0.26 0.42 0.36 0.28 0.24 3.66
  239. // 500 1.57 1.31 1.22 0.88 1.57 1.36 1.10 0.83 13.7
  240. // 1000 6.08 5.14 4.56 2.76 6.12 5.36 4.06 2.58 45.5
  241. // 2500 36.5 32.2 25.8 10.2 38.4 33.6 22.2 9.1 191
  242. // 5000
  243. // 10000
  244. // asymp. N^2 N^2 N^2 FAST N^2 N^2 N^2 FAST FAST
  245. //
  246. // m = 319. (For J�rg Arndt's formula (4.7).)
  247. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  248. // 1000 6.06 4.40 9.17 3.82 5.29 3.90 7.50 3.53 50.3
  249. //
  250. // m = 18. (For J�rg Arndt's formula (4.4).)
  251. // N 1a 1b 1c 1d 2a 2b 2c 2d 3
  252. // 1000 11.8 9.0 22.3 6.0 10.2 7.7 17.1 5.7 54.3