From 3480230ed3f9464b0367b41232b6e8a09da7d384 Mon Sep 17 00:00:00 2001 From: Richard Kreckel Date: Wed, 2 Nov 2005 23:13:39 +0000 Subject: [PATCH] * src/integer/conv/cl_I_from_digits.cc: Made input of all numbers in non-power-of-two base much faster. * tests/test_I_io.cc: New file... * tests/Makefile.in, tests/test_I.cc: ...used here. --- ChangeLog | 9 +- src/integer/conv/cl_I_from_digits.cc | 284 +++++++++++++++++---------- tests/Makefile.in | 2 +- tests/test_I.cc | 2 + tests/test_I_io.cc | 17 ++ 5 files changed, 204 insertions(+), 110 deletions(-) create mode 100644 tests/test_I_io.cc diff --git a/ChangeLog b/ChangeLog index 6621673..18e8e19 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,11 @@ -2004-10-22 Richard B. Kreckel +2005-11-02 Richard B. Kreckel + + * src/integer/conv/cl_I_from_digits.cc: Made input of all numbers in + non-power-of-two base much faster. + * tests/test_I_io.cc: New file... + * tests/Makefile.in, tests/test_I.cc: ...used here. + +2005-10-22 Richard B. Kreckel * Version 1.1.10 released. diff --git a/src/integer/conv/cl_I_from_digits.cc b/src/integer/conv/cl_I_from_digits.cc index 1f49bbd..7e101ac 100644 --- a/src/integer/conv/cl_I_from_digits.cc +++ b/src/integer/conv/cl_I_from_digits.cc @@ -13,116 +13,184 @@ namespace cln { +static const cl_I digits_to_I_base2 (const char * MSBptr, uintL len, uintD base) +{ + // base is a power of two: write the digits from least significant + // to most significant into the result NUDS. Result needs + // 1+ceiling(len*log(base)/(intDsize*log(2))) or some more digits + CL_ALLOCA_STACK; + var uintD* erg_MSDptr; + var uintC erg_len; + var uintD* erg_LSDptr; + var int b = (base==2 ? 1 : base==4 ? 2 : base==8 ? 3 : base==16 ? 4 : /*base==32*/ 5); + num_stack_alloc(1+(len*b)/intDsize,,erg_LSDptr=); + erg_MSDptr = erg_LSDptr; erg_len = 0; + var uintD d = 0; // resulting digit + var int ch_where = 0; // position of ch inside d + while (len > 0) { + var uintB ch = *(const uintB *)(MSBptr+len-1); // next character + if (ch!='.') { // skip decimal point + // Compute value of ch ('0'-'9','A'-'Z','a'-'z'): + ch = ch - '0'; + if (ch > '9'-'0') { // not a digit? + ch = ch+'0'-'A'+10; + if (ch > 'Z'-'A'+10) {// not an uppercase letter? + ch = ch+'A'-'a'; // must be lowercase! + } + } + d = d | (uintD)ch<= intDsize) { + // d is ready to be written into the NUDS: + lsprefnext(erg_MSDptr) = d; + ch_where = ch_where-intDsize; + d = (uintD)ch >> b-ch_where; // carry + erg_len++; + } + } + len--; + } + if (d != 0) { // is there anything left over? + lsprefnext(erg_MSDptr) = d; + ++erg_len; + } + return NUDS_to_I(erg_MSDptr,erg_len); +} + +// For each base b in [2..36], power_table[b-2] contains the largest exponent e +// such that b^e<2^intDsize, i.e. floor(log(2^intDsize-1,b)). +static const uintC power_table [36-2+1] = { +#if (intDsize==8) + /* base 2..7 */ 7, 5, 3, 3, 3, 2, + /* base 8..15 */ 2, 2, 2, 2, 2, 2, 2, 2, + /* base 16..23 */ 1, 1, 1, 1, 1, 1, 1, 1, + /* base 24..31 */ 1, 1, 1, 1, 1, 1, 1, 1, + /* base 32..36 */ 1, 1, 1, 1, 1 +#endif +#if (intDsize==16) + /* base 2..7 */ 15, 10, 7, 6, 6, 5, + /* base 8..15 */ 5, 5, 4, 4, 4, 4, 4, 4, + /* base 16..23 */ 3, 3, 3, 3, 3, 3, 3, 3, + /* base 24..31 */ 3, 3, 3, 3, 3, 3, 3, 3, + /* base 32..36 */ 3, 3, 3, 3, 3 +#endif +#if (intDsize==32) + /* base 2..7 */ 31, 20, 15, 13, 12, 11, + /* base 8..15 */ 10, 10, 9, 9, 8, 8, 8, 8, + /* base 16..23 */ 7, 7, 7, 7, 7, 7, 7, 7, + /* base 24..31 */ 6, 6, 6, 6, 6, 6, 6, 6, + /* base 32..36 */ 6, 6, 6, 6, 6 +#endif +#if (intDsize==64) + /* base 2..7 */ 63, 40, 31, 27, 24, 22, + /* base 8..15 */ 21, 20, 19, 18, 17, 17, 16, 16, + /* base 16..23 */ 15, 15, 15, 15, 14, 14, 14, 14, + /* base 24..31 */ 13, 13, 13, 13, 13, 13, 13, 12, + /* base 32..36 */ 12, 12, 12, 12, 12 +#endif +}; + +static const cl_I digits_to_I_baseN (const char * MSBptr, uintL len, uintD base) +{ + // base is not a power of two: Add digits one by one. Result nees + // 1+ceiling(len*log(base)/(intDsize*log(2))) or some more digits. + CL_ALLOCA_STACK; + var uintD* erg_MSDptr; + var uintC erg_len; + var uintD* erg_LSDptr; + var uintL need = 1+floor(len,intDsize*256); // > len/(intDsize*256) >=0 + switch (base) { // multiply need with ceiling(256*log(base)/log(2)): + case 2: need = 256*need; break; + case 3: need = 406*need; break; + case 4: need = 512*need; break; + case 5: need = 595*need; break; + case 6: need = 662*need; break; + case 7: need = 719*need; break; + case 8: need = 768*need; break; + case 9: need = 812*need; break; + case 10: need = 851*need; break; + case 11: need = 886*need; break; + case 12: need = 918*need; break; + case 13: need = 948*need; break; + case 14: need = 975*need; break; + case 15: need = 1001*need; break; + case 16: need = 1024*need; break; + case 17: need = 1047*need; break; + case 18: need = 1068*need; break; + case 19: need = 1088*need; break; + case 20: need = 1107*need; break; + case 21: need = 1125*need; break; + case 22: need = 1142*need; break; + case 23: need = 1159*need; break; + case 24: need = 1174*need; break; + case 25: need = 1189*need; break; + case 26: need = 1204*need; break; + case 27: need = 1218*need; break; + case 28: need = 1231*need; break; + case 29: need = 1244*need; break; + case 30: need = 1257*need; break; + case 31: need = 1269*need; break; + case 32: need = 1280*need; break; + case 33: need = 1292*need; break; + case 34: need = 1303*need; break; + case 35: need = 1314*need; break; + case 36: need = 1324*need; break; + default: NOTREACHED + } + // Now we have need >= len*log(base)/(intDsize*log(2)). + need += 1; + // Add digits one by one: + num_stack_alloc(need,,erg_LSDptr=); + // erg_MSDptr/erg_len/erg_LSDptr is a NUDS, erg_len < need. + erg_MSDptr = erg_LSDptr; erg_len = 0; + while (len > 0) { + var uintD newdigit = 0; + var uintC chx = 0; + var uintD factor = 1; + while (chx < power_table[base-2] && len > 0) { + var uintB ch = *(const uintB *)MSBptr; MSBptr++; // next character + if (ch!='.') { // skip decimal point + // Compute value of ('0'-'9','A'-'Z','a'-'z'): + ch = ch-'0'; + if (ch > '9'-'0') { // not a digit? + ch = ch+'0'-'A'+10; + if (ch > 'Z'-'A'+10) {// not an uppercase letter? + ch = ch+'A'-'a'; // must be lowercase! + } + } + factor = factor*base; + newdigit = base*newdigit+ch; + chx++; + } + len--; + } + var uintD carry = mulusmall_loop_lsp(factor,erg_LSDptr,erg_len,newdigit); + if (carry!=0) { + // need to extend NUDS: + lsprefnext(erg_MSDptr) = carry; + erg_len++; + } + } + return NUDS_to_I(erg_MSDptr,erg_len); +} + const cl_I digits_to_I (const char * MSBptr, uintL len, uintD base) { - CL_ALLOCA_STACK; - var uintD* erg_MSDptr; - var uintC erg_len; - var uintD* erg_LSDptr; - if ((base & (base-1)) == 0) { - // Fast path for powers of two: write the digits from least - // significant to most significant into the result NUDS. - var int b = (base==2 ? 1 : base==4 ? 2 : base==8 ? 3 : base==16 ? 4 : /*base==32*/ 5); - num_stack_alloc(1+(len*b)/intDsize,,erg_LSDptr=); - erg_MSDptr = erg_LSDptr; erg_len = 0; - var uintD d = 0; // resulting digit - var int ch_where = 0; // position of ch inside d - while (len > 0) - { var uintB ch = *(const uintB *)(MSBptr+len-1); // next character - if (!(ch=='.')) // skip decimal point - { // Compute value of ch ('0'-'9','A'-'Z','a'-'z'): - ch = ch - '0'; - if (ch > '9'-'0') // not a digit? - { ch = ch+'0'-'A'+10; - if (ch > 'Z'-'A'+10) // not an uppercase letter? - { ch = ch+'A'-'a'; } // must be lowercase! - } - d = d | (uintD)ch<= intDsize) { - // d is ready to be written into the NUDS: - lsprefnext(erg_MSDptr) = d; - ch_where = ch_where-intDsize; - d = (uintD)ch >> b-ch_where; // carry - erg_len++; - } - } - len--; - } - if (d != 0) { // is there anything left over? - lsprefnext(erg_MSDptr) = d; - ++erg_len; - } - } else { - // Slow path: Add digits one by one for non-powers of two. - // Platz fürs Ergebnis: - // 1+ceiling(len*log(base)/(intDsize*log(2))) or some more digits - var uintL need = 1+floor(len,intDsize*256); // > len/(intDsize*256) >=0 - switch (base) // multiply need with ceiling(256*log(base)/log(2)): - { - //case 2: need = 256*need; break; - case 3: need = 406*need; break; - //case 4: need = 512*need; break; - case 5: need = 595*need; break; - case 6: need = 662*need; break; - case 7: need = 719*need; break; - //case 8: need = 768*need; break; - case 9: need = 812*need; break; - case 10: need = 851*need; break; - case 11: need = 886*need; break; - case 12: need = 918*need; break; - case 13: need = 948*need; break; - case 14: need = 975*need; break; - case 15: need = 1001*need; break; - //case 16: need = 1024*need; break; - case 17: need = 1047*need; break; - case 18: need = 1068*need; break; - case 19: need = 1088*need; break; - case 20: need = 1107*need; break; - case 21: need = 1125*need; break; - case 22: need = 1142*need; break; - case 23: need = 1159*need; break; - case 24: need = 1174*need; break; - case 25: need = 1189*need; break; - case 26: need = 1204*need; break; - case 27: need = 1218*need; break; - case 28: need = 1231*need; break; - case 29: need = 1244*need; break; - case 30: need = 1257*need; break; - case 31: need = 1269*need; break; - //case 32: need = 1280*need; break; - case 33: need = 1292*need; break; - case 34: need = 1303*need; break; - case 35: need = 1314*need; break; - case 36: need = 1324*need; break; - default: NOTREACHED - } - // Now we have need >= len*log(base)/(intDsize*log(2)). - need += 1; - // Add digits one by one: - num_stack_alloc(need,,erg_LSDptr=); - erg_MSDptr = erg_LSDptr; erg_len = 0; - while (len > 0) - { // erg_MSDptr/erg_len/erg_LSDptr is a NUDS, erg_len < need. - var uintB ch = *(const uintB *)MSBptr; MSBptr++; // next character - if (!(ch=='.')) // skip decimal point - { // Compute value of ('0'-'9','A'-'Z','a'-'z'): - ch = ch - '0'; - if (ch > '9'-'0') // not a digit? - { ch = ch+'0'-'A'+10; - if (ch > 'Z'-'A'+10) // not an uppercase letter? - { ch = ch+'A'-'a'; } // must be lowercase! - } - // multiply erg with base and add ch: - {var uintD carry = mulusmall_loop_lsp(base,erg_LSDptr,erg_len,ch); - if (!(carry==0)) - // need to extend NUDS: - { lsprefnext(erg_MSDptr) = carry; erg_len++; } - }} - len--; - } - } - return NUDS_to_I(erg_MSDptr,erg_len); + if ((base & (base-1)) == 0) { + return digits_to_I_base2(MSBptr, len, base); + } else { + // This is quite insensitive to the breakeven point. + // On a 1GHz Athlon I get approximately: + // base 3: breakeven == 15000 + // base 10: breakeven == 5000 + // base 36: breakeven == 2000 + if (len>50000/base) + // Divide-and-conquer: + return digits_to_I(MSBptr,len/2,base)*expt_pos(base,len-len/2) + +digits_to_I(MSBptr+len/2,len-len/2,base); + else + return digits_to_I_baseN(MSBptr, len, base); + } } } // namespace cln diff --git a/tests/Makefile.in b/tests/Makefile.in index 18802f0..dc6bf80 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -56,7 +56,7 @@ MODULES_tests = tests \ test_I_gcd test_I_xgcd \ test_I_ash test_I_evenp test_I_oddp test_I_lognot test_I_logand test_I_logandc1 test_I_logandc2 test_I_logior test_I_logorc1 test_I_logorc2 test_I_logxor test_I_lognand test_I_lognor test_I_logeqv test_I_boole test_I_logbitp test_I_logtest test_I_ldb test_I_ldbtest test_I_mkf test_I_dpb test_I_dpf test_I_logcount test_I_ilength test_I_ord2 test_I_power2p \ test_I_isqrt test_I_sqrtp \ - test_I_GV \ + test_I_io test_I_GV \ test_MI \ test_MI_canonhom test_MI_plus test_MI_minus test_MI_mul test_MI_recip test_MI_div test_MI_expt \ test_nt \ diff --git a/tests/test_I.cc b/tests/test_I.cc index 6073620..8e5ea63 100644 --- a/tests/test_I.cc +++ b/tests/test_I.cc @@ -43,6 +43,7 @@ extern int test_I_power2p (int iterations); extern int test_I_isqrt (int iterations); extern int test_I_sqrtp (int iterations); // Miscellaneous. +extern int test_I_io (int iterations); extern int test_I_GV (int iterations); #define RUN(tester,iterations) \ @@ -96,6 +97,7 @@ int test_I (int iterations) RUN(test_I_isqrt,iterations); RUN(test_I_sqrtp,iterations); // Miscellaneous. + RUN(test_I_io,iterations); RUN(test_I_GV,iterations); return error; } diff --git a/tests/test_I_io.cc b/tests/test_I_io.cc new file mode 100644 index 0000000..cbc4b1e --- /dev/null +++ b/tests/test_I_io.cc @@ -0,0 +1,17 @@ +#include "test_I.h" +#include +#include + +int test_I_io (int iterations) +{ + int error = 0; + for (int i = iterations; i > 0; i--) { + cl_I a = testrandom_I(); + int base = iterations % (36-1) + 2; + cl_read_flags rflags = {syntax_integer, lsyntax_standard, base}; + stringstream buf; + print_integer(buf, base, a); + ASSERT1(a == read_integer(buf, rflags), a); + } + return error; +}