|
@ -171,7 +171,7 @@ GLABEL(mulu32_) |
|
|
MUL a3,ip,a3 // high section of result
|
|
|
MUL a3,ip,a3 // high section of result
|
|
|
ADDS a2,a2,a1 // add middle sections
|
|
|
ADDS a2,a2,a1 // add middle sections
|
|
|
// (can't use mla as we need carry)
|
|
|
// (can't use mla as we need carry)
|
|
|
ADDCS a3,a3,#&10000 // carry from above add
|
|
|
|
|
|
|
|
|
ADDCS a3,a3,#0x10000 // carry from above add
|
|
|
ADDS a1,a4,a2,LSL #16 // x is now bottom 32 bits of result
|
|
|
ADDS a1,a4,a2,LSL #16 // x is now bottom 32 bits of result
|
|
|
ADC a2,a3,a2,LSR #16 // hi is top 32 bits
|
|
|
ADC a2,a3,a2,LSR #16 // hi is top 32 bits
|
|
|
#endif
|
|
|
#endif
|
|
@ -257,7 +257,7 @@ GLABEL(divu_6432_3232_) |
|
|
STMFD sp!, {v1,v2,v3,v4,v5,v6,lr} |
|
|
STMFD sp!, {v1,v2,v3,v4,v5,v6,lr} |
|
|
MOV v2, a2 // = xlo
|
|
|
MOV v2, a2 // = xlo
|
|
|
MOV v1, a3 // = y
|
|
|
MOV v1, a3 // = y
|
|
|
CMP a3,#&10000 // y <= (uint32)(bit(16)-1)
|
|
|
|
|
|
|
|
|
CMP a3,#0x10000 // y <= (uint32)(bit(16)-1)
|
|
|
BCS divu_6432_3232_l1 |
|
|
BCS divu_6432_3232_l1 |
|
|
MOV a2, v2, LSR #16 |
|
|
MOV a2, v2, LSR #16 |
|
|
ORR a1, a2, a1, ASL #16 // = highlow32(low16(xhi),high16(xlo))
|
|
|
ORR a1, a2, a1, ASL #16 // = highlow32(low16(xhi),high16(xlo))
|
|
@ -299,7 +299,7 @@ LABEL(divu_6432_3232_l1) |
|
|
RSBNE a1, v3, #32 // { xhi = (xhi << s)
|
|
|
RSBNE a1, v3, #32 // { xhi = (xhi << s)
|
|
|
ORRNE a1, a2, v2, LSR a1 // | (xlo >> (32-s));
|
|
|
ORRNE a1, a2, v2, LSR a1 // | (xlo >> (32-s));
|
|
|
MOVNE v2, v2, ASL v3 // xlo = xlo << s; }
|
|
|
MOVNE v2, v2, ASL v3 // xlo = xlo << s; }
|
|
|
ADD a2, v1, #&10000 // y1_1 = high16(y)+1
|
|
|
|
|
|
|
|
|
ADD a2, v1, #0x10000 // y1_1 = high16(y)+1
|
|
|
MOVS v5, a2, LSR #16 // if (y1_1 = 0)
|
|
|
MOVS v5, a2, LSR #16 // if (y1_1 = 0)
|
|
|
MOVEQ v4, a1, ASL #16 // r16 = low16(xhi) * 2^16
|
|
|
MOVEQ v4, a1, ASL #16 // r16 = low16(xhi) * 2^16
|
|
|
MOVEQ a1, a1, LSR #16 // q1 = high16(xhi)
|
|
|
MOVEQ a1, a1, LSR #16 // q1 = high16(xhi)
|
|
@ -1878,7 +1878,7 @@ LABEL(mulu32_64_vregs) |
|
|
MUL v2,v1,v2 // high section of result
|
|
|
MUL v2,v1,v2 // high section of result
|
|
|
ADDS ip,ip,v3 // add middle sections
|
|
|
ADDS ip,ip,v3 // add middle sections
|
|
|
// (can't use mla as we need carry)
|
|
|
// (can't use mla as we need carry)
|
|
|
ADDCS v2,v2,#&10000 // carry from above add
|
|
|
|
|
|
|
|
|
ADDCS v2,v2,#0x10000 // carry from above add
|
|
|
ADDS v1,v4,ip,LSL #16 // x is now bottom 32 bits of result
|
|
|
ADDS v1,v4,ip,LSL #16 // x is now bottom 32 bits of result
|
|
|
ADC ip,v2,ip,LSR #16 // hi is top 32 bits
|
|
|
ADC ip,v2,ip,LSR #16 // hi is top 32 bits
|
|
|
MOVS pc,lr |
|
|
MOVS pc,lr |
|
@ -3351,7 +3351,7 @@ LABEL(mulu32_64_vregs) |
|
|
MUL v2,v1,v2 // high section of result
|
|
|
MUL v2,v1,v2 // high section of result
|
|
|
ADDS ip,ip,v3 // add middle sections
|
|
|
ADDS ip,ip,v3 // add middle sections
|
|
|
// (can't use mla as we need carry)
|
|
|
// (can't use mla as we need carry)
|
|
|
ADDCS v2,v2,#&10000 // carry from above add
|
|
|
|
|
|
|
|
|
ADDCS v2,v2,#0x10000 // carry from above add
|
|
|
ADDS v1,v4,ip,LSL #16 // x is now bottom 32 bits of result
|
|
|
ADDS v1,v4,ip,LSL #16 // x is now bottom 32 bits of result
|
|
|
ADC ip,v2,ip,LSR #16 // hi is top 32 bits
|
|
|
ADC ip,v2,ip,LSR #16 // hi is top 32 bits
|
|
|
MOVS pc,lr |
|
|
MOVS pc,lr |
|
|