oscar64/include/fixmath.c

912 lines
9.3 KiB
C

#include "fixmath.h"
unsigned long lmul16u(unsigned x, unsigned y)
{
__asm
{
lda #0
sta accu + 2
sta accu + 3
ldx #16
L1: lsr x + 1
ror x
bcc W1
clc
lda accu + 2
adc y
sta accu + 2
lda accu + 3
adc y + 1
sta accu + 3
W1:
ror accu + 3
ror accu + 2
ror accu + 1
ror accu
dex
bne L1
}
}
long lmul16s(int x, int y)
{
__asm
{
bit y + 1
bpl W0
sec
lda #0
sbc y
sta y
lda #0
sbc y + 1
sta y + 1
sec
lda #0
sbc x
sta x
lda #0
sbc x + 1
sta x + 1
W0:
ldx #15
lda #0
sta accu + 2
L1: lsr x + 1
ror x
bcc W1
tay
clc
lda accu + 2
adc y
sta accu + 2
tya
adc y + 1
W1:
ror
ror accu + 2
ror accu + 1
ror accu
dex
bne L1
lsr x
bcc W2
tay
sec
lda accu + 2
sbc y
sta accu + 2
tya
sbc y + 1
sec
W2:
ror
ror accu + 2
ror accu + 1
ror accu
sta accu + 3
}
}
inline int lmul12f4s(int x, int y)
{
return (int)(lmul16s(x, y) >> 4);
}
inline int lmul8f8s(int x, int y)
{
return (int)(lmul16s(x, y) >> 8);
}
inline unsigned long lsqr4f12s(int x)
{
if (x < 0) x = -x;
return lmul16u(x, x);
}
int lmul4f12s(int x, int y)
{
__asm
{
sec
lda x
ror
sta accu
lda #0
sta accu + 1
bcc W4
L2:
tay
clc
lda accu + 1
adc y
sta accu + 1
tya
adc y + 1
W4:
ror
ror accu + 1
lsr accu
bcc W4
bne L2
ldx x + 1
stx accu
ldx #7
lsr accu
L1:
bcc W1
tay
clc
lda accu + 1
adc y
sta accu + 1
tya
adc y + 1
W1:
ror
ror accu + 1
ror accu
dex
bne L1
bcc W2
tay
// sec ; we know it is set here
lda accu + 1
sbc y
sta accu + 1
tya
sbc y + 1
W2:
ror
ror accu + 1
ror accu
bit y + 1
bpl W3
tax
sec
lda accu + 1
sbc x
sta accu + 1
txa
sbc x + 1
W3:
lsr
ror accu + 1
ror accu
lsr
ror accu + 1
ror accu
lsr
ror accu + 1
ror accu
lsr
ror accu + 1
ror accu
}
}
unsigned ldiv16u(unsigned long x, unsigned y)
{
__asm
{
lda #0
sta accu
sta accu + 1
ldx #17
L1:
sec
lda x + 2
sbc y
tay
lda x + 3
sbc y + 1
bcc W1
sta x + 3
sty x + 2
W1:
rol accu
rol accu + 1
asl x
rol x + 1
rol x + 2
rol x + 3
dex
beq E1
bcc L1
lda x + 2
sbc y
sta x + 2
lda x + 3
sbc y + 1
sta x + 3
sec
bcs W1
E1:
}
}
int ldiv16s(long x, int y)
{
if (x < 0)
{
if (y < 0)
return ldiv16u(-x, - y);
else
return -ldiv16u(-x, y);
}
else if (y < 0)
return -ldiv16u(x, -y);
else
return ldiv16u(x, y);
}
inline int ldiv12f4s(int x, int y)
{
return (int)(ldiv16s((long)x << 4, y));
}
inline int ldiv8f8s(int x, int y)
{
return (int)(ldiv16s((long)x << 8, y));
}
inline int ldiv4f12s(int x, int y)
{
return (int)(ldiv16s((long)x << 12, y));
}
unsigned lmuldiv16u(unsigned a, unsigned b, unsigned c)
{
__asm
{
lda #0
sta __tmp + 2
sta __tmp + 3
lda a
sec
T1:
ldy #8
L1:
ror
bcc W1
tax
clc
lda __tmp + 2
adc b
sta __tmp + 2
lda __tmp + 3
adc b + 1
sta __tmp + 3
txa
W1:
ror __tmp + 3
ror __tmp + 2
dey
bne L1
ror
bcc T2
sta __tmp + 0
lda a + 1
clc
bcc T1
T2:
sec
L3:
sta __tmp + 1
ldx #8
L2:
rol __tmp + 1
rol __tmp + 2
rol __tmp + 3
bcc W3
lda __tmp + 2
sbc c
tay
lda __tmp + 3
sbc c + 1
sec
bcs W4
W3:
sec
lda __tmp + 2
sbc c
tay
lda __tmp + 3
sbc c + 1
bcc W2
W4:
sta __tmp + 3
sty __tmp + 2
W2:
dex
bne L2
lda __tmp + 1
rol
bcc T3
sta accu + 1
lda __tmp + 0
clc
bcc L3
T3:
sta accu
}
}
int lmuldiv16s(int a, int b, int c)
{
bool sign = false;
if (a < 0)
{
a = -a;
sign = !sign;
}
if (b < 0)
{
b = -b;
sign = !sign;
}
if (c < 0)
{
c = -c;
sign = !sign;
}
__asm
{
lda #0
sta __tmp + 2
sta __tmp + 3
lda a
sec
T1:
ldy #8
L1:
ror
bcc W1
tax
clc
lda __tmp + 2
adc b
sta __tmp + 2
lda __tmp + 3
adc b + 1
sta __tmp + 3
txa
W1:
ror __tmp + 3
ror __tmp + 2
dey
bne L1
ror
bcc T2
sta __tmp + 0
lda a + 1
clc
bcc T1
T2:
sec
L3:
sta __tmp + 1
ldx #8
L2:
rol __tmp + 1
rol __tmp + 2
rol __tmp + 3
bcc W3
lda __tmp + 2
sbc c
tay
lda __tmp + 3
sbc c + 1
sec
bcs W4
W3:
sec
lda __tmp + 2
sbc c
tay
lda __tmp + 3
sbc c + 1
bcc W2
W4:
sta __tmp + 3
sty __tmp + 2
W2:
dex
bne L2
lda __tmp + 1
rol
bcc T3
sta accu + 1
lda __tmp + 0
clc
bcc L3
T3:
sta accu
lda sign
beq E1
sec
lda #0
sbc accu
sta accu
lda #0
sbc accu + 1
sta accu + 1
E1:
}
}
unsigned lmuldiv16by8(unsigned a, char b, char c)
{
__asm {
lda #0
sta accu + 0
sta accu + 1
sta accu + 2
sta accu + 3
lda b
beq z1
lda c
l1:
asl
bcs e1
cmp b
bcs e2
asl a + 0
rol a + 1
jmp l1
e2:
clc
e1:
ror
sta c
ldx #16
l2:
lda b
sec
sbc c
bcc w1
sta b
clc
lda accu + 2
adc a
sta accu + 2
lda accu + 3
adc a + 1
sta accu + 3
bcc * + 8
inc accu + 0
bne * + 4
inc accu + 1
w1:
asl accu + 2
rol accu + 3
rol accu + 0
rol accu + 1
asl b
bcc w2
lda b
sbc c
sta b
clc
lda accu + 2
adc a
sta accu + 2
lda accu + 3
adc a + 1
sta accu + 3
bcc * + 8
inc accu + 0
bne * + 4
inc accu + 1
w2:
dex
bne l2
z1:
}
}
unsigned lmuldiv8by8(char a, char b, char c)
{
__asm {
ldy #0
sty accu + 0
sty accu + 1
sty accu + 2
lda a
beq z1
lda c
beq z1
ldx #8
lda b
beq z1
cmp c
bcc w0
l1:
lsr
ror accu + 1
inx
cmp c
bcs l1
bcc wa
l2:
asl accu + 2
rol accu + 0
wa:
rol accu + 1
w0:
rol
bcc w2
sbc c
bcc w3
w2:
tay
sec
sbc c
bcc w1
w3:
tay
clc
lda accu + 2
adc a
sta accu + 2
bcc w1
inc accu + 0
bne w1
inc accu + 1
w1:
tya
dex
bne l2
z1:
}
}
int lmuldiv16sby8(int a, char b, char c)
{
if (a < 0)
return -(int)lmuldiv16by8(-a, b, c);
else
return lmuldiv16by8(a, b, c);
}
unsigned usqrt(unsigned n)
{
unsigned p, q, r, h;
p = 0;
r = n;
#assign q 0x4000
#repeat
{
h = p | q;
p >>= 1;
if (r >= h)
{
p |= q;
r -= h;
}
}
#assign q q >> 2
#until q == 0
#undef q
return p;
}
unsigned long lmul16f16(unsigned long x, unsigned long y)
{
unsigned long hh = lmul16u(x >> 16, y >> 16);
unsigned long lh = lmul16u(x, y >> 16);
unsigned long hl = lmul16u(x >> 16, y);
unsigned long ll = lmul16u(x, y);
if (ll & 0x8000)
lh++;
ll >>= 16;
ll |= hh << 16;
ll += lh;
ll += hl;
return ll;
}
#if 1
long lmul16f16s(long x, long y)
{
__asm
{
lda #0
// fractional
sta __tmp + 0
sta __tmp + 1
// result
sta __accu + 0
sta __accu + 1
sta __accu + 2
sta __accu + 3
lda x + 0
ora x + 1
ora y + 0
ora y + 1
bne w0b
l2:
lsr x + 2
bcc ws1
clc
lda y + 2
adc __accu + 2
sta __accu + 2
lda y + 3
adc __accu + 3
sta __accu + 3
ws1:
lsr x + 3
bcc ws2
clc
lda y + 2
adc __accu + 3
sta __accu + 3
ws2:
asl y + 2
rol y + 3
lda x + 2
ora x + 3
bne l2
rts
w0b:
lda y + 3
and #$80
beq w0
lda #$ff
w0:
// overflow
sta __tmp + 2
sta __tmp + 3
lda x + 3
bpl w0a
sec
lda #0
sbc y + 0
sta __accu + 2
lda #0
sbc y + 1
sta __accu + 3
w0a:
ldx #8
l1:
lsr x + 0
bcc w1
clc
lda y + 0
adc __tmp + 0
sta __tmp + 0
lda y + 1
adc __tmp + 1
sta __tmp + 1
lda y + 2
adc __accu + 0
sta __accu + 0
lda y + 3
adc __accu + 1
sta __accu + 1
lda __tmp + 2
adc __accu + 2
sta __accu + 2
lda __tmp + 3
adc __accu + 3
sta __accu + 3
w1:
lsr x + 1
bcc w2
clc
lda y + 0
adc __tmp + 1
sta __tmp + 1
lda y + 1
adc __accu + 0
sta __accu + 0
lda y + 2
adc __accu + 1
sta __accu + 1
lda y + 3
adc __accu + 2
sta __accu + 2
lda __tmp + 2
adc __accu + 3
sta __accu + 3
w2:
lsr x + 2
bcc w3
clc
lda y + 0
adc __accu + 0
sta __accu + 0
lda y + 1
adc __accu + 1
sta __accu + 1
lda y + 2
adc __accu + 2
sta __accu + 2
lda y + 3
adc __accu + 3
sta __accu + 3
w3:
lsr x + 3
bcc w4
clc
lda y + 0
adc __accu + 1
sta __accu + 1
lda y + 1
adc __accu + 2
sta __accu + 2
lda y + 2
adc __accu + 3
sta __accu + 3
w4:
asl y + 0
rol y + 1
rol y + 2
rol y + 3
rol __tmp + 2
rol __tmp + 3
dex
beq w5
jmp l1
w5:
}
}
#else
__native long lmul16f16s(long x, long y)
{
unsigned lox = x;
int hix = x >> 16;
unsigned loy = y;
int hiy = y >> 16;
long r = (long)(hix * hiy) << 16;
if (lox)
{
r += lmul16u(lox, hiy);
if (hiy < 0)
r -= (unsigned long)lox << 16;
}
if (loy)
{
r += lmul16u(loy, hix);
if (hix < 0)
r -= (unsigned long)loy << 16;
}
if (lox && loy)
{
r += lmul16u(lox, loy) >> 16;
}
return r;
}
#endif
__native unsigned long ldiv16f16(unsigned long x, unsigned long y)
{
unsigned long k = x >> 16, d = 0;
x <<= 16;
for(char i=0; i<32; i++)
{
d <<= 1;
k <<= 1;
k |= (x >> 31);
x <<= 1;
if (k >= y)
{
k -= y;
d |= 1;
}
}
return d;
}
__native long ldiv16f16s(long x, long y)
{
bool sign = false;
if (x < 0)
{
x = -x;
sign = true;
}
if (y < 0)
{
y = -y;
sign = !sign;
}
x = ldiv16f16(x, y);
if (sign)
return -x;
else
return x;
}