Optimize call to 8 by16 mul
This commit is contained in:
parent
99f674c866
commit
0dd6a0655c
|
@ -553,19 +553,17 @@ W1: asl accu
|
||||||
|
|
||||||
__asm mul16by8
|
__asm mul16by8
|
||||||
{
|
{
|
||||||
lda #0
|
ldy #0
|
||||||
sta tmp + 2
|
sty tmp + 3
|
||||||
sta tmp + 3
|
|
||||||
|
|
||||||
lda tmp
|
|
||||||
lsr
|
lsr
|
||||||
bcc L2
|
bcc L2
|
||||||
L1:
|
L1:
|
||||||
tax
|
tax
|
||||||
clc
|
clc
|
||||||
lda tmp + 2
|
tya
|
||||||
adc accu
|
adc accu
|
||||||
sta tmp + 2
|
tay
|
||||||
lda tmp + 3
|
lda tmp + 3
|
||||||
adc accu + 1
|
adc accu + 1
|
||||||
sta tmp + 3
|
sta tmp + 3
|
||||||
|
@ -576,6 +574,8 @@ L2:
|
||||||
lsr
|
lsr
|
||||||
bcs L1
|
bcs L1
|
||||||
bne L2
|
bne L2
|
||||||
|
|
||||||
|
sty tmp + 2
|
||||||
rts
|
rts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,21 @@ void bm_clr(Bitmap * bm, int x, int y)
|
||||||
bm->data[bm->cwidth * (y & ~7) + (x & ~7) + (y & 7)] &= ~(0x80 >> (x & 7));
|
bm->data[bm->cwidth * (y & ~7) + (x & ~7) + (y & 7)] &= ~(0x80 >> (x & 7));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool bm_get(Bitmap * bm, int x, int y)
|
||||||
|
{
|
||||||
|
return (bm->data[bm->cwidth * (y & ~7) + (x & ~7) + (y & 7)] & (0x80 >> (x & 7))) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bm_put(Bitmap * bm, int x, int y, bool c)
|
||||||
|
{
|
||||||
|
char * dp = bm->data + bm->cwidth * (y & ~7) + (x & ~7) + (y & 7);
|
||||||
|
char m = 0x80 >> (x & 7);
|
||||||
|
if (c)
|
||||||
|
*dp |= m;
|
||||||
|
else
|
||||||
|
*dp &= ~m;
|
||||||
|
}
|
||||||
|
|
||||||
char NineShadesOfGrey[9][8] = {
|
char NineShadesOfGrey[9][8] = {
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 0
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 0
|
||||||
{0x22, 0x00, 0x88, 0x00, 0x22, 0x00, 0x88, 0x00}, // 8
|
{0x22, 0x00, 0x88, 0x00, 0x22, 0x00, 0x88, 0x00}, // 8
|
||||||
|
@ -1273,3 +1288,30 @@ int bm_put_chars_clipped(Bitmap * bm, ClipRect * clip, int x, int y, const char
|
||||||
|
|
||||||
return tw;
|
return tw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int bm_transform(Bitmap * dbm, ClipRect * clip, int dx, int dy, int w, int h, Bitmap * sbm, int sx, int sy, int dxx, int dxy, int dyx, int dyy)
|
||||||
|
{
|
||||||
|
long lsx = (long)sx << 16, lsy = (long)sy << 16;
|
||||||
|
|
||||||
|
for(int y=0; y<h; y++)
|
||||||
|
{
|
||||||
|
long rsx = lsx, rsy = lsy;
|
||||||
|
|
||||||
|
for(int x=0; x<w; x++)
|
||||||
|
{
|
||||||
|
int ix = (int)(rsx >> 16);
|
||||||
|
int iy = (int)(rsy >> 16);
|
||||||
|
|
||||||
|
if (ix >= 0 && iy >= 0 && ix < sbm->width && iy < 8 * sbm->cheight)
|
||||||
|
{
|
||||||
|
bm_put(dbm, dx + x, dy + y, bm_get(sbm, ix, iy));
|
||||||
|
}
|
||||||
|
|
||||||
|
rsx += (long)dxx << 8;
|
||||||
|
rsy += (long)dyx << 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
lsx += (long)dxy << 8;
|
||||||
|
lsy += (long)dyy << 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -76,6 +76,8 @@ void bm_polygon_nc_fill(Bitmap * bm, ClipRect * clip, int * x, int * y, char num
|
||||||
|
|
||||||
inline void bm_set(Bitmap * bm, int x, int y);
|
inline void bm_set(Bitmap * bm, int x, int y);
|
||||||
inline void bm_clr(Bitmap * bm, int x, int y);
|
inline void bm_clr(Bitmap * bm, int x, int y);
|
||||||
|
inline bool bm_get(Bitmap * bm, int x, int y);
|
||||||
|
inline void bm_put(Bitmap * bm, int x, int y, bool c);
|
||||||
|
|
||||||
|
|
||||||
void bm_line(Bitmap * bm, int x0, int y0, int x1, int y1, char pattern);
|
void bm_line(Bitmap * bm, int x0, int y0, int x1, int y1, char pattern);
|
||||||
|
@ -112,6 +114,9 @@ int bm_put_chars(Bitmap * bm, int x, int y, const char * str, char len, BlitOp o
|
||||||
|
|
||||||
int bm_put_chars_clipped(Bitmap * bm, ClipRect * clip, int x, int y, const char * str, char len, BlitOp op);
|
int bm_put_chars_clipped(Bitmap * bm, ClipRect * clip, int x, int y, const char * str, char len, BlitOp op);
|
||||||
|
|
||||||
|
|
||||||
|
int bm_transform(Bitmap * dbm, ClipRect * clip, int dx, int dy, int w, int h, Bitmap * sbm, int sx, int sy, int dxx, int dxy, int dyx, int dyy);
|
||||||
|
|
||||||
#pragma compile("bitmap.c")
|
#pragma compile("bitmap.c")
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -943,6 +943,18 @@ void ValueSet::UpdateValue(InterInstruction * ins, const GrowingInstructionPtrAr
|
||||||
|
|
||||||
UpdateValue(ins, tvalue, aliasedLocals, aliasedParams, staticVars);
|
UpdateValue(ins, tvalue, aliasedLocals, aliasedParams, staticVars);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (ins->mOperator == IA_MUL && tvalue[ins->mSrc[0].mTemp]->mConst.mIntConst == -1)
|
||||||
|
{
|
||||||
|
ins->mCode = IC_UNARY_OPERATOR;
|
||||||
|
ins->mOperator = IA_NEG;
|
||||||
|
ins->mSrc[0] = ins->mSrc[1];
|
||||||
|
ins->mSrc[1].mTemp = -1;
|
||||||
|
ins->mSrc[1].mType = IT_NONE;
|
||||||
|
|
||||||
|
UpdateValue(ins, tvalue, aliasedLocals, aliasedParams, staticVars);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -972,6 +984,17 @@ void ValueSet::UpdateValue(InterInstruction * ins, const GrowingInstructionPtrAr
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
else if (ins->mOperator == IA_MUL && tvalue[ins->mSrc[1].mTemp]->mConst.mIntConst == -1)
|
||||||
|
{
|
||||||
|
ins->mCode = IC_UNARY_OPERATOR;
|
||||||
|
ins->mOperator = IA_NEG;
|
||||||
|
ins->mSrc[1].mTemp = -1;
|
||||||
|
ins->mSrc[1].mType = IT_NONE;
|
||||||
|
|
||||||
|
UpdateValue(ins, tvalue, aliasedLocals, aliasedParams, staticVars);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
else if (ins->mOperator == IA_SUB && tvalue[ins->mSrc[1].mTemp]->mConst.mIntConst == 0)
|
else if (ins->mOperator == IA_SUB && tvalue[ins->mSrc[1].mTemp]->mConst.mIntConst == 0)
|
||||||
{
|
{
|
||||||
ins->mCode = IC_UNARY_OPERATOR;
|
ins->mCode = IC_UNARY_OPERATOR;
|
||||||
|
|
|
@ -165,6 +165,9 @@ bool NativeCodeInstruction::IsUsedResultInstructions(NumberSet& requiredTemps)
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
requiredTemps += mParam + i;
|
requiredTemps += mParam + i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mFlags & NCIF_USE_CPU_REG_A)
|
||||||
|
requiredTemps += CPU_REG_A;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -2199,6 +2202,11 @@ void NativeCodeInstruction::FilterRegUsage(NumberSet& requiredTemps, NumberSet&
|
||||||
requiredTemps += mParam + i;
|
requiredTemps += mParam + i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (mFlags & NCIF_USE_CPU_REG_A)
|
||||||
|
{
|
||||||
|
if (!providedTemps[CPU_REG_A])
|
||||||
|
requiredTemps += CPU_REG_A;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -4794,10 +4802,10 @@ int NativeCodeBasicBlock::ShortMultiply(InterCodeProcedure* proc, NativeCodeProc
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, mul));
|
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, mul));
|
||||||
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
|
// mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
|
||||||
|
|
||||||
NativeCodeGenerator::Runtime& rt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8")));
|
NativeCodeGenerator::Runtime& rt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8")));
|
||||||
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, rt.mOffset, rt.mLinkerObject, NCIF_RUNTIME));
|
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, rt.mOffset, rt.mLinkerObject, NCIF_RUNTIME | NCIF_USE_CPU_REG_A));
|
||||||
}
|
}
|
||||||
|
|
||||||
return BC_REG_WORK + 2;
|
return BC_REG_WORK + 2;
|
||||||
|
@ -5980,15 +5988,15 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sins0)
|
if (sins0)
|
||||||
LoadValueToReg(proc, sins0, BC_REG_WORK, nullptr, nullptr);
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp]));
|
LoadValueToReg(proc, sins0, BC_REG_WORK, nullptr, nullptr);
|
||||||
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
|
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp]));
|
||||||
|
|
||||||
NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8")));
|
NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8")));
|
||||||
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME));
|
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME | NCIF_USE_CPU_REG_A));
|
||||||
reg = BC_REG_WORK + 2;
|
reg = BC_REG_WORK + 2;
|
||||||
}
|
}
|
||||||
else if (ins->mOperator == IA_MUL && ins->mSrc[1].IsUByte())
|
else if (ins->mOperator == IA_MUL && ins->mSrc[1].IsUByte())
|
||||||
|
@ -6004,15 +6012,15 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sins1)
|
if (sins1)
|
||||||
LoadValueToReg(proc, sins1, BC_REG_WORK, nullptr, nullptr);
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp]));
|
LoadValueToReg(proc, sins1, BC_REG_WORK, nullptr, nullptr);
|
||||||
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
|
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp]));
|
||||||
|
|
||||||
NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8")));
|
NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8")));
|
||||||
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME));
|
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME | NCIF_USE_CPU_REG_A));
|
||||||
reg = BC_REG_WORK + 2;
|
reg = BC_REG_WORK + 2;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
Loading…
Reference in New Issue