Optimize call to 8 by16 mul

This commit is contained in:
drmortalwombat 2021-12-23 20:27:45 +01:00
parent 99f674c866
commit 0dd6a0655c
5 changed files with 96 additions and 18 deletions

View File

@ -553,19 +553,17 @@ W1: asl accu
__asm mul16by8
{
lda #0
sta tmp + 2
sta tmp + 3
ldy #0
sty tmp + 3
lda tmp
lsr
bcc L2
L1:
tax
clc
lda tmp + 2
tya
adc accu
sta tmp + 2
tay
lda tmp + 3
adc accu + 1
sta tmp + 3
@ -576,6 +574,8 @@ L2:
lsr
bcs L1
bne L2
sty tmp + 2
rts
}

View File

@ -42,6 +42,21 @@ void bm_clr(Bitmap * bm, int x, int y)
bm->data[bm->cwidth * (y & ~7) + (x & ~7) + (y & 7)] &= ~(0x80 >> (x & 7));
}
bool bm_get(Bitmap * bm, int x, int y)
{
return (bm->data[bm->cwidth * (y & ~7) + (x & ~7) + (y & 7)] & (0x80 >> (x & 7))) != 0;
}
void bm_put(Bitmap * bm, int x, int y, bool c)
{
char * dp = bm->data + bm->cwidth * (y & ~7) + (x & ~7) + (y & 7);
char m = 0x80 >> (x & 7);
if (c)
*dp |= m;
else
*dp &= ~m;
}
char NineShadesOfGrey[9][8] = {
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 0
{0x22, 0x00, 0x88, 0x00, 0x22, 0x00, 0x88, 0x00}, // 8
@ -1273,3 +1288,30 @@ int bm_put_chars_clipped(Bitmap * bm, ClipRect * clip, int x, int y, const char
return tw;
}
int bm_transform(Bitmap * dbm, ClipRect * clip, int dx, int dy, int w, int h, Bitmap * sbm, int sx, int sy, int dxx, int dxy, int dyx, int dyy)
{
long lsx = (long)sx << 16, lsy = (long)sy << 16;
for(int y=0; y<h; y++)
{
long rsx = lsx, rsy = lsy;
for(int x=0; x<w; x++)
{
int ix = (int)(rsx >> 16);
int iy = (int)(rsy >> 16);
if (ix >= 0 && iy >= 0 && ix < sbm->width && iy < 8 * sbm->cheight)
{
bm_put(dbm, dx + x, dy + y, bm_get(sbm, ix, iy));
}
rsx += (long)dxx << 8;
rsy += (long)dyx << 8;
}
lsx += (long)dxy << 8;
lsy += (long)dyy << 8;
}
}

View File

@ -76,6 +76,8 @@ void bm_polygon_nc_fill(Bitmap * bm, ClipRect * clip, int * x, int * y, char num
inline void bm_set(Bitmap * bm, int x, int y);
inline void bm_clr(Bitmap * bm, int x, int y);
inline bool bm_get(Bitmap * bm, int x, int y);
inline void bm_put(Bitmap * bm, int x, int y, bool c);
void bm_line(Bitmap * bm, int x0, int y0, int x1, int y1, char pattern);
@ -112,6 +114,9 @@ int bm_put_chars(Bitmap * bm, int x, int y, const char * str, char len, BlitOp o
int bm_put_chars_clipped(Bitmap * bm, ClipRect * clip, int x, int y, const char * str, char len, BlitOp op);
int bm_transform(Bitmap * dbm, ClipRect * clip, int dx, int dy, int w, int h, Bitmap * sbm, int sx, int sy, int dxx, int dxy, int dyx, int dyy);
#pragma compile("bitmap.c")
#endif

View File

@ -943,6 +943,18 @@ void ValueSet::UpdateValue(InterInstruction * ins, const GrowingInstructionPtrAr
UpdateValue(ins, tvalue, aliasedLocals, aliasedParams, staticVars);
return;
}
else if (ins->mOperator == IA_MUL && tvalue[ins->mSrc[0].mTemp]->mConst.mIntConst == -1)
{
ins->mCode = IC_UNARY_OPERATOR;
ins->mOperator = IA_NEG;
ins->mSrc[0] = ins->mSrc[1];
ins->mSrc[1].mTemp = -1;
ins->mSrc[1].mType = IT_NONE;
UpdateValue(ins, tvalue, aliasedLocals, aliasedParams, staticVars);
return;
}
}
@ -972,6 +984,17 @@ void ValueSet::UpdateValue(InterInstruction * ins, const GrowingInstructionPtrAr
return;
}
else if (ins->mOperator == IA_MUL && tvalue[ins->mSrc[1].mTemp]->mConst.mIntConst == -1)
{
ins->mCode = IC_UNARY_OPERATOR;
ins->mOperator = IA_NEG;
ins->mSrc[1].mTemp = -1;
ins->mSrc[1].mType = IT_NONE;
UpdateValue(ins, tvalue, aliasedLocals, aliasedParams, staticVars);
return;
}
else if (ins->mOperator == IA_SUB && tvalue[ins->mSrc[1].mTemp]->mConst.mIntConst == 0)
{
ins->mCode = IC_UNARY_OPERATOR;

View File

@ -165,6 +165,9 @@ bool NativeCodeInstruction::IsUsedResultInstructions(NumberSet& requiredTemps)
for (int i = 0; i < 4; i++)
requiredTemps += mParam + i;
}
if (mFlags & NCIF_USE_CPU_REG_A)
requiredTemps += CPU_REG_A;
}
else
{
@ -2199,6 +2202,11 @@ void NativeCodeInstruction::FilterRegUsage(NumberSet& requiredTemps, NumberSet&
requiredTemps += mParam + i;
}
}
if (mFlags & NCIF_USE_CPU_REG_A)
{
if (!providedTemps[CPU_REG_A])
requiredTemps += CPU_REG_A;
}
}
else
{
@ -4794,10 +4802,10 @@ int NativeCodeBasicBlock::ShortMultiply(InterCodeProcedure* proc, NativeCodeProc
else
{
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, mul));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
// mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
NativeCodeGenerator::Runtime& rt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8")));
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, rt.mOffset, rt.mLinkerObject, NCIF_RUNTIME));
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, rt.mOffset, rt.mLinkerObject, NCIF_RUNTIME | NCIF_USE_CPU_REG_A));
}
return BC_REG_WORK + 2;
@ -5980,15 +5988,15 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
}
if (sins0)
LoadValueToReg(proc, sins0, BC_REG_WORK, nullptr, nullptr);
else
{
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp]));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
LoadValueToReg(proc, sins0, BC_REG_WORK, nullptr, nullptr);
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
}
else
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp]));
NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8")));
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME));
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME | NCIF_USE_CPU_REG_A));
reg = BC_REG_WORK + 2;
}
else if (ins->mOperator == IA_MUL && ins->mSrc[1].IsUByte())
@ -6004,15 +6012,15 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
}
if (sins1)
LoadValueToReg(proc, sins1, BC_REG_WORK, nullptr, nullptr);
else
{
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp]));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
LoadValueToReg(proc, sins1, BC_REG_WORK, nullptr, nullptr);
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0));
}
else
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp]));
NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8")));
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME));
mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME | NCIF_USE_CPU_REG_A));
reg = BC_REG_WORK + 2;
}
else