Optimize more short multiplies in native code

This commit is contained in:
drmortalwombat 2022-01-02 12:44:37 +01:00
parent 2d0f20cadc
commit 272b7b08df
2 changed files with 193 additions and 30 deletions

View File

@ -3658,6 +3658,81 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(void)
else else
vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND; vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND;
break; break;
case IA_SHR:
if (ins->mSrc[0].mTemp < 0)
{
vr = mLocalValueRange[ins->mSrc[1].mTemp];
if (ins->mSrc[0].mIntConst > 0)
{
if (vr.mMinState == IntegerValueRange::S_BOUND && vr.mMinState >= 0)
{
switch (ins->mSrc[1].mType)
{
case IT_INT16:
vr.mMaxValue = (unsigned short)(vr.mMaxValue) >> ins->mSrc[0].mIntConst;
vr.mMinValue = (unsigned short)(vr.mMinValue) >> ins->mSrc[0].mIntConst;
break;
case IT_INT8:
vr.mMaxValue = (unsigned char)(vr.mMaxValue) >> ins->mSrc[0].mIntConst;
vr.mMinValue = (unsigned char)(vr.mMinValue) >> ins->mSrc[0].mIntConst;
break;
case IT_INT32:
vr.mMaxValue = (unsigned)(vr.mMaxValue) >> ins->mSrc[0].mIntConst;
vr.mMinValue = (unsigned)(vr.mMinValue) >> ins->mSrc[0].mIntConst;
break;
}
}
else
{
switch (ins->mSrc[1].mType)
{
case IT_INT16:
vr.mMaxValue = 65535 >> ins->mSrc[0].mIntConst;
vr.mMinValue = 0;
break;
case IT_INT8:
vr.mMaxValue = 255 >> ins->mSrc[0].mIntConst;
vr.mMinValue = 0;
break;
case IT_INT32:
vr.mMaxValue = 0x100000000ULL >> ins->mSrc[0].mIntConst;
vr.mMinValue = 0;
break;
}
}
}
}
else
vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND;
break;
case IA_SAR:
if (ins->mSrc[0].mTemp < 0)
{
vr = mLocalValueRange[ins->mSrc[1].mTemp];
if (ins->mSrc[0].mIntConst > 0)
{
switch (ins->mSrc[1].mType)
{
case IT_INT16:
vr.mMaxValue = (short)(vr.mMaxValue) >> ins->mSrc[0].mIntConst;
vr.mMinValue = (short)(vr.mMinValue) >> ins->mSrc[0].mIntConst;
break;
case IT_INT8:
vr.mMaxValue = (char)(vr.mMaxValue) >> ins->mSrc[0].mIntConst;
vr.mMinValue = (char)(vr.mMinValue) >> ins->mSrc[0].mIntConst;
break;
case IT_INT32:
vr.mMaxValue = (int)(vr.mMaxValue) >> ins->mSrc[0].mIntConst;
vr.mMinValue = (int)(vr.mMinValue) >> ins->mSrc[0].mIntConst;
break;
}
}
}
else
vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND;
break;
case IA_AND: case IA_AND:
if (ins->mSrc[0].mTemp < 0) if (ins->mSrc[0].mTemp < 0)
{ {

View File

@ -4760,37 +4760,58 @@ int NativeCodeBasicBlock::ShortMultiply(InterCodeProcedure* proc, NativeCodeProc
ShiftRegisterLeft(proc, BC_REG_ACCU, lshift); ShiftRegisterLeft(proc, BC_REG_ACCU, lshift);
return BC_REG_ACCU; return BC_REG_ACCU;
case 3: case 3:
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); if (ins->mSrc[index].IsUByte() && ins->mSrc[index].mRange.mMaxValue <= 85)
mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED)); {
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED)); }
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); else
mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); {
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1));
mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5));
mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4));
mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5));
mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1));
}
ShiftRegisterLeft(proc, BC_REG_ACCU, lshift); ShiftRegisterLeft(proc, BC_REG_ACCU, lshift);
return BC_REG_ACCU; return BC_REG_ACCU;
case 5: case 5:
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); if (ins->mSrc[index].IsUByte() && ins->mSrc[index].mRange.mMaxValue <= 51)
mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED)); {
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); }
mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED)); else
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); {
mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4));
mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_ZERO_PAGE, BC_REG_WORK + 4));
mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5));
mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED));
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4));
mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0));
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5));
mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1));
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1));
}
ShiftRegisterLeft(proc, BC_REG_ACCU, lshift); ShiftRegisterLeft(proc, BC_REG_ACCU, lshift);
return BC_REG_ACCU; return BC_REG_ACCU;
case 7: case 7:
@ -6309,12 +6330,22 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, treg + 1)); mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, treg + 1));
mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_ZERO_PAGE, treg)); mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_ZERO_PAGE, treg));
} }
mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED));
int check = 0xffff;
if (ins->mSrc[1].IsUByte())
check = ins->mSrc[1].mRange.mMaxValue;
check <<= 1;
if (check >= 0x100)
mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED));
for (int i = 1; i < shift; i++) for (int i = 1; i < shift; i++)
{ {
mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_ZERO_PAGE, treg)); mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_ZERO_PAGE, treg));
mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); check <<= 1;
if (check >= 0x100)
mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED));
} }
mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1));
} }
} }
@ -10877,6 +10908,7 @@ bool NativeCodeBasicBlock::OptimizeInnerLoops(NativeCodeProcedure* proc)
return changed; return changed;
} }
// Size reduction violating various assumptions such as no branches in basic blocks // Size reduction violating various assumptions such as no branches in basic blocks
// must be last step before actual assembly // must be last step before actual assembly
@ -11153,6 +11185,43 @@ void NativeCodeBasicBlock::BlockSizeReduction(void)
yimm = ximm = false; yimm = ximm = false;
} }
#endif #endif
bool carrySet = true, carryClear = true;
for (int i = 0; i < mEntryBlocks.Size(); i++)
{
if (mEntryBlocks[i]->mBranch == ASMIT_BCC)
{
if (mEntryBlocks[i]->mTrueJump == this)
carrySet = false;
else
carryClear = false;
}
else if (mEntryBlocks[i]->mBranch == ASMIT_BCS)
{
if (mEntryBlocks[i]->mTrueJump == this)
carryClear = false;
else
carrySet = false;
}
else
carryClear = carrySet = false;
}
if (carryClear || carrySet)
{
int i = 0;
while (i < mIns.Size() && !mIns[i].ChangesCarry())
i++;
if (i < mIns.Size())
{
if (mIns[i].mType == ASMIT_CLC && carryClear)
mIns.Remove(i);
else if (mIns[i].mType == ASMIT_SEC && carrySet)
mIns.Remove(i);
}
}
if (mTrueJump) if (mTrueJump)
mTrueJump->BlockSizeReduction(); mTrueJump->BlockSizeReduction();
if (mFalseJump) if (mFalseJump)
@ -12957,6 +13026,25 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass)
mIns[i + 2].mAddress = mIns[i + 0].mAddress; mIns[i + 2].mAddress = mIns[i + 0].mAddress;
progress = true; progress = true;
} }
else if (
mIns[i + 0].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && (mIns[i + 2].mAddress == 1 || mIns[i + 2].mAddress == 2) &&
mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].mAddress == mIns[i + 3].mAddress &&
mIns[i + 4].mType == ASMIT_TAY && !(mIns[i + 4].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C)))
{
mIns[i + 0].mType = ASMIT_NOP;
mIns[i + 1].mType = ASMIT_LDY; mIns[i + 1].mFlags |= LIVE_CPU_REG_Y;
mIns[i + 2].mType = ASMIT_INY; mIns[i + 2].mMode = ASMIM_IMPLIED; mIns[i + 2].mFlags |= LIVE_CPU_REG_Y;
mIns[i + 3].mType == ASMIT_STY; mIns[i + 3].mFlags |= LIVE_CPU_REG_Y;
mIns[i + 4].mType = ASMIT_NOP;
if (mIns[i + 2].mAddress == 2)
{
mIns.Insert(i + 3, mIns[i + 2]);
}
progress = true;
}
#if 1 #if 1
else if ( else if (
mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 0].mAddress == 0 && mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 0].mAddress == 0 &&