Strength reduction for simple native code loops with y register

This commit is contained in:
drmortalwombat 2021-12-20 20:44:10 +01:00
parent 893b6f2294
commit b9c477976a
2 changed files with 200 additions and 45 deletions

View File

@ -4425,6 +4425,32 @@ static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins
return true;
}
static bool CanBypassUp(const InterInstruction* lins, const InterInstruction* bins)
{
if (lins->mDst.mTemp >= 0)
{
if (lins->mDst.mTemp == bins->mDst.mTemp)
return false;
for (int i = 0; i < bins->mNumOperands; i++)
if (lins->mDst.mTemp == bins->mSrc[i].mTemp)
return false;
}
if (bins->mDst.mTemp >= 0)
{
for (int i = 0; i < lins->mNumOperands; i++)
if (bins->mDst.mTemp == lins->mSrc[i].mTemp)
return false;
}
if (bins->mCode == IC_PUSH_FRAME || bins->mCode == IC_POP_FRAME)
{
if (lins->mCode == IC_CONSTANT && lins->mDst.mType == IT_POINTER && lins->mConst.mMemory == IM_FRAME)
return false;
}
return true;
}
static bool IsChained(const InterInstruction* ins, const InterInstruction* nins)
{
if (ins->mDst.mTemp >= 0)
@ -4443,27 +4469,39 @@ static bool CanBypassStore(const InterInstruction * sins, const InterInstruction
return false;
InterMemory sm = IM_NONE, bm = IM_NONE;
int bi = -1, si = -1;
int bi = -1, si = -1, bt = -1, st = -1, bo = 0, so = 0, bz = 1, sz = 1;
if (sins->mCode == IC_LOAD)
{
sm = sins->mSrc[0].mMemory;
si = sins->mSrc[0].mVarIndex;
st = sins->mSrc[0].mTemp;
so = sins->mSrc[0].mIntConst;
sz = InterTypeSize[sins->mDst.mType];
}
else if (sins->mCode == IC_LEA || sins->mCode == IC_STORE)
{
sm = sins->mSrc[1].mMemory;
si = sins->mSrc[1].mVarIndex;
st = sins->mSrc[1].mTemp;
so = sins->mSrc[1].mIntConst;
sz = InterTypeSize[sins->mSrc[0].mType];
}
if (bins->mCode == IC_LOAD)
{
bm = bins->mSrc[0].mMemory;
bi = bins->mSrc[0].mVarIndex;
st = sins->mSrc[0].mTemp;
bo = sins->mSrc[0].mIntConst;
bz = InterTypeSize[sins->mDst.mType];
}
else if (bins->mCode == IC_LEA || bins->mCode == IC_STORE)
{
bm = bins->mSrc[1].mMemory;
bi = bins->mSrc[1].mVarIndex;
bt = sins->mSrc[1].mTemp;
bo = sins->mSrc[1].mIntConst;
bz = InterTypeSize[sins->mSrc[1].mType];
}
// Check ambiguity
@ -4481,6 +4519,10 @@ static bool CanBypassStore(const InterInstruction * sins, const InterInstruction
else
return false;
}
else if (sm == IM_INDIRECT && bm == IM_INDIRECT && st == bt)
{
return so + sz <= bz || bo + bz <= so;
}
else
return false;
}
@ -4727,6 +4769,10 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa
{
ins->mInvariant = false;
}
else if (ins->mSrc[0].mMemory == IM_LOCAL && hasCall)
{
ins->mInvariant = false;
}
else
{
for (int j = 0; j < mInstructions.Size(); j++)
@ -4961,10 +5007,54 @@ void InterCodeBasicBlock::PeepholeOptimization(void)
if (i != j)
mInstructions[j] = ins;
}
else if (mInstructions[i]->mCode == IC_BINARY_OPERATOR && mInstructions[i]->mSrc[0].mTemp >= 0 && mInstructions[i]->mSrc[0].mFinal && mInstructions[i]->mSrc[1].mTemp >= 0 && mInstructions[i]->mSrc[1].mFinal)
{
InterInstruction* ins(mInstructions[i]);
int j = i;
while (j > 0 && CanBypassUp(ins, mInstructions[j - 1]))
{
mInstructions[j] = mInstructions[j - 1];
j--;
}
if (i != j)
mInstructions[j] = ins;
}
i++;
}
i = limit;
while (i >= 0)
{
// move non indirect loads down
if (mInstructions[i]->mCode == IC_LOAD && (mInstructions[i]->mSrc[0].mMemory != IM_INDIRECT || mInstructions[i]->mDst.mType != IT_INT8))
{
InterInstruction* ins(mInstructions[i]);
int j = i;
while (j < limit && CanBypassLoad(ins, mInstructions[j + 1]))
{
mInstructions[j] = mInstructions[j + 1];
j++;
}
if (i != j)
mInstructions[j] = ins;
}
else if (mInstructions[i]->mCode == IC_LEA && mInstructions[i]->mSrc[0].mTemp == -1)
{
InterInstruction* ins(mInstructions[i]);
int j = i;
while (j < limit && CanBypass(ins, mInstructions[j + 1]))
{
mInstructions[j] = mInstructions[j + 1];
j++;
}
if (i != j)
mInstructions[j] = ins;
}
i--;
}
bool changed;
do
{

View File

@ -9290,26 +9290,29 @@ void NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
while (ai < lblock->mIns.Size() && !lblock->mIns[ai].ChangesAccu())
ai++;
if (lblock->mIns[ai].mType == ASMIT_LDA && lblock->mIns[ai].mMode == ASMIM_IMMEDIATE)
if (ai < lblock->mIns.Size())
{
int i = ai + 1;
while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu())
i++;
if (i == lblock->mIns.Size())
if (lblock->mIns[ai].mType == ASMIT_LDA && lblock->mIns[ai].mMode == ASMIM_IMMEDIATE)
{
mIns.Push(lblock->mIns[ai]);
lblock->mIns.Remove(ai);
int i = ai + 1;
while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu())
i++;
if (i == lblock->mIns.Size())
{
mIns.Push(lblock->mIns[ai]);
lblock->mIns.Remove(ai);
}
}
}
else if (lblock->mIns[ai].mType == ASMIT_LDA && lblock->mIns[ai].mMode == ASMIM_ZERO_PAGE)
{
int i = ai + 1;
while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu() && !lblock->mIns[i].ChangesZeroPage(lblock->mIns[0].mAddress))
i++;
if (i == lblock->mIns.Size())
else if (lblock->mIns[ai].mType == ASMIT_LDA && lblock->mIns[ai].mMode == ASMIM_ZERO_PAGE)
{
mIns.Push(lblock->mIns[ai]);
lblock->mIns.Remove(ai);
int i = ai + 1;
while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu() && !lblock->mIns[i].ChangesZeroPage(lblock->mIns[ai].mAddress))
i++;
if (i == lblock->mIns.Size())
{
mIns.Push(lblock->mIns[ai]);
lblock->mIns.Remove(ai);
}
}
}
@ -9317,26 +9320,64 @@ void NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
while (ai < lblock->mIns.Size() && !lblock->mIns[ai].ChangesYReg())
ai++;
if (lblock->mIns[ai].mType == ASMIT_LDY && lblock->mIns[ai].mMode == ASMIM_IMMEDIATE)
if (ai < lblock->mIns.Size())
{
int i = ai + 1;
while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesYReg())
i++;
if (i == lblock->mIns.Size())
if (lblock->mIns[ai].mType == ASMIT_LDY && lblock->mIns[ai].mMode == ASMIM_IMMEDIATE)
{
mIns.Push(lblock->mIns[ai]);
lblock->mIns.Remove(ai);
int i = ai + 1;
while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesYReg())
i++;
if (i == lblock->mIns.Size())
{
mIns.Push(lblock->mIns[ai]);
lblock->mIns.Remove(ai);
}
}
}
else if (lblock->mIns[ai].mType == ASMIT_LDY && lblock->mIns[ai].mMode == ASMIM_ZERO_PAGE)
{
int i = ai + 1;
while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesYReg() && !lblock->mIns[i].ChangesZeroPage(lblock->mIns[0].mAddress))
i++;
if (i == lblock->mIns.Size())
else if (lblock->mIns[ai].mType == ASMIT_LDY && lblock->mIns[ai].mMode == ASMIM_ZERO_PAGE)
{
mIns.Push(lblock->mIns[ai]);
lblock->mIns.Remove(ai);
int i = 0;
while (i < lblock->mIns.Size() && (i == ai || !lblock->mIns[i].ChangesYReg()))
i++;
if (i == lblock->mIns.Size())
{
int addr = lblock->mIns[ai].mAddress;
i = 0;
while (i < lblock->mIns.Size() &&
(lblock->mIns[i].mMode != ASMIM_ZERO_PAGE || lblock->mIns[i].mAddress != addr ||
lblock->mIns[i].mType == ASMIT_LDA || lblock->mIns[i].mType == ASMIT_STA || lblock->mIns[i].mType == ASMIT_INC || lblock->mIns[i].mType == ASMIT_DEC || lblock->mIns[i].mType == ASMIT_LDY))
i++;
if (i == lblock->mIns.Size())
{
mIns.Push(NativeCodeInstruction(ASMIT_LDY, ASMIM_ZERO_PAGE, addr));
lblock->mFalseJump->mIns.Push(NativeCodeInstruction(ASMIT_STY, ASMIM_ZERO_PAGE, addr));
for (int i = 0; i < lblock->mIns.Size(); i++)
{
if (lblock->mIns[i].mMode == ASMIM_ZERO_PAGE && lblock->mIns[i].mAddress == addr)
{
if (lblock->mIns[i].mType == ASMIT_LDA)
{
lblock->mIns[i].mType = ASMIT_TYA; lblock->mIns[i].mMode = ASMIM_IMPLIED;
}
else if (lblock->mIns[i].mType == ASMIT_STA)
{
lblock->mIns[i].mType = ASMIT_TAY; lblock->mIns[i].mMode = ASMIM_IMPLIED;
}
else if (lblock->mIns[i].mType == ASMIT_LDY)
{
lblock->mIns[i].mType = ASMIT_NOP; lblock->mIns[i].mMode = ASMIM_IMPLIED;
}
else if (lblock->mIns[i].mType == ASMIT_INC)
{
lblock->mIns[i].mType = ASMIT_INY; lblock->mIns[i].mMode = ASMIM_IMPLIED;
}
else if (lblock->mIns[i].mType == ASMIT_DEC)
{
lblock->mIns[i].mType = ASMIT_DEY; lblock->mIns[i].mMode = ASMIM_IMPLIED;
}
}
}
}
}
}
}
}
@ -9356,9 +9397,13 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc)
if (lb->mIns[lbs-1].mType == ASMIT_CPX)
{
if (lb->mIns[lbs-2].mType == ASMIT_INX && mIns.Last().mType == ASMIT_LDX && mIns.Last().mMode == ASMIM_IMMEDIATE && mIns.Last().mAddress == 0)
int li = mIns.Size() - 1;
while (li >= 0 && !mIns[li].ChangesXReg())
li--;
if (li >= 0 && lb->mIns[lbs-2].mType == ASMIT_INX && mIns[li].mType == ASMIT_LDX && mIns[li].mMode == ASMIM_IMMEDIATE)
{
if (lb->mIns[lbs - 1].mMode == ASMIM_ZERO_PAGE)
if (lb->mIns[lbs - 1].mMode == ASMIM_ZERO_PAGE && mIns[li].mAddress == 0)
{
int a = lb->mIns[lbs - 1].mAddress;
@ -9367,14 +9412,30 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc)
i++;
if (i + 2 == lbs)
{
mIns[mIns.Size() - 1].mMode = ASMIM_ZERO_PAGE;
mIns[mIns.Size() - 1].mAddress = a;
mIns[li].mMode = ASMIM_ZERO_PAGE;
mIns[li].mAddress = a;
lb->mIns[lbs - 2].mType = ASMIT_DEX;
lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED;
lb->mBranch = ASMIT_BNE;
}
}
else if (lb->mIns[lbs - 1].mMode == ASMIM_IMMEDIATE)
{
int a = lb->mIns[lbs - 1].mAddress - mIns[li].mAddress;
int i = 0;
while (i + 2 < lbs && !lb->mIns[i].RequiresXReg())
i++;
if (i + 2 == lbs)
{
mIns[li].mAddress = a;
lb->mIns[lbs - 2].mType = ASMIT_DEX;
lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED;
lb->mBranch = ASMIT_BNE;
}
}
}
}
else if (lb->mIns[lbs - 1].mType == ASMIT_CPY)
{
@ -12917,14 +12978,18 @@ void NativeCodeProcedure::Optimize(void)
changed = true;
#endif
#if 1
ResetVisited();
if (mEntryBlock->OptimizeSimpleLoop(this))
changed = true;
if (step > 0)
{
ResetVisited();
if (mEntryBlock->OptimizeSimpleLoop(this))
changed = true;
ResetVisited();
if (mEntryBlock->SimpleLoopReversal(this))
changed = true;
ResetVisited();
if (mEntryBlock->SimpleLoopReversal(this))
changed = true;
}
ResetVisited();
if (mEntryBlock->MergeBasicBlocks())
@ -13194,7 +13259,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode
}
else if (i + 2 < iblock->mInstructions.Size() &&
InterTypeSize[ins->mDst.mType] >= 2 &&
iblock->mInstructions[i + 1]->mCode == IC_LOAD && InterTypeSize[iblock->mInstructions[i + 1]->mDst.mType] == 2 &&
iblock->mInstructions[i + 1]->mCode == IC_LOAD && InterTypeSize[iblock->mInstructions[i + 1]->mDst.mType] >= 2 &&
iblock->mInstructions[i + 1]->mDst.mTemp != ins->mDst.mTemp &&
iblock->mInstructions[i + 2]->mCode == IC_BINARY_OPERATOR &&
iblock->mInstructions[i + 2]->mSrc[0].mTemp == iblock->mInstructions[i + 1]->mDst.mTemp && iblock->mInstructions[i + 2]->mSrc[0].mFinal &&
@ -13205,7 +13270,7 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode
}
else if (i + 2 < iblock->mInstructions.Size() &&
InterTypeSize[ins->mDst.mType] >= 2 &&
iblock->mInstructions[i + 1]->mCode == IC_LOAD && InterTypeSize[iblock->mInstructions[i + 1]->mDst.mType] == 2 &&
iblock->mInstructions[i + 1]->mCode == IC_LOAD && InterTypeSize[iblock->mInstructions[i + 1]->mDst.mType] >= 2 &&
iblock->mInstructions[i + 1]->mDst.mTemp != ins->mDst.mTemp &&
iblock->mInstructions[i + 2]->mCode == IC_BINARY_OPERATOR &&
iblock->mInstructions[i + 2]->mSrc[1].mTemp == iblock->mInstructions[i + 1]->mDst.mTemp && iblock->mInstructions[i + 2]->mSrc[1].mFinal &&