Fix XY register propagation for incomming fast parameters

This commit is contained in:
drmortalwombat 2023-02-10 18:10:41 +01:00
parent 5852ca5aea
commit acbd70a84f
3 changed files with 173 additions and 3 deletions

View File

@ -10944,6 +10944,118 @@ void InterCodeBasicBlock::SingleBlockLoopUnrolling(void)
} }
} }
void InterCodeBasicBlock::PushMoveOutOfLoop(void)
{
if (!mVisited)
{
mVisited = true;
if (mTrueJump && mFalseJump)
{
InterCodeBasicBlock* eblock = nullptr, * lblock = nullptr;
if (mTrueJump->mLoopHead)
{
lblock = mTrueJump;
eblock = mFalseJump;
}
else if (mFalseJump->mLoopHead)
{
lblock = mFalseJump;
eblock = mTrueJump;
}
if (eblock)
{
int i = 0;
while (i < mInstructions.Size())
{
InterInstruction* mins = mInstructions[i];
if (mins->mCode == IC_LOAD_TEMPORARY && !mins->mSrc[0].mFinal)
{
if (!lblock->mEntryRequiredTemps[mins->mDst.mTemp] && eblock->mEntryRequiredTemps[mins->mDst.mTemp] && !eblock->mExitRequiredTemps[mins->mDst.mTemp])
{
int offset = 0;
int j = i + 1;
bool fail = false;
while (j < mInstructions.Size() && !fail)
{
InterInstruction* cins = mInstructions[j];
if (cins->ReferencesTemp(mins->mDst.mTemp))
fail = true;
else if (cins->mDst.mTemp == mins->mSrc[0].mTemp)
{
if (cins->mCode == IC_LEA && cins->mSrc[1].mTemp == mins->mSrc[0].mTemp && cins->mSrc[0].mTemp < 0)
offset += cins->mSrc[0].mIntConst;
else
fail = true;
}
j++;
}
if (!fail)
{
int j = 0;
while (j < eblock->mInstructions.Size() && !fail)
{
InterInstruction* cins = eblock->mInstructions[j];
if (cins->ReferencesTemp(mins->mDst.mTemp))
{
if (cins->mCode == IC_LEA && cins->mSrc[1].mTemp == mins->mDst.mTemp && cins->mSrc[0].mTemp < 0)
{
if (cins->mSrc[1].mFinal)
break;
}
else
fail = true;
}
if (cins->mDst.mTemp == mins->mSrc[0].mTemp)
fail = true;
j++;
}
if (!fail)
{
eblock->mEntryRequiredTemps += mins->mSrc[0].mTemp;
j = 0;
while (j < eblock->mInstructions.Size())
{
InterInstruction* cins = eblock->mInstructions[j];
if (cins->ReferencesTemp(mins->mDst.mTemp))
{
if (cins->mCode == IC_LEA && cins->mSrc[1].mTemp == mins->mDst.mTemp && cins->mSrc[0].mTemp < 0)
{
cins->mSrc[1].mTemp = mins->mSrc[0].mTemp;
cins->mSrc[0].mIntConst -= offset;
if (cins->mSrc[1].mFinal)
break;
}
}
j++;
}
}
}
}
}
i++;
}
}
}
if (mTrueJump)
mTrueJump->PushMoveOutOfLoop();
if (mFalseJump)
mFalseJump->PushMoveOutOfLoop();
}
}
bool InterCodeBasicBlock::SingleBlockLoopPointerSplit(int& spareTemps) bool InterCodeBasicBlock::SingleBlockLoopPointerSplit(int& spareTemps)
{ {
@ -12649,6 +12761,19 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
mInstructions[i + 1]->mSrc[1].mMemory = IM_INDIRECT; mInstructions[i + 1]->mSrc[1].mMemory = IM_INDIRECT;
changed = true; changed = true;
} }
#endif
#if 1
else if (
mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mCode == IC_LEA && mInstructions[i + 1]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal)
{
mInstructions[i + 0]->mDst = mInstructions[i + 1]->mDst;
mInstructions[i + 0]->mSrc[0].mIntConst += mInstructions[i + 1]->mSrc[0].mIntConst;
mInstructions[i + 1]->mCode = IC_NONE; mInstructions[i + 1]->mNumOperands = 0;
changed = true;
}
#endif #endif
else if ( else if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_ADD && mInstructions[i + 0]->mSrc[1].mTemp < 0 && mInstructions[i + 0]->mSrc[0].mType == IT_INT16 && mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_ADD && mInstructions[i + 0]->mSrc[1].mTemp < 0 && mInstructions[i + 0]->mSrc[0].mType == IT_INT16 &&
@ -14635,13 +14760,20 @@ void InterCodeProcedure::Close(void)
TempForwarding(); TempForwarding();
} while (GlobalConstantPropagation()); } while (GlobalConstantPropagation());
PeepholeOptimization();
PeepholeOptimization();
TempForwarding(); TempForwarding();
RemoveUnusedInstructions(); RemoveUnusedInstructions();
DisassembleDebug("Global Constant Prop 1"); DisassembleDebug("Global Constant Prop 1");
BuildDataFlowSets();
ResetVisited();
mEntryBlock->PushMoveOutOfLoop();
BuildDataFlowSets();
DisassembleDebug("PushMoveOutOfLoop");
#endif #endif
#if 1 #if 1

View File

@ -504,6 +504,7 @@ public:
bool CollectLoopBody(InterCodeBasicBlock* head, GrowingArray<InterCodeBasicBlock*> & body); bool CollectLoopBody(InterCodeBasicBlock* head, GrowingArray<InterCodeBasicBlock*> & body);
void CollectLoopPath(const GrowingArray<InterCodeBasicBlock*>& body, GrowingArray<InterCodeBasicBlock*>& path); void CollectLoopPath(const GrowingArray<InterCodeBasicBlock*>& body, GrowingArray<InterCodeBasicBlock*>& path);
void InnerLoopOptimization(const NumberSet& aliasedParams); void InnerLoopOptimization(const NumberSet& aliasedParams);
void PushMoveOutOfLoop(void);
InterCodeBasicBlock* BuildLoopPrefix(InterCodeProcedure * proc); InterCodeBasicBlock* BuildLoopPrefix(InterCodeProcedure * proc);
void BuildLoopSuffix(InterCodeProcedure* proc); void BuildLoopSuffix(InterCodeProcedure* proc);

View File

@ -17151,6 +17151,22 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool
changed = true; changed = true;
} }
} }
if (mIns[0].mType == ASMIT_LDY && mIns[0].mMode == ASMIM_ZERO_PAGE)
{
if (lblock->mIns[ls - 2].mType == ASMIT_STY && lblock->mIns[ls - 2].mMode == ASMIM_ZERO_PAGE && lblock->mIns[ls - 2].mAddress == mIns[0].mAddress && lblock->mIns[ls - 1].mType == ASMIT_CPY)
{
pblock = AddDominatorBlock(proc, pblock);
pblock->mIns.Push(mIns[0]);
mIns.Remove(0);
pblock->mExitRequiredRegs += CPU_REG_Y;
lblock->mExitRequiredRegs += CPU_REG_Y;
mEntryRequiredRegs += CPU_REG_Y;
mExitRequiredRegs += CPU_REG_Y;
}
}
} }
} }
#endif #endif
@ -18855,6 +18871,9 @@ bool NativeCodeBasicBlock::CheckCrossBlockXFloodExit(const NativeCodeBasicBlock*
mPatched = true; mPatched = true;
if (mEntryBlocks.Size() == 0)
return false;
for (int i = 0; i < mEntryBlocks.Size(); i++) for (int i = 0; i < mEntryBlocks.Size(); i++)
if (!mEntryBlocks[i]->CheckCrossBlockXFloodExit(block, reg, rvalid)) if (!mEntryBlocks[i]->CheckCrossBlockXFloodExit(block, reg, rvalid))
return false; return false;
@ -19111,6 +19130,9 @@ bool NativeCodeBasicBlock::CheckCrossBlockYFloodExit(const NativeCodeBasicBlock*
mPatched = true; mPatched = true;
if (mEntryBlocks.Size() == 0)
return false;
for (int i = 0; i < mEntryBlocks.Size(); i++) for (int i = 0; i < mEntryBlocks.Size(); i++)
if (!mEntryBlocks[i]->CheckCrossBlockYFloodExit(block, reg, false)) if (!mEntryBlocks[i]->CheckCrossBlockYFloodExit(block, reg, false))
return false; return false;
@ -31924,6 +31946,20 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
progress = true; progress = true;
} }
else if (
mIns[i + 0].mType == ASMIT_STA && !(mIns[i + 0].mLive & LIVE_CPU_REG_A) &&
!mIns[i + 1].ReferencesAccu() && !mIns[i + 0].MayBeSameAddress(mIns[i + 1]) &&
!mIns[i + 2].ReferencesAccu() && !mIns[i + 0].MayBeSameAddress(mIns[i + 2]) &&
mIns[i + 3].IsShift() && mIns[i + 3].SameEffectiveAddress(mIns[i + 0]))
{
NativeCodeInstruction ins = mIns[i + 0];
mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_A;
mIns[i + 1] = mIns[i + 2]; mIns[i + 1].mLive |= LIVE_CPU_REG_A;
mIns[i + 2] = mIns[i + 3]; mIns[i + 2].mMode = ASMIM_IMPLIED; mIns[i + 2].mLive |= LIVE_CPU_REG_A;
mIns[i + 3] = ins; mIns[i + 3].mLive |= mIns[i + 2].mLive;
progress = true;
}
else if ( else if (
mIns[i + 0].IsShift() && (mIns[i + 0].mMode == ASMIM_ZERO_PAGE || mIns[i + 0].mMode == ASMIM_ABSOLUTE) && mIns[i + 0].IsShift() && (mIns[i + 0].mMode == ASMIM_ZERO_PAGE || mIns[i + 0].mMode == ASMIM_ABSOLUTE) &&
mIns[i + 3].mType == ASMIT_LDA && mIns[i + 3].SameEffectiveAddress(mIns[i + 0]) && !(mIns[i + 3].mLive & LIVE_MEM) && mIns[i + 3].mType == ASMIT_LDA && mIns[i + 3].SameEffectiveAddress(mIns[i + 0]) && !(mIns[i + 3].mLive & LIVE_MEM) &&
@ -34890,7 +34926,7 @@ void NativeCodeProcedure::RebuildEntry(void)
void NativeCodeProcedure::Optimize(void) void NativeCodeProcedure::Optimize(void)
{ {
CheckFunc = !strcmp(mInterProc->mIdent->mString, "malloc"); CheckFunc = !strcmp(mInterProc->mIdent->mString, "tile_draw_p");
#if 1 #if 1
int step = 0; int step = 0;
@ -35380,6 +35416,7 @@ void NativeCodeProcedure::Optimize(void)
mEntryBlock->CheckBlocks(); mEntryBlock->CheckBlocks();
#endif #endif
if (step == 7) if (step == 7)
{ {
ResetVisited(); ResetVisited();
@ -35401,6 +35438,7 @@ void NativeCodeProcedure::Optimize(void)
} }
#endif #endif
#if 1 #if 1
if (step == 7) if (step == 7)
{ {
@ -35442,7 +35480,6 @@ void NativeCodeProcedure::Optimize(void)
mGenerator->mErrors->Error(mInterProc->mLocation, EWARN_OPTIMIZER_LOCKED, "Optimizer locked in infinite loop", mInterProc->mIdent); mGenerator->mErrors->Error(mInterProc->mLocation, EWARN_OPTIMIZER_LOCKED, "Optimizer locked in infinite loop", mInterProc->mIdent);
} }
#if 1 #if 1
if (!changed && step < 9) if (!changed && step < 9)
{ {