From acbd70a84f73165a4f7ad32691b38251c476a65e Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Fri, 10 Feb 2023 18:10:41 +0100 Subject: [PATCH] Fix XY register propagation for incomming fast parameters --- oscar64/InterCode.cpp | 134 +++++++++++++++++++++++++++++++- oscar64/InterCode.h | 1 + oscar64/NativeCodeGenerator.cpp | 41 +++++++++- 3 files changed, 173 insertions(+), 3 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 2fc7bc3..3dd9e05 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -10944,6 +10944,118 @@ void InterCodeBasicBlock::SingleBlockLoopUnrolling(void) } } +void InterCodeBasicBlock::PushMoveOutOfLoop(void) +{ + if (!mVisited) + { + mVisited = true; + + if (mTrueJump && mFalseJump) + { + InterCodeBasicBlock* eblock = nullptr, * lblock = nullptr; + + if (mTrueJump->mLoopHead) + { + lblock = mTrueJump; + eblock = mFalseJump; + } + else if (mFalseJump->mLoopHead) + { + lblock = mFalseJump; + eblock = mTrueJump; + } + + if (eblock) + { + int i = 0; + while (i < mInstructions.Size()) + { + InterInstruction* mins = mInstructions[i]; + if (mins->mCode == IC_LOAD_TEMPORARY && !mins->mSrc[0].mFinal) + { + if (!lblock->mEntryRequiredTemps[mins->mDst.mTemp] && eblock->mEntryRequiredTemps[mins->mDst.mTemp] && !eblock->mExitRequiredTemps[mins->mDst.mTemp]) + { + int offset = 0; + int j = i + 1; + bool fail = false; + + while (j < mInstructions.Size() && !fail) + { + InterInstruction* cins = mInstructions[j]; + if (cins->ReferencesTemp(mins->mDst.mTemp)) + fail = true; + else if (cins->mDst.mTemp == mins->mSrc[0].mTemp) + { + if (cins->mCode == IC_LEA && cins->mSrc[1].mTemp == mins->mSrc[0].mTemp && cins->mSrc[0].mTemp < 0) + offset += cins->mSrc[0].mIntConst; + else + fail = true; + } + j++; + } + + if (!fail) + { + int j = 0; + while (j < eblock->mInstructions.Size() && !fail) + { + InterInstruction* cins = eblock->mInstructions[j]; + + if (cins->ReferencesTemp(mins->mDst.mTemp)) + { + if (cins->mCode == IC_LEA && cins->mSrc[1].mTemp == mins->mDst.mTemp && cins->mSrc[0].mTemp < 0) + { + if (cins->mSrc[1].mFinal) + break; + } + else + fail = true; + } + + if (cins->mDst.mTemp == mins->mSrc[0].mTemp) + fail = true; + + j++; + } + + if (!fail) + { + eblock->mEntryRequiredTemps += mins->mSrc[0].mTemp; + + j = 0; + while (j < eblock->mInstructions.Size()) + { + InterInstruction* cins = eblock->mInstructions[j]; + if (cins->ReferencesTemp(mins->mDst.mTemp)) + { + if (cins->mCode == IC_LEA && cins->mSrc[1].mTemp == mins->mDst.mTemp && cins->mSrc[0].mTemp < 0) + { + cins->mSrc[1].mTemp = mins->mSrc[0].mTemp; + cins->mSrc[0].mIntConst -= offset; + + if (cins->mSrc[1].mFinal) + break; + } + } + j++; + } + } + } + } + } + + i++; + } + } + } + + if (mTrueJump) + mTrueJump->PushMoveOutOfLoop(); + if (mFalseJump) + mFalseJump->PushMoveOutOfLoop(); + } +} + bool InterCodeBasicBlock::SingleBlockLoopPointerSplit(int& spareTemps) { @@ -12649,6 +12761,19 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 1]->mSrc[1].mMemory = IM_INDIRECT; changed = true; } +#endif +#if 1 + else if ( + mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mCode == IC_LEA && mInstructions[i + 1]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal) + { + mInstructions[i + 0]->mDst = mInstructions[i + 1]->mDst; + mInstructions[i + 0]->mSrc[0].mIntConst += mInstructions[i + 1]->mSrc[0].mIntConst; + + mInstructions[i + 1]->mCode = IC_NONE; mInstructions[i + 1]->mNumOperands = 0; + changed = true; + } #endif else if ( mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_ADD && mInstructions[i + 0]->mSrc[1].mTemp < 0 && mInstructions[i + 0]->mSrc[0].mType == IT_INT16 && @@ -14635,13 +14760,20 @@ void InterCodeProcedure::Close(void) TempForwarding(); } while (GlobalConstantPropagation()); - PeepholeOptimization(); + PeepholeOptimization(); TempForwarding(); RemoveUnusedInstructions(); DisassembleDebug("Global Constant Prop 1"); + BuildDataFlowSets(); + ResetVisited(); + mEntryBlock->PushMoveOutOfLoop(); + BuildDataFlowSets(); + + DisassembleDebug("PushMoveOutOfLoop"); + #endif #if 1 diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 4969af8..357c764 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -504,6 +504,7 @@ public: bool CollectLoopBody(InterCodeBasicBlock* head, GrowingArray & body); void CollectLoopPath(const GrowingArray& body, GrowingArray& path); void InnerLoopOptimization(const NumberSet& aliasedParams); + void PushMoveOutOfLoop(void); InterCodeBasicBlock* BuildLoopPrefix(InterCodeProcedure * proc); void BuildLoopSuffix(InterCodeProcedure* proc); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 599b63b..b8cf4c5 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -17151,6 +17151,22 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool changed = true; } } + + if (mIns[0].mType == ASMIT_LDY && mIns[0].mMode == ASMIM_ZERO_PAGE) + { + if (lblock->mIns[ls - 2].mType == ASMIT_STY && lblock->mIns[ls - 2].mMode == ASMIM_ZERO_PAGE && lblock->mIns[ls - 2].mAddress == mIns[0].mAddress && lblock->mIns[ls - 1].mType == ASMIT_CPY) + { + pblock = AddDominatorBlock(proc, pblock); + + pblock->mIns.Push(mIns[0]); + mIns.Remove(0); + + pblock->mExitRequiredRegs += CPU_REG_Y; + lblock->mExitRequiredRegs += CPU_REG_Y; + mEntryRequiredRegs += CPU_REG_Y; + mExitRequiredRegs += CPU_REG_Y; + } + } } } #endif @@ -18855,6 +18871,9 @@ bool NativeCodeBasicBlock::CheckCrossBlockXFloodExit(const NativeCodeBasicBlock* mPatched = true; + if (mEntryBlocks.Size() == 0) + return false; + for (int i = 0; i < mEntryBlocks.Size(); i++) if (!mEntryBlocks[i]->CheckCrossBlockXFloodExit(block, reg, rvalid)) return false; @@ -19111,6 +19130,9 @@ bool NativeCodeBasicBlock::CheckCrossBlockYFloodExit(const NativeCodeBasicBlock* mPatched = true; + if (mEntryBlocks.Size() == 0) + return false; + for (int i = 0; i < mEntryBlocks.Size(); i++) if (!mEntryBlocks[i]->CheckCrossBlockYFloodExit(block, reg, false)) return false; @@ -31924,6 +31946,20 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass progress = true; } + else if ( + mIns[i + 0].mType == ASMIT_STA && !(mIns[i + 0].mLive & LIVE_CPU_REG_A) && + !mIns[i + 1].ReferencesAccu() && !mIns[i + 0].MayBeSameAddress(mIns[i + 1]) && + !mIns[i + 2].ReferencesAccu() && !mIns[i + 0].MayBeSameAddress(mIns[i + 2]) && + mIns[i + 3].IsShift() && mIns[i + 3].SameEffectiveAddress(mIns[i + 0])) + { + NativeCodeInstruction ins = mIns[i + 0]; + mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_A; + mIns[i + 1] = mIns[i + 2]; mIns[i + 1].mLive |= LIVE_CPU_REG_A; + mIns[i + 2] = mIns[i + 3]; mIns[i + 2].mMode = ASMIM_IMPLIED; mIns[i + 2].mLive |= LIVE_CPU_REG_A; + mIns[i + 3] = ins; mIns[i + 3].mLive |= mIns[i + 2].mLive; + + progress = true; + } else if ( mIns[i + 0].IsShift() && (mIns[i + 0].mMode == ASMIM_ZERO_PAGE || mIns[i + 0].mMode == ASMIM_ABSOLUTE) && mIns[i + 3].mType == ASMIT_LDA && mIns[i + 3].SameEffectiveAddress(mIns[i + 0]) && !(mIns[i + 3].mLive & LIVE_MEM) && @@ -34890,7 +34926,7 @@ void NativeCodeProcedure::RebuildEntry(void) void NativeCodeProcedure::Optimize(void) { - CheckFunc = !strcmp(mInterProc->mIdent->mString, "malloc"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "tile_draw_p"); #if 1 int step = 0; @@ -35380,6 +35416,7 @@ void NativeCodeProcedure::Optimize(void) mEntryBlock->CheckBlocks(); #endif + if (step == 7) { ResetVisited(); @@ -35401,6 +35438,7 @@ void NativeCodeProcedure::Optimize(void) } #endif + #if 1 if (step == 7) { @@ -35442,7 +35480,6 @@ void NativeCodeProcedure::Optimize(void) mGenerator->mErrors->Error(mInterProc->mLocation, EWARN_OPTIMIZER_LOCKED, "Optimizer locked in infinite loop", mInterProc->mIdent); } - #if 1 if (!changed && step < 9) {