From 514cf59398a47210fd888a48c9f26023ebabe950 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 31 May 2025 19:44:33 +0200 Subject: [PATCH] More short loop reversal --- oscar64/InterCode.cpp | 107 +++++++++++++++++++++++++++++--- oscar64/InterCode.h | 1 + oscar64/NativeCodeGenerator.cpp | 71 +++++++++++++++++++-- 3 files changed, 164 insertions(+), 15 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 80d1875..a0b4e2c 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -7566,6 +7566,54 @@ bool InterCodeBasicBlock::EarlyBranchElimination(const GrowingInstructionPtrArra return changed; } +bool InterCodeBasicBlock::PropagateConstCompareResults(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + int sz = mInstructions.Size(); + if (sz >= 2 && mFalseJump && + mInstructions[sz - 1]->mCode == IC_BRANCH && + mInstructions[sz - 2]->mCode == IC_RELATIONAL_OPERATOR && (mInstructions[sz - 2]->mOperator == IA_CMPEQ || mInstructions[sz - 2]->mOperator == IA_CMPNE) && + mInstructions[sz - 1]->mSrc[0].mTemp == mInstructions[sz - 2]->mDst.mTemp) + { + InterCodeBasicBlock* cblock = mTrueJump; + if (mInstructions[sz - 2]->mOperator == IA_CMPNE) + cblock = mFalseJump; + + if (cblock->mNumEntries == 1) + { + if (mInstructions[sz - 2]->mSrc[1].mTemp < 0 && mInstructions[sz - 2]->mSrc[0].mTemp >= 0 && cblock->mEntryRequiredTemps[mInstructions[sz - 2]->mSrc[0].mTemp]) + { + InterInstruction* cins = new InterInstruction(mInstructions[sz - 2]->mLocation, IC_CONSTANT); + cins->mDst = mInstructions[sz - 2]->mSrc[0]; + cins->mConst = mInstructions[sz - 2]->mSrc[1]; + cblock->mInstructions.Insert(0, cins); + changed = true; + } + else if (mInstructions[sz - 2]->mSrc[0].mTemp < 0 && mInstructions[sz - 2]->mSrc[1].mTemp >= 0 && cblock->mEntryRequiredTemps[mInstructions[sz - 2]->mSrc[1].mTemp]) + { + InterInstruction* cins = new InterInstruction(mInstructions[sz - 2]->mLocation, IC_CONSTANT); + cins->mDst = mInstructions[sz - 2]->mSrc[1]; + cins->mConst = mInstructions[sz - 2]->mSrc[0]; + cblock->mInstructions.Insert(0, cins); + changed = true; + } + } + } + + if (mTrueJump && mTrueJump->PropagateConstCompareResults()) + changed = true; + if (mFalseJump && mFalseJump->PropagateConstCompareResults()) + changed = true; + } + + return changed; +} + bool InterCodeBasicBlock::ForwardConstTemps(const GrowingInstructionPtrArray& ctemps) { bool changed = false; @@ -7576,26 +7624,55 @@ bool InterCodeBasicBlock::ForwardConstTemps(const GrowingInstructionPtrArray& ct { mVisited = true; - GrowingInstructionPtrArray temps(ctemps); - if (mEntryBlocks.Size() > 1) - temps.Clear(); + GrowingInstructionPtrArray ltemps(ctemps); + + if (mLoopHead) + { + if (mNumEntries == 2 && (mTrueJump == this || mFalseJump == this)) + { + for (i = 0; i < mInstructions.Size(); i++) + if (mInstructions[i]->mDst.mTemp >= 0) + ltemps[mInstructions[i]->mDst.mTemp] = nullptr; + } + else + ltemps.Clear(); + } + else if (mNumEntries > 0) + { + if (mNumEntered > 0) + { + for (int i = 0; i < ltemps.Size(); i++) + { + if (mMergeTValues[i] != ltemps[i]) + ltemps[i] = nullptr; + } + } + + mNumEntered++; + + if (mNumEntered < mNumEntries) + { + mMergeTValues = ltemps; + return false; + } + } for (i = 0; i < mInstructions.Size(); i++) { - if (mInstructions[i]->PropagateConstTemps(temps)) + if (mInstructions[i]->PropagateConstTemps(ltemps)) changed = true; if (mInstructions[i]->mDst.mTemp >= 0) { if (mInstructions[i]->mCode == IC_CONSTANT) - temps[mInstructions[i]->mDst.mTemp] = mInstructions[i]; + ltemps[mInstructions[i]->mDst.mTemp] = mInstructions[i]; else - temps[mInstructions[i]->mDst.mTemp] = nullptr; + ltemps[mInstructions[i]->mDst.mTemp] = nullptr; } } - if (mTrueJump && mTrueJump->ForwardConstTemps(temps)) + if (mTrueJump && mTrueJump->ForwardConstTemps(ltemps)) changed = true; - if (mFalseJump && mFalseJump->ForwardConstTemps(temps)) + if (mFalseJump && mFalseJump->ForwardConstTemps(ltemps)) changed = true; } @@ -24000,7 +24077,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "main"); + CheckFunc = !strcmp(mIdent->mString, "equipment_init_display"); CheckCase = false; mEntryBlock = mBlocks[0]; @@ -24934,6 +25011,9 @@ void InterCodeProcedure::Close(void) BuildDataFlowSets(); + ResetVisited(); + mEntryBlock->PropagateConstCompareResults(); + GrowingInstructionPtrArray ptemps(nullptr); ptemps.SetSize(mTemporaries.Size()); ResetVisited(); @@ -24952,6 +25032,15 @@ void InterCodeProcedure::Close(void) DisassembleDebug("EliminateDoubleLoopCounter"); + BuildDataFlowSets(); + + ResetVisited(); + mEntryBlock->PropagateConstCompareResults(); + + ptemps.SetSize(mTemporaries.Size(), true); + ResetVisited(); + mEntryBlock->ForwardConstTemps(ptemps); + ResetVisited(); mEntryBlock->SingleLoopCountZeroCheck(); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 52a3f2a..449215a 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -448,6 +448,7 @@ public: void CollectConstTemps(GrowingInstructionPtrArray& ctemps, NumberSet& assignedTemps); bool PropagateConstTemps(const GrowingInstructionPtrArray& ctemps); bool ForwardConstTemps(const GrowingInstructionPtrArray& ctemps); + bool PropagateConstCompareResults(void); bool EarlyBranchElimination(const GrowingInstructionPtrArray& ctemps); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index e19f688..67d6681 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -40837,7 +40837,9 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) int finalx = lb->mIns[lbs - 1].mAddress; int a = lb->mIns[lbs - 1].mAddress - mIns[li].mAddress; - if (lbs == 3 && lb->mIns[0].mType == ASMIT_STA && lb->mIns[0].mMode == ASMIM_ABSOLUTE_X && lb->mIns[0].mLinkerObject && a < 128) + if (lbs == 3 && lb->mIns[0].mType == ASMIT_STA && lb->mIns[0].mMode == ASMIM_ABSOLUTE_X && + ((lb->mIns[0].mLinkerObject && a < 128) || + (!lb->mIns[0].mLinkerObject && !(lb->mIns[0].mFlags & NCIF_VOLATILE) && a <= 40))) { lb->mIns[0].mAddress += mIns[li].mAddress; mIns[li].mAddress = a - 1; @@ -40875,11 +40877,11 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) while (li >= 0 && !mIns[li].ReferencesYReg()) li--; - if (li >= 0 && lb->mIns[lbs - 2].mType == ASMIT_INY && mIns[li].mType == ASMIT_LDY && mIns[li].mMode == ASMIM_IMMEDIATE && mIns[li].mAddress == 0) + if (li >= 0 && lb->mIns[lbs - 2].mType == ASMIT_INY && mIns[li].mType == ASMIT_LDY && mIns[li].mMode == ASMIM_IMMEDIATE) { if (lb->mIns[lbs - 1].mMode == ASMIM_ZERO_PAGE) { - int a = lb->mIns[lbs - 1].mAddress; + int a = lb->mIns[lbs - 1].mAddress - mIns[li].mAddress; int i = 0; while (i + 2 < lbs && !(lb->mIns[i].RequiresYReg() || lb->mIns[i].ChangesZeroPage(a))) @@ -40895,7 +40897,6 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) changed = true; CheckLive(); - } } else if (lb->mIns[lbs - 1].mMode == ASMIM_IMMEDIATE) @@ -40903,7 +40904,9 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) int finaly = lb->mIns[lbs - 1].mAddress; int a = lb->mIns[lbs - 1].mAddress - mIns[li].mAddress; - if (lbs == 3 && lb->mIns[0].mType == ASMIT_STA && lb->mIns[0].mMode == ASMIM_ABSOLUTE_Y && lb->mIns[0].mLinkerObject && a < 128) + if (lbs == 3 && lb->mIns[0].mType == ASMIT_STA && lb->mIns[0].mMode == ASMIM_ABSOLUTE_Y && + ((lb->mIns[0].mLinkerObject && a < 128) || + (!lb->mIns[0].mLinkerObject && !(lb->mIns[0].mFlags & NCIF_VOLATILE) && a <= 40))) { lb->mIns[0].mAddress += mIns[li].mAddress; mIns[li].mAddress = a - 1; @@ -40914,7 +40917,29 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) changed = true; CheckLive(); + + } + else if (lbs == 3 && lb->mIns[0].mType == ASMIT_STA && lb->mIns[0].mMode == ASMIM_INDIRECT_Y && !(lb->mIns[0].mFlags & NCIF_VOLATILE) && a <= 40 && mIns[li].mAddress == 0) + { + mIns[li].mAddress = a - 1; + lb->mIns[1].mType = ASMIT_DEY; lb->mIns[1].mLive |= LIVE_CPU_REG_Z; + lb->mIns[2].mType = ASMIT_NOP; lb->mIns[2].mMode = ASMIM_IMPLIED; + lb->mBranch = ASMIT_BPL; + eb->mIns.Insert(0, NativeCodeInstruction(lb->mIns[lbs - 1].mIns, ASMIT_LDY, ASMIM_IMMEDIATE, finaly)); + changed = true; + CheckLive(); + } + else if (lbs == 3 && lb->mIns[0].mType == ASMIT_STA && lb->mIns[0].mMode == ASMIM_INDIRECT_Y && !(lb->mIns[0].mFlags & NCIF_VOLATILE) && a <= 40 && mIns[li].mAddress == 1) + { + mIns[li].mAddress = a - 1; + lb->mIns[1].mType = ASMIT_DEY; lb->mIns[1].mLive |= LIVE_CPU_REG_Z; + lb->mIns[2].mType = ASMIT_NOP; lb->mIns[2].mMode = ASMIM_IMPLIED; + lb->mBranch = ASMIT_BNE; + eb->mIns.Insert(0, NativeCodeInstruction(lb->mIns[lbs - 1].mIns, ASMIT_LDY, ASMIM_IMMEDIATE, finaly)); + changed = true; + + CheckLive(); } else { @@ -47001,6 +47026,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerIterate2(int i, int pass) mIns[i].mAddress = (mIns[i].mAddress << 1) & 255; mIns[i].mLive |= LIVE_CPU_REG_A; + mIns[i + 1].mLive |= mIns[i].mLive & LIVE_CPU_REG_Z; return true; } @@ -47012,6 +47038,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerIterate2(int i, int pass) mIns[i].mAddress = (mIns[i].mAddress >> 1) & 255; mIns[i].mLive |= LIVE_CPU_REG_A; + mIns[i + 1].mLive |= mIns[i].mLive & LIVE_CPU_REG_Z; return true; } @@ -48222,7 +48249,19 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerIterate3(int i, int pass) mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; return true; } - +#if 1 + if ( + pass >= 6 && + mIns[i + 0].mType == ASMIT_ASL && mIns[i + 0].mMode == ASMIM_IMPLIED && + mIns[i + 1].mType == ASMIT_STA && + mIns[i + 2].mType == ASMIT_INC && mIns[i + 2].SameEffectiveAddress(mIns[i + 1])) + { + mIns[i + 0].mType = ASMIT_SEC; mIns[i + 0].mLive |= LIVE_CPU_REG_C; + mIns[i + 1].mType = ASMIT_ROL; mIns[i + 1].mMode = ASMIM_IMPLIED; mIns[i + 1].mLive |= LIVE_CPU_REG_A; + mIns[i + 2].mType = ASMIT_STA; + return true; + } +#endif if ( mIns[i + 0].mType == ASMIT_STA && !(mIns[i + 0].mFlags & NCIF_VOLATILE) && mIns[i + 2].mType == ASMIT_STA && mIns[i + 0].SameEffectiveAddress(mIns[i + 2]) && @@ -50329,6 +50368,22 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerIterate4(int i, int pass) } #endif + if (pass >= 6 && + mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && + mIns[i + 1].IsShift() && mIns[i + 1].mMode == ASMIM_IMPLIED && + mIns[i + 2].IsShift() && mIns[i + 2].mMode == ASMIM_IMPLIED && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].SameEffectiveAddress(mIns[i + 0]) && !(mIns[i + 3].mLive & LIVE_CPU_REG_A)) + { + + mIns[i + 1].CopyMode(mIns[i + 0]); + mIns[i + 2].CopyMode(mIns[i + 0]); + + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED; + + return true; + } + if ( mIns[i + 0].mType == ASMIT_STA && !(mIns[i + 0].mLive & LIVE_CPU_REG_A) && !mIns[i + 1].ReferencesAccu() && !mIns[i + 0].MayBeSameAddress(mIns[i + 1]) && @@ -53114,6 +53169,10 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerIterate(int pass) } } +#if _DEBUG + NativeCodeInstruction h0(mIns[i + 0]); + NativeCodeInstruction h1(mIns[i + 1 < mIns.Size() ? i + 1 : 0]); +#endif if (i + 1 < mIns.Size() && PeepHoleOptimizerIterate2(i, pass)) progress = true; CheckLive(); #if _DEBUG