From 37776dfaee6aad8451fb8b46ebec2ae5f7fe6b45 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sun, 5 May 2024 13:14:23 +0200 Subject: [PATCH] Remove more conditional branches from loops --- oscar64/InterCode.cpp | 68 ++++++++++++++- oscar64/NativeCodeGenerator.cpp | 141 ++++++++++++++++++++++++++++++-- oscar64/NativeCodeGenerator.h | 1 + 3 files changed, 199 insertions(+), 11 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index c2e2638..5c77ad9 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -15670,6 +15670,70 @@ bool InterCodeBasicBlock::MoveConditionOutOfLoop(void) } } } + else if (mEntryBlocks.Size() == 2 && mInstructions.Size() == 1 && mInstructions[0]->mCode == IC_BRANCH && mInstructions[0]->mSrc[0].mTemp >= 0) + { + InterCodeBasicBlock* tail, * post; + + if (mEntryBlocks[0] == mLoopPrefix) + tail = mEntryBlocks[1]; + else + tail = mEntryBlocks[0]; + + if (mTrueJump == tail || mFalseJump == tail) + { + if (tail->mTrueJump == this) + post = tail->mFalseJump; + else + post = tail->mTrueJump; + + if (post && post->mNumEntries == 1) + { + GrowingArray lbody(nullptr); + + if (tail->CollectSingleHeadLoopBody(this, tail, lbody)) + { + int tz = tail->mInstructions.Size(); + int ct = mInstructions[0]->mSrc[0].mTemp; + + int i = 0; + while (i < lbody.Size() && !lbody[i]->IsTempModified(ct)) + i++; + + if (i == lbody.Size()) + { + i = 0; + while (i < tz && !IsObservable(tail->mInstructions[i]->mCode) && (tail->mInstructions[i]->mDst.mTemp < 0 || !post->mEntryRequiredTemps[tail->mInstructions[i]->mDst.mTemp])) + i++; + + if (i == tz) + { + InterInstruction * ins = mLoopPrefix->mInstructions.Pop(); + mLoopPrefix->mInstructions.Push(mInstructions.Pop()); + mInstructions.Push(ins); + + tail->mEntryBlocks.RemoveAll(this); + tail->mNumEntries--; + + post->mEntryBlocks.Push(mLoopPrefix); + post->mNumEntries++; + + if (mTrueJump == tail) + { + mTrueJump = mFalseJump; + mLoopPrefix->mTrueJump = post; + mLoopPrefix->mFalseJump = this; + } + else + mLoopPrefix->mFalseJump = post; + mFalseJump = nullptr; + + return true; + } + } + } + } + } + } } if (mTrueJump && mTrueJump->MoveConditionOutOfLoop()) @@ -20570,7 +20634,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "interpret_program"); + CheckFunc = !strcmp(mIdent->mString, "ffill"); CheckCase = false; mEntryBlock = mBlocks[0]; @@ -22334,7 +22398,7 @@ void InterCodeProcedure::Disassemble(FILE* file) void InterCodeProcedure::Disassemble(const char* name, bool dumpSets) { -#if 0 +#if 1 #ifdef _WIN32 FILE* file; static bool initial = true; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index f99ece9..3a8ad9b 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -24735,6 +24735,36 @@ bool NativeCodeBasicBlock::CrossBlockYAliasProgpagation(const int* yalias) } +void NativeCodeBasicBlock::BypassAccuLoadStoreXY(void) +{ + if (!mVisited) + { + mVisited = true; + + for (int i = 0; i + 1 < mIns.Size(); i++) + { + if (mIns[i].mType == ASMIT_LDA && mIns[i + 1].mType == ASMIT_STA && !(mIns[i + 1].mLive & LIVE_CPU_REG_A)) + { + if (!(mIns[i].mLive & LIVE_CPU_REG_X) && HasAsmInstructionMode(ASMIT_LDX, mIns[i].mMode) && HasAsmInstructionMode(ASMIT_STX, mIns[i + 1].mMode)) + { + mIns[i].mType = ASMIT_LDX; mIns[i].mLive |= LIVE_CPU_REG_X; + mIns[i + 1].mType = ASMIT_STX; + } + else if (!(mIns[i].mLive & LIVE_CPU_REG_Y) && HasAsmInstructionMode(ASMIT_LDY, mIns[i].mMode) && HasAsmInstructionMode(ASMIT_STY, mIns[i + 1].mMode)) + { + mIns[i].mType = ASMIT_LDY; mIns[i].mLive |= LIVE_CPU_REG_Y; + mIns[i + 1].mType = ASMIT_STY; + } + } + } + + if (mTrueJump) mTrueJump->BypassAccuLoadStoreXY(); + if (mFalseJump) mFalseJump->BypassAccuLoadStoreXY(); + + } +} + + bool NativeCodeBasicBlock::CrossBlockXYShortcut(void) { bool changed = false; @@ -31541,6 +31571,16 @@ bool NativeCodeBasicBlock::MoveLoadAddImmStoreAbsXUp(int at) if (top < at) { + int live = 0; + if (top > 0) + live = mIns[top - 1].mLive; + else if (mIns[top].RequiresYReg()) + live |= LIVE_CPU_REG_Y; + mIns[at + 0].mLive |= live; + mIns[at + 1].mLive |= live; + mIns[at + 2].mLive |= live; + mIns[at + 3].mLive |= live; + mIns.Insert(top, mIns[at + 3]); mIns.Remove(at + 4); mIns.Insert(top, mIns[at + 3]); mIns.Remove(at + 4); mIns.Insert(top, mIns[at + 3]); mIns.Remove(at + 4); @@ -35317,6 +35357,8 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) mTrueJump->mBranch == ASMIT_BCC && !mExitRequiredRegs[CPU_REG_C]) { NativeCodeBasicBlock* lb = mTrueJump; + NativeCodeBasicBlock* eb = mTrueJump->mFalseJump; + int lbs = lb->mIns.Size(); if (lb->mIns[lbs-1].mType == ASMIT_CPX) @@ -35341,6 +35383,7 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) lb->mIns[lbs - 2].mType = ASMIT_DEX; lb->mIns[lbs - 2].mLive |= LIVE_CPU_REG_Z; lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; lb->mBranch = ASMIT_BNE; + eb->mIns.Insert(0, NativeCodeInstruction(mIns[lbs - 1].mIns, ASMIT_LDX, mIns[li])); changed = true; CheckLive(); @@ -35349,6 +35392,7 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) } else if (lb->mIns[lbs - 1].mMode == ASMIM_IMMEDIATE) { + int finalx = lb->mIns[lbs - 1].mAddress; int a = lb->mIns[lbs - 1].mAddress - mIns[li].mAddress; if (lbs == 3 && lb->mIns[0].mType == ASMIT_STA && lb->mIns[0].mMode == ASMIM_ABSOLUTE_X && lb->mIns[0].mLinkerObject && a < 128) @@ -35358,6 +35402,7 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) lb->mIns[1].mType = ASMIT_DEX; lb->mIns[1].mLive |= LIVE_CPU_REG_Z; lb->mIns[2].mType = ASMIT_NOP; lb->mIns[2].mMode = ASMIM_IMPLIED; lb->mBranch = ASMIT_BPL; + eb->mIns.Insert(0, NativeCodeInstruction(mIns[lbs - 1].mIns, ASMIT_LDX, ASMIM_IMMEDIATE, finalx)); changed = true; } else @@ -35371,6 +35416,7 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) lb->mIns[lbs - 2].mType = ASMIT_DEX; lb->mIns[lbs - 2].mLive |= LIVE_CPU_REG_Z; lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; lb->mBranch = ASMIT_BNE; + eb->mIns.Insert(0, NativeCodeInstruction(mIns[lbs - 1].mIns, ASMIT_LDX, ASMIM_IMMEDIATE, finalx)); changed = true; CheckLive(); @@ -35403,6 +35449,7 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) lb->mIns[lbs - 2].mType = ASMIT_DEY; lb->mIns[lbs - 2].mLive |= LIVE_CPU_REG_Z; lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; lb->mBranch = ASMIT_BNE; + eb->mIns.Insert(0, NativeCodeInstruction(mIns[lbs - 1].mIns, ASMIT_LDY, mIns[li])); changed = true; CheckLive(); @@ -35411,6 +35458,7 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) } else if (lb->mIns[lbs - 1].mMode == ASMIM_IMMEDIATE) { + int finaly = lb->mIns[lbs - 1].mAddress; int a = lb->mIns[lbs - 1].mAddress - mIns[li].mAddress; if (lbs == 3 && lb->mIns[0].mType == ASMIT_STA && lb->mIns[0].mMode == ASMIM_ABSOLUTE_Y && lb->mIns[0].mLinkerObject && a < 128) @@ -35420,6 +35468,7 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) lb->mIns[1].mType = ASMIT_DEY; lb->mIns[1].mLive |= LIVE_CPU_REG_Z; lb->mIns[2].mType = ASMIT_NOP; lb->mIns[2].mMode = ASMIM_IMPLIED; lb->mBranch = ASMIT_BPL; + eb->mIns.Insert(0, NativeCodeInstruction(mIns[lbs - 1].mIns, ASMIT_LDY, ASMIM_IMMEDIATE, finaly)); changed = true; CheckLive(); @@ -35436,6 +35485,7 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) lb->mIns[lbs - 2].mType = ASMIT_DEY; lb->mIns[lbs - 2].mLive |= LIVE_CPU_REG_Z; lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; lb->mBranch = ASMIT_BNE; + eb->mIns.Insert(0, NativeCodeInstruction(mIns[lbs - 1].mIns, ASMIT_LDY, ASMIM_IMMEDIATE, finaly)); changed = true; CheckLive(); @@ -39907,7 +39957,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass { if ((mIns[i].mType == ASMIT_INY || mIns[i].mType == ASMIT_DEY) && !mIns[i + 1].ChangesYReg() && !(mIns[i + 1].mLive & LIVE_CPU_REG_Z)) { - if (!mIns[i + 1].RequiresYReg()) + if (i + 3 == mIns.Size() && mIns[i + 1].mType == ASMIT_INX && mIns[i + 2].mType == ASMIT_CPX) + ; // Avoid bypassing loop iterator + else if (!mIns[i + 1].RequiresYReg()) { NativeCodeInstruction pins = mIns[i]; mIns[i] = mIns[i + 1]; @@ -39930,7 +39982,9 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } else if ((mIns[i].mType == ASMIT_INX || mIns[i].mType == ASMIT_DEX) && !mIns[i + 1].ChangesXReg() && !(mIns[i + 1].mLive & LIVE_CPU_REG_Z)) { - if (!mIns[i + 1].RequiresXReg()) + if (i + 3 == mIns.Size() && mIns[i + 1].mType == ASMIT_INY && mIns[i + 2].mType == ASMIT_CPY) + ; // Avoid bypassing loop iterator + else if (!mIns[i + 1].RequiresXReg()) { NativeCodeInstruction pins = mIns[i]; mIns[i] = mIns[i + 1]; @@ -41207,6 +41261,14 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mLive |= mIns[i + 1].mLive; progress = true; } + else if ( + mIns[i + 0].mType == ASMIT_LDX && (mIns[i + 0].mMode == ASMIM_IMMEDIATE || mIns[i + 0].mMode == ASMIM_ZERO_PAGE || mIns[i + 0].mMode == ASMIM_ABSOLUTE || mIns[i + 0].mMode == ASMIM_ABSOLUTE_Y) && + mIns[i + 1].mType == ASMIT_TXA && !(mIns[i + 1].mLive & LIVE_CPU_REG_A)) + { + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 0].mLive |= mIns[i + 1].mLive; + progress = true; + } else if ( mIns[i + 0].mType == ASMIT_TXA && mIns[i + 1].mType == ASMIT_CMP && (mIns[i + 1].mMode == ASMIM_IMMEDIATE || mIns[i + 1].mMode == ASMIM_ZERO_PAGE || mIns[i + 1].mMode == ASMIM_ABSOLUTE)) @@ -41892,6 +41954,28 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; progress = true; } + else if ( + mIns[i + 0].mType == ASMIT_TAX && + mIns[i + 1].mType == ASMIT_INX && + mIns[i + 2].mType == ASMIT_TXA && !(mIns[i + 2].mLive & (LIVE_CPU_REG_X | LIVE_CPU_REG_C))) + { + mIns[i + 0].mType = ASMIT_CLC; + mIns[i + 0].mLive |= LIVE_CPU_REG_C | LIVE_CPU_REG_A; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_ADC; mIns[i + 2].mMode = ASMIM_IMMEDIATE; mIns[i + 2].mAddress = 1; + progress = true; + } + else if ( + mIns[i + 0].mType == ASMIT_TAX && + mIns[i + 1].mType == ASMIT_DEX && + mIns[i + 2].mType == ASMIT_TXA && !(mIns[i + 2].mLive & (LIVE_CPU_REG_X | LIVE_CPU_REG_C))) + { + mIns[i + 0].mType = ASMIT_SEC; + mIns[i + 0].mLive |= LIVE_CPU_REG_C | LIVE_CPU_REG_A; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_SBC; mIns[i + 2].mMode = ASMIM_IMMEDIATE; mIns[i + 2].mAddress = 1; + progress = true; + } else if ( mIns[i + 0].mType == ASMIT_TAX && !mIns[i + 1].ChangesXReg() && !mIns[i + 1].ChangesAccu() && @@ -46421,6 +46505,19 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass changed = true; } } + +#if 1 + if (sz >= 3 && (mBranch == ASMIT_BCC || mBranch == ASMIT_BCS) && + mIns[sz - 1].mType == ASMIT_INY && mIns[sz - 2].mType == ASMIT_CPX && mIns[sz - 3].mType == ASMIT_INX && + !(mIns[sz - 1].mLive & LIVE_CPU_REG_Z)) + { + mIns[sz - 1] = mIns[sz - 2]; + mIns[sz - 2] = mIns[sz - 3]; + mIns[sz - 3].mType = ASMIT_INY; + changed = true; + } +#endif + #if 1 if (sz > 0 && mFalseJump && mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 1].mMode == ASMIM_IMMEDIATE) { @@ -48879,6 +48976,15 @@ void NativeCodeProcedure::Optimize(void) } } #endif +#if 1 + if (step == 14 && cnt == 0) + { + ResetVisited(); + mEntryBlock->BypassAccuLoadStoreXY(); + changed = true; + } +#endif + #if _DEBUG ResetVisited(); mEntryBlock->CheckAsmCode(); @@ -48898,7 +49004,7 @@ void NativeCodeProcedure::Optimize(void) } #if 1 - if (!changed && step < 13) + if (!changed && step < 14) { ResetIndexFlipped(); @@ -50070,23 +50176,40 @@ void NativeCodeGenerator::BuildFunctionProxies(void) code.Push(uint8(ins.mAddress)); break; case ASMIM_ABSOLUTE: - code.Push(uint8(AsmInsOpcodes[ASMIT_LDA][ASMIM_ABSOLUTE])); - if (ins.mLinkerObject) + if (ins.mLinkerObject && (ins.mLinkerObject->mFlags & LOBJF_ZEROPAGE)) { + code.Push(uint8(AsmInsOpcodes[ASMIT_LDA][ASMIM_ZERO_PAGE])); LinkerReference rl; rl.mObject = ncp->mProxyObject; rl.mOffset = code.Size(); + rl.mRefObject = ins.mLinkerObject; rl.mRefOffset = ins.mAddress; - rl.mFlags = LREF_LOWBYTE | LREF_HIGHBYTE; + rl.mFlags = LREF_LOWBYTE; + ncp->mProxyObject->AddReference(rl); code.Push(0); - code.Push(0); } else { - code.Push(uint8(ins.mAddress & 0xff)); - code.Push(uint8(ins.mAddress >> 8)); + code.Push(uint8(AsmInsOpcodes[ASMIT_LDA][ASMIM_ABSOLUTE])); + if (ins.mLinkerObject) + { + LinkerReference rl; + rl.mObject = ncp->mProxyObject; + rl.mOffset = code.Size(); + rl.mRefObject = ins.mLinkerObject; + rl.mRefOffset = ins.mAddress; + rl.mFlags = LREF_LOWBYTE | LREF_HIGHBYTE; + ncp->mProxyObject->AddReference(rl); + code.Push(0); + code.Push(0); + } + else + { + code.Push(uint8(ins.mAddress & 0xff)); + code.Push(uint8(ins.mAddress >> 8)); + } } break; } diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index c6ddbc2..116ebde 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -575,6 +575,7 @@ public: void DoCrossBlockYShortcut(int addr); bool CrossBlockXYShortcut(void); + void BypassAccuLoadStoreXY(void); bool CrossBlockYAliasProgpagation(const int * yalias);