From e013142cb18d646ec98aa718cdc4fcf1c12c97fe Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sun, 30 Jul 2023 11:24:19 +0200 Subject: [PATCH] Cross block register optimizations --- oscar64/InterCode.cpp | 53 ++++++++ oscar64/InterCode.h | 1 + oscar64/NativeCodeGenerator.cpp | 229 +++++++++++++++++++++++++------- oscar64/NativeCodeGenerator.h | 2 + 4 files changed, 234 insertions(+), 51 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 69ae93b..2c4dbd7 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -6376,6 +6376,47 @@ static int64 BuildLowerBitsMask(int64 v) return v; } +void InterCodeBasicBlock::MarkIntegerRangeBoundUp(int temp, int64 value, GrowingIntegerValueRangeArray& range) +{ + range[temp].SetLimit(value, value); + + for (int i = mInstructions.Size() - 1; i >= 0; i--) + { + InterInstruction* ins(mInstructions[i]); + + if (ins->mDst.mTemp == temp) + { + if (ins->mCode == IC_BINARY_OPERATOR && ins->mSrc[1].mTemp == temp && ins->mSrc[0].mTemp < 0) + { + switch (ins->mOperator) + { + case IA_ADD: + value -= ins->mSrc[0].mIntConst; + break; + case IA_SUB: + value += ins->mSrc[0].mIntConst; + break; + default: + return; + } + } + else if (ins->mCode == IC_LOAD_TEMPORARY) + { + if (!IsTempModifiedInRange(i + 1, mInstructions.Size(), ins->mSrc[0].mTemp)) + range[ins->mSrc[0].mTemp].SetLimit(value, value); + temp = ins->mSrc[0].mTemp; + } + else + return; + } + else if (ins->mCode == IC_LOAD_TEMPORARY && ins->mSrc[0].mTemp == temp) + { + if (!IsTempModifiedInRange(i + 1, mInstructions.Size(), ins->mDst.mTemp)) + range[ins->mDst.mTemp].SetLimit(value, value); + } + } +} + void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars) { mLocalValueRange = mEntryValueRange; @@ -7328,34 +7369,46 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray case IA_CMPEQ: if (s0 < 0) { + MarkIntegerRangeBoundUp(s1, mInstructions[sz - 2]->mSrc[0].mIntConst, mTrueValueRange); +#if 0 mTrueValueRange[s1].mMinState = IntegerValueRange::S_BOUND; mTrueValueRange[s1].mMinValue = mInstructions[sz - 2]->mSrc[0].mIntConst; mTrueValueRange[s1].mMaxState = IntegerValueRange::S_BOUND; mTrueValueRange[s1].mMaxValue = mInstructions[sz - 2]->mSrc[0].mIntConst; +#endif } else if (s1 < 0) { + MarkIntegerRangeBoundUp(s0, mInstructions[sz - 2]->mSrc[1].mIntConst, mTrueValueRange); +#if 0 mTrueValueRange[s0].mMinState = IntegerValueRange::S_BOUND; mTrueValueRange[s0].mMinValue = mInstructions[sz - 2]->mSrc[1].mIntConst; mTrueValueRange[s0].mMaxState = IntegerValueRange::S_BOUND; mTrueValueRange[s0].mMaxValue = mInstructions[sz - 2]->mSrc[1].mIntConst; +#endif } break; case IA_CMPNE: if (s0 < 0) { + MarkIntegerRangeBoundUp(s1, mInstructions[sz - 2]->mSrc[0].mIntConst, mFalseValueRange); +#if 0 mFalseValueRange[s1].mMinState = IntegerValueRange::S_BOUND; mFalseValueRange[s1].mMinValue = mInstructions[sz - 2]->mSrc[0].mIntConst; mFalseValueRange[s1].mMaxState = IntegerValueRange::S_BOUND; mFalseValueRange[s1].mMaxValue = mInstructions[sz - 2]->mSrc[0].mIntConst; +#endif } else if (s1 < 0) { + MarkIntegerRangeBoundUp(s0, mInstructions[sz - 2]->mSrc[1].mIntConst, mFalseValueRange); +#if 0 mFalseValueRange[s0].mMinState = IntegerValueRange::S_BOUND; mFalseValueRange[s0].mMinValue = mInstructions[sz - 2]->mSrc[1].mIntConst; mFalseValueRange[s0].mMaxState = IntegerValueRange::S_BOUND; mFalseValueRange[s0].mMaxValue = mInstructions[sz - 2]->mSrc[1].mIntConst; +#endif } break; #endif diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 75845db..260e406 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -440,6 +440,7 @@ public: void UpdateLocalIntegerRangeSets(const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars); bool BuildGlobalIntegerRangeSets(bool initial, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars); void SimplifyIntegerRangeRelops(void); + void MarkIntegerRangeBoundUp(int temp, int64 value, GrowingIntegerValueRangeArray& range); bool CombineIndirectAddressing(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 5047cc1..2b8683c 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -20706,6 +20706,100 @@ bool NativeCodeBasicBlock::CanCrossBlockYShortcut(int addr) return false; } +bool NativeCodeBasicBlock::CrossBlockRegisterAlias(bool sameAX, bool sameAY) +{ + bool changed = false; + if (!mVisited) + { + mVisited = true; + + if (mNumEntries > 1) + { + sameAX = false; + sameAY = false; + } + + bool direct = false; + for (int i = 0; i < mIns.Size(); i++) + { + NativeCodeInstruction& ins(mIns[i]); + switch (ins.mType) + { + case ASMIT_TAY: + sameAY = true; + break; + case ASMIT_TAX: + sameAX = true; + break; + case ASMIT_TYA: + sameAY = true; + sameAX = false; + direct = true; + break; + case ASMIT_TXA: + sameAY = false; + sameAX = true; + direct = true; + break; + case ASMIT_CMP: + if (!(ins.mLive & LIVE_CPU_REG_A)) + { + if (sameAY && (ins.mLive & LIVE_CPU_REG_Y) && HasAsmInstructionMode(ASMIT_CPY, ins.mMode)) + { + ins.mType = ASMIT_CPY; + changed = true; + } + else if (sameAX && (ins.mLive & LIVE_CPU_REG_X) && HasAsmInstructionMode(ASMIT_CPX, ins.mMode)) + { + ins.mType = ASMIT_CPX; + changed = true; + } + } + break; + case ASMIT_CPX: + if (!direct && !(ins.mLive & LIVE_CPU_REG_X)) + { + if (sameAX && (ins.mLive & LIVE_CPU_REG_A)) + { + ins.mType = ASMIT_CMP; + changed = true; + } + } + break; + case ASMIT_CPY: + if (!direct && !(ins.mLive & LIVE_CPU_REG_Y)) + { + if (sameAY && (ins.mLive & LIVE_CPU_REG_A)) + { + ins.mType = ASMIT_CMP; + changed = true; + } + } + break; + default: + if (ins.ChangesAccu()) + { + sameAY = false; + sameAX = false; + } + if (ins.ChangesXReg()) + sameAX = false; + if (ins.ChangesYReg()) + sameAY = false; + break; + } + } + + + if (mTrueJump && mTrueJump->CrossBlockRegisterAlias(sameAX, sameAY)) + changed = true; + if (mFalseJump && mFalseJump->CrossBlockRegisterAlias(sameAX, sameAY)) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::CrossBlockYAliasProgpagation(const int* yalias, int yoffset) { bool changed = false; @@ -31540,13 +31634,23 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) { if (zxreg[ins.mAddress] >= 0) { - zxreg[ins.mAddress] += 3; - xskew++; + if (ins.mLive & LIVE_CPU_REG_X) + zxreg[ins.mAddress] = -1; + else + { + zxreg[ins.mAddress] += 3; + xskew++; + } } if (zyreg[ins.mAddress] >= 0) { - zyreg[ins.mAddress] += 3; - yskew++; + if (ins.mLive & LIVE_CPU_REG_Y) + zyreg[ins.mAddress] = -1; + else + { + zyreg[ins.mAddress] += 3; + yskew++; + } } zareg[ins.mAddress] = -1; @@ -31557,13 +31661,23 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) { if (zxreg[ins.mAddress] >= 0) { - zxreg[ins.mAddress] += 3; - xskew--; + if (ins.mLive & LIVE_CPU_REG_X) + zxreg[ins.mAddress] = -1; + else + { + zxreg[ins.mAddress] += 3; + xskew--; + } } if (zyreg[ins.mAddress] >= 0) { - zyreg[ins.mAddress] += 3; - yskew--; + if (ins.mLive & LIVE_CPU_REG_Y) + zyreg[ins.mAddress] = -1; + else + { + zyreg[ins.mAddress] += 3; + yskew--; + } } zareg[ins.mAddress] = -1; @@ -31762,47 +31876,6 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) block->mExitRequiredRegs += CPU_REG_A; } - if (block->mTrueJump && !lblocks.Contains(block->mTrueJump)) - { - block->mTrueJump = block->BuildSingleEntry(proc, block->mTrueJump); - if (areg >= 0 && block->mTrueJump->mEntryRequiredRegs[areg]) - { - if (areg < 256) - block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg)); - block->mTrueJump->mEntryRequiredRegs += CPU_REG_A; - } - if (yreg >= 0 && block->mTrueJump->mEntryRequiredRegs[yreg]) - { - block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg)); - block->mTrueJump->mEntryRequiredRegs += CPU_REG_Y; - } - if (xreg >= 0 && block->mTrueJump->mEntryRequiredRegs[xreg]) - { - block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg)); - block->mTrueJump->mEntryRequiredRegs += CPU_REG_X; - } - } - if (block->mFalseJump && !lblocks.Contains(block->mFalseJump)) - { - block->mFalseJump = block->BuildSingleEntry(proc, block->mFalseJump); - if (areg >= 0 && block->mFalseJump->mEntryRequiredRegs[areg]) - { - if (areg < 256) - block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg)); - block->mFalseJump->mEntryRequiredRegs += CPU_REG_A; - } - if (yreg >= 0 && block->mFalseJump->mEntryRequiredRegs[yreg]) - { - block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg)); - block->mFalseJump->mEntryRequiredRegs += CPU_REG_Y; - } - if (xreg >= 0 && block->mFalseJump->mEntryRequiredRegs[xreg]) - { - block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg)); - block->mFalseJump->mEntryRequiredRegs += CPU_REG_X; - } - } - for (int j = 0; j < block->mEntryBlocks.Size(); j++) { if (!lblocks.Contains(block->mEntryBlocks[j])) @@ -31831,7 +31904,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) } } - int yoffset = 0, xoffset = 0; + int yoffset = 0, xoffset = 0, xskew = 0, yskew = 0; for (int j = 0; j < block->mIns.Size(); j++) { @@ -31846,6 +31919,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) ins.mType = ASMIT_NOP; ins.mMode = ASMIM_IMPLIED; yoffset = 0; + yskew = 0; } break; case ASMIT_LDX: @@ -31855,6 +31929,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) ins.mType = ASMIT_NOP; ins.mMode = ASMIM_IMPLIED; xoffset = 0; + xskew = 0; } break; case ASMIT_INC: @@ -31862,11 +31937,13 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) { ins.mType = ASMIT_INY; ins.mMode = ASMIM_IMPLIED; + yskew += 1; } else if (ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == xreg) { ins.mType = ASMIT_INX; ins.mMode = ASMIM_IMPLIED; + xskew += 1; } break; case ASMIT_DEC: @@ -31874,11 +31951,13 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) { ins.mType = ASMIT_DEY; ins.mMode = ASMIM_IMPLIED; + yskew -= 1; } else if (ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == xreg) { ins.mType = ASMIT_DEX; ins.mMode = ASMIM_IMPLIED; + xskew -= 1; } break; @@ -31995,6 +32074,47 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) } + if (block->mTrueJump && !lblocks.Contains(block->mTrueJump)) + { + block->mTrueJump = block->BuildSingleEntry(proc, block->mTrueJump); + if (areg >= 0 && block->mTrueJump->mEntryRequiredRegs[areg]) + { + if (areg < 256) + block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg)); + block->mTrueJump->mEntryRequiredRegs += CPU_REG_A; + } + if (yreg >= 0 && block->mTrueJump->mEntryRequiredRegs[yreg]) + { + block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg)); + block->mTrueJump->mEntryRequiredRegs += CPU_REG_Y; + } + if (xreg >= 0 && block->mTrueJump->mEntryRequiredRegs[xreg]) + { + block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg)); + block->mTrueJump->mEntryRequiredRegs += CPU_REG_X; + } + } + if (block->mFalseJump && !lblocks.Contains(block->mFalseJump)) + { + block->mFalseJump = block->BuildSingleEntry(proc, block->mFalseJump); + if (areg >= 0 && block->mFalseJump->mEntryRequiredRegs[areg]) + { + if (areg < 256) + block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg)); + block->mFalseJump->mEntryRequiredRegs += CPU_REG_A; + } + if (yreg >= 0 && block->mFalseJump->mEntryRequiredRegs[yreg]) + { + block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg)); + block->mFalseJump->mEntryRequiredRegs += CPU_REG_Y; + } + if (xreg >= 0 && block->mFalseJump->mEntryRequiredRegs[xreg]) + { + block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg)); + block->mFalseJump->mEntryRequiredRegs += CPU_REG_X; + } + } + block->CheckLive(); } @@ -40551,7 +40671,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) { mInterProc = proc; - CheckFunc = !strcmp(mInterProc->mIdent->mString, "test"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "enemies_find"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -41631,6 +41751,13 @@ void NativeCodeProcedure::Optimize(void) } #endif + if (step == 7) + { + ResetVisited(); + if (mEntryBlock->CrossBlockRegisterAlias(false, false)) + changed = true; + } + if (step == 8) { ResetVisited(); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index ff5d34f..e103579 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -525,6 +525,8 @@ public: bool CrossBlockYAliasProgpagation(const int * yalias, int yoffset); + bool CrossBlockRegisterAlias(bool sameAX, bool sameAY); + bool BypassRegisterConditionBlock(void); bool FoldLoopEntry(void);