From 153c0ef4fa3e5563038b66b3a5b50230900c2673 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Wed, 22 Feb 2023 22:43:35 +0100 Subject: [PATCH] Short loop optimizations --- oscar64/InterCode.cpp | 57 ++++++- oscar64/NativeCodeGenerator.cpp | 276 ++++++++++++++++++++++++++++++-- oscar64/NativeCodeGenerator.h | 5 +- 3 files changed, 324 insertions(+), 14 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 45ac76e..e17852c 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -711,6 +711,22 @@ static int64 ConstantFolding(InterOperator oper, InterType type, int64 val1, int } } +static int64 ConstantRelationalPointerFolding(InterOperator oper, const InterOperand& op1, const InterOperand& op2) +{ + if (op1.mMemory == op2.mMemory) + { + if (op1.mMemory == IM_ABSOLUTE) + return ConstantFolding(oper, IT_INT16, op1.mIntConst, op2.mIntConst); + else if (op1.mMemory != IM_INDIRECT && op1.mVarIndex == op2.mVarIndex) + return ConstantFolding(oper, IT_INT16, op1.mIntConst, op2.mIntConst); + } + + if (oper == IA_CMPNE) + return 1; + else + return 0; +} + static int64 ConstantRelationalFolding(InterOperator oper, double val1, double val2) { switch (oper) @@ -3030,6 +3046,29 @@ bool InterInstruction::PropagateConstTemps(const GrowingInstructionPtrArray& cte } } break; + case IC_RELATIONAL_OPERATOR: + { + bool changed = false; + + for (int i = 0; i < 2; i++) + { + if (mSrc[i].mTemp >= 0 && ctemps[mSrc[i].mTemp]) + { + InterType t = mSrc[i].mType; + InterInstruction* ains = ctemps[mSrc[i].mTemp]; + mSrc[i] = ains->mConst; + mSrc[i].mType = t; + changed = true; + } + } + + if (changed) + { + this->ConstantFolding(); + return true; + } + } break; + case IC_CONVERSION_OPERATOR: case IC_UNARY_OPERATOR: { @@ -3628,7 +3667,9 @@ bool InterInstruction::ConstantFolding(void) if (mSrc[0].mTemp < 0 && mSrc[1].mTemp < 0) { mCode = IC_CONSTANT; - if (IsIntegerType(mSrc[0].mType)) + if (mSrc[0].mType == IT_POINTER) + mConst.mIntConst = ::ConstantRelationalPointerFolding(mOperator, mSrc[1], mSrc[0]); + else if (IsIntegerType(mSrc[0].mType)) mConst.mIntConst = ::ConstantFolding(mOperator, mSrc[0].mType, mSrc[1].mIntConst, mSrc[0].mIntConst); else mConst.mIntConst = ConstantRelationalFolding(mOperator, mSrc[1].mFloatConst, mSrc[0].mFloatConst); @@ -11213,7 +11254,7 @@ void InterCodeBasicBlock::SingleBlockLoopUnrolling(void) if (nins > 3 && nins < 20) { if (mInstructions[nins - 1]->mCode == IC_BRANCH && - mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && mInstructions[nins - 2]->mOperator == IA_CMPLU && mInstructions[nins - 2]->mDst.mTemp == mInstructions[nins - 1]->mSrc[0].mTemp && + mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && (mInstructions[nins - 2]->mOperator == IA_CMPLU || mInstructions[nins - 2]->mOperator == IA_CMPLEU) && mInstructions[nins - 2]->mDst.mTemp == mInstructions[nins - 1]->mSrc[0].mTemp && mInstructions[nins - 2]->mSrc[0].mTemp < 0 && mInstructions[nins - 3]->mCode == IC_BINARY_OPERATOR && mInstructions[nins - 3]->mOperator == IA_ADD && mInstructions[nins - 3]->mDst.mTemp == mInstructions[nins - 2]->mSrc[1].mTemp) { @@ -11232,6 +11273,9 @@ void InterCodeBasicBlock::SingleBlockLoopUnrolling(void) { int start = mDominator->mTrueValueRange[ireg].mMinValue; int end = mInstructions[nins - 2]->mSrc[0].mIntConst; + if (mInstructions[nins - 2]->mOperator == IA_CMPLEU) + end++; + int step = mInstructions[nins - 3]->mSrc[0].mTemp < 0 ? mInstructions[nins - 3]->mSrc[0].mIntConst : mInstructions[nins - 3]->mSrc[1].mIntConst; int count = (end - start) / step; @@ -14993,6 +15037,14 @@ void InterCodeProcedure::Close(void) PeepholeOptimization(); +#if 1 + BuildDataFlowSets(); + + do { + TempForwarding(); + } while (GlobalConstantPropagation()); +#endif + #if 1 RebuildIntegerRangeSet(); #endif @@ -15124,7 +15176,6 @@ void InterCodeProcedure::Close(void) PropagateConstOperationsUp(); - #if 1 do { diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 5734d39..37ab119 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -13439,6 +13439,90 @@ bool NativeCodeBasicBlock::CanGlobalSwapXY(void) return true; } + +bool NativeCodeBasicBlock::IsSimpleSubExpression(int at) const +{ + if (at >= 2 && mIns[at - 2].mType == ASMIT_LDA && mIns[at - 2].mMode == ASMIM_ZERO_PAGE && mIns[at - 1].mMode == ASMIM_IMMEDIATE) + { + if (mIns[at - 1].mType == ASMIT_AND || mIns[at - 1].mType == ASMIT_ORA || mIns[at - 1].mType == ASMIT_EOR) + return true; + if (at >= 3) + { + if (mIns[at - 1].mType == ASMIT_ADC && mIns[at - 3].mType == ASMIT_CLC) + return true; + if (mIns[at - 1].mType == ASMIT_SBC && mIns[at - 3].mType == ASMIT_SEC) + return true; + } + } + + return false; +} + +bool NativeCodeBasicBlock::PropagateCommonSubExpression(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + int cex[256]; + for (int i = 0; i < 256; i++) + cex[i] = -1; + + for (int i = 0; i < mIns.Size(); i++) + { + bool reset = false; + + NativeCodeInstruction& ins(mIns[i]); + if (ins.mType == ASMIT_STA && ins.mMode == ASMIM_ZERO_PAGE) + { + if (IsSimpleSubExpression(i)) + { + int si = cex[ins.mAddress]; + if (si >= 0 && mIns[si - 2].IsSame(mIns[i - 2]) && mIns[si - 1].IsSame(mIns[i - 1]) && !(ins.mLive & LIVE_CPU_REG_C)) + { + mIns[i].mType = ASMIT_LDA; + mIns[si].mLive |= LIVE_MEM; + mIns[i - 1].mType = ASMIT_NOP; mIns[i - 1].mMode = ASMIM_IMPLIED; + mIns[i - 2].mType = ASMIT_NOP; mIns[i - 2].mMode = ASMIM_IMPLIED; + changed = true; + } + else + cex[ins.mAddress] = i; + } + else + reset = true; + } + else if (ins.mMode == ASMIM_ZERO_PAGE && ins.ChangesAddress()) + reset = true; + else if (ins.mType == ASMIT_JSR) + { + for (int i = 0; i < 256; i++) + cex[i] = -1; + } + + if (reset) + { + cex[ins.mAddress] = -1; + for (int j = 0; j < 256; j++) + { + if (cex[j] >= 0 && mIns[cex[j] - 2].mAddress == ins.mAddress) + cex[j] = -1; + } + } + + } + + if (mTrueJump && mTrueJump->PropagateCommonSubExpression()) + changed = true; + if (mFalseJump && mFalseJump->PropagateCommonSubExpression()) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::GlobalSwapXY(void) { bool changed = false; @@ -17592,6 +17676,69 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool } } #endif +#if 1 + if (mIns.Size() >= 2 && mFalseJump) + { + int ns = mIns.Size(); + if ((mIns[ns - 2].mType == ASMIT_INY || mIns[ns - 2].mType == ASMIT_DEY) && mIns[ns - 1].mType == ASMIT_TYA) + { + if (mBranch == ASMIT_BEQ && !mFalseJump->mEntryRequiredRegs[CPU_REG_A]) + { + mTrueJump->mIns.Insert(0, NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + mIns[ns - 1].mType = ASMIT_NOP; + mIns[ns - 2].mLive |= LIVE_CPU_REG_Z; + changed = true; + } + else if (mBranch == ASMIT_BNE && !mTrueJump->mEntryRequiredRegs[CPU_REG_A]) + { + mFalseJump->mIns.Insert(0, NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + mIns[ns - 1].mType = ASMIT_NOP; + mIns[ns - 2].mLive |= LIVE_CPU_REG_Z; + changed = true; + } + } + else if ((mIns[ns - 2].mType == ASMIT_INX || mIns[ns - 2].mType == ASMIT_DEX) && mIns[ns - 1].mType == ASMIT_TXA) + { + if (mBranch == ASMIT_BEQ && !mFalseJump->mEntryRequiredRegs[CPU_REG_A]) + { + mTrueJump->mIns.Insert(0, NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + mIns[ns - 1].mType = ASMIT_NOP; + mIns[ns - 2].mLive |= LIVE_CPU_REG_Z; + changed = true; + } + else if (mBranch == ASMIT_BNE && !mTrueJump->mEntryRequiredRegs[CPU_REG_A]) + { + mFalseJump->mIns.Insert(0, NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + mIns[ns - 1].mType = ASMIT_NOP; + mIns[ns - 2].mLive |= LIVE_CPU_REG_Z; + changed = true; + } + } + } +#endif +#if 1 + if (mIns.Size() >= 2 && mFalseJump) + { + int ns = mIns.Size(); + + if (mIns[ns - 2].mType == ASMIT_LDA && mIns[ns - 1].mType == ASMIT_CMP && mIns[ns - 1].mMode == ASMIM_ZERO_PAGE && !(mIns[ns - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))) + { + if (mFalseJump->mIns.Size() > 0 && mFalseJump->mIns[0].mType == ASMIT_LDA && mFalseJump->mIns[0].SameEffectiveAddress(mIns[ns - 1]) || + mTrueJump->mIns.Size() > 0 && mTrueJump->mIns[0].mType == ASMIT_LDA && mTrueJump->mIns[0].SameEffectiveAddress(mIns[ns - 1])) + { + NativeCodeInstruction ins = mIns[ns - 1]; + mIns[ns - 1].CopyMode(mIns[ns - 2]); + mIns[ns - 2].CopyMode(ins); + + if (mIns[ns - 1].RequiresXReg()) + mIns[ns - 2].mLive |= LIVE_CPU_REG_X; + if (mIns[ns - 1].RequiresYReg()) + mIns[ns - 2].mLive |= LIVE_CPU_REG_Y; + changed = true; + } + } + } +#endif #if 1 if (mIns.Size() >= 2) { @@ -18791,10 +18938,10 @@ bool NativeCodeBasicBlock::CheckForwardSumYPointer(const NativeCodeBasicBlock* b { if (!mPatched) { - mPatched = true; - if (at == 0) { + mPatched = true; + if (!mEntryRequiredRegs[reg] && !mEntryRequiredRegs[reg + 1]) return true; @@ -18815,6 +18962,8 @@ bool NativeCodeBasicBlock::CheckForwardSumYPointer(const NativeCodeBasicBlock* b return false; else if (ins.mMode == ASMIM_INDIRECT_Y && ins.mAddress == reg) { + if (iins.mMode == ASMIM_IMMEDIATE && yval + iins.mAddress > 255) + return false; if (yval < 0 || yval > 3) return false; else if (!(ins.mLive & LIVE_MEM)) @@ -18833,6 +18982,11 @@ bool NativeCodeBasicBlock::CheckForwardSumYPointer(const NativeCodeBasicBlock* b return false; yval = -1; } + else if (ins.mType == ASMIT_RTS) + { + if ((ins.mFlags & NCIF_LOWER) && base == BC_REG_ACCU) + return false; + } else if (ins.ChangesZeroPage(base) || ins.ChangesZeroPage(base + 1) || iins.MayBeChangedOnAddress(ins)) return false; else if (ins.ChangesYReg()) @@ -22525,7 +22679,7 @@ bool NativeCodeBasicBlock::BackwardReplaceZeroPage(int at, int from, int to, boo } -bool NativeCodeBasicBlock::Propagate16BitSum(void) +bool NativeCodeBasicBlock::Propagate16BitSum(const GrowingArray & cinfo) { bool changed = false; @@ -22534,6 +22688,8 @@ bool NativeCodeBasicBlock::Propagate16BitSum(void) mVisited = true; GrowingArray infos(NativeRegisterSum16Info{}); + if (mNumEntries == 1) + infos = cinfo; for (int i = 0; i < mIns.Size(); i++) { @@ -22597,6 +22753,18 @@ bool NativeCodeBasicBlock::Propagate16BitSum(void) info.mAddH->mAddress -= infos[j].mAddH->mAddress; changed = true; } + else if (info.mAddL->mMode == ASMIM_IMMEDIATE && info.mAddH->mMode == ASMIM_IMMEDIATE && + infos[j].mAddL->mMode == ASMIM_IMMEDIATE && infos[j].mAddH->mMode == ASMIM_IMMEDIATE) + { + if (info.mAddress == infos[j].mAddress) + { + info.mSrcL->mAddress = infos[j].mDstL->mAddress; + info.mSrcH->mAddress = infos[j].mDstH->mAddress; + info.mAddL->mType = ASMIT_NOP; info.mAddL->mMode = ASMIM_IMPLIED; + info.mAddH->mType = ASMIT_NOP; info.mAddH->mMode = ASMIM_IMPLIED; + changed = true; + } + } } } @@ -22627,9 +22795,9 @@ bool NativeCodeBasicBlock::Propagate16BitSum(void) } } - if (mTrueJump && mTrueJump->Propagate16BitSum()) + if (mTrueJump && mTrueJump->Propagate16BitSum(infos)) changed = true; - if (mFalseJump && mFalseJump->Propagate16BitSum()) + if (mFalseJump && mFalseJump->Propagate16BitSum(infos)) changed = true; } return changed; @@ -25734,7 +25902,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc, bool f mTrueJump = this; } - if (sz == 2 && mTrueJump == this) + if (sz <= 3 && mTrueJump == this) { changed = OptimizeSimpleLoopInvariant(proc, nullptr, nullptr, full); @@ -31311,6 +31479,48 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 2].mAddress += 2; progress = true; } +#if 1 + else if ( + mIns[i + 0].mType == ASMIT_TXA && !(mIns[i + 0].mLive & LIVE_CPU_REG_X) && + mIns[i + 1].mType == ASMIT_CLC && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 1 && !(mIns[i + 2].mLive & LIVE_CPU_REG_C)) + { + mIns[i + 0].mType = ASMIT_INX; mIns[i + 0].mLive |= LIVE_CPU_REG_X; + mIns[i + 1].mType = ASMIT_TXA; mIns[i + 1].mLive |= LIVE_CPU_REG_Z; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + progress = true; + } + else if ( + mIns[i + 0].mType == ASMIT_TXA && !(mIns[i + 0].mLive & LIVE_CPU_REG_X) && + mIns[i + 1].mType == ASMIT_SEC && + mIns[i + 2].mType == ASMIT_SBC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 1 && !(mIns[i + 2].mLive & LIVE_CPU_REG_C)) + { + mIns[i + 0].mType = ASMIT_DEX; mIns[i + 0].mLive |= LIVE_CPU_REG_X; + mIns[i + 1].mType = ASMIT_TXA; mIns[i + 1].mLive |= LIVE_CPU_REG_Z; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + progress = true; + } + else if ( + mIns[i + 0].mType == ASMIT_TYA && !(mIns[i + 0].mLive & LIVE_CPU_REG_Y) && + mIns[i + 1].mType == ASMIT_CLC && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 1 && !(mIns[i + 2].mLive & LIVE_CPU_REG_C)) + { + mIns[i + 0].mType = ASMIT_INY; mIns[i + 0].mLive |= LIVE_CPU_REG_Y; + mIns[i + 1].mType = ASMIT_TYA; mIns[i + 1].mLive |= LIVE_CPU_REG_Z; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + progress = true; + } + else if ( + mIns[i + 0].mType == ASMIT_TYA && !(mIns[i + 0].mLive & LIVE_CPU_REG_Y) && + mIns[i + 1].mType == ASMIT_SEC && + mIns[i + 2].mType == ASMIT_SBC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 1 && !(mIns[i + 2].mLive & LIVE_CPU_REG_C)) + { + mIns[i + 0].mType = ASMIT_DEY; mIns[i + 0].mLive |= LIVE_CPU_REG_Y; + mIns[i + 1].mType = ASMIT_TYA; mIns[i + 1].mLive |= LIVE_CPU_REG_Z; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + progress = true; + } +#endif else if ( mIns[i + 0].mType == ASMIT_CLC && mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && mIns[i + 1].mAddress == 1 && @@ -33565,6 +33775,35 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } } #endif +#if 1 + if (mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && + mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && mIns[i + 4].mAddress == mIns[i + 1].mAddress + 1 && + mIns[i + 5].mType == ASMIT_ADC && mIns[i + 5].mMode == ASMIM_IMMEDIATE && + mIns[i + 6].mType == ASMIT_STA && mIns[i + 6].mMode == ASMIM_ZERO_PAGE && mIns[i + 6].mAddress == mIns[i + 3].mAddress + 1 && + mIns[i + 3].mAddress != BC_REG_STACK && + !(mIns[i + 6].mLive & LIVE_CPU_REG_A)) + { + int yval = RetrieveYValue(i); + proc->ResetPatched(); + + if (CheckForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 3].mAddress, mIns[i + 2], i + 7, yval)) + { + proc->ResetPatched(); + if (PatchForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 3].mAddress, mIns[i + 2], i + 7, yval)) + progress = true; + + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + + if (mTrueJump) + mTrueJump->CheckLive(); + if (mFalseJump) + mFalseJump->CheckLive(); + } + } +#endif #if 1 if ( mIns[i + 0].mType == ASMIT_CLC && @@ -34517,7 +34756,17 @@ bool NativeCodeBasicBlock::CalculateOffset(int& total) else if (mTrueJump) { if (mTrueJump->mPlace != mPlace + 1) - total += JumpByteSize(mTrueJump, mTrueJump->mOffset - total); + { + if (!mTrueJump->mTrueJump && mTrueJump->mCode.Size() < 3) + { + total += mTrueJump->mCode.Size(); + for (int i = 0; i < mTrueJump->mCode.Size(); i++) + mCode.Push(mTrueJump->mCode[i]); + mTrueJump = nullptr; + } + else + total += JumpByteSize(mTrueJump, mTrueJump->mOffset - total); + } } if (mOffset + mSize != total) @@ -35332,7 +35581,7 @@ void NativeCodeProcedure::RebuildEntry(void) void NativeCodeProcedure::Optimize(void) { - CheckFunc = !strcmp(mInterProc->mIdent->mString, "disp_menu_color"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "digger_decide"); #if 1 int step = 0; @@ -35418,7 +35667,9 @@ void NativeCodeProcedure::Optimize(void) if (step == 0) { ResetVisited(); - if (mEntryBlock->Propagate16BitSum()) + GrowingArray cinfo({ 0 }); + + if (mEntryBlock->Propagate16BitSum(cinfo)) changed = true; } @@ -35458,7 +35709,6 @@ void NativeCodeProcedure::Optimize(void) mEntryBlock->CheckBlocks(); #endif - #if 1 ResetVisited(); if (mEntryBlock->PeepHoleOptimizer(this, step)) @@ -35510,6 +35760,12 @@ void NativeCodeProcedure::Optimize(void) changed = true; } + if (step == 4) + { + ResetVisited(); + if (mEntryBlock->PropagateCommonSubExpression()) + changed = true; + } #if 1 if (step > 0) { diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 83fe731..f200743 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -426,7 +426,7 @@ public: bool BypassRegisterConditionBlock(void); bool Check16BitSum(int at, NativeRegisterSum16Info& info); - bool Propagate16BitSum(void); + bool Propagate16BitSum(const GrowingArray& cinfo); bool IsFinalZeroPageUse(const NativeCodeBasicBlock* block, int at, int from, int to, bool pair); bool ReplaceFinalZeroPageUse(NativeCodeProcedure* nproc); @@ -480,6 +480,9 @@ public: bool CanGlobalSwapXY(void); bool GlobalSwapXY(void); + bool IsSimpleSubExpression(int at) const; + bool PropagateCommonSubExpression(void); + bool ForwardAbsoluteLoadStores(void); bool CanForwardZPMove(int saddr, int daddr, int & index) const; bool Is16BitAddSubImmediate(int at, int& sreg, int &dreg, int& offset) const;