From 8ea991db818dd5a5898898b4739a24228f5ab777 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Fri, 3 Nov 2023 20:16:23 +0100 Subject: [PATCH] Optimize div/mod unsigned pairs --- include/stdio.c | 10 +- oscar64/InterCode.cpp | 82 +++++++- oscar64/NativeCodeGenerator.cpp | 334 +++++++++++++++++++++++++++++++- oscar64/NativeCodeGenerator.h | 3 + 4 files changed, 415 insertions(+), 14 deletions(-) diff --git a/include/stdio.c b/include/stdio.c index 944dcf6..7ce8e64 100644 --- a/include/stdio.c +++ b/include/stdio.c @@ -279,10 +279,7 @@ int nformi(const sinfo * si, char * str, int v, bool s) while (u > 0) { int c = u % si->base; - if (c >= 10) - c += 'A' - 10; - else - c += '0'; + c += c >= 10 ? 'A' - 10 : '0'; buffer[--i] = c; u /= si->base; } @@ -340,10 +337,7 @@ int nforml(const sinfo * si, char * str, long v, bool s) while (u > 0) { int c = u % si->base; - if (c >= 10) - c += 'A' - 10; - else - c += '0'; + c += c >= 10 ? 'A' - 10 : '0'; buffer[--i] = c; u /= si->base; } diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index a11dc47..f6da5ef 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -8022,6 +8022,11 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray } } } + else if (s0 >= 0) + { + mTrueValueRange[s0].LimitMin(mInstructions[sz - 2]->mSrc[1].mIntConst + 1); + mFalseValueRange[s0].LimitMax(mInstructions[sz - 2]->mSrc[1].mIntConst); + } break; case IA_CMPLEU: if (s0 < 0) @@ -10827,6 +10832,10 @@ bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray& } #if 1 + int sz = mInstructions.Size() - 1; + if (sz > 2 && mInstructions[sz]->mCode == IC_BRANCH && mInstructions[sz - 1]->mCode == IC_RELATIONAL_OPERATOR) + sz--; + // move loads down as far as possible to avoid false aliasing for (int i = mInstructions.Size() - 2; i >= 0; i--) { @@ -10834,7 +10843,7 @@ bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray& if (ins->mCode == IC_LOAD) { int j = i; - while (j + 1 < mInstructions.Size() && CanSwapInstructions(ins, mInstructions[j + 1])) + while (j + 1 < sz && CanSwapInstructions(ins, mInstructions[j + 1])) { SwapInstructions(ins, mInstructions[j + 1]); mInstructions[j] = mInstructions[j + 1]; @@ -11904,9 +11913,30 @@ bool InterCodeBasicBlock::PushSinglePathResultInstructions(void) InterInstruction* ins(mInstructions[i]); int dtemp = ins->mDst.mTemp; - bool moved = false; + bool moved = false, pair = false; - if (dtemp >= 0 && !providedTemps[dtemp] && !requiredTemps[dtemp]) + if (i > 0 && ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_DIVU && i > 0 && + mInstructions[i - 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i - 1]->mOperator == IA_MODU && + ins->mSrc[0].IsEqual(mInstructions[i - 1]->mSrc[0]) && + ins->mSrc[1].IsEqual(mInstructions[i - 1]->mSrc[1])) + pair = true; + else if (i > 0 && ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_MODU && i > 0 && + mInstructions[i - 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i - 1]->mOperator == IA_DIVU && + ins->mSrc[0].IsEqual(mInstructions[i - 1]->mSrc[0]) && + ins->mSrc[1].IsEqual(mInstructions[i - 1]->mSrc[1])) + pair = true; + else if (i + 1 < mInstructions.Size() && ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_DIVU && + mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_MODU && + ins->mSrc[0].IsEqual(mInstructions[i + 1]->mSrc[0]) && + ins->mSrc[1].IsEqual(mInstructions[i + 1]->mSrc[1])) + pair = true; + else if (i + 1 < mInstructions.Size() && ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_MODU && + mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_DIVU && + ins->mSrc[0].IsEqual(mInstructions[i + 1]->mSrc[0]) && + ins->mSrc[1].IsEqual(mInstructions[i + 1]->mSrc[1])) + pair = true; + + if (!pair && dtemp >= 0 && !providedTemps[dtemp] && !requiredTemps[dtemp]) { int j = 0; while (j < ins->mNumOperands && (ins->mSrc[j].mTemp < 0 || !(providedTemps[ins->mSrc[j].mTemp] || IsTempModifiedOnPath(ins->mSrc[j].mTemp, i + 1)))) @@ -13000,7 +13030,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar } } else if (ai->mCode == IC_BINARY_OPERATOR && ai->mOperator == IA_ADD && ai->mSrc[0].mTemp < 0 && ai->mDst.mTemp == ai->mSrc[1].mTemp && ai->mSrc[0].mIntConst == 1 && IsIntegerType(ai->mDst.mType) && - ci->mCode == IC_RELATIONAL_OPERATOR && ci->mOperator == IA_CMPLU && ci->mSrc[0].mTemp >= 0 && ci->mSrc[0].IsUnsigned() && ci->mSrc[1].mTemp == ai->mDst.mTemp && + ci->mCode == IC_RELATIONAL_OPERATOR && ci->mOperator == IA_CMPLU && ci->mSrc[0].mTemp >= 0 && ci->mSrc[0].IsPositive() && ci->mSrc[1].mTemp == ai->mDst.mTemp && bi->mCode == IC_BRANCH && bi->mSrc[0].mTemp == ci->mDst.mTemp && !post->mEntryRequiredTemps[ai->mDst.mTemp] && !tail->IsTempReferencedInRange(0, tz - 3, ai->mDst.mTemp) && !tail->IsTempModifiedInRange(0, tz - 3, ci->mSrc[0].mTemp)) { @@ -16950,6 +16980,48 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati } while (changed); + // move div up to mod + int imod = -1, idiv = -1; + for (int i = 0; i < mInstructions.Size(); i++) + { + InterInstruction* ins = mInstructions[i]; + if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_MODU) + { + imod = -1; + if (idiv >= 0 && ins->mSrc[0].IsEqual(mInstructions[idiv]->mSrc[0]) && ins->mSrc[1].IsEqual(mInstructions[idiv]->mSrc[1])) + { + int j = i - 1; + while (j > idiv && CanSwapInstructions(mInstructions[j], ins)) + { + SwapInstructions(mInstructions[j], ins); + mInstructions[j + 1] = mInstructions[j]; + j--; + } + mInstructions[j + 1] = ins; + } + else + imod = i; + } + else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_DIVU) + { + idiv = -1; + + if (imod >= 0 && ins->mSrc[0].IsEqual(mInstructions[imod]->mSrc[0]) && ins->mSrc[1].IsEqual(mInstructions[imod]->mSrc[1])) + { + int j = i - 1; + while (j > imod && CanSwapInstructions(mInstructions[j], ins)) + { + SwapInstructions(mInstructions[j + 0], ins); + mInstructions[j + 1] = mInstructions[j]; + j--; + } + mInstructions[j + 1] = ins; + } + else + idiv = i; + } + } + // Check case of cmp signed immediate if (mFalseJump && mInstructions.Size() > 3) { @@ -18436,7 +18508,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "game_menu"); + CheckFunc = !strcmp(mIdent->mString, "nformi"); CheckCase = false; mEntryBlock = mBlocks[0]; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 7f6684b..b33c493 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -8352,6 +8352,109 @@ void NativeCodeBasicBlock::AddAsrSignedByte(InterCodeProcedure* proc, const Inte } +void NativeCodeBasicBlock::BinaryDivModPair(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction* ins1, const InterInstruction* ins2) +{ + if (ins1->mSrc[1].mTemp < 0) + { + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_IMMEDIATE, ins1->mSrc[1].mIntConst & 0xff)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_IMMEDIATE, (ins1->mSrc[1].mIntConst >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + if (ins1->mDst.mType == IT_INT32) + { + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_IMMEDIATE, (ins1->mSrc[1].mIntConst >> 16) & 0xff)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 2)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_IMMEDIATE, (ins1->mSrc[1].mIntConst >> 24) & 0xff)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 3)); + } + } + else + { + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mSrc[1].mTemp])); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mSrc[1].mTemp] + 1)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + if (ins1->mDst.mType == IT_INT32) + { + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mSrc[1].mTemp] + 2)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 2)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mSrc[1].mTemp] + 3)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 3)); + } + } + + if (ins1->mSrc[0].mTemp < 0) + { + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_IMMEDIATE, ins1->mSrc[0].mIntConst & 0xff)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_IMMEDIATE, (ins1->mSrc[0].mIntConst >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 1)); + if (ins1->mDst.mType == IT_INT32) + { + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_IMMEDIATE, (ins1->mSrc[0].mIntConst >> 16) & 0xff)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 2)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_IMMEDIATE, (ins1->mSrc[0].mIntConst >> 24) & 0xff)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 3)); + } + } + else + { + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mSrc[0].mTemp])); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mSrc[0].mTemp] + 1)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 1)); + if (ins1->mDst.mType == IT_INT32) + { + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mSrc[0].mTemp] + 2)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 2)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mSrc[0].mTemp] + 3)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 3)); + } + } + + if (ins1->mDst.mType == IT_INT32) + { + NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("divu32"))); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME | NCIF_LOWER | NCIF_UPPER)); + } + else + { + NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("divu16"))); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME | NCIF_LOWER | NCIF_UPPER)); + } + + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mDst.mTemp])); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mDst.mTemp] + 1)); + if (ins1->mDst.mType == IT_INT32) + { + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 2)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mDst.mTemp] + 2)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 3)); + mIns.Push(NativeCodeInstruction(ins1, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins1->mDst.mTemp] + 3)); + } + + if (ins2->mDst.mType == IT_INT32) + { + mIns.Push(NativeCodeInstruction(ins2, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); + mIns.Push(NativeCodeInstruction(ins2, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins2->mDst.mTemp])); + mIns.Push(NativeCodeInstruction(ins2, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); + mIns.Push(NativeCodeInstruction(ins2, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins2->mDst.mTemp] + 1)); + mIns.Push(NativeCodeInstruction(ins2, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 6)); + mIns.Push(NativeCodeInstruction(ins2, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins2->mDst.mTemp] + 2)); + mIns.Push(NativeCodeInstruction(ins2, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 7)); + mIns.Push(NativeCodeInstruction(ins2, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins2->mDst.mTemp] + 3)); + } + else + { + mIns.Push(NativeCodeInstruction(ins2, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 2)); + mIns.Push(NativeCodeInstruction(ins2, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins2->mDst.mTemp])); + mIns.Push(NativeCodeInstruction(ins2, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 3)); + mIns.Push(NativeCodeInstruction(ins2, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins2->mDst.mTemp] + 1)); + } +} + NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins, const InterInstruction * sins1, const InterInstruction * sins0) { int treg = BC_REG_TMP + proc->mTempOffset[ins->mDst.mTemp]; @@ -16107,6 +16210,116 @@ bool NativeCodeBasicBlock::Split16BitLoopCount(NativeCodeProcedure* proc) return changed; } +bool NativeCodeBasicBlock::LoopRegisterWrapAround(void) +{ + bool changed = false; + if (!mVisited) + { + mVisited = true; + + if (mLoopHead && mNumEntries == 2) + { + NativeCodeBasicBlock* eblock = nullptr, * cblock = nullptr, * bblock = nullptr; + + if (mEntryBlocks[0]->mFalseJump) + { + eblock = mEntryBlocks[0]; + bblock = mEntryBlocks[1]; + } + else if (mEntryBlocks[1]->mFalseJump) + { + eblock = mEntryBlocks[1]; + bblock = mEntryBlocks[0]; + } + + if (eblock && eblock != this && !bblock->mFalseJump) + { + if (eblock->mFalseJump == this) + cblock = eblock->mTrueJump; + else + cblock = eblock->mFalseJump; + + int i = eblock->mIns.Size() - 1; + while (i >= 0 && !eblock->mIns[i].ChangesXReg()) + i--; + if (i >= 0 && eblock->mIns[i].mType == ASMIT_LDX && eblock->mIns[i].mMode == ASMIM_ZERO_PAGE) + { + int addr = eblock->mIns[i].mAddress; + + int j = 0; + while (j < mIns.Size() && !mIns[j].ReferencesXReg() && !mIns[j].ChangesZeroPage(addr)) + j++; + if (j < mIns.Size()) + { + if (mIns[j].mType == ASMIT_STX && mIns[j].mMode == ASMIM_ZERO_PAGE && mIns[j].mAddress == addr) + { + bblock->mIns.Push(mIns[j]); + bblock->mExitRequiredRegs += addr; + mEntryRequiredRegs += addr; + mIns.Remove(j); + changed = true; + } + } + } + + i = eblock->mIns.Size() - 1; + while (i >= 0 && !eblock->mIns[i].ChangesYReg()) + i--; + if (i >= 0 && eblock->mIns[i].mType == ASMIT_LDY && eblock->mIns[i].mMode == ASMIM_ZERO_PAGE) + { + int addr = eblock->mIns[i].mAddress; + + int j = 0; + while (j < mIns.Size() && !mIns[j].ReferencesYReg() && !mIns[j].ChangesZeroPage(addr)) + j++; + if (j < mIns.Size()) + { + if (mIns[j].mType == ASMIT_STY && mIns[j].mMode == ASMIM_ZERO_PAGE && mIns[j].mAddress == addr) + { + bblock->mIns.Push(mIns[j]); + bblock->mExitRequiredRegs += addr; + mEntryRequiredRegs += addr; + mIns.Remove(j); + changed = true; + } + } + } + + i = eblock->mIns.Size() - 1; + while (i >= 0 && !eblock->mIns[i].ChangesAccu()) + i--; + if (i >= 0 && eblock->mIns[i].mType == ASMIT_LDA && eblock->mIns[i].mMode == ASMIM_ZERO_PAGE) + { + int addr = eblock->mIns[i].mAddress; + + int j = 0; + while (j < mIns.Size() && !mIns[j].ReferencesAccu() && !mIns[j].ChangesZeroPage(addr)) + j++; + if (j < mIns.Size()) + { + if (mIns[j].mType == ASMIT_STA && mIns[j].mMode == ASMIM_ZERO_PAGE && mIns[j].mAddress == addr) + { + bblock->mIns.Push(mIns[j]); + bblock->mExitRequiredRegs += addr; + mEntryRequiredRegs += addr; + mIns.Remove(j); + changed = true; + } + } + } + } + } + + if (mTrueJump && mTrueJump->LoopRegisterWrapAround()) + changed = true; + if (mFalseJump && mFalseJump->LoopRegisterWrapAround()) + changed = true; + } + + return changed; +} + + bool NativeCodeBasicBlock::EliminateDeadLoops(void) { bool changed = false; @@ -24541,6 +24754,69 @@ bool NativeCodeBasicBlock::FindGlobalAddressSumY(int at, int reg, bool direct, i return false; } +bool NativeCodeBasicBlock::FindSharedGlobalAddressSumY(int at, int reg, const NativeCodeInstruction*& ains, const NativeCodeInstruction*& iins) +{ + int j = at - 7; + while (j >= 0) + { + if (mIns[j + 0].mType == ASMIT_CLC && + mIns[j + 1].mType == ASMIT_LDA && mIns[j + 1].mMode == ASMIM_IMMEDIATE_ADDRESS && (mIns[j + 1].mFlags & NCIF_LOWER) && mIns[j + 1].mLinkerObject && + mIns[j + 2].mType == ASMIT_ADC && mIns[j + 2].mMode == ASMIM_ZERO_PAGE && + mIns[j + 3].mType == ASMIT_STA && mIns[j + 3].mMode == ASMIM_ZERO_PAGE && mIns[j + 3].mAddress == reg && + mIns[j + 4].mType == ASMIT_LDA && mIns[j + 4].mMode == ASMIM_IMMEDIATE_ADDRESS && (mIns[j + 4].mFlags & NCIF_UPPER) && mIns[j + 4].mLinkerObject == mIns[j + 1].mLinkerObject && + mIns[j + 5].mType == ASMIT_ADC && mIns[j + 5].mMode == ASMIM_IMMEDIATE && mIns[j + 5].mAddress == 0 && + mIns[j + 6].mType == ASMIT_STA && mIns[j + 6].mMode == ASMIM_ZERO_PAGE && mIns[j + 6].mAddress == reg + 1) + { + ains = &(mIns[j + 1]); + iins = &(mIns[j + 2]); + + int ireg = iins->mAddress; + if (reg == ireg) + return false; + + if (ChangesZeroPage(ireg, j + 7, at)) + return false; + + return true; + } + + if (mIns[j + 6].ChangesZeroPage(reg) || mIns[j + 6].ChangesZeroPage(reg + 1)) + return false; + + j--; + } + + if (mLoopHead) + return false; + + if (mEntryBlocks.Size() > 0) + { + while (j >= -6) + { + if (mIns[j + 6].ChangesZeroPage(reg) || mIns[j + 6].ChangesZeroPage(reg + 1)) + return false; + j--; + } + + if (mEntryBlocks[0]->FindSharedGlobalAddressSumY(mEntryBlocks[0]->mIns.Size(), reg, ains, iins)) + { + if (ChangesZeroPage(iins->mAddress, 0, at)) + return false; + + const NativeCodeInstruction* ains0; + const NativeCodeInstruction* iins0; + + int i = 1; + while (i < mEntryBlocks.Size() && mEntryBlocks[i]->FindSharedGlobalAddressSumY(mEntryBlocks[i]->mIns.Size(), reg, ains0, iins0) && ains0->IsSame(*ains) && iins0->IsSame(*iins)) + i++; + if (i == mEntryBlocks.Size()) + return true; + } + } + + return false; +} + bool NativeCodeBasicBlock::JoinTAXARange(int from, int to) { int start = from; @@ -30656,7 +30932,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc exitBlock->mIns[1].mType = ASMIT_STX; } - mIns.Remove(i); mIns.Remove(i); + mIns.Remove(i + 1); mIns.Remove(0); mIns.Remove(0); CheckLive(); @@ -33336,6 +33612,7 @@ bool NativeCodeBasicBlock::OptimizeFindLoop(NativeCodeProcedure* proc) body->mExitRequiredRegs += CPU_REG_Y; mEntryRequiredRegs += CPU_REG_Y; mExitRequiredRegs += CPU_REG_Y; + succ->mEntryRequiredRegs += CPU_REG_Y; for (int i = 0; i < mIns.Size(); i++) mIns[i].mLive |= LIVE_CPU_REG_Y; @@ -36003,6 +36280,28 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #endif } + else if (FindSharedGlobalAddressSumY(i, sreg, ains, iins)) + { +#if 1 + if (mIns[i + 0].mLive & LIVE_CPU_REG_Y) + { + InsertLoadYImmediate(mIns[i + 0].mIns, i + 1, 0); + // mIns.Insert(i + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, 0)); + // mIns[i + 1].mLive |= LIVE_CPU_REG_Y; + } + mIns.Insert(i + 0, NativeCodeInstruction(mIns[i + 0].mIns, ASMIT_LDY, *iins)); + mIns[i + 0].mLive |= LIVE_CPU_REG_Y | LIVE_MEM; + + mIns[i + 1].mMode = ASMIM_ABSOLUTE_Y; + mIns[i + 1].mLinkerObject = ains->mLinkerObject; + mIns[i + 1].mAddress = ains->mAddress; + mIns[i + 1].mFlags &= ~NCIF_YZERO; + progress = true; + + CheckLive(); +#endif + } + #if 1 if (mIns[i + 0].mMode == ASMIM_INDIRECT_Y && (mIns[i + 0].mFlags & NCIF_YZERO) && !(mIns[i + 0].mLive & LIVE_CPU_REG_X)) @@ -43024,6 +43323,11 @@ void NativeCodeProcedure::Optimize(void) changed = true; #endif +#if _DEBUG + ResetVisited(); + mEntryBlock->CheckBlocks(true); +#endif + #if 1 ResetVisited(); if (mEntryBlock->OptimizeFindLoop(this)) @@ -43286,6 +43590,14 @@ void NativeCodeProcedure::Optimize(void) changed = true; } + if (step == 8) + { + ResetVisited(); + if (mEntryBlock->LoopRegisterWrapAround()) + changed = true; + } + + if (step >= 9) { ResetVisited(); @@ -43820,6 +44132,26 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode block->AddAsrSignedByte(iproc, ins, iblock->mInstructions[i + 1]); i ++; } + else if (i + 1 < iblock->mInstructions.Size() && + ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_DIVU && + iblock->mInstructions[i + 1]->mOperator == IA_MODU && + ins->mSrc[0].IsEqual(iblock->mInstructions[i + 1]->mSrc[0]) && + ins->mSrc[1].IsEqual(iblock->mInstructions[i + 1]->mSrc[1]) && + ins->mSrc[0].mTemp != ins->mDst.mTemp && ins->mSrc[1].mTemp != ins->mDst.mTemp) + { + block->BinaryDivModPair(iproc, this, ins, iblock->mInstructions[i + 1]); + i++; + } + else if (i + 1 < iblock->mInstructions.Size() && + ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_MODU && + iblock->mInstructions[i + 1]->mOperator == IA_DIVU && + ins->mSrc[0].IsEqual(iblock->mInstructions[i + 1]->mSrc[0]) && + ins->mSrc[1].IsEqual(iblock->mInstructions[i + 1]->mSrc[1]) && + ins->mSrc[0].mTemp != ins->mDst.mTemp && ins->mSrc[1].mTemp != ins->mDst.mTemp) + { + block->BinaryDivModPair(iproc, this, iblock->mInstructions[i + 1], ins); + i++; + } else block = block->BinaryOperator(iproc, this, ins, nullptr, nullptr); break; diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 07c27dd..6188094 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -343,6 +343,7 @@ public: void LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0, bool addrvalid); void LoadStoreOpAbsolute2D(InterCodeProcedure* proc, const InterInstruction* lins1, const InterInstruction* lins2, const InterInstruction* mins); void SignExtendAddImmediate(InterCodeProcedure* proc, const InterInstruction* xins, const InterInstruction* ains); + void BinaryDivModPair(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction* ins1, const InterInstruction* ins2); void NumericConversion(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins); NativeCodeBasicBlock * CopyValue(InterCodeProcedure* proc, const InterInstruction * ins, NativeCodeProcedure* nproc); @@ -429,6 +430,7 @@ public: bool FindGlobalAddress(int at, int reg, int& apos); bool FindGlobalAddressSumY(int at, int reg, bool direct, int& apos, const NativeCodeInstruction * & ains, const NativeCodeInstruction*& iins, uint32 & flags, int & addr); bool FindExternAddressSumY(int at, int reg, int& breg, int& ireg); + bool FindSharedGlobalAddressSumY(int at, int reg, const NativeCodeInstruction*& ains, const NativeCodeInstruction*& iins); bool FindPageStartAddress(int at, int reg, int& addr); bool FindBypassAddressSumY(int at, int reg, int& apos, int& breg); bool PatchBypassAddressSumY(int at, int reg, int apos, int breg); @@ -588,6 +590,7 @@ public: bool SimplifyLoopEnd(NativeCodeProcedure* proc); bool CrossBlockStoreLoadBypass(NativeCodeProcedure* proc); bool EliminateDeadLoops(void); + bool LoopRegisterWrapAround(void); bool CanBytepassLoad(const NativeCodeInstruction& ains, int from = 0) const; bool CanHoistStore(const NativeCodeInstruction& ains) const;