From 9662f5e69f31fe1a0debdd6f9e4023d067f94f74 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Mon, 6 Mar 2023 20:34:02 +0100 Subject: [PATCH] Loop counter depend index optimizations for small xy loops --- include/crt.c | 1 + oscar64/NativeCodeGenerator.cpp | 135 ++++++++++++++++++++++++++++---- oscar64/NativeCodeGenerator.h | 2 +- 3 files changed, 122 insertions(+), 16 deletions(-) diff --git a/include/crt.c b/include/crt.c index afb91a7..0901e94 100644 --- a/include/crt.c +++ b/include/crt.c @@ -296,6 +296,7 @@ spexit: sta 0xff00 #elif defined(__PLUS4__) sta $ff3e +#endif #endif rts } diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index a1e5e3f..156d9e4 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -13981,13 +13981,20 @@ bool NativeCodeBasicBlock::MoveAccuTrainUp(int at, int end) CheckLive(); bool needXY = (mIns[end - 1].mLive & (LIVE_CPU_REG_X | LIVE_CPU_REG_Y)) != 0; + + int addr = mIns[at].mAddress; + if (mIns[at].mLive & LIVE_CPU_REG_C) + at--; int i = at; while (i > 0) { i--; - if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == mIns[at].mAddress) + if ((mIns[i].mType == ASMIT_STA || mIns[i].mType == ASMIT_LDA) && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == addr) { + if (mIns[i].mType == ASMIT_LDA) + i++; + if (mIns[i].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C | LIVE_CPU_REG_Z)) return false; @@ -14049,7 +14056,8 @@ bool NativeCodeBasicBlock::MoveAccuTrainsUp(void) wzero += mIns[i].mAddress; i++; } - else if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && wzero[mIns[i].mAddress] && !(mIns[i].mLive & LIVE_CPU_REG_C)) + else if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && wzero[mIns[i].mAddress] && + (!(mIns[i].mLive & LIVE_CPU_REG_C) || i > 0 && (mIns[i-1].mType == ASMIT_CLC || mIns[i-1].mType == ASMIT_SEC))) { int j = i; while (j < mIns.Size() && (mIns[j].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C | LIVE_CPU_REG_Z)) && mIns[j].mType != ASMIT_JSR) @@ -14069,6 +14077,11 @@ bool NativeCodeBasicBlock::MoveAccuTrainsUp(void) else i++; } + else if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && i + 1 < mIns.Size() && mIns[i + 1].mType == ASMIT_STA && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C | LIVE_CPU_REG_Z))) + { + wzero += mIns[i].mAddress; + i++; + } else if (mIns[i].mType == ASMIT_JSR) { for (int j = 0; j < 4; j++) @@ -26571,6 +26584,72 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc } #endif +#if 1 + sz = mIns.Size(); + if (sz >= 2 && mIns[sz - 2].mType == ASMIT_INX && mIns[sz - 1].mType == ASMIT_CPX && !mEntryRequiredRegs[CPU_REG_Y] && !mExitRequiredRegs[CPU_REG_Y]) + { + int xtoy = -1; + + for (int i = 0; i < sz - 2; i++) + { + if (mIns[i].ChangesXReg()) + { + xtoy = -1; + break; + } + + if (mIns[i].ChangesYReg()) + { + if (xtoy >= 0) + { + xtoy = -1; + break; + } + + if (i >= 3 && + mIns[i - 0].mType == ASMIT_TAY && + mIns[i - 1].mType == ASMIT_ADC && + mIns[i - 2].mType == ASMIT_TXA && + mIns[i - 3].mType == ASMIT_CLC && + !(mIns[i].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C | LIVE_CPU_REG_Z))) + xtoy = i - 3; + else + { + xtoy = -1; + break; + } + } + + + } + + if (xtoy >= 0) + { + int i = 0; + while (i < sz && !mIns[xtoy + 1].MayBeChangedOnAddress(mIns[i])) + i++; + if (i == sz) + { + if (!prevBlock) + return OptimizeSimpleLoopInvariant(proc, full); + + prevBlock->mIns.Push(mIns[xtoy + 0]); + prevBlock->mIns.Push(mIns[xtoy + 1]); + prevBlock->mIns.Push(mIns[xtoy + 2]); + prevBlock->mIns.Push(mIns[xtoy + 3]); + + mIns.Insert(sz - 2, NativeCodeInstruction(ASMIT_INY)); + + mIns.Remove(xtoy, 4); + for (int i = 0; i < mIns.Size(); i++) + mIns[i].mLive |= LIVE_CPU_REG_Y; + mEntryRequiredRegs += CPU_REG_Y; + mExitRequiredRegs += CPU_REG_Y; + } + } + } +#endif + CheckLive(); return changed; @@ -36065,7 +36144,7 @@ NativeCodeProcedure::~NativeCodeProcedure(void) } -void NativeCodeProcedure::CompressTemporaries(void) +void NativeCodeProcedure::CompressTemporaries(bool singles) { if (mInterProc->mTempSize > 0) { @@ -36115,22 +36194,43 @@ void NativeCodeProcedure::CompressTemporaries(void) int pos = spos; #if 1 - if (mInterProc->mLeafProcedure) +// if (mInterProc->mLeafProcedure) { - int k = 0; - while (k < usize && !collisionSet[k + BC_REG_ACCU][k + reg]) - k++; - if (k == usize) + if (singles && usize == 1) { - pos = BC_REG_ACCU; - for (int i = 0; i < 256; i++) + int k = 0; + while (k < 4 && collisionSet[k + BC_REG_ACCU][reg]) + k++; + if (k < 4) { - for (int j = 0; j < usize; j++) + pos = BC_REG_ACCU + k; + for (int i = 0; i < 256; i++) { - if (collisionSet[j + reg][i]) + if (collisionSet[reg][i]) { - collisionSet[j + BC_REG_ACCU] += i; - collisionSet[i] += j + BC_REG_ACCU; + collisionSet[pos] += i; + collisionSet[i] += pos; + } + } + } + } + else + { + int k = 0; + while (k < usize && !collisionSet[k + BC_REG_ACCU][k + reg]) + k++; + if (k == usize) + { + pos = BC_REG_ACCU; + for (int i = 0; i < 256; i++) + { + for (int j = 0; j < usize; j++) + { + if (collisionSet[j + reg][i]) + { + collisionSet[j + BC_REG_ACCU] += i; + collisionSet[i] += j + BC_REG_ACCU; + } } } } @@ -37366,7 +37466,12 @@ void NativeCodeProcedure::Optimize(void) #endif #if 1 - CompressTemporaries(); + BuildDataFlowSets(); + CompressTemporaries(false); + BuildDataFlowSets(); + CompressTemporaries(false); + BuildDataFlowSets(); + CompressTemporaries(true); #endif #if 1 diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index e4e7598..f3ce30e 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -580,7 +580,7 @@ class NativeCodeProcedure void CompileInterBlock(InterCodeProcedure* iproc, InterCodeBasicBlock* iblock, NativeCodeBasicBlock*block); bool MapFastParamsToTemps(void); - void CompressTemporaries(void); + void CompressTemporaries(bool singles); void BuildDataFlowSets(void); void ResetEntryBlocks(void);