From 2ac276458afd681f9765c90209fe15054ddfe867 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Fri, 19 May 2023 21:25:29 +0200 Subject: [PATCH] Optimize loop invariants in single entry/exit loops --- README.md | 14 +- oscar64/Array.h | 12 ++ oscar64/InterCode.cpp | 14 +- oscar64/NativeCodeGenerator.cpp | 231 +++++++++++++++++++++++++++++++- oscar64/NativeCodeGenerator.h | 8 ++ 5 files changed, 272 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 95bd959..b3fa5a5 100644 --- a/README.md +++ b/README.md @@ -231,7 +231,7 @@ Imports the character data and compresses it using lzo compression. #embed ctm_tiles8 "cards.ctm" }; -Imports the tiles in 8 bit form and builds word constants +Imports the tiles in 8 bit form and builds byte constants const unsigned CardsTiles[] = { #embed ctm_tiles16 word "cards.ctm" @@ -239,6 +239,18 @@ Imports the tiles in 8 bit form and builds word constants Imports the tiles in 16 bit form and builds word constants + const char CardsMap[] = { + #embed ctm_map8 "cards.ctm" + }; + +Imports the map (screen) in 8 bit form and builds byte constants + + const unsigned CardsMap[] = { + #embed ctm_map16 word "cards.ctm" + }; + +Imports the map (screen) in 16 bit form and builds word constants + const char CardsAttrib1[] = { #embed ctm_attr1 "cards.ctm" }; diff --git a/oscar64/Array.h b/oscar64/Array.h index 1fdf8d7..83ec110 100644 --- a/oscar64/Array.h +++ b/oscar64/Array.h @@ -240,6 +240,18 @@ public: else return array[n]; } + const T getAt(int n) const + { + if (n >= size) return empty; + else return array[n]; + } + + void destroyAt(int n) + { + if (n < size) + array[n] = empty; + } + void Push(T t) { (*this)[size] = t; diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 824afc7..1fd2a2b 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -3616,11 +3616,11 @@ static void DestroySourceValues(int temp, GrowingInstructionPtrArray& tvalue, Fa { j = tvalid.Element(i); - ins = tvalue[j]; + ins = tvalue.getAt(j); if (ins->UsesTemp(temp)) { - tvalue[j] = NULL; + tvalue.destroyAt(j); tvalid -= j; } else @@ -15661,7 +15661,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "rirq_build1"); + CheckFunc = !strcmp(mIdent->mString, "main"); mEntryBlock = mBlocks[0]; @@ -15994,6 +15994,10 @@ void InterCodeProcedure::Close(void) ResetVisited(); mEntryBlock->FollowJumps(); + ResetEntryBlocks(); + ResetVisited(); + mEntryBlock->CollectEntryBlocks(nullptr); + DisassembleDebug("Followed Jumps 2"); RebuildIntegerRangeSet(); @@ -16990,8 +16994,8 @@ void InterCodeProcedure::Disassemble(const char* name, bool dumpSets) FILE* file; static bool initial = true; -// if (!CheckFunc) -// return; + if (!CheckFunc) + return; if (!initial) { diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 0e43001..6a128b0 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -11342,6 +11342,9 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In NativeCodeBasicBlock* nblock = nproc->AllocateBlock(); NativeCodeBasicBlock* cblock = this; + if (op == IA_CMPLU && ins->mSrc[0].mTemp == -1 && ins->mSrc[1].IsUnsigned() && ins->mSrc[1].mRange.mMaxValue == ins->mSrc[0].mIntConst) + op = IA_CMPNE; + int li = 1, ri = 0; if (op == IA_CMPLEU || op == IA_CMPGU || op == IA_CMPLES || op == IA_CMPGS) { @@ -20815,6 +20818,76 @@ bool NativeCodeBasicBlock::PatchSingleUseGlobalLoad(const NativeCodeBasicBlock* return changed; } +bool NativeCodeBasicBlock::CheckForwardLowYPointer(const NativeCodeBasicBlock* block, int reg, int yreg, int at, int yval) +{ + // Checking only current block as first optimization step + + while (at < mIns.Size()) + { + NativeCodeInstruction& ins(mIns[at]); + + if (ins.mMode == ASMIM_ZERO_PAGE && (ins.mAddress == reg || ins.mAddress == yreg)) + return false; + else if (ins.mMode == ASMIM_INDIRECT_Y && ins.mAddress == reg) + { + if (yval != 0) + return false; + else if (!(ins.mLive & LIVE_MEM)) + return true; + } + + if (ins.mType == ASMIT_LDY && ins.mMode == ASMIM_IMMEDIATE) + yval = ins.mAddress; + else if (ins.mType == ASMIT_INY && yval >= 0) + yval = (yval + 1) & 255; + else if (ins.mType == ASMIT_DEY && yval >= 0) + yval = (yval - 1) & 255; + else if (ins.mType == ASMIT_JSR) + { + if (ins.UsesZeroPage(reg) || ins.ChangesZeroPage(yreg)) + return false; + yval = -1; + } + else if (ins.ChangesZeroPage(reg) || ins.ChangesZeroPage(yreg)) + return false; + else if (ins.ChangesYReg()) + yval = -1; + + at++; + } + + return false; +} + +bool NativeCodeBasicBlock::PatchForwardLowYPointer(const NativeCodeBasicBlock* block, int reg, int yreg, int at, int yval) +{ + bool changed = false; + + while (at < mIns.Size()) + { + NativeCodeInstruction& ins(mIns[at]); + + if (ins.mMode == ASMIM_INDIRECT_Y && ins.mAddress == reg) + { + bool done = !(ins.mLive & LIVE_MEM); + + if (ins.mLive & LIVE_CPU_REG_Y) + mIns.Insert(at + 1, NativeCodeInstruction(ins.mIns, ASMIT_LDY, ASMIM_IMMEDIATE, 0)); + mIns.Insert(at, NativeCodeInstruction(ins.mIns, ASMIT_LDY, ASMIM_ZERO_PAGE, yreg)); + at++; + + changed = true; + + if (done) + return changed; + } + + at++; + } + + return changed; +} + bool NativeCodeBasicBlock::CheckForwardSumYPointer(const NativeCodeBasicBlock* block, int reg, int base, const NativeCodeInstruction& iins, int at, int yval) { if (!mPatched) @@ -27398,6 +27471,116 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc return lblock->OptimizeSimpleLoopInvariant(proc, this, eblock, full); } +bool NativeCodeBasicBlock::OptimizeSingleEntryLoop(NativeCodeProcedure* proc) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + if (mLoopHead && mEntryBlocks.Size() == 2) + { + NativeCodeBasicBlock* pblock, * eblock; + + if (mEntryBlocks[0]->mFalseJump) + { + pblock = mEntryBlocks[1]; + eblock = mEntryBlocks[0]; + } + else + { + eblock = mEntryBlocks[1]; + pblock = mEntryBlocks[0]; + } + + if (!pblock->mFalseJump && eblock->IsDominatedBy(this)) + { + ExpandingArray lblocks; + + proc->ResetPatched(); + mPatched = true; + + lblocks.Push(eblock); + eblock->mPatched = true; + + int i = 0; + while (i < lblocks.Size()) + { + NativeCodeBasicBlock* block = lblocks[i]; + for(int j=0; jmEntryBlocks.Size(); j++) + { + NativeCodeBasicBlock* cblock = block->mEntryBlocks[j]; + if (!cblock->mPatched) + { + cblock->mPatched = true; + lblocks.Push(cblock); + } + } + i++; + } + + int aimm = -1; + + if (!pblock->mExitRequiredRegs[CPU_REG_A] || !pblock->mExitRequiredRegs[CPU_REG_X]) + { + for (int i = 0; i < mIns.Size(); i++) + { + if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_IMMEDIATE) + aimm = mIns[i].mAddress; + else if (mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && aimm >= 0) + { + int reg = mIns[i].mAddress; + if (!ReferencedOnPath(this, 0, i, reg) && !ChangedOnPath(this, i + 1, mIns.Size(), reg)) + { + int k = 0; + while (k < lblocks.Size() && !(lblocks[k] != this && lblocks[k]->ChangesZeroPage(reg))) + k++; + if (k == lblocks.Size()) + { + if (!pblock->mExitRequiredRegs[CPU_REG_A]) + { + pblock->mIns.Push(NativeCodeInstruction(mIns[i].mIns, ASMIT_LDA, ASMIM_IMMEDIATE, aimm)); + pblock->mIns.Push(NativeCodeInstruction(mIns[i].mIns, ASMIT_STA, ASMIM_ZERO_PAGE, reg)); + } + else if (!pblock->mExitRequiredRegs[CPU_REG_X]) + { + pblock->mIns.Push(NativeCodeInstruction(mIns[i].mIns, ASMIT_LDX, ASMIM_IMMEDIATE, aimm)); + pblock->mIns.Push(NativeCodeInstruction(mIns[i].mIns, ASMIT_STX, ASMIM_ZERO_PAGE, reg)); + } + + mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; + changed = true; + + mEntryRequiredRegs += reg; + mExitRequiredRegs += reg; + pblock->mExitRequiredRegs += reg; + + for (int i = 0; i < lblocks.Size(); i++) + { + lblocks[i]->mEntryRequiredRegs += reg; + lblocks[i]->mExitRequiredRegs += reg; + } + + } + } + } + else if (mIns[i].ChangesAccu()) + aimm = -1; + } + } + } + } + + if (mTrueJump && mTrueJump->OptimizeSingleEntryLoop(proc)) + changed = true; + if (mFalseJump && mFalseJump->OptimizeSingleEntryLoop(proc)) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::OptimizeLoopCarryOver(void) { bool changed = false; @@ -33445,6 +33628,16 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass progress = true; } #endif + else if ( + mIns[i + 0].mType == ASMIT_LSR && mIns[i + 0].mMode == ASMIM_IMPLIED && + mIns[i + 1].mType == ASMIT_ASL && mIns[i + 1].mMode == ASMIM_IMPLIED && !(mIns[i + 1].mLive & LIVE_CPU_REG_C)) + { + mIns[i + 0].mType = ASMIT_AND; + mIns[i + 0].mMode = ASMIM_IMMEDIATE; + mIns[i + 0].mAddress = 0xfe; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + progress = true; + } else if ( mIns[i + 0].mType == ASMIT_ROL && mIns[i + 0].mMode == ASMIM_IMPLIED && mIns[i + 1].mType == ASMIT_LSR && mIns[i + 1].mMode == ASMIM_IMPLIED && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) @@ -37406,6 +37599,34 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } } #endif +#if 1 + if ( + mLoopHead && + mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && + mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].mAddress != mIns[i + 0].mAddress && + mIns[i + 2].mType == ASMIT_CLC && + mIns[i + 3].mType == ASMIT_LDA && + mIns[i + 4].mType == ASMIT_ADC && + mIns[i + 5].mType == ASMIT_STA && mIns[i + 5].mMode == ASMIM_ZERO_PAGE && mIns[i + 5].mAddress == mIns[i + 1].mAddress + 1) + { + int yval = RetrieveYValue(i); + proc->ResetPatched(); + if (CheckForwardLowYPointer(this, mIns[i + 1].mAddress, mIns[i + 0].mAddress, i + 6, yval)) + { + proc->ResetPatched(); + if (PatchForwardLowYPointer(this, mIns[i + 1].mAddress, mIns[i + 0].mAddress, i + 6, yval)) + progress = true; + + mIns[i + 0].mMode = ASMIM_IMMEDIATE; + mIns[i + 0].mAddress = 0; + + if (mTrueJump) + mTrueJump->CheckLive(); + if (mFalseJump) + mFalseJump->CheckLive(); + } + } +#endif #if 1 if ( @@ -39859,6 +40080,15 @@ void NativeCodeProcedure::Optimize(void) changed = true; } #endif +#if 1 + if (step == 9) + { + RebuildEntry(); + ResetVisited(); + if (mEntryBlock->OptimizeSingleEntryLoop(this)) + changed = true; + } +#endif #if 1 if (step == 2 && !changed) { @@ -39920,7 +40150,6 @@ void NativeCodeProcedure::Optimize(void) else cnt++; - } while (changed); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 8bd8cb1..dbca705 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -263,6 +263,7 @@ public: bool OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock * prevBlock, NativeCodeBasicBlock* exitBlock, bool full); bool RemoveSimpleLoopUnusedIndex(void); bool OptimizeLoopCarryOver(void); + bool OptimizeSingleEntryLoop(NativeCodeProcedure* proc); bool OptimizeSimpleLoop(NativeCodeProcedure* proc, bool full); bool SimpleLoopReversal(NativeCodeProcedure* proc); @@ -571,6 +572,13 @@ public: bool CheckForwardSumYPointer(const NativeCodeBasicBlock* block, int reg, int base, const NativeCodeInstruction & iins, int at, int yval); bool PatchForwardSumYPointer(const NativeCodeBasicBlock* block, int reg, int base, const NativeCodeInstruction & iins, int at, int yval); + // reg : base register pair to replace LSB with zero + // ireg : index register + // at : start position in block + // yval: known y immediate value of -1 if not known + bool CheckForwardLowYPointer(const NativeCodeBasicBlock* block, int reg, int yreg, int at, int yval); + bool PatchForwardLowYPointer(const NativeCodeBasicBlock* block, int reg, int yreg, int at, int yval); + bool CrossBlock16BitFlood(NativeCodeProcedure* proc); bool CheckCrossBlock16BitFlood(const NativeCodeBasicBlock* block, int sreg, int dreg, int at, bool rvalid); bool CheckCrossBlock16BitFloodExit(const NativeCodeBasicBlock* block, int sreg, int dreg, bool rvalid);