From 2f4b2790f6d299eca04f1fdfa0578f87be5e0b20 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Tue, 17 Jan 2023 08:02:36 +0100 Subject: [PATCH] Register condition block bypassing --- include/gfx/bitmap.c | 38 ++++- include/gfx/mcbitmap.c | 9 +- oscar64/NativeCodeGenerator.cpp | 245 ++++++++++++++++++++++++++++++++ oscar64/NativeCodeGenerator.h | 14 ++ 4 files changed, 302 insertions(+), 4 deletions(-) diff --git a/include/gfx/bitmap.c b/include/gfx/bitmap.c index 22ec70b..9125368 100644 --- a/include/gfx/bitmap.c +++ b/include/gfx/bitmap.c @@ -89,7 +89,7 @@ void bm_scan_fill(int left, int right, char * lp, int x0, int x1, char pat) { __assume(left >= 0); __assume(right >= 0); - + if (x0 < left) x0 = left; if (x1 > right) @@ -119,6 +119,15 @@ void bm_scan_fill(int left, int right, char * lp, int x0, int x1, char pat) dp += 256; l -= 248; } + else if (l >= 128) + { + #pragma unroll(full) + for(char i=0; i<15; i++) + { + dp[o] = pat; + o += 8; + } + } while (o < (char)l) { @@ -132,6 +141,7 @@ void bm_scan_fill(int left, int right, char * lp, int x0, int x1, char pat) #pragma native(bm_scan_fill) +#if 0 unsigned bm_usqrt(unsigned n) { unsigned p, q, r, h; @@ -157,6 +167,32 @@ unsigned bm_usqrt(unsigned n) return p; } +#else +unsigned bm_usqrt(unsigned n) +{ + unsigned p, q, r, h; + + p = 0; + r = n; + +#assign q 0x4000 +#repeat + { + h = p | q; + p >>= 1; + if (r >= h) + { + p |= q; + r -= h; + } + } +#assign q q >> 2 +#until q == 0 +#undef q + + return p; +} +#endif #pragma native(bm_usqrt) diff --git a/include/gfx/mcbitmap.c b/include/gfx/mcbitmap.c index 55383ce..bf5f02f 100644 --- a/include/gfx/mcbitmap.c +++ b/include/gfx/mcbitmap.c @@ -204,16 +204,19 @@ void bmmc_circle_fill(const Bitmap * bm, const ClipRect * clip, int x, int y, ch int stride = 8 * bm->cwidth - 8; unsigned rr = r * r + r; + unsigned d = rr - (y0 - y) * (y0 - y); + int tt = 2 * (y0 - y) + 1; for(char iy=y0; iy<(char)y1; iy++) { - int d = (iy - y); - - int t = bm_usqrt(rr - d * d); + int t = bm_usqrt(d); bmmc_scan_fill(clip->left, clip->right, lp, x - t, x + t + 1, pat[iy & 7]); lp ++; if (!((int)lp & 7)) lp += stride; + + d -= tt; + tt += 2; } } diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 2449f46..f949f4e 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -16437,6 +16437,24 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool #endif + if (mIns.Size() >= 1 && mIns[0].mType == ASMIT_STA && mIns[0].mMode == ASMIM_ZERO_PAGE && !(mIns[0].mMode & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)) && mEntryBlocks.Size() == 2) + { + if (!mEntryBlocks[0]->mFalseJump && !mEntryBlocks[1]->mFalseJump && mEntryBlocks[0]->mIns.Size() > 0 && mEntryBlocks[1]->mIns.Size() > 0) + { + if (mEntryBlocks[0]->mIns.Last().mType == ASMIT_LDA && mEntryBlocks[0]->mIns.Last().SameEffectiveAddress(mIns[0])) + { + mEntryBlocks[1]->mIns.Push(NativeCodeInstruction(ASMIT_STA, mIns[0])); + mIns.Remove(0); + changed = true; + } + else if (mEntryBlocks[1]->mIns.Last().mType == ASMIT_LDA && mEntryBlocks[1]->mIns.Last().SameEffectiveAddress(mIns[0])) + { + mEntryBlocks[0]->mIns.Push(NativeCodeInstruction(ASMIT_STA, mIns[0])); + mIns.Remove(0); + changed = true; + } + } + } #if 1 if (mFalseJump && mTrueJump->mIns.Size() > 0 && mFalseJump->mIns.Size() > 0 && mTrueJump->mNumEntries == 1 && mFalseJump->mNumEntries == 1 && mTrueJump->mIns[0].mType == ASMIT_LDA && mTrueJump->mIns[0].mMode == ASMIM_ZERO_PAGE && !(mTrueJump->mIns[0].mLive & LIVE_MEM) && @@ -17201,6 +17219,145 @@ bool NativeCodeBasicBlock::CrossBlockXYPreservation(void) return changed; } +bool NativeCodeBasicBlock::BypassRegisterConditionBlock(void) +{ + bool changed = false; + if (!mVisited) + { + mVisited = true; + + if (mTrueJump && mFalseJump && mIns.Size() > 0) + { + NativeCodeBasicBlock* eblock = nullptr, * cblock = nullptr; + + if (mTrueJump->mTrueJump == mFalseJump && !mTrueJump->mFalseJump) + { + cblock = mTrueJump; + eblock = mFalseJump; + } + else if (mFalseJump->mTrueJump == mTrueJump && !mFalseJump->mFalseJump) + { + cblock = mFalseJump; + eblock = mTrueJump; + } + + if (cblock && cblock->mNumEntries == 1 && eblock->mNumEntries == 2) + { +#if 0 + if (cblock->mIns.Size() >= 1 && eblock->mIns.Size() >= 1) + { + int csz = cblock->mIns.Size() - 1; + + if (cblock->mIns[csz].mType == ASMIT_LDA && cblock->mIns[csz].mMode == ASMIM_ZERO_PAGE && + eblock->mIns[0].mType == ASMIT_STA && eblock->mIns[0].mMode == ASMIM_ZERO_PAGE && eblock->mIns[0].mAddress == cblock->mIns[csz].mAddress && !(eblock->mIns[0].mLive & LIVE_CPU_REG_A)) + { + mIns.Push(NativeCodeInstruction(ASMIT_STA, eblock->mIns[0])); + cblock->mIns.Remove(csz); + cblock->mExitRequiredRegs -= CPU_REG_A; + changed = true; + } + } +#endif + if (mExitRequiredRegs[CPU_REG_A]) + { + if (!cblock->ReferencesAccu()) + { + int i = mIns.Size() - 1; + while (i >= 0 && !mIns[i].ReferencesAccu()) + i--; + if (i >= 0 && mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + { + if (!ChangesZeroPage(mIns[i].mAddress, i + 1) && !cblock->ChangesZeroPage(mIns[i].mAddress)) + { + eblock->mIns.Insert(0, NativeCodeInstruction(ASMIT_LDA, mIns[i])); + mIns.Remove(i); + changed = true; + mExitRequiredRegs -= CPU_REG_A; + } + } + } + else if (!eblock->mEntryRequiredRegs[CPU_REG_A]) + { + int i = mIns.Size() - 1; + while (i >= 0 && !mIns[i].ReferencesAccu()) + i--; + if (i >= 0 && mIns[i].mType == ASMIT_TXA && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + { + if (!ChangesXReg(i + 1)) + { + cblock->mIns.Insert(0, NativeCodeInstruction(ASMIT_TXA)); + mIns.Remove(i); + changed = true; + mExitRequiredRegs -= CPU_REG_A; + mExitRequiredRegs += CPU_REG_X; + } + } + else if (i >= 0 && mIns[i].mType == ASMIT_TYA && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + { + if (!ChangesYReg(i + 1)) + { + cblock->mIns.Insert(0, NativeCodeInstruction(ASMIT_TYA)); + mIns.Remove(i); + changed = true; + mExitRequiredRegs -= CPU_REG_A; + mExitRequiredRegs += CPU_REG_Y; + } + } + } + } + + if (mExitRequiredRegs[CPU_REG_Y]) + { + if (!cblock->ReferencesYReg()) + { + int i = mIns.Size() - 1; + while (i >= 0 && !mIns[i].ReferencesYReg()) + i--; + if (i >= 0 && mIns[i].mType == ASMIT_LDY && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + { + if (!ChangesZeroPage(mIns[i].mAddress, i + 1) && !cblock->ChangesZeroPage(mIns[i].mAddress)) + { + eblock->mIns.Insert(0, NativeCodeInstruction(ASMIT_LDY, mIns[i])); + mIns.Remove(i); + changed = true; + mExitRequiredRegs -= CPU_REG_Y; + } + } + } + } + + if (mExitRequiredRegs[CPU_REG_X]) + { + if (!cblock->ReferencesXReg()) + { + int i = mIns.Size() - 1; + while (i >= 0 && !mIns[i].ReferencesXReg()) + i--; + if (i >= 0 && mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z)) + { + if (!ChangesZeroPage(mIns[i].mAddress, i + 1) && !cblock->ChangesZeroPage(mIns[i].mAddress)) + { + eblock->mIns.Insert(0, NativeCodeInstruction(ASMIT_LDX, mIns[i])); + mIns.Remove(i); + changed = true; + mExitRequiredRegs -= CPU_REG_X; + } + } + } + } + } + } + + + if (mTrueJump && mTrueJump->BypassRegisterConditionBlock()) + changed = true; + if (mFalseJump && mFalseJump->BypassRegisterConditionBlock()) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::FindPageStartAddress(int at, int reg, int& addr) { @@ -25902,6 +26059,80 @@ void NativeCodeBasicBlock::BlockSizeReduction(NativeCodeProcedure* proc, int xen } } +bool NativeCodeBasicBlock::ReferencesAccu(int from) const +{ + for (int i = from; i < mIns.Size(); i++) + if (mIns[i].ReferencesAccu()) + return true; + return false; +} + +bool NativeCodeBasicBlock::ReferencesYReg(int from) const +{ + for (int i = from; i < mIns.Size(); i++) + if (mIns[i].ReferencesYReg()) + return true; + return false; +} + +bool NativeCodeBasicBlock::ReferencesXReg(int from) const +{ + for (int i = from; i < mIns.Size(); i++) + if (mIns[i].ReferencesXReg()) + return true; + return false; +} + +bool NativeCodeBasicBlock::ChangesAccu(int from) const +{ + for (int i = from; i < mIns.Size(); i++) + if (mIns[i].ChangesAccu()) + return true; + return false; +} + +bool NativeCodeBasicBlock::ChangesYReg(int from) const +{ + for (int i = from; i < mIns.Size(); i++) + if (mIns[i].ChangesYReg()) + return true; + return false; +} + +bool NativeCodeBasicBlock::ChangesXReg(int from) const +{ + for (int i = from; i < mIns.Size(); i++) + if (mIns[i].ChangesXReg()) + return true; + return false; +} + +bool NativeCodeBasicBlock::ChangesZeroPage(int address, int from) const +{ + for (int i = from; i < mIns.Size(); i++) + if (mIns[i].ChangesZeroPage(address)) + return true; + return false; +} + +bool NativeCodeBasicBlock::UsesZeroPage(int address, int from) const +{ + for (int i = from; i < mIns.Size(); i++) + if (mIns[i].UsesZeroPage(address)) + return true; + return false; +} + +bool NativeCodeBasicBlock::ReferencesZeroPage(int address, int from) const +{ + for (int i = from; i < mIns.Size(); i++) + if (mIns[i].ReferencesZeroPage(address)) + return true; + return false; +} + + + bool NativeCodeBasicBlock::RemoveNops(void) { @@ -33002,9 +33233,17 @@ void NativeCodeProcedure::Optimize(void) ResetVisited(); if (mEntryBlock->SimplifyLoopEnd(this)) changed = true; + } #endif + if (step >= 5) + { + ResetVisited(); + if (mEntryBlock->BypassRegisterConditionBlock()) + changed = true; + } + if (step == 7) { ResetVisited(); @@ -33104,6 +33343,12 @@ void NativeCodeProcedure::Optimize(void) changed = mEntryBlock->PeepHoleOptimizer(this, 10); } + if (!changed) + { + ResetVisited(); + changed = mEntryBlock->JoinTailCodeSequences(this, true); + } + } while (changed); #endif diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index d27d2ea..ffe2a67 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -199,6 +199,19 @@ public: void ShortcutTailRecursion(); + bool ReferencesAccu(int from = 0) const; + bool ReferencesYReg(int from = 0) const; + bool ReferencesXReg(int from = 0) const; + + bool ChangesAccu(int from = 0) const; + bool ChangesYReg(int from = 0) const; + bool ChangesXReg(int from = 0) const; + + bool ChangesZeroPage(int address, int from = 0) const; + bool UsesZeroPage(int address, int from = 0) const; + bool ReferencesZeroPage(int address, int from = 0) const; + + bool RemoveNops(void); bool PeepHoleOptimizer(NativeCodeProcedure* proc, int pass); void BlockSizeReduction(NativeCodeProcedure* proc, int xenter, int yenter); @@ -399,6 +412,7 @@ public: bool CrossBlockXYShortcut(void); + bool BypassRegisterConditionBlock(void); bool Check16BitSum(int at, NativeRegisterSum16Info& info); bool Propagate16BitSum(void);