Register condition block bypassing

This commit is contained in:
drmortalwombat 2023-01-17 08:02:36 +01:00
parent 02db5b5eb1
commit 2f4b2790f6
4 changed files with 302 additions and 4 deletions

View File

@ -89,7 +89,7 @@ void bm_scan_fill(int left, int right, char * lp, int x0, int x1, char pat)
{ {
__assume(left >= 0); __assume(left >= 0);
__assume(right >= 0); __assume(right >= 0);
if (x0 < left) if (x0 < left)
x0 = left; x0 = left;
if (x1 > right) if (x1 > right)
@ -119,6 +119,15 @@ void bm_scan_fill(int left, int right, char * lp, int x0, int x1, char pat)
dp += 256; dp += 256;
l -= 248; l -= 248;
} }
else if (l >= 128)
{
#pragma unroll(full)
for(char i=0; i<15; i++)
{
dp[o] = pat;
o += 8;
}
}
while (o < (char)l) while (o < (char)l)
{ {
@ -132,6 +141,7 @@ void bm_scan_fill(int left, int right, char * lp, int x0, int x1, char pat)
#pragma native(bm_scan_fill) #pragma native(bm_scan_fill)
#if 0
unsigned bm_usqrt(unsigned n) unsigned bm_usqrt(unsigned n)
{ {
unsigned p, q, r, h; unsigned p, q, r, h;
@ -157,6 +167,32 @@ unsigned bm_usqrt(unsigned n)
return p; return p;
} }
#else
unsigned bm_usqrt(unsigned n)
{
unsigned p, q, r, h;
p = 0;
r = n;
#assign q 0x4000
#repeat
{
h = p | q;
p >>= 1;
if (r >= h)
{
p |= q;
r -= h;
}
}
#assign q q >> 2
#until q == 0
#undef q
return p;
}
#endif
#pragma native(bm_usqrt) #pragma native(bm_usqrt)

View File

@ -204,16 +204,19 @@ void bmmc_circle_fill(const Bitmap * bm, const ClipRect * clip, int x, int y, ch
int stride = 8 * bm->cwidth - 8; int stride = 8 * bm->cwidth - 8;
unsigned rr = r * r + r; unsigned rr = r * r + r;
unsigned d = rr - (y0 - y) * (y0 - y);
int tt = 2 * (y0 - y) + 1;
for(char iy=y0; iy<(char)y1; iy++) for(char iy=y0; iy<(char)y1; iy++)
{ {
int d = (iy - y); int t = bm_usqrt(d);
int t = bm_usqrt(rr - d * d);
bmmc_scan_fill(clip->left, clip->right, lp, x - t, x + t + 1, pat[iy & 7]); bmmc_scan_fill(clip->left, clip->right, lp, x - t, x + t + 1, pat[iy & 7]);
lp ++; lp ++;
if (!((int)lp & 7)) if (!((int)lp & 7))
lp += stride; lp += stride;
d -= tt;
tt += 2;
} }
} }

View File

@ -16437,6 +16437,24 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool
#endif #endif
if (mIns.Size() >= 1 && mIns[0].mType == ASMIT_STA && mIns[0].mMode == ASMIM_ZERO_PAGE && !(mIns[0].mMode & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)) && mEntryBlocks.Size() == 2)
{
if (!mEntryBlocks[0]->mFalseJump && !mEntryBlocks[1]->mFalseJump && mEntryBlocks[0]->mIns.Size() > 0 && mEntryBlocks[1]->mIns.Size() > 0)
{
if (mEntryBlocks[0]->mIns.Last().mType == ASMIT_LDA && mEntryBlocks[0]->mIns.Last().SameEffectiveAddress(mIns[0]))
{
mEntryBlocks[1]->mIns.Push(NativeCodeInstruction(ASMIT_STA, mIns[0]));
mIns.Remove(0);
changed = true;
}
else if (mEntryBlocks[1]->mIns.Last().mType == ASMIT_LDA && mEntryBlocks[1]->mIns.Last().SameEffectiveAddress(mIns[0]))
{
mEntryBlocks[0]->mIns.Push(NativeCodeInstruction(ASMIT_STA, mIns[0]));
mIns.Remove(0);
changed = true;
}
}
}
#if 1 #if 1
if (mFalseJump && mTrueJump->mIns.Size() > 0 && mFalseJump->mIns.Size() > 0 && mTrueJump->mNumEntries == 1 && mFalseJump->mNumEntries == 1 && if (mFalseJump && mTrueJump->mIns.Size() > 0 && mFalseJump->mIns.Size() > 0 && mTrueJump->mNumEntries == 1 && mFalseJump->mNumEntries == 1 &&
mTrueJump->mIns[0].mType == ASMIT_LDA && mTrueJump->mIns[0].mMode == ASMIM_ZERO_PAGE && !(mTrueJump->mIns[0].mLive & LIVE_MEM) && mTrueJump->mIns[0].mType == ASMIT_LDA && mTrueJump->mIns[0].mMode == ASMIM_ZERO_PAGE && !(mTrueJump->mIns[0].mLive & LIVE_MEM) &&
@ -17201,6 +17219,145 @@ bool NativeCodeBasicBlock::CrossBlockXYPreservation(void)
return changed; return changed;
} }
bool NativeCodeBasicBlock::BypassRegisterConditionBlock(void)
{
bool changed = false;
if (!mVisited)
{
mVisited = true;
if (mTrueJump && mFalseJump && mIns.Size() > 0)
{
NativeCodeBasicBlock* eblock = nullptr, * cblock = nullptr;
if (mTrueJump->mTrueJump == mFalseJump && !mTrueJump->mFalseJump)
{
cblock = mTrueJump;
eblock = mFalseJump;
}
else if (mFalseJump->mTrueJump == mTrueJump && !mFalseJump->mFalseJump)
{
cblock = mFalseJump;
eblock = mTrueJump;
}
if (cblock && cblock->mNumEntries == 1 && eblock->mNumEntries == 2)
{
#if 0
if (cblock->mIns.Size() >= 1 && eblock->mIns.Size() >= 1)
{
int csz = cblock->mIns.Size() - 1;
if (cblock->mIns[csz].mType == ASMIT_LDA && cblock->mIns[csz].mMode == ASMIM_ZERO_PAGE &&
eblock->mIns[0].mType == ASMIT_STA && eblock->mIns[0].mMode == ASMIM_ZERO_PAGE && eblock->mIns[0].mAddress == cblock->mIns[csz].mAddress && !(eblock->mIns[0].mLive & LIVE_CPU_REG_A))
{
mIns.Push(NativeCodeInstruction(ASMIT_STA, eblock->mIns[0]));
cblock->mIns.Remove(csz);
cblock->mExitRequiredRegs -= CPU_REG_A;
changed = true;
}
}
#endif
if (mExitRequiredRegs[CPU_REG_A])
{
if (!cblock->ReferencesAccu())
{
int i = mIns.Size() - 1;
while (i >= 0 && !mIns[i].ReferencesAccu())
i--;
if (i >= 0 && mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z))
{
if (!ChangesZeroPage(mIns[i].mAddress, i + 1) && !cblock->ChangesZeroPage(mIns[i].mAddress))
{
eblock->mIns.Insert(0, NativeCodeInstruction(ASMIT_LDA, mIns[i]));
mIns.Remove(i);
changed = true;
mExitRequiredRegs -= CPU_REG_A;
}
}
}
else if (!eblock->mEntryRequiredRegs[CPU_REG_A])
{
int i = mIns.Size() - 1;
while (i >= 0 && !mIns[i].ReferencesAccu())
i--;
if (i >= 0 && mIns[i].mType == ASMIT_TXA && !(mIns[i].mLive & LIVE_CPU_REG_Z))
{
if (!ChangesXReg(i + 1))
{
cblock->mIns.Insert(0, NativeCodeInstruction(ASMIT_TXA));
mIns.Remove(i);
changed = true;
mExitRequiredRegs -= CPU_REG_A;
mExitRequiredRegs += CPU_REG_X;
}
}
else if (i >= 0 && mIns[i].mType == ASMIT_TYA && !(mIns[i].mLive & LIVE_CPU_REG_Z))
{
if (!ChangesYReg(i + 1))
{
cblock->mIns.Insert(0, NativeCodeInstruction(ASMIT_TYA));
mIns.Remove(i);
changed = true;
mExitRequiredRegs -= CPU_REG_A;
mExitRequiredRegs += CPU_REG_Y;
}
}
}
}
if (mExitRequiredRegs[CPU_REG_Y])
{
if (!cblock->ReferencesYReg())
{
int i = mIns.Size() - 1;
while (i >= 0 && !mIns[i].ReferencesYReg())
i--;
if (i >= 0 && mIns[i].mType == ASMIT_LDY && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z))
{
if (!ChangesZeroPage(mIns[i].mAddress, i + 1) && !cblock->ChangesZeroPage(mIns[i].mAddress))
{
eblock->mIns.Insert(0, NativeCodeInstruction(ASMIT_LDY, mIns[i]));
mIns.Remove(i);
changed = true;
mExitRequiredRegs -= CPU_REG_Y;
}
}
}
}
if (mExitRequiredRegs[CPU_REG_X])
{
if (!cblock->ReferencesXReg())
{
int i = mIns.Size() - 1;
while (i >= 0 && !mIns[i].ReferencesXReg())
i--;
if (i >= 0 && mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_CPU_REG_Z))
{
if (!ChangesZeroPage(mIns[i].mAddress, i + 1) && !cblock->ChangesZeroPage(mIns[i].mAddress))
{
eblock->mIns.Insert(0, NativeCodeInstruction(ASMIT_LDX, mIns[i]));
mIns.Remove(i);
changed = true;
mExitRequiredRegs -= CPU_REG_X;
}
}
}
}
}
}
if (mTrueJump && mTrueJump->BypassRegisterConditionBlock())
changed = true;
if (mFalseJump && mFalseJump->BypassRegisterConditionBlock())
changed = true;
}
return changed;
}
bool NativeCodeBasicBlock::FindPageStartAddress(int at, int reg, int& addr) bool NativeCodeBasicBlock::FindPageStartAddress(int at, int reg, int& addr)
{ {
@ -25902,6 +26059,80 @@ void NativeCodeBasicBlock::BlockSizeReduction(NativeCodeProcedure* proc, int xen
} }
} }
bool NativeCodeBasicBlock::ReferencesAccu(int from) const
{
for (int i = from; i < mIns.Size(); i++)
if (mIns[i].ReferencesAccu())
return true;
return false;
}
bool NativeCodeBasicBlock::ReferencesYReg(int from) const
{
for (int i = from; i < mIns.Size(); i++)
if (mIns[i].ReferencesYReg())
return true;
return false;
}
bool NativeCodeBasicBlock::ReferencesXReg(int from) const
{
for (int i = from; i < mIns.Size(); i++)
if (mIns[i].ReferencesXReg())
return true;
return false;
}
bool NativeCodeBasicBlock::ChangesAccu(int from) const
{
for (int i = from; i < mIns.Size(); i++)
if (mIns[i].ChangesAccu())
return true;
return false;
}
bool NativeCodeBasicBlock::ChangesYReg(int from) const
{
for (int i = from; i < mIns.Size(); i++)
if (mIns[i].ChangesYReg())
return true;
return false;
}
bool NativeCodeBasicBlock::ChangesXReg(int from) const
{
for (int i = from; i < mIns.Size(); i++)
if (mIns[i].ChangesXReg())
return true;
return false;
}
bool NativeCodeBasicBlock::ChangesZeroPage(int address, int from) const
{
for (int i = from; i < mIns.Size(); i++)
if (mIns[i].ChangesZeroPage(address))
return true;
return false;
}
bool NativeCodeBasicBlock::UsesZeroPage(int address, int from) const
{
for (int i = from; i < mIns.Size(); i++)
if (mIns[i].UsesZeroPage(address))
return true;
return false;
}
bool NativeCodeBasicBlock::ReferencesZeroPage(int address, int from) const
{
for (int i = from; i < mIns.Size(); i++)
if (mIns[i].ReferencesZeroPage(address))
return true;
return false;
}
bool NativeCodeBasicBlock::RemoveNops(void) bool NativeCodeBasicBlock::RemoveNops(void)
{ {
@ -33002,9 +33233,17 @@ void NativeCodeProcedure::Optimize(void)
ResetVisited(); ResetVisited();
if (mEntryBlock->SimplifyLoopEnd(this)) if (mEntryBlock->SimplifyLoopEnd(this))
changed = true; changed = true;
} }
#endif #endif
if (step >= 5)
{
ResetVisited();
if (mEntryBlock->BypassRegisterConditionBlock())
changed = true;
}
if (step == 7) if (step == 7)
{ {
ResetVisited(); ResetVisited();
@ -33104,6 +33343,12 @@ void NativeCodeProcedure::Optimize(void)
changed = mEntryBlock->PeepHoleOptimizer(this, 10); changed = mEntryBlock->PeepHoleOptimizer(this, 10);
} }
if (!changed)
{
ResetVisited();
changed = mEntryBlock->JoinTailCodeSequences(this, true);
}
} while (changed); } while (changed);
#endif #endif

View File

@ -199,6 +199,19 @@ public:
void ShortcutTailRecursion(); void ShortcutTailRecursion();
bool ReferencesAccu(int from = 0) const;
bool ReferencesYReg(int from = 0) const;
bool ReferencesXReg(int from = 0) const;
bool ChangesAccu(int from = 0) const;
bool ChangesYReg(int from = 0) const;
bool ChangesXReg(int from = 0) const;
bool ChangesZeroPage(int address, int from = 0) const;
bool UsesZeroPage(int address, int from = 0) const;
bool ReferencesZeroPage(int address, int from = 0) const;
bool RemoveNops(void); bool RemoveNops(void);
bool PeepHoleOptimizer(NativeCodeProcedure* proc, int pass); bool PeepHoleOptimizer(NativeCodeProcedure* proc, int pass);
void BlockSizeReduction(NativeCodeProcedure* proc, int xenter, int yenter); void BlockSizeReduction(NativeCodeProcedure* proc, int xenter, int yenter);
@ -399,6 +412,7 @@ public:
bool CrossBlockXYShortcut(void); bool CrossBlockXYShortcut(void);
bool BypassRegisterConditionBlock(void);
bool Check16BitSum(int at, NativeRegisterSum16Info& info); bool Check16BitSum(int at, NativeRegisterSum16Info& info);
bool Propagate16BitSum(void); bool Propagate16BitSum(void);