Improve int dependency in loop analysis

This commit is contained in:
drmortalwombat 2023-05-07 17:46:53 +02:00
parent 0639fdc008
commit 4aa6b1c47d
4 changed files with 172 additions and 3 deletions

View File

@ -10,7 +10,7 @@ volatile byte rirq_count;
byte rasterIRQRows[NUM_IRQS];
byte rasterIRQIndex[NUM_IRQS];
byte rasterIRQNext[NUM_IRQS];
byte rasterIRQNext[NUM_IRQS + 1];
byte rasterIRQLow[NUM_IRQS];
byte rasterIRQHigh[NUM_IRQS];
@ -255,6 +255,8 @@ ex2:
void rirq_build(RIRQCode * ic, byte size)
{
__assume(size < 26);
ic->size = size;
asm_im(ic->code + 0, ASM_LDY, 0);
@ -372,6 +374,7 @@ void rirq_init_kernal(void)
rasterIRQRows[i] = 255;
rasterIRQIndex[i] = i;
}
rasterIRQNext[NUM_IRQS] = 255;
__asm
{
@ -393,6 +396,7 @@ void rirq_init_io(void)
rasterIRQRows[i] = 255;
rasterIRQIndex[i] = i;
}
rasterIRQNext[NUM_IRQS] = 255;
__asm
{
@ -414,6 +418,7 @@ void rirq_init_memmap(void)
rasterIRQRows[i] = 255;
rasterIRQIndex[i] = i;
}
rasterIRQNext[NUM_IRQS] = 255;
__asm
{

View File

@ -6081,11 +6081,62 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray
bool singleLoop = CheckSingleBlockLimitedLoop(pblock, nloop);
#if 0
FastNumberSet dependTemps(mExitRequiredTemps.Size());
#endif
if (singleLoop)
{
#if 1
struct TempChain
{
int mBaseTemp;
int64 mOffset;
};
ExpandingArray<TempChain> tempChain;
tempChain.SetSize(mExitRequiredTemps.Size());
for (int i = 0; i < mExitRequiredTemps.Size(); i++)
{
tempChain[i].mBaseTemp = i;
tempChain[i].mOffset = 0;
}
for (int i = 0; i < sz; i++)
{
InterInstruction* ins(mInstructions[i]);
if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD &&
ins->mSrc[1].mTemp >= 0 && ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst > 0 &&
tempChain[ins->mSrc[1].mTemp].mBaseTemp >= 0)
{
tempChain[ins->mDst.mTemp].mBaseTemp = tempChain[ins->mSrc[1].mTemp].mBaseTemp;
tempChain[ins->mDst.mTemp].mOffset = tempChain[ins->mSrc[1].mTemp].mOffset + ins->mSrc[0].mIntConst;
}
else if (ins->mCode == IC_CONVERSION_OPERATOR && ins->mOperator == IA_EXT8TO16U && ins->mSrc[0].mTemp >= 0)
tempChain[ins->mDst.mTemp] = tempChain[ins->mSrc[0].mTemp];
else if (ins->mDst.mTemp >= 0)
{
tempChain[ins->mDst.mTemp].mBaseTemp = -1;
}
}
for (int i = 0; i < tempChain.Size(); i++)
{
if (tempChain[i].mBaseTemp == i)
{
IntegerValueRange& r(pblock->mTrueValueRange[i]);
if (r.IsConstant())
{
mLocalValueRange[i].LimitMax(r.mMinValue + (nloop - 1) * tempChain[i].mOffset);
}
}
}
#else
FastNumberSet changedTemps(mExitRequiredTemps.Size());
GrowingArray<int> expandedTemps(-1);
for (int i = 0; i < sz; i++)
{
@ -6100,12 +6151,31 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray
else
changedTemps += ins->mDst.mTemp;
}
else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD &&
ins->mSrc[1].mTemp >= 0 &&
ins->mDst.mTemp == expandedTemps[ins->mSrc[1].mTemp] && ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst > 0)
{
if (dependTemps[ins->mDst.mTemp])
changedTemps += ins->mDst.mTemp;
else if (pblock->mTrueValueRange[ins->mDst.mTemp].IsConstant())
dependTemps += ins->mDst.mTemp;
else
changedTemps += ins->mDst.mTemp;
expandedTemps[ins->mSrc[1].mTemp] = -1;
}
else if (ins->mCode == IC_CONVERSION_OPERATOR && ins->mOperator == IA_EXT8TO16U && ins->mSrc[0].mTemp >= 0)
{
expandedTemps[ins->mDst.mTemp] = ins->mSrc[0].mTemp;
changedTemps += ins->mDst.mTemp;
}
else if (ins->mDst.mTemp >= 0)
{
expandedTemps[ins->mDst.mTemp] = -1;
changedTemps += ins->mDst.mTemp;
dependTemps -= ins->mDst.mTemp;
}
}
#endif
}
for (int i = 0; i < sz; i++)
@ -6290,12 +6360,14 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray
case IA_ADD:
if (ins->mSrc[0].mTemp < 0)
{
if (ins->mSrc[1].mTemp == ins->mDst.mTemp && dependTemps[ins->mDst.mTemp] && i + 3 != sz)
#if 0
if (/*ins->mSrc[1].mTemp == ins->mDst.mTemp &&*/ dependTemps[ins->mDst.mTemp] && i + 3 != sz)
{
int64 start = pblock->mTrueValueRange[ins->mDst.mTemp].mMinValue;
vr.SetLimit(start + ins->mSrc[0].mIntConst, start + nloop * ins->mSrc[0].mIntConst);
}
else
#endif
{
vr = mLocalValueRange[ins->mSrc[1].mTemp];
if (ins->mSrc[0].mIntConst > 0 && vr.mMaxState == IntegerValueRange::S_WEAK)
@ -11955,6 +12027,32 @@ bool InterCodeBasicBlock::CheckSingleBlockLimitedLoop(InterCodeBasicBlock*& pbl
nloop++;
nloop = (nloop + ains->mSrc[0].mIntConst - 1) / ains->mSrc[0].mIntConst;
return true;
}
}
else if (bins->mSrc[0].mTemp == cins->mDst.mTemp &&
cins->mSrc[1].mTemp == ains->mDst.mTemp &&
cins->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND &&
ains->mSrc[1].mTemp == ains->mDst.mTemp &&
ains->mSrc[0].mTemp < 0 &&
(cins->mOperator == IA_CMPLU || cins->mOperator == IA_CMPLEU) &&
cins->mSrc[0].mRange.mMaxValue < 255 &&
ains->mSrc[0].mRange.mMaxValue > 0)
{
int pi = pblock->mInstructions.Size() - 1;
while (pi >= 0 && pblock->mInstructions[pi]->mDst.mTemp != ains->mDst.mTemp)
pi--;
int i = 0;
while (i < nins - 3 && mInstructions[i]->mDst.mTemp != ains->mDst.mTemp)
i++;
if (i == nins - 3)
{
nloop = cins->mSrc[0].mRange.mMaxValue;
if (cins->mOperator == IA_CMPLEU)
nloop++;
nloop = (nloop + ains->mSrc[0].mIntConst - 1) / ains->mSrc[0].mIntConst;
return true;
}
}
@ -15555,7 +15653,7 @@ void InterCodeProcedure::Close(void)
{
GrowingTypeArray tstack(IT_NONE);
CheckFunc = !strcmp(mIdent->mString, "main");
CheckFunc = !strcmp(mIdent->mString, "rirq_build1");
mEntryBlock = mBlocks[0];

View File

@ -20148,6 +20148,64 @@ bool NativeCodeBasicBlock::CrossBlockXYPreservation(void)
return changed;
}
bool NativeCodeBasicBlock::FoldLoopEntry(void)
{
bool changed = false;
if (!mVisited)
{
mVisited = true;
if (mTrueJump && mFalseJump && mIns.Size() >= 1)
{
int sz = mIns.Size();
if (mIns[sz - 1].mType == ASMIT_LDA)
{
if (mTrueJump->mIns.Size() == 1 && mTrueJump != this)
{
if (mTrueJump->mIns[0].mType == ASMIT_LDA && mTrueJump->mIns[0].SameEffectiveAddress(mIns[sz - 1]))
{
if (mBranch == mTrueJump->mBranch && mFalseJump == mTrueJump->mFalseJump && mTrueJump == mTrueJump->mTrueJump ||
mBranch == InvertBranchCondition(mTrueJump->mBranch) && mFalseJump == mTrueJump->mTrueJump && mTrueJump == mTrueJump->mFalseJump)
{
mIns[sz - 1].mType = ASMIT_NOP;
mIns[sz - 1].mMode = ASMIM_IMPLIED;
mBranch = ASMIT_JMP;
mFalseJump->RemEntryBlock(this);
mFalseJump = nullptr;
changed = true;
}
}
}
if (!changed && mFalseJump->mIns.Size() == 1 && mFalseJump != this)
{
if (mFalseJump->mIns[0].mType == ASMIT_LDA && mFalseJump->mIns[0].SameEffectiveAddress(mIns[sz - 1]))
{
if (mBranch == mFalseJump->mBranch && mFalseJump == mFalseJump->mFalseJump && mFalseJump == mTrueJump->mTrueJump ||
mBranch == InvertBranchCondition(mFalseJump->mBranch) && mFalseJump == mFalseJump->mTrueJump && mTrueJump == mFalseJump->mFalseJump)
{
mIns[sz - 1].mType = ASMIT_NOP;
mIns[sz - 1].mMode = ASMIM_IMPLIED;
mBranch = ASMIT_JMP;
mTrueJump->RemEntryBlock(this);
mTrueJump = mFalseJump;
mFalseJump = nullptr;
changed = true;
}
}
}
}
}
if (mTrueJump && mTrueJump->FoldLoopEntry())
changed = true;
if (mFalseJump && mFalseJump->FoldLoopEntry())
changed = true;
}
return changed;
}
bool NativeCodeBasicBlock::BypassRegisterConditionBlock(void)
{
bool changed = false;
@ -39237,6 +39295,13 @@ void NativeCodeProcedure::Optimize(void)
}
#endif
if (step == 5)
{
ResetVisited();
if (mEntryBlock->FoldLoopEntry())
changed = true;
}
#if 1
if (step < 7)
{

View File

@ -476,6 +476,7 @@ public:
bool CrossBlockYAliasProgpagation(const int * yalias, int yoffset);
bool BypassRegisterConditionBlock(void);
bool FoldLoopEntry(void);
bool Check16BitSum(int at, NativeRegisterSum16Info& info);
bool Propagate16BitSum(const ExpandingArray<NativeRegisterSum16Info>& cinfo);