Cross block register optimizations

This commit is contained in:
drmortalwombat 2023-07-30 11:24:19 +02:00
parent 0b1d42b7d5
commit e013142cb1
4 changed files with 234 additions and 51 deletions

View File

@ -6376,6 +6376,47 @@ static int64 BuildLowerBitsMask(int64 v)
return v;
}
void InterCodeBasicBlock::MarkIntegerRangeBoundUp(int temp, int64 value, GrowingIntegerValueRangeArray& range)
{
range[temp].SetLimit(value, value);
for (int i = mInstructions.Size() - 1; i >= 0; i--)
{
InterInstruction* ins(mInstructions[i]);
if (ins->mDst.mTemp == temp)
{
if (ins->mCode == IC_BINARY_OPERATOR && ins->mSrc[1].mTemp == temp && ins->mSrc[0].mTemp < 0)
{
switch (ins->mOperator)
{
case IA_ADD:
value -= ins->mSrc[0].mIntConst;
break;
case IA_SUB:
value += ins->mSrc[0].mIntConst;
break;
default:
return;
}
}
else if (ins->mCode == IC_LOAD_TEMPORARY)
{
if (!IsTempModifiedInRange(i + 1, mInstructions.Size(), ins->mSrc[0].mTemp))
range[ins->mSrc[0].mTemp].SetLimit(value, value);
temp = ins->mSrc[0].mTemp;
}
else
return;
}
else if (ins->mCode == IC_LOAD_TEMPORARY && ins->mSrc[0].mTemp == temp)
{
if (!IsTempModifiedInRange(i + 1, mInstructions.Size(), ins->mDst.mTemp))
range[ins->mDst.mTemp].SetLimit(value, value);
}
}
}
void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars)
{
mLocalValueRange = mEntryValueRange;
@ -7328,34 +7369,46 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray
case IA_CMPEQ:
if (s0 < 0)
{
MarkIntegerRangeBoundUp(s1, mInstructions[sz - 2]->mSrc[0].mIntConst, mTrueValueRange);
#if 0
mTrueValueRange[s1].mMinState = IntegerValueRange::S_BOUND;
mTrueValueRange[s1].mMinValue = mInstructions[sz - 2]->mSrc[0].mIntConst;
mTrueValueRange[s1].mMaxState = IntegerValueRange::S_BOUND;
mTrueValueRange[s1].mMaxValue = mInstructions[sz - 2]->mSrc[0].mIntConst;
#endif
}
else if (s1 < 0)
{
MarkIntegerRangeBoundUp(s0, mInstructions[sz - 2]->mSrc[1].mIntConst, mTrueValueRange);
#if 0
mTrueValueRange[s0].mMinState = IntegerValueRange::S_BOUND;
mTrueValueRange[s0].mMinValue = mInstructions[sz - 2]->mSrc[1].mIntConst;
mTrueValueRange[s0].mMaxState = IntegerValueRange::S_BOUND;
mTrueValueRange[s0].mMaxValue = mInstructions[sz - 2]->mSrc[1].mIntConst;
#endif
}
break;
case IA_CMPNE:
if (s0 < 0)
{
MarkIntegerRangeBoundUp(s1, mInstructions[sz - 2]->mSrc[0].mIntConst, mFalseValueRange);
#if 0
mFalseValueRange[s1].mMinState = IntegerValueRange::S_BOUND;
mFalseValueRange[s1].mMinValue = mInstructions[sz - 2]->mSrc[0].mIntConst;
mFalseValueRange[s1].mMaxState = IntegerValueRange::S_BOUND;
mFalseValueRange[s1].mMaxValue = mInstructions[sz - 2]->mSrc[0].mIntConst;
#endif
}
else if (s1 < 0)
{
MarkIntegerRangeBoundUp(s0, mInstructions[sz - 2]->mSrc[1].mIntConst, mFalseValueRange);
#if 0
mFalseValueRange[s0].mMinState = IntegerValueRange::S_BOUND;
mFalseValueRange[s0].mMinValue = mInstructions[sz - 2]->mSrc[1].mIntConst;
mFalseValueRange[s0].mMaxState = IntegerValueRange::S_BOUND;
mFalseValueRange[s0].mMaxValue = mInstructions[sz - 2]->mSrc[1].mIntConst;
#endif
}
break;
#endif

View File

@ -440,6 +440,7 @@ public:
void UpdateLocalIntegerRangeSets(const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars);
bool BuildGlobalIntegerRangeSets(bool initial, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars);
void SimplifyIntegerRangeRelops(void);
void MarkIntegerRangeBoundUp(int temp, int64 value, GrowingIntegerValueRangeArray& range);
bool CombineIndirectAddressing(void);

View File

@ -20706,6 +20706,100 @@ bool NativeCodeBasicBlock::CanCrossBlockYShortcut(int addr)
return false;
}
bool NativeCodeBasicBlock::CrossBlockRegisterAlias(bool sameAX, bool sameAY)
{
bool changed = false;
if (!mVisited)
{
mVisited = true;
if (mNumEntries > 1)
{
sameAX = false;
sameAY = false;
}
bool direct = false;
for (int i = 0; i < mIns.Size(); i++)
{
NativeCodeInstruction& ins(mIns[i]);
switch (ins.mType)
{
case ASMIT_TAY:
sameAY = true;
break;
case ASMIT_TAX:
sameAX = true;
break;
case ASMIT_TYA:
sameAY = true;
sameAX = false;
direct = true;
break;
case ASMIT_TXA:
sameAY = false;
sameAX = true;
direct = true;
break;
case ASMIT_CMP:
if (!(ins.mLive & LIVE_CPU_REG_A))
{
if (sameAY && (ins.mLive & LIVE_CPU_REG_Y) && HasAsmInstructionMode(ASMIT_CPY, ins.mMode))
{
ins.mType = ASMIT_CPY;
changed = true;
}
else if (sameAX && (ins.mLive & LIVE_CPU_REG_X) && HasAsmInstructionMode(ASMIT_CPX, ins.mMode))
{
ins.mType = ASMIT_CPX;
changed = true;
}
}
break;
case ASMIT_CPX:
if (!direct && !(ins.mLive & LIVE_CPU_REG_X))
{
if (sameAX && (ins.mLive & LIVE_CPU_REG_A))
{
ins.mType = ASMIT_CMP;
changed = true;
}
}
break;
case ASMIT_CPY:
if (!direct && !(ins.mLive & LIVE_CPU_REG_Y))
{
if (sameAY && (ins.mLive & LIVE_CPU_REG_A))
{
ins.mType = ASMIT_CMP;
changed = true;
}
}
break;
default:
if (ins.ChangesAccu())
{
sameAY = false;
sameAX = false;
}
if (ins.ChangesXReg())
sameAX = false;
if (ins.ChangesYReg())
sameAY = false;
break;
}
}
if (mTrueJump && mTrueJump->CrossBlockRegisterAlias(sameAX, sameAY))
changed = true;
if (mFalseJump && mFalseJump->CrossBlockRegisterAlias(sameAX, sameAY))
changed = true;
}
return changed;
}
bool NativeCodeBasicBlock::CrossBlockYAliasProgpagation(const int* yalias, int yoffset)
{
bool changed = false;
@ -31540,13 +31634,23 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
{
if (zxreg[ins.mAddress] >= 0)
{
zxreg[ins.mAddress] += 3;
xskew++;
if (ins.mLive & LIVE_CPU_REG_X)
zxreg[ins.mAddress] = -1;
else
{
zxreg[ins.mAddress] += 3;
xskew++;
}
}
if (zyreg[ins.mAddress] >= 0)
{
zyreg[ins.mAddress] += 3;
yskew++;
if (ins.mLive & LIVE_CPU_REG_Y)
zyreg[ins.mAddress] = -1;
else
{
zyreg[ins.mAddress] += 3;
yskew++;
}
}
zareg[ins.mAddress] = -1;
@ -31557,13 +31661,23 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
{
if (zxreg[ins.mAddress] >= 0)
{
zxreg[ins.mAddress] += 3;
xskew--;
if (ins.mLive & LIVE_CPU_REG_X)
zxreg[ins.mAddress] = -1;
else
{
zxreg[ins.mAddress] += 3;
xskew--;
}
}
if (zyreg[ins.mAddress] >= 0)
{
zyreg[ins.mAddress] += 3;
yskew--;
if (ins.mLive & LIVE_CPU_REG_Y)
zyreg[ins.mAddress] = -1;
else
{
zyreg[ins.mAddress] += 3;
yskew--;
}
}
zareg[ins.mAddress] = -1;
@ -31762,47 +31876,6 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
block->mExitRequiredRegs += CPU_REG_A;
}
if (block->mTrueJump && !lblocks.Contains(block->mTrueJump))
{
block->mTrueJump = block->BuildSingleEntry(proc, block->mTrueJump);
if (areg >= 0 && block->mTrueJump->mEntryRequiredRegs[areg])
{
if (areg < 256)
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_A;
}
if (yreg >= 0 && block->mTrueJump->mEntryRequiredRegs[yreg])
{
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_Y;
}
if (xreg >= 0 && block->mTrueJump->mEntryRequiredRegs[xreg])
{
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_X;
}
}
if (block->mFalseJump && !lblocks.Contains(block->mFalseJump))
{
block->mFalseJump = block->BuildSingleEntry(proc, block->mFalseJump);
if (areg >= 0 && block->mFalseJump->mEntryRequiredRegs[areg])
{
if (areg < 256)
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_A;
}
if (yreg >= 0 && block->mFalseJump->mEntryRequiredRegs[yreg])
{
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_Y;
}
if (xreg >= 0 && block->mFalseJump->mEntryRequiredRegs[xreg])
{
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_X;
}
}
for (int j = 0; j < block->mEntryBlocks.Size(); j++)
{
if (!lblocks.Contains(block->mEntryBlocks[j]))
@ -31831,7 +31904,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
}
}
int yoffset = 0, xoffset = 0;
int yoffset = 0, xoffset = 0, xskew = 0, yskew = 0;
for (int j = 0; j < block->mIns.Size(); j++)
{
@ -31846,6 +31919,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
ins.mType = ASMIT_NOP;
ins.mMode = ASMIM_IMPLIED;
yoffset = 0;
yskew = 0;
}
break;
case ASMIT_LDX:
@ -31855,6 +31929,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
ins.mType = ASMIT_NOP;
ins.mMode = ASMIM_IMPLIED;
xoffset = 0;
xskew = 0;
}
break;
case ASMIT_INC:
@ -31862,11 +31937,13 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
{
ins.mType = ASMIT_INY;
ins.mMode = ASMIM_IMPLIED;
yskew += 1;
}
else if (ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == xreg)
{
ins.mType = ASMIT_INX;
ins.mMode = ASMIM_IMPLIED;
xskew += 1;
}
break;
case ASMIT_DEC:
@ -31874,11 +31951,13 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
{
ins.mType = ASMIT_DEY;
ins.mMode = ASMIM_IMPLIED;
yskew -= 1;
}
else if (ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == xreg)
{
ins.mType = ASMIT_DEX;
ins.mMode = ASMIM_IMPLIED;
xskew -= 1;
}
break;
@ -31995,6 +32074,47 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
}
if (block->mTrueJump && !lblocks.Contains(block->mTrueJump))
{
block->mTrueJump = block->BuildSingleEntry(proc, block->mTrueJump);
if (areg >= 0 && block->mTrueJump->mEntryRequiredRegs[areg])
{
if (areg < 256)
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_A;
}
if (yreg >= 0 && block->mTrueJump->mEntryRequiredRegs[yreg])
{
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_Y;
}
if (xreg >= 0 && block->mTrueJump->mEntryRequiredRegs[xreg])
{
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_X;
}
}
if (block->mFalseJump && !lblocks.Contains(block->mFalseJump))
{
block->mFalseJump = block->BuildSingleEntry(proc, block->mFalseJump);
if (areg >= 0 && block->mFalseJump->mEntryRequiredRegs[areg])
{
if (areg < 256)
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_A;
}
if (yreg >= 0 && block->mFalseJump->mEntryRequiredRegs[yreg])
{
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_Y;
}
if (xreg >= 0 && block->mFalseJump->mEntryRequiredRegs[xreg])
{
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_X;
}
}
block->CheckLive();
}
@ -40551,7 +40671,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
{
mInterProc = proc;
CheckFunc = !strcmp(mInterProc->mIdent->mString, "test");
CheckFunc = !strcmp(mInterProc->mIdent->mString, "enemies_find");
int nblocks = proc->mBlocks.Size();
tblocks = new NativeCodeBasicBlock * [nblocks];
@ -41631,6 +41751,13 @@ void NativeCodeProcedure::Optimize(void)
}
#endif
if (step == 7)
{
ResetVisited();
if (mEntryBlock->CrossBlockRegisterAlias(false, false))
changed = true;
}
if (step == 8)
{
ResetVisited();

View File

@ -525,6 +525,8 @@ public:
bool CrossBlockYAliasProgpagation(const int * yalias, int yoffset);
bool CrossBlockRegisterAlias(bool sameAX, bool sameAY);
bool BypassRegisterConditionBlock(void);
bool FoldLoopEntry(void);