Cross block register optimizations

This commit is contained in:
drmortalwombat 2023-07-30 11:24:19 +02:00
parent 0b1d42b7d5
commit e013142cb1
4 changed files with 234 additions and 51 deletions

View File

@ -6376,6 +6376,47 @@ static int64 BuildLowerBitsMask(int64 v)
return v; return v;
} }
void InterCodeBasicBlock::MarkIntegerRangeBoundUp(int temp, int64 value, GrowingIntegerValueRangeArray& range)
{
range[temp].SetLimit(value, value);
for (int i = mInstructions.Size() - 1; i >= 0; i--)
{
InterInstruction* ins(mInstructions[i]);
if (ins->mDst.mTemp == temp)
{
if (ins->mCode == IC_BINARY_OPERATOR && ins->mSrc[1].mTemp == temp && ins->mSrc[0].mTemp < 0)
{
switch (ins->mOperator)
{
case IA_ADD:
value -= ins->mSrc[0].mIntConst;
break;
case IA_SUB:
value += ins->mSrc[0].mIntConst;
break;
default:
return;
}
}
else if (ins->mCode == IC_LOAD_TEMPORARY)
{
if (!IsTempModifiedInRange(i + 1, mInstructions.Size(), ins->mSrc[0].mTemp))
range[ins->mSrc[0].mTemp].SetLimit(value, value);
temp = ins->mSrc[0].mTemp;
}
else
return;
}
else if (ins->mCode == IC_LOAD_TEMPORARY && ins->mSrc[0].mTemp == temp)
{
if (!IsTempModifiedInRange(i + 1, mInstructions.Size(), ins->mDst.mTemp))
range[ins->mDst.mTemp].SetLimit(value, value);
}
}
}
void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars) void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars)
{ {
mLocalValueRange = mEntryValueRange; mLocalValueRange = mEntryValueRange;
@ -7328,34 +7369,46 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray
case IA_CMPEQ: case IA_CMPEQ:
if (s0 < 0) if (s0 < 0)
{ {
MarkIntegerRangeBoundUp(s1, mInstructions[sz - 2]->mSrc[0].mIntConst, mTrueValueRange);
#if 0
mTrueValueRange[s1].mMinState = IntegerValueRange::S_BOUND; mTrueValueRange[s1].mMinState = IntegerValueRange::S_BOUND;
mTrueValueRange[s1].mMinValue = mInstructions[sz - 2]->mSrc[0].mIntConst; mTrueValueRange[s1].mMinValue = mInstructions[sz - 2]->mSrc[0].mIntConst;
mTrueValueRange[s1].mMaxState = IntegerValueRange::S_BOUND; mTrueValueRange[s1].mMaxState = IntegerValueRange::S_BOUND;
mTrueValueRange[s1].mMaxValue = mInstructions[sz - 2]->mSrc[0].mIntConst; mTrueValueRange[s1].mMaxValue = mInstructions[sz - 2]->mSrc[0].mIntConst;
#endif
} }
else if (s1 < 0) else if (s1 < 0)
{ {
MarkIntegerRangeBoundUp(s0, mInstructions[sz - 2]->mSrc[1].mIntConst, mTrueValueRange);
#if 0
mTrueValueRange[s0].mMinState = IntegerValueRange::S_BOUND; mTrueValueRange[s0].mMinState = IntegerValueRange::S_BOUND;
mTrueValueRange[s0].mMinValue = mInstructions[sz - 2]->mSrc[1].mIntConst; mTrueValueRange[s0].mMinValue = mInstructions[sz - 2]->mSrc[1].mIntConst;
mTrueValueRange[s0].mMaxState = IntegerValueRange::S_BOUND; mTrueValueRange[s0].mMaxState = IntegerValueRange::S_BOUND;
mTrueValueRange[s0].mMaxValue = mInstructions[sz - 2]->mSrc[1].mIntConst; mTrueValueRange[s0].mMaxValue = mInstructions[sz - 2]->mSrc[1].mIntConst;
#endif
} }
break; break;
case IA_CMPNE: case IA_CMPNE:
if (s0 < 0) if (s0 < 0)
{ {
MarkIntegerRangeBoundUp(s1, mInstructions[sz - 2]->mSrc[0].mIntConst, mFalseValueRange);
#if 0
mFalseValueRange[s1].mMinState = IntegerValueRange::S_BOUND; mFalseValueRange[s1].mMinState = IntegerValueRange::S_BOUND;
mFalseValueRange[s1].mMinValue = mInstructions[sz - 2]->mSrc[0].mIntConst; mFalseValueRange[s1].mMinValue = mInstructions[sz - 2]->mSrc[0].mIntConst;
mFalseValueRange[s1].mMaxState = IntegerValueRange::S_BOUND; mFalseValueRange[s1].mMaxState = IntegerValueRange::S_BOUND;
mFalseValueRange[s1].mMaxValue = mInstructions[sz - 2]->mSrc[0].mIntConst; mFalseValueRange[s1].mMaxValue = mInstructions[sz - 2]->mSrc[0].mIntConst;
#endif
} }
else if (s1 < 0) else if (s1 < 0)
{ {
MarkIntegerRangeBoundUp(s0, mInstructions[sz - 2]->mSrc[1].mIntConst, mFalseValueRange);
#if 0
mFalseValueRange[s0].mMinState = IntegerValueRange::S_BOUND; mFalseValueRange[s0].mMinState = IntegerValueRange::S_BOUND;
mFalseValueRange[s0].mMinValue = mInstructions[sz - 2]->mSrc[1].mIntConst; mFalseValueRange[s0].mMinValue = mInstructions[sz - 2]->mSrc[1].mIntConst;
mFalseValueRange[s0].mMaxState = IntegerValueRange::S_BOUND; mFalseValueRange[s0].mMaxState = IntegerValueRange::S_BOUND;
mFalseValueRange[s0].mMaxValue = mInstructions[sz - 2]->mSrc[1].mIntConst; mFalseValueRange[s0].mMaxValue = mInstructions[sz - 2]->mSrc[1].mIntConst;
#endif
} }
break; break;
#endif #endif

View File

@ -440,6 +440,7 @@ public:
void UpdateLocalIntegerRangeSets(const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars); void UpdateLocalIntegerRangeSets(const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars);
bool BuildGlobalIntegerRangeSets(bool initial, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars); bool BuildGlobalIntegerRangeSets(bool initial, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars);
void SimplifyIntegerRangeRelops(void); void SimplifyIntegerRangeRelops(void);
void MarkIntegerRangeBoundUp(int temp, int64 value, GrowingIntegerValueRangeArray& range);
bool CombineIndirectAddressing(void); bool CombineIndirectAddressing(void);

View File

@ -20706,6 +20706,100 @@ bool NativeCodeBasicBlock::CanCrossBlockYShortcut(int addr)
return false; return false;
} }
bool NativeCodeBasicBlock::CrossBlockRegisterAlias(bool sameAX, bool sameAY)
{
bool changed = false;
if (!mVisited)
{
mVisited = true;
if (mNumEntries > 1)
{
sameAX = false;
sameAY = false;
}
bool direct = false;
for (int i = 0; i < mIns.Size(); i++)
{
NativeCodeInstruction& ins(mIns[i]);
switch (ins.mType)
{
case ASMIT_TAY:
sameAY = true;
break;
case ASMIT_TAX:
sameAX = true;
break;
case ASMIT_TYA:
sameAY = true;
sameAX = false;
direct = true;
break;
case ASMIT_TXA:
sameAY = false;
sameAX = true;
direct = true;
break;
case ASMIT_CMP:
if (!(ins.mLive & LIVE_CPU_REG_A))
{
if (sameAY && (ins.mLive & LIVE_CPU_REG_Y) && HasAsmInstructionMode(ASMIT_CPY, ins.mMode))
{
ins.mType = ASMIT_CPY;
changed = true;
}
else if (sameAX && (ins.mLive & LIVE_CPU_REG_X) && HasAsmInstructionMode(ASMIT_CPX, ins.mMode))
{
ins.mType = ASMIT_CPX;
changed = true;
}
}
break;
case ASMIT_CPX:
if (!direct && !(ins.mLive & LIVE_CPU_REG_X))
{
if (sameAX && (ins.mLive & LIVE_CPU_REG_A))
{
ins.mType = ASMIT_CMP;
changed = true;
}
}
break;
case ASMIT_CPY:
if (!direct && !(ins.mLive & LIVE_CPU_REG_Y))
{
if (sameAY && (ins.mLive & LIVE_CPU_REG_A))
{
ins.mType = ASMIT_CMP;
changed = true;
}
}
break;
default:
if (ins.ChangesAccu())
{
sameAY = false;
sameAX = false;
}
if (ins.ChangesXReg())
sameAX = false;
if (ins.ChangesYReg())
sameAY = false;
break;
}
}
if (mTrueJump && mTrueJump->CrossBlockRegisterAlias(sameAX, sameAY))
changed = true;
if (mFalseJump && mFalseJump->CrossBlockRegisterAlias(sameAX, sameAY))
changed = true;
}
return changed;
}
bool NativeCodeBasicBlock::CrossBlockYAliasProgpagation(const int* yalias, int yoffset) bool NativeCodeBasicBlock::CrossBlockYAliasProgpagation(const int* yalias, int yoffset)
{ {
bool changed = false; bool changed = false;
@ -31539,15 +31633,25 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
if (ins.mMode == ASMIM_ZERO_PAGE) if (ins.mMode == ASMIM_ZERO_PAGE)
{ {
if (zxreg[ins.mAddress] >= 0) if (zxreg[ins.mAddress] >= 0)
{
if (ins.mLive & LIVE_CPU_REG_X)
zxreg[ins.mAddress] = -1;
else
{ {
zxreg[ins.mAddress] += 3; zxreg[ins.mAddress] += 3;
xskew++; xskew++;
} }
}
if (zyreg[ins.mAddress] >= 0) if (zyreg[ins.mAddress] >= 0)
{
if (ins.mLive & LIVE_CPU_REG_Y)
zyreg[ins.mAddress] = -1;
else
{ {
zyreg[ins.mAddress] += 3; zyreg[ins.mAddress] += 3;
yskew++; yskew++;
} }
}
zareg[ins.mAddress] = -1; zareg[ins.mAddress] = -1;
} }
@ -31556,15 +31660,25 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
if (ins.mMode == ASMIM_ZERO_PAGE) if (ins.mMode == ASMIM_ZERO_PAGE)
{ {
if (zxreg[ins.mAddress] >= 0) if (zxreg[ins.mAddress] >= 0)
{
if (ins.mLive & LIVE_CPU_REG_X)
zxreg[ins.mAddress] = -1;
else
{ {
zxreg[ins.mAddress] += 3; zxreg[ins.mAddress] += 3;
xskew--; xskew--;
} }
}
if (zyreg[ins.mAddress] >= 0) if (zyreg[ins.mAddress] >= 0)
{
if (ins.mLive & LIVE_CPU_REG_Y)
zyreg[ins.mAddress] = -1;
else
{ {
zyreg[ins.mAddress] += 3; zyreg[ins.mAddress] += 3;
yskew--; yskew--;
} }
}
zareg[ins.mAddress] = -1; zareg[ins.mAddress] = -1;
} }
@ -31762,47 +31876,6 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
block->mExitRequiredRegs += CPU_REG_A; block->mExitRequiredRegs += CPU_REG_A;
} }
if (block->mTrueJump && !lblocks.Contains(block->mTrueJump))
{
block->mTrueJump = block->BuildSingleEntry(proc, block->mTrueJump);
if (areg >= 0 && block->mTrueJump->mEntryRequiredRegs[areg])
{
if (areg < 256)
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_A;
}
if (yreg >= 0 && block->mTrueJump->mEntryRequiredRegs[yreg])
{
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_Y;
}
if (xreg >= 0 && block->mTrueJump->mEntryRequiredRegs[xreg])
{
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_X;
}
}
if (block->mFalseJump && !lblocks.Contains(block->mFalseJump))
{
block->mFalseJump = block->BuildSingleEntry(proc, block->mFalseJump);
if (areg >= 0 && block->mFalseJump->mEntryRequiredRegs[areg])
{
if (areg < 256)
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_A;
}
if (yreg >= 0 && block->mFalseJump->mEntryRequiredRegs[yreg])
{
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_Y;
}
if (xreg >= 0 && block->mFalseJump->mEntryRequiredRegs[xreg])
{
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_X;
}
}
for (int j = 0; j < block->mEntryBlocks.Size(); j++) for (int j = 0; j < block->mEntryBlocks.Size(); j++)
{ {
if (!lblocks.Contains(block->mEntryBlocks[j])) if (!lblocks.Contains(block->mEntryBlocks[j]))
@ -31831,7 +31904,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
} }
} }
int yoffset = 0, xoffset = 0; int yoffset = 0, xoffset = 0, xskew = 0, yskew = 0;
for (int j = 0; j < block->mIns.Size(); j++) for (int j = 0; j < block->mIns.Size(); j++)
{ {
@ -31846,6 +31919,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
ins.mType = ASMIT_NOP; ins.mType = ASMIT_NOP;
ins.mMode = ASMIM_IMPLIED; ins.mMode = ASMIM_IMPLIED;
yoffset = 0; yoffset = 0;
yskew = 0;
} }
break; break;
case ASMIT_LDX: case ASMIT_LDX:
@ -31855,6 +31929,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
ins.mType = ASMIT_NOP; ins.mType = ASMIT_NOP;
ins.mMode = ASMIM_IMPLIED; ins.mMode = ASMIM_IMPLIED;
xoffset = 0; xoffset = 0;
xskew = 0;
} }
break; break;
case ASMIT_INC: case ASMIT_INC:
@ -31862,11 +31937,13 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
{ {
ins.mType = ASMIT_INY; ins.mType = ASMIT_INY;
ins.mMode = ASMIM_IMPLIED; ins.mMode = ASMIM_IMPLIED;
yskew += 1;
} }
else if (ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == xreg) else if (ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == xreg)
{ {
ins.mType = ASMIT_INX; ins.mType = ASMIT_INX;
ins.mMode = ASMIM_IMPLIED; ins.mMode = ASMIM_IMPLIED;
xskew += 1;
} }
break; break;
case ASMIT_DEC: case ASMIT_DEC:
@ -31874,11 +31951,13 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
{ {
ins.mType = ASMIT_DEY; ins.mType = ASMIT_DEY;
ins.mMode = ASMIM_IMPLIED; ins.mMode = ASMIM_IMPLIED;
yskew -= 1;
} }
else if (ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == xreg) else if (ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == xreg)
{ {
ins.mType = ASMIT_DEX; ins.mType = ASMIT_DEX;
ins.mMode = ASMIM_IMPLIED; ins.mMode = ASMIM_IMPLIED;
xskew -= 1;
} }
break; break;
@ -31995,6 +32074,47 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc)
} }
if (block->mTrueJump && !lblocks.Contains(block->mTrueJump))
{
block->mTrueJump = block->BuildSingleEntry(proc, block->mTrueJump);
if (areg >= 0 && block->mTrueJump->mEntryRequiredRegs[areg])
{
if (areg < 256)
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_A;
}
if (yreg >= 0 && block->mTrueJump->mEntryRequiredRegs[yreg])
{
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_Y;
}
if (xreg >= 0 && block->mTrueJump->mEntryRequiredRegs[xreg])
{
block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg));
block->mTrueJump->mEntryRequiredRegs += CPU_REG_X;
}
}
if (block->mFalseJump && !lblocks.Contains(block->mFalseJump))
{
block->mFalseJump = block->BuildSingleEntry(proc, block->mFalseJump);
if (areg >= 0 && block->mFalseJump->mEntryRequiredRegs[areg])
{
if (areg < 256)
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_A;
}
if (yreg >= 0 && block->mFalseJump->mEntryRequiredRegs[yreg])
{
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_Y;
}
if (xreg >= 0 && block->mFalseJump->mEntryRequiredRegs[xreg])
{
block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg));
block->mFalseJump->mEntryRequiredRegs += CPU_REG_X;
}
}
block->CheckLive(); block->CheckLive();
} }
@ -40551,7 +40671,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
{ {
mInterProc = proc; mInterProc = proc;
CheckFunc = !strcmp(mInterProc->mIdent->mString, "test"); CheckFunc = !strcmp(mInterProc->mIdent->mString, "enemies_find");
int nblocks = proc->mBlocks.Size(); int nblocks = proc->mBlocks.Size();
tblocks = new NativeCodeBasicBlock * [nblocks]; tblocks = new NativeCodeBasicBlock * [nblocks];
@ -41631,6 +41751,13 @@ void NativeCodeProcedure::Optimize(void)
} }
#endif #endif
if (step == 7)
{
ResetVisited();
if (mEntryBlock->CrossBlockRegisterAlias(false, false))
changed = true;
}
if (step == 8) if (step == 8)
{ {
ResetVisited(); ResetVisited();

View File

@ -525,6 +525,8 @@ public:
bool CrossBlockYAliasProgpagation(const int * yalias, int yoffset); bool CrossBlockYAliasProgpagation(const int * yalias, int yoffset);
bool CrossBlockRegisterAlias(bool sameAX, bool sameAY);
bool BypassRegisterConditionBlock(void); bool BypassRegisterConditionBlock(void);
bool FoldLoopEntry(void); bool FoldLoopEntry(void);