Optimize indexing in loops

This commit is contained in:
drmortalwombat 2022-11-26 14:12:13 +01:00
parent bc058a1c80
commit 02e4d4bd1b
3 changed files with 138 additions and 13 deletions

View File

@ -2446,6 +2446,13 @@ InterOperand::InterOperand(void)
: mTemp(INVALID_TEMPORARY), mType(IT_NONE), mFinal(false), mIntConst(0), mFloatConst(0), mVarIndex(-1), mOperandSize(0), mLinkerObject(nullptr), mMemory(IM_NONE), mStride(1) : mTemp(INVALID_TEMPORARY), mType(IT_NONE), mFinal(false), mIntConst(0), mFloatConst(0), mVarIndex(-1), mOperandSize(0), mLinkerObject(nullptr), mMemory(IM_NONE), mStride(1)
{} {}
bool InterOperand::IsNotUByte(void) const
{
return
mRange.mMinState == IntegerValueRange::S_BOUND && mRange.mMinValue < 0 ||
mRange.mMaxState == IntegerValueRange::S_BOUND && mRange.mMaxValue >= 256;
}
bool InterOperand::IsUByte(void) const bool InterOperand::IsUByte(void) const
{ {
return return
@ -5869,7 +5876,13 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray
case IA_OR: case IA_OR:
case IA_XOR: case IA_XOR:
if (ins->mSrc[0].mTemp < 0) if (ins->mSrc[0].IsUnsigned() && ins->mSrc[1].IsUnsigned())
{
vr.mMaxState = vr.mMinState = IntegerValueRange::S_BOUND;
vr.mMaxValue = BuildLowerBitsMask(ins->mSrc[1].mRange.mMaxValue) | BuildLowerBitsMask(ins->mSrc[0].mRange.mMaxValue);
vr.mMinValue = 0;
}
else if (ins->mSrc[0].mTemp < 0)
{ {
vr = mLocalValueRange[ins->mSrc[1].mTemp]; vr = mLocalValueRange[ins->mSrc[1].mTemp];
int64 v = vr.mMaxValue; int64 v = vr.mMaxValue;
@ -8461,7 +8474,7 @@ bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray&
} }
else if (ins->mCode == IC_COPY || ins->mCode == IC_STRCPY) else if (ins->mCode == IC_COPY || ins->mCode == IC_STRCPY)
flushMem = true; flushMem = true;
else if (ins->mCode == IC_LEA || ins->mCode == IC_UNARY_OPERATOR || ins->mCode == IC_BINARY_OPERATOR || ins->mCode == IC_RELATIONAL_OPERATOR) else if (ins->mCode == IC_LEA || ins->mCode == IC_UNARY_OPERATOR || ins->mCode == IC_BINARY_OPERATOR || ins->mCode == IC_RELATIONAL_OPERATOR || ins->mCode == IC_CONVERSION_OPERATOR)
{ {
int j = 0; int j = 0;
while (j < mLoadStoreInstructions.Size() && !SameInstruction(ins, mLoadStoreInstructions[j])) while (j < mLoadStoreInstructions.Size() && !SameInstruction(ins, mLoadStoreInstructions[j]))
@ -8758,6 +8771,40 @@ void InterCodeBasicBlock::MarkRelevantStatics(void)
} }
} }
bool InterCodeBasicBlock::IsTempModifiedInRange(int from, int to, int temp)
{
for (int i = from; i < to; i++)
if (mInstructions[i]->mDst.mTemp == temp)
return true;
return false;
}
bool InterCodeBasicBlock::IsTempUsedInRange(int from, int to, int temp)
{
for (int i = from; i < to; i++)
{
InterInstruction* ins = mInstructions[i];
for (int j = 0; j < ins->mNumOperands; j++)
if (ins->mSrc[j].mTemp == temp)
return true;
}
return false;
}
bool InterCodeBasicBlock::IsTempReferencedInRange(int from, int to, int temp)
{
for (int i = from; i < to; i++)
{
InterInstruction* ins = mInstructions[i];
if (ins->mDst.mTemp == temp)
return true;
for (int j = 0; j < ins->mNumOperands; j++)
if (ins->mSrc[j].mTemp == temp)
return true;
}
return false;
}
bool InterCodeBasicBlock::CanMoveInstructionDown(int si, int ti) const bool InterCodeBasicBlock::CanMoveInstructionDown(int si, int ti) const
{ {
InterInstruction* ins = mInstructions[si]; InterInstruction* ins = mInstructions[si];
@ -10368,9 +10415,11 @@ bool InterCodeBasicBlock::SingleBlockLoopPointerSplit(int& spareTemps)
{ {
int nins = mInstructions.Size(); int nins = mInstructions.Size();
InterCodeBasicBlock* pblock = mEntryBlocks[0]; InterCodeBasicBlock* pblock = mEntryBlocks[0], *eblock = mFalseJump;
if (pblock == this) if (pblock == this)
pblock = mEntryBlocks[1]; pblock = mEntryBlocks[1];
if (eblock == this)
eblock = mTrueJump;
if (mInstructions[nins - 1]->mCode == IC_BRANCH && if (mInstructions[nins - 1]->mCode == IC_BRANCH &&
mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR &&
@ -10389,6 +10438,10 @@ bool InterCodeBasicBlock::SingleBlockLoopPointerSplit(int& spareTemps)
GrowingArray<InterInstructionPtr> tvalues(nullptr); GrowingArray<InterInstructionPtr> tvalues(nullptr);
tvalues.SetSize(mEntryRequiredTemps.Size() + 16); tvalues.SetSize(mEntryRequiredTemps.Size() + 16);
int pi = pblock->mInstructions.Size() - 1;
while (pi >= 0 && pblock->mInstructions[pi]->mDst.mTemp != ains->mDst.mTemp)
pi--;
int i = 0; int i = 0;
while (i < nins - 3 && mInstructions[i]->mDst.mTemp != ains->mDst.mTemp) while (i < nins - 3 && mInstructions[i]->mDst.mTemp != ains->mDst.mTemp)
i++; i++;
@ -10401,6 +10454,53 @@ bool InterCodeBasicBlock::SingleBlockLoopPointerSplit(int& spareTemps)
{ {
tvalues[lins->mDst.mTemp] = lins; tvalues[lins->mDst.mTemp] = lins;
} }
else if (lins->mCode == IC_LEA && lins->mSrc[0].mTemp < 0 && lins->mSrc[0].mIntConst == ains->mSrc[0].mIntConst && lins->mSrc[1].mTemp == lins->mDst.mTemp &&
pi >= 0 && pblock->mInstructions[pi]->mCode == IC_CONSTANT && ains->mSrc[1].IsUByte() && pblock->mInstructions[pi]->mConst.mIntConst == 0 &&
!IsTempReferencedInRange(i + 1, mInstructions.Size(), lins->mDst.mTemp) && !IsTempModifiedInRange(0, i, lins->mDst.mTemp) &&
!eblock->mEntryRequiredTemps[lins->mDst.mTemp])
{
if (spareTemps + 2 >= mEntryRequiredTemps.Size() + 16)
return true;
InterInstruction* nins = new InterInstruction(lins->mLocation, IC_LEA);
InterInstruction* cins = nullptr;
nins->mSrc[1] = lins->mSrc[1];
if (ains->mDst.mType == IT_INT16)
nins->mSrc[0] = ains->mSrc[1];
else
{
cins = new InterInstruction(lins->mLocation, IC_CONVERSION_OPERATOR);
cins->mOperator = IA_EXT8TO16U;
cins->mSrc[0] = ains->mSrc[1];
cins->mDst.mMemory = IM_INDIRECT;
cins->mDst.mTemp = spareTemps++;
cins->mDst.mType = IT_INT16;
nins->mSrc[0] = cins->mDst;
}
nins->mDst.mMemory = IM_INDIRECT;
nins->mDst.mTemp = spareTemps++;
nins->mDst.mType = IT_POINTER;
for (int j = 0; j < i; j++)
{
InterInstruction* tins = mInstructions[j];
for (int k = 0; k < tins->mNumOperands; k++)
{
if (tins->mSrc[k].mTemp == lins->mDst.mTemp)
tins->mSrc[k].mTemp = nins->mDst.mTemp;
}
}
mInstructions.Remove(i);
mInstructions.Insert(0, nins);
if (cins)
mInstructions.Insert(0, cins);
changed = true;
}
else if (lins->mCode == IC_STORE && lins->mSrc[1].mTemp >= 0 && lins->mSrc[1].mIntConst >= 32 && tvalues[lins->mSrc[1].mTemp]) else if (lins->mCode == IC_STORE && lins->mSrc[1].mTemp >= 0 && lins->mSrc[1].mIntConst >= 32 && tvalues[lins->mSrc[1].mTemp])
{ {
if (spareTemps + 2 >= mEntryRequiredTemps.Size() + 16) if (spareTemps + 2 >= mEntryRequiredTemps.Size() + 16)
@ -10740,6 +10840,14 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa
ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_SHL && IsIntegerType(ins->mDst.mType) && ins->mSrc[0].mTemp < 0 && (dep[ins->mSrc[1].mTemp] == DEP_INDEX || dep[ins->mSrc[1].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED) || ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_SHL && IsIntegerType(ins->mDst.mType) && ins->mSrc[0].mTemp < 0 && (dep[ins->mSrc[1].mTemp] == DEP_INDEX || dep[ins->mSrc[1].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED) ||
ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[0].mTemp < 0 || dep[ins->mSrc[0].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[0].mTemp] == DEP_DEFINED) && dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED || ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[0].mTemp < 0 || dep[ins->mSrc[0].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[0].mTemp] == DEP_DEFINED) && dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED ||
ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[1].mTemp < 0 || dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED) && dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED || ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[1].mTemp < 0 || dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED) && dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED ||
ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD &&
IsIntegerType(ins->mDst.mType) &&
(ins->mSrc[0].mTemp >= 0 && ins->mSrc[0].IsNotUByte() && (dep[ins->mSrc[0].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[0].mTemp] == DEP_DEFINED)) &&
(dep[ins->mSrc[1].mTemp] == DEP_INDEX || dep[ins->mSrc[1].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED) ||
ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD &&
IsIntegerType(ins->mDst.mType) &&
(ins->mSrc[1].mTemp >= 0 && ins->mSrc[1].IsNotUByte() && (dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED)) &&
(dep[ins->mSrc[0].mTemp] == DEP_INDEX || dep[ins->mSrc[0].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED) ||
ins->mCode == IC_LEA && (ins->mSrc[1].mTemp < 0 || dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED) && dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED ) ins->mCode == IC_LEA && (ins->mSrc[1].mTemp < 0 || dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED) && dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED )
{ {
if (dep[ins->mDst.mTemp] != DEP_INDEX_DERIVED) if (dep[ins->mDst.mTemp] != DEP_INDEX_DERIVED)
@ -10868,7 +10976,7 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa
indexins.Push(ins); indexins.Push(ins);
} }
else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[0].mTemp < 0 || dep[ins->mSrc[0].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[0].mTemp] == DEP_DEFINED) && dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED) else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[0].mTemp < 0 || dep[ins->mSrc[0].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[0].mTemp] == DEP_DEFINED) && (dep[ins->mSrc[1].mTemp] == DEP_INDEX || dep[ins->mSrc[1].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[1].mTemp] == DEP_INDEX_DERIVED))
{ {
indexStep[ins->mDst.mTemp] = indexStep[ins->mSrc[1].mTemp]; indexStep[ins->mDst.mTemp] = indexStep[ins->mSrc[1].mTemp];
indexBase[ins->mDst.mTemp] = 0; indexBase[ins->mDst.mTemp] = 0;
@ -10885,7 +10993,7 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa
indexins.Push(ains); indexins.Push(ains);
} }
else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[1].mTemp < 0 || dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED) && dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED) else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && IsIntegerType(ins->mDst.mType) && (ins->mSrc[1].mTemp < 0 || dep[ins->mSrc[1].mTemp] == DEP_UNKNOWN || dep[ins->mSrc[1].mTemp] == DEP_DEFINED) && (dep[ins->mSrc[0].mTemp] == DEP_INDEX || dep[ins->mSrc[0].mTemp] == DEP_INDEX_EXTENDED || dep[ins->mSrc[0].mTemp] == DEP_INDEX_DERIVED))
{ {
indexStep[ins->mDst.mTemp] = indexStep[ins->mSrc[0].mTemp]; indexStep[ins->mDst.mTemp] = indexStep[ins->mSrc[0].mTemp];
indexBase[ins->mDst.mTemp] = 0; indexBase[ins->mDst.mTemp] = 0;
@ -13587,9 +13695,18 @@ void InterCodeProcedure::Close(void)
mEntryBlock->CollectEntryBlocks(nullptr); mEntryBlock->CollectEntryBlocks(nullptr);
#endif #endif
#if 1
BuildTraces(false); BuildTraces(false);
ResetEntryBlocks();
ResetVisited();
mEntryBlock->CollectEntryBlocks(nullptr);
#if 1
SingleBlockLoopPointerSplit(activeSet);
MergeIndexedLoadStore();
#endif
#if 1
BuildLoopPrefix(); BuildLoopPrefix();
DisassembleDebug("added dominators"); DisassembleDebug("added dominators");

View File

@ -270,6 +270,8 @@ public:
bool IsSByte(void) const; bool IsSByte(void) const;
bool IsUnsigned(void) const; bool IsUnsigned(void) const;
bool IsNotUByte(void) const;
void Disassemble(FILE* file); void Disassemble(FILE* file);
}; };
@ -478,6 +480,10 @@ public:
bool CanMoveInstructionDown(int si, int ti) const; bool CanMoveInstructionDown(int si, int ti) const;
bool MergeCommonPathInstructions(void); bool MergeCommonPathInstructions(void);
bool IsTempModifiedInRange(int from, int to, int temp);
bool IsTempUsedInRange(int from, int to, int temp);
bool IsTempReferencedInRange(int from, int to, int temp);
void CheckFinalLocal(void); void CheckFinalLocal(void);
void CheckFinal(void); void CheckFinal(void);

View File

@ -15541,7 +15541,7 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool
// //
int nins = mIns.Size(), tins = mTrueJump->mIns.Size(), fins = mFalseJump->mIns.Size(); int nins = mIns.Size(), tins = mTrueJump->mIns.Size(), fins = mFalseJump->mIns.Size();
if (nins > 1 && tins > 0 && fins > 0 && mFalseJump->mIns[0].mType == ASMIT_TXA && if (nins > 1 && tins > 0 && fins > 0 && mFalseJump->mIns[0].mType == ASMIT_TXA &&
!mIns[nins - 1].ChangesAccu() && !mFalseJump->mEntryRequiredRegs[CPU_REG_A]) mIns[nins - 2].mType == ASMIT_TAX && !mIns[nins - 1].ChangesAccu() && !mFalseJump->mEntryRequiredRegs[CPU_REG_A])
{ {
mTrueJump->mIns.Push(NativeCodeInstruction(ASMIT_TXA)); mTrueJump->mIns.Push(NativeCodeInstruction(ASMIT_TXA));
mFalseJump->mIns[0].mType = ASMIT_NOP; mFalseJump->mIns[0].mMode = ASMIM_IMPLIED; mFalseJump->mIns[0].mType = ASMIT_NOP; mFalseJump->mIns[0].mMode = ASMIM_IMPLIED;
@ -30990,7 +30990,7 @@ void NativeCodeProcedure::RebuildEntry(void)
void NativeCodeProcedure::Optimize(void) void NativeCodeProcedure::Optimize(void)
{ {
CheckFunc = !strcmp(mInterProc->mIdent->mString, "plants_animate"); CheckFunc = !strcmp(mInterProc->mIdent->mString, "menu_draw_color_line");
#if 1 #if 1
int step = 0; int step = 0;
@ -31028,6 +31028,7 @@ void NativeCodeProcedure::Optimize(void)
RebuildEntry(); RebuildEntry();
#if 1 #if 1
if (step > 3) if (step > 3)
{ {
@ -31061,7 +31062,6 @@ void NativeCodeProcedure::Optimize(void)
mEntryBlock->ReplaceFinalZeroPageUse(this); mEntryBlock->ReplaceFinalZeroPageUse(this);
} }
#endif #endif
#if 1 #if 1
do do
{ {
@ -31071,10 +31071,13 @@ void NativeCodeProcedure::Optimize(void)
ResetVisited(); ResetVisited();
changed = mEntryBlock->RemoveUnusedResultInstructions(); changed = mEntryBlock->RemoveUnusedResultInstructions();
ResetVisited(); ResetVisited();
NativeRegisterDataSet data; NativeRegisterDataSet data;
if (mEntryBlock->ValueForwarding(data, step > 0, step == 7)) if (mEntryBlock->ValueForwarding(data, step > 0, step == 7))
{
changed = true; changed = true;
}
else else
{ {
#if 1 #if 1
@ -31146,7 +31149,6 @@ void NativeCodeProcedure::Optimize(void)
changed = true; changed = true;
} }
#endif #endif
#if 1 #if 1
if (step > 0) if (step > 0)
{ {
@ -31192,7 +31194,6 @@ void NativeCodeProcedure::Optimize(void)
} }
#endif #endif
#if _DEBUG #if _DEBUG
ResetVisited(); ResetVisited();
mEntryBlock->CheckBlocks(true); mEntryBlock->CheckBlocks(true);
@ -31212,6 +31213,8 @@ void NativeCodeProcedure::Optimize(void)
} }
#endif #endif
#if _DEBUG #if _DEBUG
ResetVisited(); ResetVisited();
mEntryBlock->CheckBlocks(true); mEntryBlock->CheckBlocks(true);
@ -31358,7 +31361,6 @@ void NativeCodeProcedure::Optimize(void)
} }
#endif #endif
#if _DEBUG #if _DEBUG
ResetVisited(); ResetVisited();
mEntryBlock->CheckBlocks(); mEntryBlock->CheckBlocks();
@ -31374,7 +31376,6 @@ void NativeCodeProcedure::Optimize(void)
changed = true; changed = true;
#endif #endif
#if 1 #if 1
if (step >= 4) if (step >= 4)
{ {
@ -31487,6 +31488,7 @@ void NativeCodeProcedure::Optimize(void)
else else
cnt++; cnt++;
} while (changed); } while (changed);
#if 1 #if 1