Loop optimizations

This commit is contained in:
drmortalwombat 2023-06-08 20:27:11 +02:00
parent d054818aa3
commit e593e2affb
5 changed files with 226 additions and 39 deletions

View File

@ -653,9 +653,15 @@ Declaration* Declaration::ToStriped(Errors * errors)
if (mBase->mType == DT_TYPE_ARRAY)
{
ndec->mBase = mBase->Clone();
ndec->mStride = mSize / mBase->mSize;
ndec->mBase->mStride = 1;
ndec->mBase->mBase = mBase->mBase->ToStriped(mSize / mBase->mBase->mSize);
if (mBase->mSize)
{
ndec->mStride = mBase->mSize / mBase->mBase->mSize;
ndec->mBase->mStride = 1;
ndec->mBase->mBase = mBase->mBase->ToStriped(mSize / mBase->mBase->mSize);
}
else
errors->Error(ndec->mLocation, ERRR_STRIPE_REQUIRES_FIXED_SIZE_ARRAY, "__striped with zero size");
}
else
{

View File

@ -26,6 +26,7 @@ enum ErrorID
EWARN_LOOP_UNROLL_IGNORED,
EWARN_USE_OF_UNINITIALIZED_VARIABLE,
EWARN_MISSING_RETURN_STATEMENT,
EWARN_UNREACHABLE_CODE,
EERR_GENERIC = 3000,
EERR_FILE_NOT_FOUND,

View File

@ -1154,6 +1154,11 @@ static bool HasSideEffect(InterCode code)
return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER;
}
static bool IsObservable(InterCode code)
{
return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER || code == IC_STORE || code == IC_COPY || code == IC_STRCPY;
}
static bool IsMoveable(InterCode code)
{
if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME)
@ -5893,6 +5898,7 @@ void InterCodeBasicBlock::SimplifyIntegerRangeRelops(void)
if (bins->mSrc[0].mIntConst)
{
mFalseJump->mNumEntries--;
mFalseJump->WarnUnreachable();
mFalseJump = nullptr;
bins->mCode = IC_JUMP;
bins->mSrc[0].mTemp = -1;
@ -5901,6 +5907,7 @@ void InterCodeBasicBlock::SimplifyIntegerRangeRelops(void)
else
{
mTrueJump->mNumEntries--;
mTrueJump->WarnUnreachable();
mTrueJump = mFalseJump;
mFalseJump = nullptr;
bins->mCode = IC_JUMP;
@ -5916,6 +5923,7 @@ void InterCodeBasicBlock::SimplifyIntegerRangeRelops(void)
if (mInstructions[sz - 2]->mConst.mIntConst)
{
mFalseJump->mNumEntries--;
mFalseJump->WarnUnreachable();
mFalseJump = nullptr;
bins->mCode = IC_JUMP;
bins->mSrc[0].mTemp = -1;
@ -5924,6 +5932,7 @@ void InterCodeBasicBlock::SimplifyIntegerRangeRelops(void)
else
{
mTrueJump->mNumEntries--;
mTrueJump->WarnUnreachable();
mTrueJump = mFalseJump;
mFalseJump = nullptr;
bins->mCode = IC_JUMP;
@ -9908,6 +9917,30 @@ void InterCodeBasicBlock::MarkRelevantStatics(void)
}
}
bool InterCodeBasicBlock::IsInsModified(const InterInstruction* ins)
{
return IsInsModifiedInRange(0, mInstructions.Size(), ins);
}
bool InterCodeBasicBlock::IsInsModifiedInRange(int from, int to, const InterInstruction* ins)
{
if (ins->mDst.mTemp >= 0 && IsTempModifiedInRange(from, to, ins->mDst.mTemp))
return true;
for (int i = 0; i < ins->mNumOperands; i++)
{
if (ins->mSrc[i].mTemp >= 0 && IsTempModifiedInRange(from, to, ins->mSrc[i].mTemp))
return true;
}
return false;
}
bool InterCodeBasicBlock::IsTempModified(int temp)
{
return IsTempModifiedInRange(0, mInstructions.Size(), temp);
}
bool InterCodeBasicBlock::IsTempModifiedInRange(int from, int to, int temp)
{
for (int i = from; i < to; i++)
@ -9928,6 +9961,11 @@ bool InterCodeBasicBlock::IsTempUsedInRange(int from, int to, int temp)
return false;
}
bool InterCodeBasicBlock::IsTempReferenced(int temp)
{
return IsTempReferencedInRange(0, mInstructions.Size(), temp);
}
bool InterCodeBasicBlock::IsTempReferencedInRange(int from, int to, int temp)
{
for (int i = from; i < to; i++)
@ -11481,7 +11519,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
if (tail->CollectSingleHeadLoopBody(this, tail, body))
{
int tz = tail->mInstructions.Size();
#if 1
if (tz > 2)
{
InterInstruction* ai = tail->mInstructions[tz - 3];
@ -11524,15 +11562,74 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
}
}
}
}
else if (ai->mCode == IC_BINARY_OPERATOR && ai->mOperator == IA_ADD && ai->mSrc[0].mTemp < 0 && ai->mDst.mTemp == ai->mSrc[1].mTemp && ai->mSrc[0].mIntConst == 1 && IsIntegerType(ai->mDst.mType) &&
ci->mCode == IC_RELATIONAL_OPERATOR && ci->mOperator == IA_CMPLU && ci->mSrc[0].mTemp >= 0 && ci->mSrc[0].IsUnsigned() && ci->mSrc[1].mTemp == ai->mDst.mTemp &&
bi->mCode == IC_BRANCH && bi->mSrc[0].mTemp == ci->mDst.mTemp && !post->mEntryRequiredTemps[ai->mDst.mTemp] &&
!tail->IsTempReferencedInRange(0, tz - 3, ai->mDst.mTemp) && !tail->IsTempModifiedInRange(0, tz - 3, ci->mSrc[0].mTemp))
{
int i = 0;
while (i + 1 < body.Size() &&
!body[i]->IsTempReferenced(ai->mDst.mTemp) &&
!body[i]->IsTempModified(ci->mSrc[0].mTemp))
i++;
if (i + 1 == body.Size())
{
int64 num = ci->mSrc[0].mRange.mMaxValue;
InterInstruction* si = FindSourceInstruction(mLoopPrefix, ai->mDst.mTemp);
if (si && si->mCode == IC_CONSTANT && si->mSrc[0].mIntConst == 0)
{
InterInstruction* mins = new InterInstruction(si->mLocation, IC_LOAD_TEMPORARY);
mins->mSrc[0] = ci->mSrc[0];
mins->mDst = ai->mDst;
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, mins);
ai->mOperator = IA_SUB;
ai->mSrc[0].mIntConst = 1;
ci->mOperator = IA_CMPGU;
ci->mSrc[0].mTemp = -1;
ci->mSrc[0].mIntConst = 0;
ai->mSrc[1].mRange.SetLimit(1, num);
ai->mDst.mRange.SetLimit(0, num - 1);
ci->mSrc[1].mRange.SetLimit(0, num - 1);
modified = true;
}
}
}
}
#endif
int i = 0;
while (i < mInstructions.Size())
{
InterInstruction* lins = mInstructions[i];
if (lins->mCode == IC_LOAD && lins->mSrc[0].mTemp < 0 && !tail->mExitRequiredTemps[lins->mDst.mTemp])
if (lins->mCode == IC_BINARY_OPERATOR || lins->mCode == IC_CONSTANT || lins->mCode == IC_UNARY_OPERATOR ||
lins->mCode == IC_CONVERSION_OPERATOR || lins->mCode == IC_SELECT ||
lins->mCode == IC_RELATIONAL_OPERATOR)
{
#if 1
if (CanMoveInstructionBeforeBlock(i) && !IsInsModifiedInRange(i + 1, mInstructions.Size(), lins) && !tail->IsInsModified(lins) && !lins->UsesTemp(lins->mDst.mTemp))
{
int j = 1;
while (j < body.Size() && !body[j]->IsInsModified(lins))
j++;
if (j == body.Size())
{
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp;
mEntryRequiredTemps += lins->mDst.mTemp;
mInstructions.Remove(i);
i--;
modified = true;
}
}
#endif
}
else if (lins->mCode == IC_LOAD && lins->mSrc[0].mTemp < 0 && !tail->mExitRequiredTemps[lins->mDst.mTemp])
{
if (CanMoveInstructionBeforeBlock(i))
{
@ -12171,6 +12268,43 @@ bool InterCodeBasicBlock::CheckSingleBlockLimitedLoop(InterCodeBasicBlock*& pbl
}
}
}
else if (mInstructions[nins - 1]->mCode == IC_BRANCH &&
mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR &&
mInstructions[nins - 3]->mCode == IC_BINARY_OPERATOR && mInstructions[nins - 3]->mOperator == IA_SUB)
{
InterInstruction* ains = mInstructions[nins - 3];
InterInstruction* cins = mInstructions[nins - 2];
InterInstruction* bins = mInstructions[nins - 1];
if (bins->mSrc[0].mTemp == cins->mDst.mTemp &&
cins->mSrc[1].mTemp == ains->mDst.mTemp &&
cins->mSrc[0].mTemp < 0 &&
ains->mSrc[1].mTemp == ains->mDst.mTemp &&
ains->mSrc[0].mTemp < 0 &&
(cins->mOperator == IA_CMPGU || cins->mOperator == IA_CMPGEU) &&
ains->mSrc[0].mIntConst > 0)
{
int pi = pblock->mInstructions.Size() - 1;
while (pi >= 0 && pblock->mInstructions[pi]->mDst.mTemp != ains->mDst.mTemp)
pi--;
if (pi >= 0 && pblock->mInstructions[pi]->mCode == IC_CONSTANT)
{
int i = 0;
while (i < nins - 3 && mInstructions[i]->mDst.mTemp != ains->mDst.mTemp)
i++;
if (i == nins - 3)
{
nloop = pblock->mInstructions[pi]->mConst.mIntConst - cins->mSrc[0].mIntConst;
if (cins->mOperator == IA_CMPGEU)
nloop++;
nloop = (nloop + ains->mSrc[0].mIntConst - 1) / ains->mSrc[0].mIntConst;
return true;
}
}
}
}
else if (
mInstructions[nins - 1]->mCode == IC_BRANCH &&
mInstructions[nins - 2]->mCode == IC_BINARY_OPERATOR && mInstructions[nins - 2]->mOperator == IA_ADD)
@ -14981,6 +15115,21 @@ void InterCodeBasicBlock::Disassemble(FILE* file, bool dumpSets)
}
}
void InterCodeBasicBlock::WarnUnreachable(void)
{
if (mNumEntries == 0)
{
int i = 0;
while (i < mInstructions.Size() && !IsObservable(mInstructions[i]->mCode))
i++;
if (i < mInstructions.Size())
mProc->mModule->mErrors->Error(mInstructions[i]->mLocation, EWARN_UNREACHABLE_CODE, "Unreachable code");
}
}
InterCodeProcedure::InterCodeProcedure(InterCodeModule * mod, const Location & location, const Ident* ident, LinkerObject * linkerObject)
: mTemporaries(IT_NONE), mBlocks(nullptr), mLocation(location), mTempOffset(-1), mTempSizes(0),
mRenameTable(-1), mRenameUnionTable(-1), mGlobalRenameTable(-1),
@ -15832,7 +15981,7 @@ void InterCodeProcedure::Close(void)
{
GrowingTypeArray tstack(IT_NONE);
CheckFunc = !strcmp(mIdent->mString, "main");
CheckFunc = !strcmp(mIdent->mString, "sformat");
mEntryBlock = mBlocks[0];
@ -16234,6 +16383,9 @@ void InterCodeProcedure::Close(void)
BuildTraces(false);
#endif
SingleTailLoopOptimization(paramMemory);
BuildDataFlowSets();
#if 1
ExpandSelect();
@ -16405,37 +16557,8 @@ void InterCodeProcedure::Close(void)
#if 1
do {
changed = false;
BuildLoopPrefix();
DisassembleDebug("added dominators");
ResetEntryBlocks();
ResetVisited();
mEntryBlock->CollectEntryBlocks(nullptr);
BuildDataFlowSets();
ResetVisited();
changed = mEntryBlock->SingleTailLoopOptimization(mParamAliasedSet, mModule->mGlobalVars);
DisassembleDebug("SingleTailLoopOptimization");
if (changed)
{
TempForwarding();
RemoveUnusedInstructions();
RemoveUnusedStoreInstructions(paramMemory);
}
BuildTraces(false);
DisassembleDebug("Rebuilt traces");
} while (changed);
SingleTailLoopOptimization(paramMemory);
BuildDataFlowSets();
#endif
@ -16686,6 +16809,44 @@ void InterCodeProcedure::RemoveNonRelevantStatics(void)
}
}
void InterCodeProcedure::SingleTailLoopOptimization(InterMemory paramMemory)
{
bool changed;
do {
changed = false;
BuildLoopPrefix();
DisassembleDebug("added dominators");
ResetEntryBlocks();
ResetVisited();
mEntryBlock->CollectEntryBlocks(nullptr);
BuildDataFlowSets();
ResetVisited();
changed = mEntryBlock->SingleTailLoopOptimization(mParamAliasedSet, mModule->mGlobalVars);
DisassembleDebug("SingleTailLoopOptimization");
if (changed)
{
TempForwarding();
RemoveUnusedInstructions();
RemoveUnusedStoreInstructions(paramMemory);
}
BuildTraces(false);
DisassembleDebug("Rebuilt traces");
} while (changed);
}
void InterCodeProcedure::MapVariables(void)
{
ResetVisited();

View File

@ -438,6 +438,8 @@ public:
bool CombineIndirectAddressing(void);
void WarnUnreachable(void);
GrowingIntArray mEntryRenameTable;
GrowingIntArray mExitRenameTable;
@ -510,10 +512,15 @@ public:
int FindSameInstruction(const InterInstruction* ins) const;
bool MergeCommonPathInstructions(void);
bool IsTempModified(int temp);
bool IsTempModifiedInRange(int from, int to, int temp);
bool IsTempUsedInRange(int from, int to, int temp);
bool IsTempReferenced(int temp);
bool IsTempReferencedInRange(int from, int to, int temp);
bool IsInsModified(const InterInstruction* ins);
bool IsInsModifiedInRange(int from, int to, const InterInstruction* ins);
InterInstruction* FindTempOrigin(int temp) const;
void CheckFinalLocal(void);
@ -643,6 +650,7 @@ protected:
void PropagateConstOperationsUp(void);
void RebuildIntegerRangeSet(void);
void CombineIndirectAddressing(void);
void SingleTailLoopOptimization(InterMemory paramMemory);
void MergeBasicBlocks(void);
void CheckUsedDefinedTemps(void);

View File

@ -26097,6 +26097,7 @@ bool NativeCodeBasicBlock::MoveLoadImmStoreAbsoluteUp(int at)
{
while (j < at && mIns[j].mType == ASMIT_STA)
j++;
mIns[j - 1].mLive |= LIVE_CPU_REG_A;
NativeCodeInstruction sins = mIns[at + 1];
mIns.Remove(at + 1);
@ -26107,7 +26108,7 @@ bool NativeCodeBasicBlock::MoveLoadImmStoreAbsoluteUp(int at)
sins.mLive |= LIVE_CPU_REG_X;
if (sins.ReferencesYReg())
sins.mLive |= LIVE_CPU_REG_Y;
mIns.Insert(j, sins);
return true;
@ -28171,6 +28172,16 @@ bool NativeCodeBasicBlock::OptimizeLoopCarryOver(void)
mExitRequiredRegs += CPU_REG_Y;
changed = true;
}
else if (sz > 1 && hblock->mIns[0].mType == ASMIT_LDA && mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 2].mType == ASMIT_LDA && hblock->mIns[0].SameEffectiveAddress(mIns[sz - 2]) && !(hblock->mIns[0].mLive & LIVE_CPU_REG_Z))
{
pblock->mIns.Push(hblock->mIns[0]);
hblock->mIns.Remove(0);
pblock->mExitRequiredRegs += CPU_REG_A;
hblock->mEntryRequiredRegs += CPU_REG_A;
mExitRequiredRegs += CPU_REG_A;
changed = true;
}
}
}
}