Optimize short wait loop handling

This commit is contained in:
drmortalwombat 2024-01-28 19:09:59 +01:00
parent 91c907be7e
commit 80efe2351b
7 changed files with 321 additions and 22 deletions

View File

@ -34,8 +34,8 @@ byte vdc_reg_read(VDCRegister reg)
void vdc_mem_addr(unsigned addr)
{
vdc_reg_write(VDCR_ADDRH, addr >> 8);
vdc_reg_write(VDCR_ADDRL, addr);
vdc_reg_write(VDCR_ADDRH, addr >> 8);
vdc_reg(VDCR_DATA);
}

View File

@ -3368,6 +3368,17 @@ void InterOperand::ForwardMem(const InterOperand& op)
}
void InterOperand::Forward(const InterOperand& op)
{
mTemp = op.mTemp;
if (mType != IT_INT8 || op.mType != IT_INT16 && op.mType != IT_INT32)
mType = op.mType;
mRange = op.mRange;
mIntConst = op.mIntConst;
mFloatConst = op.mFloatConst;
mFinal = false;
}
void InterOperand::ForwardTemp(const InterOperand& op)
{
mTemp = op.mTemp;
if (mType != IT_INT8 || op.mType != IT_INT16 && op.mType != IT_INT32)
@ -9699,13 +9710,13 @@ bool InterCodeBasicBlock::EliminateAliasValues(const GrowingInstructionPtrArray&
for (int j = 0; j < ins->mNumOperands; j++)
{
if (ins->mSrc[j].mTemp > 0 && lavalue[ins->mSrc[j].mTemp])
if (ins->mSrc[j].mTemp >= 0 && lavalue[ins->mSrc[j].mTemp])
{
InterInstruction* mins = lavalue[ins->mSrc[j].mTemp];
if (mExitRequiredTemps[mins->mDst.mTemp] && !mExitRequiredTemps[mins->mSrc[0].mTemp])
{
ins->mSrc[j].Forward(mins->mDst);
ins->mSrc[j].ForwardTemp(mins->mDst);
changed = true;
}
}
@ -10329,7 +10340,7 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra
if (pins->mSrc[0].mTemp < 0 && ins->mSrc[1].mIntConst + pins->mSrc[0].mIntConst >= 0)
{
ins->mSrc[1].Forward(pins->mSrc[1]);
ins->mSrc[1].ForwardTemp(pins->mSrc[1]);
pins->mSrc[1].mFinal = false;
ins->mSrc[1].mIntConst += pins->mSrc[0].mIntConst;
changed = true;
@ -11277,6 +11288,7 @@ bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray&
flushMem = true;
else if (ins->mCode == IC_LEA || ins->mCode == IC_UNARY_OPERATOR || ins->mCode == IC_BINARY_OPERATOR || ins->mCode == IC_RELATIONAL_OPERATOR || ins->mCode == IC_CONVERSION_OPERATOR)
{
//
int j = 0;
while (j < mLoadStoreInstructions.Size() && !SameInstruction(ins, mLoadStoreInstructions[j]))
j++;
@ -13384,7 +13396,24 @@ InterCodeBasicBlock* InterCodeBasicBlock::BuildLoopPrefix(void)
bool InterCodeBasicBlock::CollectLoopBody(InterCodeBasicBlock* head, ExpandingArray<InterCodeBasicBlock*> & body)
{
if (mLoopHead)
{
#if 0
return this == head;
#else
if (this == head)
return true;
else if ((mTrueJump == this || mFalseJump == this) && mEntryBlocks.Size() == 2)
{
int j = 0;
while (j < mInstructions.Size() && (mInstructions[j]->mDst.mTemp < 0 || !mExitRequiredTemps[mInstructions[j]->mDst.mTemp]))
j++;
if (j != mInstructions.Size())
return false;
}
else
return false;
#endif
}
if (body.IndexOf(this) != -1)
return true;
@ -16941,7 +16970,7 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray
mInstructions[i + 1]->mCode == IC_LEA && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal
)
{
mInstructions[i + 1]->mSrc[0].Forward(mInstructions[i + 0]->mSrc[0]);
mInstructions[i + 1]->mSrc[0].ForwardTemp(mInstructions[i + 0]->mSrc[0]);
mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0;
changed = true;
}
@ -16952,7 +16981,7 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray
mInstructions[i + 1]->mCode == IC_LOAD && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal
)
{
mInstructions[i + 1]->mSrc[0].Forward(mInstructions[i + 0]->mSrc[0]);
mInstructions[i + 1]->mSrc[0].ForwardTemp(mInstructions[i + 0]->mSrc[0]);
mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0;
changed = true;
}
@ -19873,7 +19902,7 @@ void InterCodeProcedure::Close(void)
{
GrowingTypeArray tstack(IT_NONE);
CheckFunc = !strcmp(mIdent->mString, "test");
CheckFunc = !strcmp(mIdent->mString, "strcat");
CheckCase = false;
mEntryBlock = mBlocks[0];

View File

@ -274,6 +274,7 @@ public:
IntegerValueRange mRange;
void Forward(const InterOperand& op);
void ForwardTemp(const InterOperand& op);
void ForwardMem(const InterOperand& op);
InterOperand(void);

View File

@ -536,14 +536,93 @@ void Linker::ReferenceObject(LinkerObject* obj)
}
}
bool LinkerRegion::AllocateAppend(Linker* linker, LinkerObject* lobj)
{
if (lobj->mPrefix && (lobj->mPrefix->mFlags & LOBJF_PLACED))
{
if (lobj->mPrefix == mLastObject)
{
int start = mStart + mUsed - 3;
int end = start + lobj->mSize;
if (end <= mEnd)
{
lobj->mPrefix->mReferences[lobj->mPrefix->mSuffixReference]->mFlags = 0;
lobj->mPrefix->mSize -= 3;
lobj->mFlags |= LOBJF_PLACED;
lobj->mAddress = start;
lobj->mRefAddress = start + mReloc;
lobj->mRegion = this;
mUsed = end - mStart;
mLastObject = lobj;
if (lobj->mSuffix && !(lobj->mSuffix->mFlags & LOBJF_PLACED))
{
if (!Allocate(linker, lobj->mSuffix, true))
return false;
}
return true;
}
}
else
{
int i = 0;
while (i < mFreeChunks.Size() && lobj->mPrefix != mFreeChunks[i].mLastObject)
i++;
if (i < mFreeChunks.Size())
{
int start = mFreeChunks[i].mStart - 3;
int end = start + lobj->mSize;
if (end <= mFreeChunks[i].mEnd)
{
lobj->mPrefix->mReferences[lobj->mPrefix->mSuffixReference]->mFlags = 0;
lobj->mPrefix->mSize -= 3;
lobj->mFlags |= LOBJF_PLACED;
lobj->mAddress = start;
lobj->mRefAddress = start + mReloc;
lobj->mRegion = this;
if (end == mFreeChunks[i].mEnd)
mFreeChunks.Remove(i);
else
{
mFreeChunks[i].mStart = end;
mFreeChunks[i].mLastObject = lobj;
}
if (lobj->mSuffix && !(lobj->mSuffix->mFlags & LOBJF_PLACED))
{
if (!Allocate(linker, lobj->mSuffix, true))
return false;
}
return true;
}
}
}
}
return false;
}
bool LinkerRegion::Allocate(Linker * linker, LinkerObject* lobj, bool merge)
{
if (merge && lobj->mPrefix && !(lobj->mPrefix->mFlags & LOBJF_PLACED))
if (merge && lobj->mPrefix)
{
if (!Allocate(linker, lobj->mPrefix, true))
return false;
if (!(lobj->mPrefix->mFlags & LOBJF_PLACED))
{
if (!Allocate(linker, lobj->mPrefix, true))
return false;
if (lobj->mFlags & LOBJF_PLACED)
if (lobj->mFlags & LOBJF_PLACED)
return true;
}
if (AllocateAppend(linker, lobj))
return true;
}
@ -587,7 +666,7 @@ bool LinkerRegion::Allocate(Linker * linker, LinkerObject* lobj, bool merge)
}
else
{
mFreeChunks.Insert(i + 1, FreeChunk{ end, mFreeChunks[i].mEnd, lobj } );
mFreeChunks.Insert(i + 1, FreeChunk{ end, mFreeChunks[i].mEnd, lobj });
mFreeChunks[i].mEnd = start;
}

View File

@ -98,6 +98,7 @@ public:
GrowingArray<FreeChunk> mFreeChunks;
LinkerObject * mLastObject;
bool AllocateAppend(Linker* linker, LinkerObject* obj);
bool Allocate(Linker * linker, LinkerObject* obj, bool merge);
void PlaceStackSection(LinkerSection* stackSection, LinkerSection* section);
};

View File

@ -30624,6 +30624,8 @@ bool NativeCodeBasicBlock::MoveTYADCStoreDown(int at)
bool NativeCodeBasicBlock::MoveLDSTXOutOfRange(int at)
{
bool vol = mIns[at + 1].mFlags & NCIF_VOLATILE;
int j = at + 2;
while (j < mIns.Size())
{
@ -30634,8 +30636,12 @@ bool NativeCodeBasicBlock::MoveLDSTXOutOfRange(int at)
}
else if (mIns[j].MayBeSameAddress(mIns[at + 1]))
return false;
else if (vol && mIns[j].ChangesGlobalMemory())
return false;
else if (mIns[at + 1].mMode == ASMIM_ABSOLUTE && (mIns[j].mFlags & NCIF_VOLATILE))
return false;
else if (mIns[j].ReferencesXReg())
return false;
if (mIns[j].mType == ASMIT_JSR)
return false;
@ -30652,6 +30658,38 @@ bool NativeCodeBasicBlock::MoveLDSTXOutOfRange(int at)
j++;
}
j = at;
while (j > 0)
{
j--;
if (!(mIns[j].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)))
{
mIns.Insert(j + 1, NativeCodeInstruction(mIns[at + 1].mIns, ASMIT_STA, mIns[at + 1]));
mIns.Insert(j + 1, NativeCodeInstruction(mIns[at + 1].mIns, ASMIT_LDA, mIns[at + 1]));
mIns.Remove(at + 2, 2);
return true;
}
if (mIns[at + 1].mMode == ASMIM_ZERO_PAGE)
{
if (mIns[j].ReferencesZeroPage(mIns[at + 1].mAddress))
return false;
}
else if (mIns[j].MayBeSameAddress(mIns[at + 1]))
return false;
else if (vol && mIns[j].ChangesGlobalMemory())
return false;
else if (mIns[at + 1].mMode == ASMIM_ABSOLUTE && (mIns[j].mFlags & NCIF_VOLATILE))
return false;
else if (mIns[j].ReferencesXReg())
return false;
if (mIns[j].mType == ASMIT_JSR)
return false;
}
return false;
}
@ -34138,6 +34176,113 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc)
return changed;
}
bool NativeCodeBasicBlock::OptimizeXYSpilling(void)
{
bool changed = false;
if (!mVisited)
{
mVisited = true;
if (mLoopHead && (mTrueJump == this || mFalseJump == this) && mEntryBlocks.Size() == 2)
{
NativeCodeBasicBlock* pblock = mEntryBlocks[0];
if (pblock == this)
pblock = mEntryBlocks[1];
NativeCodeBasicBlock* nblock = mTrueJump;
if (nblock == this)
nblock = mFalseJump;
int ps = pblock->mIns.Size();
if (nblock->mEntryBlocks.Size() == 1 && !pblock->mFalseJump && ps > 0)
{
if (!ReferencesAccu())
{
if (!ReferencesXReg() && pblock->mIns.Size() > 0 && pblock->mIns[ps - 1].mType == ASMIT_TAX)
{
nblock->mIns.Insert(0, NativeCodeInstruction(pblock->mIns[ps - 1].mIns, ASMIT_TAX));
pblock->mIns.Remove(ps - 1);
ps--;
pblock->mExitRequiredRegs += CPU_REG_A;
nblock->mEntryRequiredRegs += CPU_REG_A;
mEntryRequiredRegs += CPU_REG_A;
mExitRequiredRegs += CPU_REG_A;
for (int i = 0; i < mIns.Size(); i++)
mIns[i].mLive |= LIVE_CPU_REG_A;
changed = true;
}
if (!ReferencesYReg() && pblock->mIns.Size() > 0 && pblock->mIns[ps - 1].mType == ASMIT_TAY)
{
nblock->mIns.Insert(0, NativeCodeInstruction(pblock->mIns[ps - 1].mIns, ASMIT_TAY));
pblock->mIns.Remove(ps - 1);
ps--;
pblock->mExitRequiredRegs += CPU_REG_A;
nblock->mEntryRequiredRegs += CPU_REG_A;
mEntryRequiredRegs += CPU_REG_A;
mExitRequiredRegs += CPU_REG_A;
for (int i = 0; i < mIns.Size(); i++)
mIns[i].mLive |= LIVE_CPU_REG_A;
changed = true;
}
}
}
}
for (int i = 0; i < mIns.Size(); i++)
{
if (i + 2 < mIns.Size())
{
if (mIns[i + 0].mType == ASMIT_TAX &&
mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_IMMEDIATE &&
mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ABSOLUTE && !(mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)))
{
mIns[i + 1].mType = ASMIT_LDX; mIns[i + 1].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_X;
mIns[i + 2].mType = ASMIT_STX; mIns[i + 2].mLive |= LIVE_CPU_REG_A;
mIns.Insert(i + 3, NativeCodeInstruction(mIns[i + 0].mIns, ASMIT_TAX));
mIns.Remove(i);
changed = true;
}
else if (mIns[i + 0].mType == ASMIT_TAY &&
mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_IMMEDIATE &&
mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ABSOLUTE && !(mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)))
{
mIns[i + 1].mType = ASMIT_LDY; mIns[i + 1].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_Y;
mIns[i + 2].mType = ASMIT_STY; mIns[i + 2].mLive |= LIVE_CPU_REG_A;
mIns.Insert(i + 3, NativeCodeInstruction(mIns[i + 0].mIns, ASMIT_TAY));
mIns.Remove(i);
changed = true;
}
}
if (i + 1 < mIns.Size())
{
if (mIns[i + 0].mType == ASMIT_TAX && !mIns[i + 1].ChangesAccu() && !mIns[i + 1].ReferencesXReg() && !(mIns[i + 1].mLive & LIVE_CPU_REG_Z))
{
mIns[i + 1].mLive |= LIVE_CPU_REG_A;
mIns.Insert(i + 2, NativeCodeInstruction(mIns[i + 0].mIns, ASMIT_TAX));
mIns.Remove(i);
changed = true;
}
else if (mIns[i + 0].mType == ASMIT_TAY && !mIns[i + 1].ChangesAccu() && !mIns[i + 1].ReferencesYReg() && !(mIns[i + 1].mLive & LIVE_CPU_REG_Z))
{
mIns[i + 1].mLive |= LIVE_CPU_REG_A;
mIns.Insert(i + 2, NativeCodeInstruction(mIns[i + 0].mIns, ASMIT_TAY));
mIns.Remove(i);
changed = true;
}
}
}
if (mTrueJump && mTrueJump->OptimizeXYSpilling())
changed = true;
if (mFalseJump && mFalseJump->OptimizeXYSpilling())
changed = true;
}
return changed;
}
bool NativeCodeBasicBlock::OptimizeXYSimpleLoop(void)
{
bool changed = false;
@ -35004,6 +35149,17 @@ bool NativeCodeBasicBlock::OptimizeInnerLoop(NativeCodeProcedure* proc, NativeCo
}
else if (!yindex && (block->mIns[i].mType == ASMIT_STY || block->mIns[i].mType == ASMIT_TYA || block->mIns[i].mMode == ASMIM_ABSOLUTE_Y || block->mIns[i].mMode == ASMIM_INDIRECT_Y))
yother = true;
else if (block->mIns[i].mType == ASMIT_LDX && block->mIns[i].mMode == ASMIM_ZERO_PAGE && block->mIns[i].mAddress == zreg && xother && !yother && !(block->mIns[i].mLive & LIVE_CPU_REG_Y))
{
if (i + 1 < bz && !(block->mIns[i + 1].mLive & (LIVE_CPU_REG_X | LIVE_CPU_REG_Y)) && block->mIns[i + 1].mMode == ASMIM_ABSOLUTE_X && HasAsmInstructionMode(block->mIns[i + 1].mType, ASMIM_ABSOLUTE_Y))
{
block->mIns[i].mType = ASMIT_LDY; block->mIns[i].mLive |= LIVE_CPU_REG_Y;
block->mIns[i + 1].mMode = ASMIM_ABSOLUTE_Y;
yindex = true;
}
else
yother = true;
}
else if (block->mIns[i].mType != ASMIT_LDA && block->mIns[i].mMode == ASMIM_ZERO_PAGE && block->mIns[i].mAddress == zreg)
yother = true;
@ -35207,14 +35363,21 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::CollectInnerLoop(NativeCodeBasicBloc
if (mTrueJump != head && mFalseJump != head)
{
if (mTrueJump)
mLoopTailBlock = mTrueJump->CollectInnerLoop(head, lblocks);
if (mLoopTailBlock && mFalseJump)
if (mFalseJump && mTrueJump == this && mEntryBlocks.Size() == 2 && mIns.Size() == 1 && (mIns[0].mType == ASMIT_BIT || mIns[0].mType == ASMIT_LDA && !(mIns[0].mLive & LIVE_CPU_REG_A)))
{
NativeCodeBasicBlock * tail = mFalseJump->CollectInnerLoop(head, lblocks);
if (tail != mLoopTailBlock)
mLoopTailBlock = nullptr;
mLoopTailBlock = mFalseJump->CollectInnerLoop(head, lblocks);
}
else
{
if (mTrueJump)
mLoopTailBlock = mTrueJump->CollectInnerLoop(head, lblocks);
if (mLoopTailBlock && mFalseJump)
{
NativeCodeBasicBlock* tail = mFalseJump->CollectInnerLoop(head, lblocks);
if (tail != mLoopTailBlock)
mLoopTailBlock = nullptr;
}
}
}
else
@ -37842,7 +38005,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
for (int i = mIns.Size() - 2 ; i >= 0; i--)
{
if (mIns[i].mType == ASMIT_LDX && (mIns[i].mMode == ASMIM_IMMEDIATE|| mIns[i].mMode == ASMIM_IMMEDIATE_ADDRESS) && mIns[i + 1].mType == ASMIT_STX && !(mIns[i + 1].mLive & LIVE_CPU_REG_X) && !(mIns[i + 1].mFlags & NCIF_VOLATILE))
if (mIns[i].mType == ASMIT_LDX && (mIns[i].mMode == ASMIM_IMMEDIATE|| mIns[i].mMode == ASMIM_IMMEDIATE_ADDRESS) && mIns[i + 1].mType == ASMIT_STX && !(mIns[i + 1].mLive & LIVE_CPU_REG_X))
{
if (MoveLDSTXOutOfRange(i))
changed = true;
@ -45479,7 +45642,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
{
mInterProc = proc;
CheckFunc = !strcmp(mInterProc->mIdent->mString, "krnio_setbnk");
CheckFunc = !strcmp(mInterProc->mIdent->mString, "vdc_hchar");
int nblocks = proc->mBlocks.Size();
tblocks = new NativeCodeBasicBlock * [nblocks];
@ -46704,6 +46867,29 @@ void NativeCodeProcedure::Optimize(void)
if (mEntryBlock->JoinXYCascade())
changed = true;
}
#if 1
if (step == 9 && cnt < 10)
{
#if _DEBUG
ResetVisited();
mEntryBlock->CheckBlocks();
#endif
ResetVisited();
while (mEntryBlock->OptimizeXYSpilling())
{
BuildDataFlowSets();
ResetVisited();
mEntryBlock->RemoveUnusedResultInstructions();
changed = true;
}
#if _DEBUG
ResetVisited();
mEntryBlock->CheckBlocks();
#endif
}
#endif
#if 1
if (step == 6)
@ -46719,6 +46905,7 @@ void NativeCodeProcedure::Optimize(void)
changed = true;
}
#endif
#if _DEBUG
ResetVisited();
mEntryBlock->CheckBlocks();

View File

@ -305,6 +305,8 @@ public:
bool OptimizeInnerLoop(NativeCodeProcedure* proc, NativeCodeBasicBlock* head, NativeCodeBasicBlock* tail, ExpandingArray<NativeCodeBasicBlock*>& blocks);
bool OptimizeXYSimpleLoop(void);
bool OptimizeXYSpilling(void);
bool OptimizeSelect(NativeCodeProcedure* proc);
bool OptimizeInnerLoops(NativeCodeProcedure* proc);