Various loop optimizations

This commit is contained in:
drmortalwombat 2023-12-16 21:03:09 +01:00
parent 6cf8466dfd
commit ae4b48c445
6 changed files with 611 additions and 14 deletions

View File

@ -23,6 +23,19 @@ void krnio_setnam(const char * name)
#pragma native(krnio_setnam)
void krnio_setnam_n(const char * name, char len)
{
__asm
{
lda len
ldx name
ldy name + 1
jsr $ffbd // setnam
}
}
#pragma native(krnio_setnam_n)
bool krnio_open(char fnum, char device, char channel)
{
krnio_pstatus[fnum] = KRNIO_OK;

View File

@ -23,6 +23,8 @@ extern krnioerr krnio_pstatus[16];
void krnio_setnam(const char * name);
void krnio_setnam_n(const char * name, char len);
// open a kernal file/stream/io channel, returns true on success
bool krnio_open(char fnum, char device, char channel);

View File

@ -457,7 +457,7 @@ Expression* Expression::ConstantFold(Errors * errors, LinkerSection * dataSectio
if (mType == EX_PREFIX && mToken == TK_BANKOF && linker)
{
LinkerRegion* rgn;
if (mLeft->mDecValue->mSection && (rgn = linker->FindRegionOfSection(mLeft->mDecValue->mSection)))
if (mLeft->mDecValue && mLeft->mDecValue->mSection && (rgn = linker->FindRegionOfSection(mLeft->mDecValue->mSection)))
{
uint64 i = 0;
while (i < 64 && rgn->mCartridgeBanks != (1ULL << i))

View File

@ -585,6 +585,54 @@ bool InterCodeBasicBlock::DestroyingMem(const InterInstruction* lins, const Inte
return CollidingMem(sins->mSrc[1], sins->mSrc[0].mType, lins);
else if (sins->mCode == IC_COPY || sins->mCode == IC_STRCPY)
return CollidingMem(sins->mSrc[1], IT_NONE, lins);
else if (sins->mCode == IC_CALL || sins->mCode == IC_CALL_NATIVE)
{
if (sins->mSrc[0].mTemp < 0 && sins->mSrc[0].mLinkerObject)
{
InterCodeProcedure* proc = sins->mSrc[0].mLinkerObject->mProc;
if (proc)
{
int opmask = 0;
if (lins->mCode == IC_LOAD)
opmask = 1;
else if (lins->mCode == IC_STORE)
opmask = 2;
else if (lins->mCode == IC_COPY)
opmask = 3;
for (int k = 0; k < lins->mNumOperands; k++)
{
if ((1 << k) & opmask)
{
const InterOperand& op(lins->mSrc[k]);
if (op.mTemp >= 0)
{
if (proc->mStoresIndirect)
return true;
}
else if (op.mMemory == IM_FFRAME || op.mMemory == IM_FRAME)
return true;
else if (op.mMemory == IM_GLOBAL)
{
if (proc->ModifiesGlobal(op.mVarIndex))
return true;
}
else if (op.mMemory == IM_LOCAL && !mProc->mLocalVars[op.mVarIndex]->mAliased)
;
else if ((op.mMemory == IM_PARAM || op.mMemory == IM_FPARAM) && !mProc->mParamVars[op.mVarIndex]->mAliased)
;
else
return true;
}
}
return false;
}
}
return true;
}
else
return false;
}
@ -12785,6 +12833,37 @@ void InterCodeBasicBlock::BuildLoopSuffix(void)
{
mVisited = true;
if (mFalseJump)
{
if (mTrueJump->mLoopHead && mTrueJump->mNumEntries == 2 && mFalseJump->mNumEntries > 1)
{
InterCodeBasicBlock* suffix = new InterCodeBasicBlock(mProc);
suffix->mEntryRequiredTemps = mFalseJump->mEntryRequiredTemps;
suffix->mExitRequiredTemps = mFalseJump->mEntryRequiredTemps;
suffix->mLocalModifiedTemps.Reset(mExitRequiredTemps.Size());
InterInstruction* jins = new InterInstruction(mInstructions[0]->mLocation, IC_JUMP);
suffix->Append(jins);
suffix->Close(mFalseJump, nullptr);
mFalseJump = suffix;
suffix->mNumEntries = 1;
}
else if (mFalseJump->mLoopHead && mFalseJump->mNumEntries == 2 && mTrueJump->mNumEntries > 1)
{
InterCodeBasicBlock* suffix = new InterCodeBasicBlock(mProc);
suffix->mEntryRequiredTemps = mTrueJump->mEntryRequiredTemps;
suffix->mExitRequiredTemps = mTrueJump->mEntryRequiredTemps;
suffix->mLocalModifiedTemps.Reset(mExitRequiredTemps.Size());
InterInstruction* jins = new InterInstruction(mInstructions[0]->mLocation, IC_JUMP);
suffix->Append(jins);
suffix->Close(mTrueJump, nullptr);
mTrueJump = suffix;
suffix->mNumEntries = 1;
}
}
#if 0
if (mLoopHead && mNumEntries == 2 && mFalseJump)
{
if (mTrueJump == this && mFalseJump != this)
@ -12820,6 +12899,7 @@ void InterCodeBasicBlock::BuildLoopSuffix(void)
}
}
}
#endif
if (mTrueJump)
mTrueJump->BuildLoopSuffix();
@ -13043,6 +13123,136 @@ bool InterCodeBasicBlock::MoveLoopHeadCheckToTail(void)
return modified;
}
bool SameExitCondition(InterCodeBasicBlock* b1, InterCodeBasicBlock* b2)
{
if (b1->mTrueJump == b2->mTrueJump && b1->mFalseJump == b2->mFalseJump)
{
int n1 = b1->mInstructions.Size(), n2 = b2->mInstructions.Size();
if (n1 > 1 && n2 > 1)
{
if (b1->mInstructions[n1 - 1]->mCode == IC_BRANCH && b2->mInstructions[n2 - 1]->mCode == IC_BRANCH &&
b1->mInstructions[n1 - 2]->mCode == IC_RELATIONAL_OPERATOR &&
b1->mInstructions[n1 - 1]->mSrc[0].mTemp == b1->mInstructions[n1 - 2]->mDst.mTemp &&
b1->mInstructions[n1 - 1]->mSrc[0].mFinal &&
b2->mInstructions[n2 - 2]->mCode == IC_RELATIONAL_OPERATOR &&
b2->mInstructions[n2 - 1]->mSrc[0].mTemp == b2->mInstructions[n2 - 2]->mDst.mTemp &&
b2->mInstructions[n2 - 1]->mSrc[0].mFinal &&
b1->mInstructions[n1 - 2]->IsEqualSource(b2->mInstructions[n2 - 2]))
{
return true;
}
}
}
return false;
}
bool InterCodeBasicBlock::MergeLoopTails(void)
{
bool modified = false;
if (!mVisited)
{
mVisited = true;
if (mLoopHead && mEntryBlocks.Size() > 2)
{
int fi = 0;
while (fi < mEntryBlocks.Size() && mEntryBlocks[fi] == mLoopPrefix)
fi++;
int i = fi + 1;
while (i < mEntryBlocks.Size() && (mEntryBlocks[i] == mLoopPrefix || SameExitCondition(mEntryBlocks[i], mEntryBlocks[fi])))
i++;
if (i == mEntryBlocks.Size())
{
int n = 2;
bool match = true;
while (i == mEntryBlocks.Size() && n < mEntryBlocks[fi]->mInstructions.Size())
{
i = fi + 1;
while (i < mEntryBlocks.Size() && (mEntryBlocks[i] == mLoopPrefix ||
n < mEntryBlocks[i]->mInstructions.Size() &&
mEntryBlocks[i]->mInstructions[mEntryBlocks[i]->mInstructions.Size() - n - 1]->IsEqual(mEntryBlocks[fi]->mInstructions[mEntryBlocks[fi]->mInstructions.Size() - n - 1])))
i++;
if (i == mEntryBlocks.Size())
n++;
}
InterCodeBasicBlock* block = new InterCodeBasicBlock(mProc);
block->mTrueJump = mEntryBlocks[fi]->mTrueJump;
block->mFalseJump = mEntryBlocks[fi]->mFalseJump;
for (int i = 0; i < n; i++)
block->mInstructions.Push(mEntryBlocks[fi]->mInstructions[mEntryBlocks[fi]->mInstructions.Size() - n + i]);
InterInstruction* bins = mEntryBlocks[fi]->mInstructions.Last();
i = 0;
while (i < mEntryBlocks.Size())
{
if (mEntryBlocks[i] == mLoopPrefix)
i++;
else
{
mEntryBlocks[i]->mInstructions.SetSize(mEntryBlocks[i]->mInstructions.Size() - n);
mEntryBlocks[i]->mFalseJump = nullptr;
mEntryBlocks[i]->mTrueJump = block;
InterInstruction* jins = new InterInstruction(bins->mLocation, IC_JUMP);
mEntryBlocks[i]->mInstructions.Push(jins);
block->mEntryBlocks.Push(mEntryBlocks[i]);
block->mNumEntries++;
mEntryBlocks.Remove(i);
}
}
mNumEntries = 2;
mEntryBlocks.Push(block);
modified = true;
}
}
if (mTrueJump && mTrueJump->MergeLoopTails())
modified = true;
if (mFalseJump && mFalseJump->MergeLoopTails())
modified = true;
}
return modified;
}
bool IsSingleLoopAssign(int at, InterCodeBasicBlock* block, const GrowingArray<InterCodeBasicBlock*>& body)
{
InterInstruction* ai = block->mInstructions[at];
if (ai->mDst.mTemp < 0)
return true;
if (block->IsTempReferencedInRange(0, at, ai->mDst.mTemp))
return false;
if (block->IsTempModifiedInRange(at + 1, block->mInstructions.Size(), ai->mDst.mTemp))
return false;
for (int i = 1; i < body.Size(); i++)
if (body[i]->IsTempModified(ai->mDst.mTemp))
return false;
return true;
}
bool IsLoopInvariantTemp(int tmp, const GrowingArray<InterCodeBasicBlock*>& body)
{
if (tmp < 0)
return true;
for (int i = 0; i < body.Size(); i++)
if (body[i]->IsTempModified(tmp))
return false;
return true;
}
static bool IsSimpleFactor(int64 val)
{
return (val == 1 || val == 2 || val == 4 || val == 8);
}
bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedParams, const GrowingVariableArray& staticVars)
{
bool modified = false;
@ -13072,6 +13282,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
if (tail->CollectSingleHeadLoopBody(this, tail, body))
{
int tz = tail->mInstructions.Size();
#if 1
if (tz > 2)
{
@ -13159,6 +13370,29 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
}
}
#endif
GrowingIntArray indexScale(0);
if (!modified)
{
int tz = tail->mInstructions.Size();
if (tz > 2)
{
InterInstruction* ai = tail->mInstructions[tz - 3];
if (ai->mCode == IC_BINARY_OPERATOR && ai->mOperator == IA_ADD && ai->mSrc[0].mTemp < 0 && ai->mDst.mTemp == ai->mSrc[1].mTemp && ai->mSrc[0].mIntConst > 0 && IsIntegerType(ai->mDst.mType) &&
!tail->IsTempModifiedInRange(tz - 1, tz, ai->mDst.mTemp) && !tail->IsTempModifiedInRange(0, tz - 3, ai->mDst.mTemp))
{
int i = 0;
while (i + 1 < body.Size() && !body[i]->IsTempModified(ai->mDst.mTemp))
i++;
if (i + 1 == body.Size())
{
indexScale[ai->mDst.mTemp] = (int)ai->mSrc[0].mIntConst;
}
}
}
}
bool hasStore = false;
for (int j = 0; j < body.Size(); j++)
{
@ -13176,6 +13410,87 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
{
InterInstruction* lins = mInstructions[i];
if (lins->mCode == IC_BINARY_OPERATOR)
{
if (lins->mOperator == IA_MUL && lins->mSrc[0].mTemp < 0 && (lins->mDst.IsNotUByte() || !IsSimpleFactor(lins->mSrc[0].mIntConst)) && lins->mSrc[1].mTemp >= 0 && indexScale[lins->mSrc[1].mTemp] != 0 && IsSingleLoopAssign(i, this, body))
{
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp;
mEntryRequiredTemps += lins->mDst.mTemp;
tail->mExitRequiredTemps += lins->mDst.mTemp;
tail->mEntryRequiredTemps += lins->mDst.mTemp;
mInstructions.Remove(i);
InterInstruction* ains = new InterInstruction(lins->mLocation, IC_BINARY_OPERATOR);
ains->mOperator = IA_ADD;
ains->mDst = lins->mDst;
ains->mSrc[1] = lins->mDst;
ains->mSrc[0] = lins->mSrc[0];
ains->mSrc[0].mIntConst *= indexScale[lins->mSrc[1].mTemp];
tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains);
indexScale[ains->mDst.mTemp] = (int)ains->mSrc[0].mIntConst;
modified = true;
continue;
}
}
else if (lins->mCode == IC_CONVERSION_OPERATOR && lins->mOperator == IA_EXT8TO16U && i + 1 < mInstructions.Size() && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body))
{
InterInstruction* nins = mInstructions[i + 1];
if (nins->mCode == IC_BINARY_OPERATOR)
{
if (nins->mOperator == IA_MUL && nins->mSrc[0].mTemp < 0 && (nins->mDst.IsNotUByte() || !IsSimpleFactor(nins->mSrc[0].mIntConst)) && nins->mSrc[1].mTemp >= 0 && nins->mSrc[1].mFinal && nins->mDst.mTemp && IsSingleLoopAssign(i + 1, this, body))
{
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, nins);
mLoopPrefix->mExitRequiredTemps += nins->mDst.mTemp;
mEntryRequiredTemps += nins->mDst.mTemp;
tail->mExitRequiredTemps += nins->mDst.mTemp;
tail->mEntryRequiredTemps += nins->mDst.mTemp;
mInstructions.Remove(i);
mInstructions.Remove(i);
InterInstruction* ains = new InterInstruction(nins->mLocation, IC_BINARY_OPERATOR);
ains->mOperator = IA_ADD;
ains->mDst = nins->mDst;
ains->mSrc[1] = nins->mDst;
ains->mSrc[0] = nins->mSrc[0];
ains->mSrc[0].mIntConst *= indexScale[lins->mSrc[0].mTemp];
tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains);
indexScale[ains->mDst.mTemp] = (int)ains->mSrc[0].mIntConst;
modified = true;
continue;
}
}
}
else if (lins->mCode == IC_LEA)
{
if (lins->mSrc[0].mTemp >= 0 && lins->mSrc[0].IsNotUByte() && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body) && IsLoopInvariantTemp(lins->mSrc[1].mTemp, body))
{
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp;
mEntryRequiredTemps += lins->mDst.mTemp;
tail->mExitRequiredTemps += lins->mDst.mTemp;
tail->mEntryRequiredTemps += lins->mDst.mTemp;
mInstructions.Remove(i);
InterInstruction* ains = new InterInstruction(lins->mLocation, IC_LEA);
ains->mDst = lins->mDst;
ains->mSrc[1] = lins->mDst;
ains->mSrc[0].mTemp = -1;
ains->mSrc[0].mType = IT_INT16;
ains->mSrc[0].mIntConst = indexScale[lins->mSrc[0].mTemp];
tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains);
modified = true;
continue;
}
}
if (lins->mCode == IC_BINARY_OPERATOR || lins->mCode == IC_CONSTANT || lins->mCode == IC_UNARY_OPERATOR ||
lins->mCode == IC_CONVERSION_OPERATOR || lins->mCode == IC_SELECT ||
lins->mCode == IC_LEA ||
@ -16458,6 +16773,46 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray
changed = true;
}
#endif
#if 1
if (i + 2 < mInstructions.Size() &&
mInstructions[i + 0]->mCode == IC_CONVERSION_OPERATOR && mInstructions[i + 0]->mOperator == IA_EXT8TO16U &&
mInstructions[i + 0]->mSrc[0].mTemp >= 0 &&
mInstructions[i + 1]->mDst.mTemp != mInstructions[i + 0]->mSrc[0].mTemp &&
mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && (mInstructions[i + 1]->mOperator == IA_ADD || mInstructions[i + 1]->mOperator == IA_SUB) &&
mInstructions[i + 1]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp &&
mInstructions[i + 2]->mCode == IC_STORE &&
mInstructions[i + 2]->mSrc[0].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[0].mFinal &&
mInstructions[i + 2]->mSrc[0].mType == IT_INT8)
{
mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[0];
mInstructions[i + 1]->mDst.mType = IT_INT8;
changed = true;
}
if (i + 2 < mInstructions.Size() &&
mInstructions[i + 0]->mCode == IC_CONVERSION_OPERATOR && mInstructions[i + 0]->mOperator == IA_EXT8TO16U &&
mInstructions[i + 0]->mSrc[0].mTemp >= 0 &&
mInstructions[i + 1]->mDst.mTemp != mInstructions[i + 0]->mSrc[0].mTemp &&
mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && (mInstructions[i + 1]->mOperator == IA_ADD || mInstructions[i + 1]->mOperator == IA_SUB) &&
mInstructions[i + 1]->mSrc[1].mTemp < 0 &&
mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp &&
mInstructions[i + 2]->mCode == IC_STORE &&
mInstructions[i + 2]->mSrc[0].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[0].mFinal &&
mInstructions[i + 2]->mSrc[0].mType == IT_INT8)
{
mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mSrc[0];
mInstructions[i + 1]->mDst.mType = IT_INT8;
changed = true;
}
#endif
#if 1
if (i + 2 < mInstructions.Size() &&
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR &&
@ -19410,6 +19765,21 @@ void InterCodeProcedure::Close(void)
PropagateConstOperationsUp();
#if 1
BuildLoopPrefix();
ResetEntryBlocks();
ResetVisited();
mEntryBlock->CollectEntryBlocks(nullptr);
DisassembleDebug("Pre MergeLoopTails");
ResetVisited();
if (mEntryBlock->MergeLoopTails())
{
BuildTraces(false);
BuildDataFlowSets();
}
DisassembleDebug("Post MergeLoopTails");
SingleTailLoopOptimization(paramMemory);
BuildDataFlowSets();

View File

@ -586,6 +586,7 @@ public:
bool CollectSingleHeadLoopBody(InterCodeBasicBlock* head, InterCodeBasicBlock* tail, GrowingArray<InterCodeBasicBlock*>& body);
bool SingleTailLoopOptimization(const NumberSet& aliasedParams, const GrowingVariableArray& staticVars);
bool MergeLoopTails(void);
InterCodeBasicBlock* BuildLoopPrefix(void);
void BuildLoopSuffix(void);

View File

@ -1890,6 +1890,13 @@ void NativeCodeInstruction::Simulate(NativeRegisterDataSet& data)
data.mRegs[CPU_REG_Z].mValue = t & 255;
data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE;
}
else if (data.mRegs[reg].mMode == NRDM_IMMEDIATE)
{
data.mRegs[CPU_REG_Z].Reset();
data.mRegs[CPU_REG_C].mValue = data.mRegs[reg].mValue >= 128;
data.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE;
data.mRegs[reg].Reset();
}
else
{
data.mRegs[reg].Reset();
@ -1915,6 +1922,13 @@ void NativeCodeInstruction::Simulate(NativeRegisterDataSet& data)
data.mRegs[CPU_REG_Z].mValue = t & 255;
data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE;
}
else if (data.mRegs[reg].mMode == NRDM_IMMEDIATE)
{
data.mRegs[CPU_REG_Z].Reset();
data.mRegs[CPU_REG_C].mValue = data.mRegs[reg].mValue & 1;
data.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE;
data.mRegs[reg].Reset();
}
else
{
data.mRegs[reg].Reset();
@ -6682,16 +6696,16 @@ bool NativeCodeBasicBlock::LoadOpStoreIndirectValue(InterCodeProcedure* proc, co
if (ram == ASMIM_INDIRECT_Y && wam == ASMIM_INDIRECT_Y && rareg == wareg && rindex == windex)
{
CheckFrameIndex(rins, rareg, rindex, size, BC_REG_ADDR);
CheckFrameIndex(rins, rareg, rindex, (size - 1) * rstride + 1, BC_REG_ADDR);
windex = rindex;
wareg = rareg;
}
else
{
if (ram == ASMIM_INDIRECT_Y)
CheckFrameIndex(rins, rareg, rindex, size, BC_REG_ADDR);
CheckFrameIndex(rins, rareg, rindex, (size - 1) * rstride + 1, BC_REG_ADDR);
if (wam == ASMIM_INDIRECT_Y)
CheckFrameIndex(wins, wareg, windex, size, BC_REG_ACCU);
CheckFrameIndex(wins, wareg, windex, (size - 1) * wstride + 1, BC_REG_ACCU);
}
switch (oins->mOperator)
@ -19010,6 +19024,63 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc)
}
}
if (mFalseJump && mIns.Size() >= 8 && (mBranch == ASMIT_BEQ || mBranch == ASMIT_BNE))
{
int sz = mIns.Size();
if (mIns[sz - 8].mType == ASMIT_SEC &&
mIns[sz - 7].mType == ASMIT_LDA &&
mIns[sz - 6].mType == ASMIT_SBC && mIns[sz - 6].mMode == ASMIM_IMMEDIATE && mIns[sz - 6].mAddress == 0x01 &&
mIns[sz - 5].mType == ASMIT_STA && mIns[sz - 5].SameEffectiveAddress(mIns[sz - 7]) &&
mIns[sz - 4].mType == ASMIT_LDA &&
mIns[sz - 3].mType == ASMIT_SBC && mIns[sz - 3].mMode == ASMIM_IMMEDIATE && mIns[sz - 3].mAddress == 0x00 &&
mIns[sz - 2].mType == ASMIT_STA && mIns[sz - 2].SameEffectiveAddress(mIns[sz - 4]) &&
mIns[sz - 1].mType == ASMIT_ORA && mIns[sz - 1].SameEffectiveAddress(mIns[sz - 7]) &&
HasAsmInstructionMode(ASMIT_DEC, mIns[sz - 5].mMode) &&
HasAsmInstructionMode(ASMIT_DEC, mIns[sz - 1].mMode) &&
!(mIns[sz - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C)))
{
changed = true;
NativeCodeBasicBlock* hiblock = proc->AllocateBlock();
NativeCodeBasicBlock* loblock = proc->AllocateBlock();
NativeCodeBasicBlock* orblock = proc->AllocateBlock();
NativeCodeBasicBlock* eqblock, * neblock;
if (mBranch == ASMIT_BEQ)
{
eqblock = mTrueJump;
neblock = mFalseJump;
}
else
{
neblock = mTrueJump;
eqblock = mFalseJump;
}
hiblock->mBranch = ASMIT_JMP;
hiblock->mTrueJump = loblock;
loblock->mBranch = ASMIT_BNE;
loblock->mTrueJump = neblock;
loblock->mFalseJump = orblock;
orblock->mBranch = ASMIT_BNE;
orblock->mTrueJump = neblock;
orblock->mFalseJump = eqblock;
mBranch = ASMIT_BNE;
mTrueJump = loblock;
mFalseJump = hiblock;
hiblock->mIns.Push(NativeCodeInstruction(mIns[sz - 4].mIns, ASMIT_DEC, mIns[sz - 4]));
loblock->mIns.Push(NativeCodeInstruction(mIns[sz - 7].mIns, ASMIT_DEC, mIns[sz - 7]));
orblock->mIns.Push(NativeCodeInstruction(mIns[sz - 4].mIns, ASMIT_LDA, mIns[sz - 4]));
mIns[sz - 8].mType = ASMIT_NOP;
mIns.SetSize(sz - 6);
}
}
if (mTrueJump && mTrueJump->ExpandADCToBranch(proc))
changed = true;
@ -22516,7 +22587,7 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool
mTrueJump->mIns[0].SameEffectiveAddress(mFalseJump->mIns[0]) && mTrueJump->mIns[1].SameEffectiveAddress(mFalseJump->mIns[1]) &&
HasAsmInstructionMode(ASMIT_LDX, mTrueJump->mIns[0].mMode) && HasAsmInstructionMode(ASMIT_STX, mTrueJump->mIns[1].mMode))
{
uint32 live = mIns[s - 1].mLive;
uint32 live = mIns[s - 1].mLive | LIVE_CPU_REG_A;
if (mIns[s - 1].RequiresYReg())
live |= LIVE_CPU_REG_Y;
if (s >= 2)
@ -24339,7 +24410,7 @@ bool NativeCodeBasicBlock::CheckForwardSumYPointer(const NativeCodeBasicBlock* b
}
else if (ins.mType == ASMIT_RTS)
{
if ((ins.mFlags & NCIF_LOWER) && base == BC_REG_ACCU)
if ((ins.mFlags & NCIF_LOWER) && (base == BC_REG_ACCU || reg == BC_REG_ACCU))
return false;
}
else if (ins.ChangesZeroPage(base) || ins.ChangesZeroPage(base + 1) || iins.MayBeChangedOnAddress(ins))
@ -30666,12 +30737,12 @@ bool NativeCodeBasicBlock::OffsetValueForwarding(const ValueNumberingDataSet& da
ins.mType = ASMIT_NOP; ins.mMode = ASMIM_IMPLIED;
changed = true;
}
else if (d == 1 && !(ins.mLive & LIVE_CPU_REG_Z))
else if (d == 1 && !(ins.mLive & (LIVE_CPU_REG_Z | LIVE_CPU_REG_C)))
{
ins.mType = ASMIT_INC;
changed = true;
}
else if (d == -1 && !(ins.mLive & LIVE_CPU_REG_Z))
else if (d == -1 && !(ins.mLive & (LIVE_CPU_REG_Z | LIVE_CPU_REG_C)))
{
ins.mType = ASMIT_DEC;
changed = true;
@ -33192,6 +33263,65 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
}
#endif
#if 1
sz = mIns.Size();
if (sz >= 2 && (mBranch == ASMIT_BEQ && mFalseJump == this || mBranch == ASMIT_BNE && mTrueJump == this) &&
((mIns[sz - 2].mType == ASMIT_LDA && mIns[sz - 1].mType == ASMIT_CMP && !mEntryRequiredRegs[CPU_REG_A]) ||
(mIns[sz - 2].mType == ASMIT_LDX && mIns[sz - 1].mType == ASMIT_CPX && !mEntryRequiredRegs[CPU_REG_X]) ||
(mIns[sz - 2].mType == ASMIT_LDY && mIns[sz - 1].mType == ASMIT_CPY && !mEntryRequiredRegs[CPU_REG_Y])) &&
mIns[sz - 2].mMode == ASMIM_ZERO_PAGE && mIns[sz - 1].mMode == ASMIM_ZERO_PAGE)
{
if (ChangesZeroPage(mIns[sz - 1].mAddress) && !ChangesZeroPage(mIns[sz - 2].mAddress))
{
int a = mIns[sz - 1].mAddress;
mIns[sz - 1].mAddress = mIns[sz - 2].mAddress;
mIns[sz - 2].mAddress = a;
changed = true;
}
}
#endif
if (sz >= 2 && (mIns[0].mType == ASMIT_DEC || mIns[0].mType == ASMIT_INC) && mIns[0].mMode == ASMIM_ZERO_PAGE &&
mIns[1].mType == ASMIT_LDY && mIns[1].SameEffectiveAddress(mIns[0]))
{
int i = 2;
int inc = 0;
while (i < mIns.Size())
{
if (mIns[i].ChangesYReg())
{
if (mIns[i].mType == ASMIT_INY)
inc++;
else if (mIns[i].mType == ASMIT_DEY)
inc--;
else if (mIns[i].mType == ASMIT_LDY && mIns[i].SameEffectiveAddress(mIns[0]))
inc = 0;
else
break;
}
else if (mIns[i].ChangesZeroPage(mIns[0].mAddress))
break;
i++;
}
if (i == mIns.Size() && inc == 0)
{
if (!prevBlock)
return OptimizeSimpleLoopInvariant(proc, full);
prevBlock->mIns.Push(NativeCodeInstruction(mIns[1].mIns, ASMIT_LDY, mIns[1]));
prevBlock->mExitRequiredRegs += CPU_REG_Y;
mEntryRequiredRegs += CPU_REG_Y;
mExitRequiredRegs += CPU_REG_Y;
mIns[1].mType = ASMIT_STY;
mIns[0].mLive |= LIVE_CPU_REG_Y;
if (mIns[0].mType == ASMIT_DEC)
mIns[0].mType = ASMIT_DEY;
else
mIns[0].mType = ASMIT_INY;
mIns[0].mMode = ASMIM_IMPLIED;
changed = true;
}
}
if (mIns.Size() > 0 && (mIns.Last().mType == ASMIT_DEX || mIns.Last().mType == ASMIT_DEC || mIns.Last().mType == ASMIT_CPX))
{
@ -39210,9 +39340,14 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
#endif
CheckLive();
#if 1
if (i + 2 < mIns.Size())
{
NativeCodeInstruction i0 = mIns[i];
NativeCodeInstruction i1 = mIns[i + 1];
NativeCodeInstruction i2 = mIns[i + 2];
if (mIns[i].mType == ASMIT_LDA && mIns[i + 2].mType == ASMIT_LDA && (mIns[i + 1].mType == ASMIT_CLC || mIns[i + 1].mType == ASMIT_SEC))
{
mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED;
@ -39268,6 +39403,17 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED;
progress = true;
}
else if (
mIns[i + 0].mType == ASMIT_LDA &&
mIns[i + 1].mType == ASMIT_STA &&
mIns[i + 2].mType == ASMIT_LDA && !(mIns[i + 2].mFlags & NCIF_VOLATILE) &&
mIns[i + 0].SameEffectiveAddress(mIns[i + 2]))
{
mIns[i + 0].mLive |= mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z);
mIns[i + 1].mLive |= mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z);
mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED;
progress = true;
}
else if (
mIns[i + 0].mType == ASMIT_STA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 2].mType == ASMIT_LDY && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && mIns[i + 2].mAddress == mIns[i + 0].mAddress &&
@ -39334,6 +39480,8 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
{
mIns[i + 2] = mIns[i + 0];
mIns[i + 2].mLive |= LIVE_CPU_REG_Y | LIVE_CPU_REG_Z;
if (mIns[i + 0].RequiresCarry())
mIns[i + 1].mLive |= LIVE_CPU_REG_C;
mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED;
progress = true;
}
@ -40446,6 +40594,8 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
}
}
CheckLive();
if (
mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_IMMEDIATE &&
mIns[i + 1].mType == ASMIT_LDA &&
@ -40470,9 +40620,10 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
progress = true;
}
}
CheckLive();
}
#endif
CheckLive();
#if 1
if (i + 3 < mIns.Size())
{
@ -42105,7 +42256,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
HasAsmInstructionMode(ASMIT_INC, mIns[i + 0].mMode))
{
mIns[i + 4].mType = ASMIT_INC;
mIns[i + 2].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED;
mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED;
mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED;
progress = true;
@ -42119,12 +42270,28 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
HasAsmInstructionMode(ASMIT_DEC, mIns[i + 0].mMode))
{
mIns[i + 4].mType = ASMIT_DEC;
mIns[i + 2].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED;
mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED;
mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED;
progress = true;
}
#endif
else if (
pass > 8 &&
mIns[i + 0].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_LDA &&
mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 1 &&
mIns[i + 3].mType == ASMIT_CLC &&
mIns[i + 4].mType == ASMIT_ADC &&
!(mIns[i + 4].mLive & LIVE_CPU_REG_C))
{
mIns[i + 3].mType = ASMIT_SEC;
mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED;
mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED;
progress = true;
}
#if 0
else if (
mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_ZERO_PAGE &&
@ -42831,6 +42998,34 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
}
}
#endif
#if 1
if (mIns[i + 0].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 2].mType == ASMIT_ADC && (mIns[i + 2].mMode == ASMIM_ZERO_PAGE || mIns[i + 2].mMode == ASMIM_ABSOLUTE) &&
mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mAddress != mIns[i + 2].mAddress &&
mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 5].mType == ASMIT_ADC && mIns[i + 5].mMode == ASMIM_IMMEDIATE && mIns[i + 5].mAddress == 0 &&
mIns[i + 6].mType == ASMIT_STA && mIns[i + 6].mMode == ASMIM_ZERO_PAGE && mIns[i + 6].mAddress == mIns[i + 3].mAddress + 1 &&
!(mIns[i + 6].mLive & LIVE_CPU_REG_A))
{
int yval = RetrieveYValue(i);
proc->ResetPatched();
if (CheckForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 1].mAddress, mIns[i + 2], i + 7, yval, 3))
{
proc->ResetPatched();
if (PatchForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 3].mAddress, mIns[i + 2], i + 7, yval))
progress = true;
mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED;
mIns[i + 5].mType = ASMIT_NOP; mIns[i + 5].mMode = ASMIM_IMPLIED;
if (mTrueJump)
mTrueJump->CheckLive();
if (mFalseJump)
mFalseJump->CheckLive();
}
}
#endif
#if 1
if (
mIns[i + 0].mType == ASMIT_STA && (mIns[i + 0].mMode == ASMIM_ZERO_PAGE || mIns[i + 0].mMode == ASMIM_ABSOLUTE) &&
@ -44528,7 +44723,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
{
mInterProc = proc;
CheckFunc = !strcmp(mInterProc->mIdent->mString, "Enemy::StepTurn");
CheckFunc = !strcmp(mInterProc->mIdent->mString, "mh_size");
int nblocks = proc->mBlocks.Size();
tblocks = new NativeCodeBasicBlock * [nblocks];
@ -44985,6 +45180,8 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
void NativeCodeProcedure::Assemble(void)
{
CheckFunc = !strcmp(mInterProc->mIdent->mString, "mh_size");
if (mInterProc->mCompilerOptions & COPT_OPTIMIZE_MERGE_CALLS)
{
ResetVisited();
@ -45138,6 +45335,8 @@ void NativeCodeProcedure::Optimize(void)
int cnt = 0;
bool swappedXY = false;
CheckCase = false;
#if _DEBUG
ResetVisited();
mEntryBlock->CheckBlocks();
@ -45327,6 +45526,7 @@ void NativeCodeProcedure::Optimize(void)
if (mEntryBlock->PeepHoleOptimizer(this, step))
changed = true;
#endif
if (step == 2)
{
@ -45614,7 +45814,7 @@ void NativeCodeProcedure::Optimize(void)
for (int i = 0; i < 256; i++)
if (xregs[i] > xregs[j])
j = i;
if (xregs[j] > 0)
if (xregs[j] > 2)
{
ResetVisited();
mEntryBlock->GlobalRegisterXMap(j);
@ -45632,7 +45832,7 @@ void NativeCodeProcedure::Optimize(void)
for (int i = 0; i < 256; i++)
if (yregs[i] > yregs[j])
j = i;
if (yregs[j] > 0)
if (yregs[j] > 2)
{
ResetVisited();
mEntryBlock->GlobalRegisterYMap(j);
@ -46077,6 +46277,17 @@ void NativeCodeProcedure::Optimize(void)
ResetVisited();
mEntryBlock->RemoveUnusedResultInstructions();
#if 1
ResetVisited();
data.Reset();
mEntryBlock->BuildEntryDataSet(data);
CheckCase = true;
ResetVisited();
if (mEntryBlock->ApplyEntryDataSet())
changed = true;
#endif
#if 1
ResetVisited();
mEntryBlock->BlockSizeReduction(this, -1, -1);