Remove dangling tail recursion rts after jmp

This commit is contained in:
drmortalwombat 2022-07-03 13:13:32 +02:00
parent bcc59a9afb
commit eb55b2f55d
4 changed files with 38 additions and 3 deletions

View File

@ -194,7 +194,7 @@ The compiler can be provided with additional information using the built in func
### Loop unrolling
Loop unrolling on 6502 is hard to decide for the compiler. Memory is usually scarce, so it only does it in realy obvious cases (and in less obbious cases for O3). On the other hand unrolling is required to get good performance in e.g. scrolling code. Therefore the compiler offers an unrolling pragma, that can be used to specifiy the amount of unrolling either as a number or "full" for complete.
Loop unrolling on 6502 is hard to decide for the compiler. Memory is usually scarce, so it only does it in realy obvious cases (and in less obvious cases for O3). On the other hand unrolling is required to get good performance in e.g. scrolling code. Therefore the compiler offers an unrolling pragma, that can be used to specifiy the amount of unrolling either as a number or "full" for complete.
The following code scrolls the screen to the left, and completely unrolls the inner vertical loop.

View File

@ -10781,6 +10781,19 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
mInstructions[i + 0]->mNumOperands = 0;
changed = true;
}
else if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_MUL && mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_SHL && mInstructions[i + 1]->mSrc[0].mTemp < 0 &&
mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
(mInstructions[i + 0]->mSrc[0].mIntConst << mInstructions[i + 1]->mSrc[0].mIntConst) < 65536)
{
mInstructions[i + 1]->mSrc[0].mIntConst = mInstructions[i + 0]->mSrc[0].mIntConst << mInstructions[i + 1]->mSrc[0].mIntConst;
mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1];
mInstructions[i + 1]->mOperator = IA_MUL;
mInstructions[i + 0]->mCode = IC_NONE;
mInstructions[i + 0]->mNumOperands = 0;
changed = true;
}
#if 1
else if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_OR && mInstructions[i + 0]->mSrc[0].mTemp < 0 &&

View File

@ -25240,6 +25240,23 @@ bool NativeCodeBasicBlock::CalculateOffset(int& total)
return changed;
}
void NativeCodeBasicBlock::ShortcutTailRecursion()
{
if (!mVisited)
{
mVisited = true;
if (!mFalseJump && mTrueJump && mTrueJump->mIns.Size() == 1 && mTrueJump->mIns[0].mType == ASMIT_RTS && mIns.Size() > 0 && mIns.Last().IsSimpleJSR())
{
this->mCode[this->mCode.Size() - 3] = 0x4c;
mTrueJump->mNumEntries--;
mTrueJump = nullptr;
}
if (mTrueJump) mTrueJump->ShortcutTailRecursion();
if (mFalseJump) mFalseJump->ShortcutTailRecursion();
}
}
void NativeCodeBasicBlock::CopyCode(NativeCodeProcedure * proc, uint8* target)
{
int i;
@ -25822,7 +25839,10 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
mEntryBlock->Assemble();
NativeCodeBasicBlock* lentryBlock = mEntryBlock->BypassEmptyBlocks();
mEntryBlock = mEntryBlock->BypassEmptyBlocks();
ResetVisited();
mEntryBlock->ShortcutTailRecursion();
proc->mLinkerObject->mType = LOT_NATIVE_CODE;
@ -25831,7 +25851,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc)
int total;
total = 0;
lentryBlock->BuildPlacement(placement);
mEntryBlock->BuildPlacement(placement);
for (int i = 0; i < placement.Size(); i++)
placement[i]->InitialOffset(total);

View File

@ -182,6 +182,8 @@ public:
void Assemble(void);
void Close(NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock* falseJump, AsmInsType branch);
void ShortcutTailRecursion();
bool RemoveNops(void);
bool PeepHoleOptimizer(NativeCodeProcedure* proc, int pass);
void BlockSizeReduction(NativeCodeProcedure* proc);