From eb55b2f55df7f54de0fe7d34af6399992afd5c5a Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sun, 3 Jul 2022 13:13:32 +0200 Subject: [PATCH] Remove dangling tail recursion rts after jmp --- README.md | 2 +- oscar64/InterCode.cpp | 13 +++++++++++++ oscar64/NativeCodeGenerator.cpp | 24 ++++++++++++++++++++++-- oscar64/NativeCodeGenerator.h | 2 ++ 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 999d1b7..c264e8d 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,7 @@ The compiler can be provided with additional information using the built in func ### Loop unrolling -Loop unrolling on 6502 is hard to decide for the compiler. Memory is usually scarce, so it only does it in realy obvious cases (and in less obbious cases for O3). On the other hand unrolling is required to get good performance in e.g. scrolling code. Therefore the compiler offers an unrolling pragma, that can be used to specifiy the amount of unrolling either as a number or "full" for complete. +Loop unrolling on 6502 is hard to decide for the compiler. Memory is usually scarce, so it only does it in realy obvious cases (and in less obvious cases for O3). On the other hand unrolling is required to get good performance in e.g. scrolling code. Therefore the compiler offers an unrolling pragma, that can be used to specifiy the amount of unrolling either as a number or "full" for complete. The following code scrolls the screen to the left, and completely unrolls the inner vertical loop. diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index a3750cb..be5251b 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -10781,6 +10781,19 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 0]->mNumOperands = 0; changed = true; } + else if ( + mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_MUL && mInstructions[i + 0]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_SHL && mInstructions[i + 1]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal && + (mInstructions[i + 0]->mSrc[0].mIntConst << mInstructions[i + 1]->mSrc[0].mIntConst) < 65536) + { + mInstructions[i + 1]->mSrc[0].mIntConst = mInstructions[i + 0]->mSrc[0].mIntConst << mInstructions[i + 1]->mSrc[0].mIntConst; + mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1]; + mInstructions[i + 1]->mOperator = IA_MUL; + mInstructions[i + 0]->mCode = IC_NONE; + mInstructions[i + 0]->mNumOperands = 0; + changed = true; + } #if 1 else if ( mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_OR && mInstructions[i + 0]->mSrc[0].mTemp < 0 && diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 43dfbd2..44d161f 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -25240,6 +25240,23 @@ bool NativeCodeBasicBlock::CalculateOffset(int& total) return changed; } +void NativeCodeBasicBlock::ShortcutTailRecursion() +{ + if (!mVisited) + { + mVisited = true; + if (!mFalseJump && mTrueJump && mTrueJump->mIns.Size() == 1 && mTrueJump->mIns[0].mType == ASMIT_RTS && mIns.Size() > 0 && mIns.Last().IsSimpleJSR()) + { + this->mCode[this->mCode.Size() - 3] = 0x4c; + mTrueJump->mNumEntries--; + mTrueJump = nullptr; + } + + if (mTrueJump) mTrueJump->ShortcutTailRecursion(); + if (mFalseJump) mFalseJump->ShortcutTailRecursion(); + } +} + void NativeCodeBasicBlock::CopyCode(NativeCodeProcedure * proc, uint8* target) { int i; @@ -25822,7 +25839,10 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) mEntryBlock->Assemble(); - NativeCodeBasicBlock* lentryBlock = mEntryBlock->BypassEmptyBlocks(); + mEntryBlock = mEntryBlock->BypassEmptyBlocks(); + + ResetVisited(); + mEntryBlock->ShortcutTailRecursion(); proc->mLinkerObject->mType = LOT_NATIVE_CODE; @@ -25831,7 +25851,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) int total; total = 0; - lentryBlock->BuildPlacement(placement); + mEntryBlock->BuildPlacement(placement); for (int i = 0; i < placement.Size(); i++) placement[i]->InitialOffset(total); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index d55c170..2ce2c56 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -182,6 +182,8 @@ public: void Assemble(void); void Close(NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock* falseJump, AsmInsType branch); + void ShortcutTailRecursion(); + bool RemoveNops(void); bool PeepHoleOptimizer(NativeCodeProcedure* proc, int pass); void BlockSizeReduction(NativeCodeProcedure* proc);