From cb5f5f421f413579c041da2fc9d0a34be5c07a29 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Tue, 9 Nov 2021 22:24:29 +0100 Subject: [PATCH] Optimize single conditional path value evaluation --- autotest/bitshifttest.c | 100 ++++++++++++++++++++++++++ oscar64/InterCode.cpp | 124 +++++++++++++++++++++++++++++--- oscar64/InterCode.h | 2 + oscar64/NativeCodeGenerator.cpp | 43 +++++++---- 4 files changed, 244 insertions(+), 25 deletions(-) diff --git a/autotest/bitshifttest.c b/autotest/bitshifttest.c index 1a68374..52a58fd 100644 --- a/autotest/bitshifttest.c +++ b/autotest/bitshifttest.c @@ -74,6 +74,85 @@ unsigned shr8n(int n) #pragma native(shr8n) + +void shl8xb(unsigned char xu, signed char xi) +{ + unsigned char ua[16]; + signed char ia[16]; +#assign s 0 +#repeat + ua[s] = xu << s; + ia[s] = xi << s; +#assign s s + 1 +#until s == 16 + + for(int i=0; i<16; i++) + { + assert(ua[i] == (unsigned char)(xu << i)); + assert(ia[i] == (signed char)(xi << i)); + } +} + +void shr8xb(unsigned char xu, signed char xi) +{ + unsigned char ua[16]; + signed char ia[16]; +#assign s 0 +#repeat + ua[s] = xu >> s; + ia[s] = xi >> s; +#assign s s + 1 +#until s == 16 + + for(int i=0; i<16; i++) + { + assert(ua[i] == (unsigned char)(xu >> i)); + assert(ia[i] == (signed char)(xi >> i)); + } +} + +void shl8xn(unsigned char xu, signed char xi) +{ + unsigned char ua[16]; + signed char ia[16]; +#assign s 0 +#repeat + ua[s] = xu << s; + ia[s] = xi << s; +#assign s s + 1 +#until s == 16 + + for(int i=0; i<16; i++) + { + assert(ua[i] == (unsigned char)(xu << i)); + assert(ia[i] == (signed char)(xi << i)); + } +} + +void shr8xn(unsigned char xu, signed char xi) +{ + unsigned char ua[16]; + signed char ia[16]; +#assign s 0 +#repeat + ua[s] = xu >> s; + ia[s] = xi >> s; +#assign s s + 1 +#until s == 16 + + for(int i=0; i<16; i++) + { + assert(ua[i] == (unsigned char)(xu >> i)); + assert(ia[i] == (signed char)(xi >> i)); + } +} + +#pragma native(shl8xn) +#pragma native(shr8xn) + + + + void shl16b(unsigned xu, int xi) { unsigned ua[16]; @@ -248,6 +327,27 @@ int main(void) assert(shr8b(i) == shr8n(i)); } + + shl8xb(0x00, 0x00); + shl8xb(0xff, 0xff); + shl8xb(0x34, 0x34); + shl8xb(0xdc, 0xdc); + + shr8xb(0x00, 0x00); + shr8xb(0xff, 0xff); + shr8xb(0x34, 0x34); + shr8xb(0xdc, 0xdc); + + shl8xn(0x00, 0x00); + shl8xn(0xff, 0xff); + shl8xn(0x34, 0x34); + shl8xn(0xdc, 0xdc); + + shr8xn(0x00, 0x00); + shr8xn(0xff, 0xff); + shr8xn(0x34, 0x34); + shr8xn(0xdc, 0xdc); + shl16b(0x0000, 0x0000); shl16b(0xffff, 0xffff); shl16b(0x1234, 0x1234); diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 7266d56..e9bf8c3 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -1736,6 +1736,17 @@ bool HasSideEffect(InterCode code) return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER; } +bool IsMoveable(InterCode code) +{ + if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME) + return false; + if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE) + return false; + + return true; +} + + bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, NumberSet& requiredTemps) { bool changed = false; @@ -4096,6 +4107,92 @@ void InterCodeBasicBlock::MarkRelevantStatics(void) } } +bool InterCodeBasicBlock::PushSinglePathResultInstructions(void) +{ + int i; + + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + if (mTrueJump && mFalseJump) + { + NumberSet trueExitRequiredTemps(mTrueJump->mEntryRequiredTemps), falseExitRequiredTems(mFalseJump->mEntryRequiredTemps); + NumberSet providedTemps(mExitRequiredTemps.Size()), requiredTemps(mExitRequiredTemps.Size()); + + bool hadStore = false; + + int i = mInstructions.Size(); + while (i > 0) + { + i--; + InterInstruction* ins(mInstructions[i]); + + bool moved = false; + + if (ins->mDst.mTemp >= 0 && !providedTemps[ins->mDst.mTemp] && !requiredTemps[ins->mDst.mTemp]) + { + int j = 0; + while (j < ins->mNumOperands && !(ins->mSrc[j].mTemp >= 0 && providedTemps[ins->mSrc[j].mTemp])) + j++; + + if (j == ins->mNumOperands && IsMoveable(ins->mCode) && (ins->mCode != IC_LOAD || !hadStore)) + { + if (mTrueJump->mNumEntries == 1 && trueExitRequiredTemps[ins->mDst.mTemp] && !falseExitRequiredTems[ins->mDst.mTemp]) + { + for (int j = 0; j < ins->mNumOperands; j++) + { + if (ins->mSrc[j].mTemp >= 0) + trueExitRequiredTemps += ins->mSrc[j].mTemp; + } + mTrueJump->mInstructions.Insert(0, ins); + mInstructions.Remove(i); + moved = true; + changed = true; + } + else if (mFalseJump->mNumEntries == 1 && !trueExitRequiredTemps[ins->mDst.mTemp] && falseExitRequiredTems[ins->mDst.mTemp]) + { + for (int j = 0; j < ins->mNumOperands; j++) + { + if (ins->mSrc[j].mTemp >= 0) + falseExitRequiredTems += ins->mSrc[j].mTemp; + } + mFalseJump->mInstructions.Insert(0, ins); + mInstructions.Remove(i); + moved = true; + changed = true; + } + } + + providedTemps += ins->mDst.mTemp; + } + + if (!moved) + { + for (int j = 0; j < ins->mNumOperands; j++) + { + if (ins->mSrc[j].mTemp >= 0) + requiredTemps += ins->mSrc[j].mTemp; + } + } + + if (HasSideEffect(ins->mCode)) + hadStore = true; + + } + } + + if (mTrueJump && mTrueJump->PushSinglePathResultInstructions()) + changed = true; + if (mFalseJump && mFalseJump->PushSinglePathResultInstructions()) + changed = true; + } + + return changed; +} + void InterCodeBasicBlock::RemoveNonRelevantStatics(void) { int i; @@ -4403,16 +4500,6 @@ InterCodeBasicBlock* InterCodeBasicBlock::PropagateDominator(InterCodeProcedure* return mDominator ? mDominator : this; } -bool IsMoveable(InterCode code) -{ - if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME) - return false; - if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE) - return false; - - return true; -} - void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedParams) { if (!mVisited) @@ -5466,6 +5553,23 @@ void InterCodeProcedure::Close(void) DisassembleDebug("Peephole optimized"); + bool changed = false; + do + { + BuildDataFlowSets(); + + ResetVisited(); + changed = mEntryBlock->PushSinglePathResultInstructions(); + + } while (changed); + + BuildDataFlowSets(); + + TempForwarding(); + RemoveUnusedInstructions(); + + DisassembleDebug("Moved single path instructions"); + FastNumberSet activeSet(numTemps); // diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index c9e8df9..4d9a6d3 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -502,6 +502,8 @@ public: void MarkRelevantStatics(void); void RemoveNonRelevantStatics(void); + bool PushSinglePathResultInstructions(void); + void PeepholeOptimization(void); void SingleBlockLoopOptimisation(const NumberSet& aliasedParams); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 3da1d6b..eb21a00 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -5732,6 +5732,9 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p } else { + NativeCodeBasicBlock* lblock = nproc->AllocateBlock(); + NativeCodeBasicBlock* eblock = nproc->AllocateBlock(); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); @@ -5754,15 +5757,19 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p } mIns.Push(NativeCodeInstruction(ASMIT_CPX, ASMIM_IMMEDIATE, 0x00)); - mIns.Push(NativeCodeInstruction(ASMIT_BEQ, ASMIM_RELATIVE, 2 + 1 + 1 + 2)); + this->Close(lblock, eblock, ASMIT_BNE); - mIns.Push(NativeCodeInstruction(ASMIT_LSR, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg)); - mIns.Push(NativeCodeInstruction(ASMIT_DEX, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_BNE, ASMIM_RELATIVE, -(2 + 1 + 1 + 2))); + lblock->mIns.Push(NativeCodeInstruction(ASMIT_LSR, ASMIM_IMPLIED)); + lblock->mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg + 0)); + lblock->mIns.Push(NativeCodeInstruction(ASMIT_DEX, ASMIM_IMPLIED)); + lblock->Close(lblock, eblock, ASMIT_BNE); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + eblock->mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + return eblock; } + + + } break; case IA_SAR: { @@ -5858,11 +5865,13 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p for (int i = 0; i < shift; i++) { - mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, 0x80)); - mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_LSR, ASMIM_IMPLIED)); mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg)); } + mIns.Push(NativeCodeInstruction(ASMIT_EOR, ASMIM_IMMEDIATE, 0x80 >> shift)); + mIns.Push(NativeCodeInstruction(ASMIT_SEC, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_SBC, ASMIM_IMMEDIATE, 0x80 >> shift)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); } } @@ -5896,6 +5905,9 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p } else { + NativeCodeBasicBlock* lblock = nproc->AllocateBlock(); + NativeCodeBasicBlock* eblock = nproc->AllocateBlock(); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); @@ -5918,15 +5930,16 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p } mIns.Push(NativeCodeInstruction(ASMIT_CPX, ASMIM_IMMEDIATE, 0x00)); - mIns.Push(NativeCodeInstruction(ASMIT_BEQ, ASMIM_RELATIVE, 2 + 1 + 2 + 1 + 2)); + this->Close(lblock, eblock, ASMIT_BNE); - mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, 0x80)); - mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg)); - mIns.Push(NativeCodeInstruction(ASMIT_DEX, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_BNE, ASMIM_RELATIVE, -(2 + 1 + 2 + 1 + 2))); + lblock->mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, 0x80)); + lblock->mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_IMPLIED)); + lblock->mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_ZERO_PAGE, treg + 0)); + lblock->mIns.Push(NativeCodeInstruction(ASMIT_DEX, ASMIM_IMPLIED)); + lblock->Close(lblock, eblock, ASMIT_BNE); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + eblock->mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + return eblock; } } break;