From bffef3e9ddcfdbb0e986caa574780e629a50677c Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sun, 26 Sep 2021 13:14:56 +0200 Subject: [PATCH] Basic block tail merging --- autotest/floatmultest.c | 4 +- oscar64/ByteCodeGenerator.cpp | 169 ++++++++++++++++++++++++++++++-- oscar64/ByteCodeGenerator.h | 10 ++ oscar64/InterCode.cpp | 20 +++- oscar64/NativeCodeGenerator.cpp | 113 ++++++++++++++++++++- oscar64/NativeCodeGenerator.h | 9 ++ 6 files changed, 312 insertions(+), 13 deletions(-) diff --git a/autotest/floatmultest.c b/autotest/floatmultest.c index f95cad7..35a1809 100644 --- a/autotest/floatmultest.c +++ b/autotest/floatmultest.c @@ -12,7 +12,7 @@ int main(void) for(i=0; i<50; i++) { -// printf("%d %f %f %f\n", i, i * c, a, i * c - a); + printf("%d %f %f %f\n", i, i * c, a, i * c - a); assert(i * c == a); a += c; } @@ -21,7 +21,7 @@ int main(void) for(i=1; i<50; i++) { -// printf("%d %f %f %f\n", i, i * d, a, fabs(i * d - a) / i); + printf("%d %f %f %f\n", i, i * d, a, fabs(i * d - a) / i); assert(fabs(i * d - a) < i * 1.0e-6); a += d; } diff --git a/oscar64/ByteCodeGenerator.cpp b/oscar64/ByteCodeGenerator.cpp index 9d16d80..a180508 100644 --- a/oscar64/ByteCodeGenerator.cpp +++ b/oscar64/ByteCodeGenerator.cpp @@ -105,6 +105,32 @@ bool ByteCodeInstruction::IsCommutative(void) const return false; } +bool ByteCodeInstruction::IsLocalStore(void) const +{ + return mCode >= BC_STORE_LOCAL_8 && mCode <= BC_STORE_LOCAL_32; +} + +bool ByteCodeInstruction::IsLocalLoad(void) const +{ + return mCode >= BC_LOAD_LOCAL_8 && mCode <= BC_LOAD_LOCAL_32 || mCode == BC_COPY; +} + +bool ByteCodeInstruction::IsLocalAccess(void) const +{ + return IsLocalStore() || IsLocalLoad(); +} + + +bool ByteCodeInstruction::IsSame(const ByteCodeInstruction& ins) const +{ + if (mCode == ins.mCode && mValue == ins.mValue && mRegister == ins.mRegister && mLinkerObject == ins.mLinkerObject) + { + return true; + } + + return false; +} + bool ByteCodeInstruction::StoresRegister(uint32 reg) const { if (mRegister == reg) @@ -529,7 +555,7 @@ int ByteCodeBasicBlock::PutBranch(ByteCodeGenerator* generator, ByteCode code, i } ByteCodeBasicBlock::ByteCodeBasicBlock(void) - : mRelocations({ 0 }), mIns(ByteCodeInstruction(BC_NOP)) + : mRelocations({ 0 }), mIns(ByteCodeInstruction(BC_NOP)), mEntryBlocks(nullptr) { mTrueJump = mFalseJump = NULL; mTrueLink = mFalseLink = NULL; @@ -2530,6 +2556,78 @@ void ByteCodeBasicBlock::Compile(InterCodeProcedure* iproc, ByteCodeProcedure* p this->Close(proc->CompileBlock(iproc, sblock->mTrueJump), nullptr, BC_JUMPS); } +void ByteCodeBasicBlock::CollectEntryBlocks(ByteCodeBasicBlock* block) +{ + if (block) + mEntryBlocks.Push(block); + + if (!mVisited) + { + mVisited = true; + + if (mTrueJump) + mTrueJump->CollectEntryBlocks(this); + if (mFalseJump) + mFalseJump->CollectEntryBlocks(this); + } +} + +bool ByteCodeBasicBlock::SameTail(ByteCodeInstruction& ins) +{ + if (mIns.Size() > 0) + return mIns[mIns.Size() - 1].IsSame(ins); + else + return false; +} + +bool ByteCodeBasicBlock::JoinTailCodeSequences(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + if (mEntryBlocks.Size() > 1) + { + int i = 0; + while (i < mEntryBlocks.Size() && mEntryBlocks[i]->mBranch == BC_JUMPS) + i++; + if (i == mEntryBlocks.Size()) + { + ByteCodeBasicBlock* eb = mEntryBlocks[0]; + + while (eb->mIns.Size() > 0) + { + ByteCodeInstruction& ins(eb->mIns[eb->mIns.Size() - 1]); + i = 1; + while (i < mEntryBlocks.Size() && mEntryBlocks[i]->SameTail(ins)) + i++; + if (i == mEntryBlocks.Size()) + { + mIns.Insert(0, ins); + for (int i = 0; i < mEntryBlocks.Size(); i++) + { + ByteCodeBasicBlock* b = mEntryBlocks[i]; + b->mIns.SetSize(b->mIns.Size() - 1); + } + changed = true; + } + else + break; + } + } + } + + if (mTrueJump && mTrueJump->JoinTailCodeSequences()) + changed = true; + if (mFalseJump && mFalseJump->JoinTailCodeSequences()) + changed = true; + } + + return changed; +} + bool ByteCodeBasicBlock::PeepHoleOptimizer(void) { @@ -2582,6 +2680,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i + 2].mCode = BC_NOP; if (mIns[i + 2].mRegisterFinal) mIns[i].mCode = BC_NOP; + progress = true; } else if (mIns[i].mCode == BC_STORE_REG_32 && !mIns[i + 1].ChangesAccu() && mIns[i + 1].mRegister != mIns[i].mRegister && @@ -2590,6 +2689,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i + 2].mCode = BC_NOP; if (mIns[i + 2].mRegisterFinal) mIns[i].mCode = BC_NOP; + progress = true; } else if (mIns[i].mCode == BC_STORE_REG_16 && !mIns[i + 1].ChangesAccu() && mIns[i + 1].mRegister != mIns[i].mRegister && @@ -2597,6 +2697,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) { mIns[i].mCode = BC_NOP; mIns[i + 2].mRegister = BC_REG_ACCU; + progress = true; } else if (mIns[i].mCode == BC_STORE_REG_16 && !mIns[i + 1].ChangesAddr() && mIns[i + 1].mRegister != mIns[i].mRegister && @@ -2605,6 +2706,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i].mCode = BC_ADDR_REG; mIns[i].mRegister = BC_REG_ACCU; mIns[i + 2].mCode = BC_NOP; + progress = true; } else if ( mIns[i + 2].mCode == BC_BINOP_ADDR_16 && @@ -2614,6 +2716,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i + 0].mRegister = BC_REG_ACCU; mIns[i + 1].mCode = mIns[i + 2].mCode; mIns[i + 2].mCode = BC_NOP; + progress = true; } else if ( mIns[i + 2].mCode == BC_BINOP_ADDR_16 && @@ -2623,6 +2726,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i + 0].mCode = BC_NOP;; mIns[i + 1].mCode = mIns[i + 2].mCode; mIns[i + 2].mCode = BC_NOP; + progress = true; } else if (mIns[i].mCode == BC_STORE_REG_32 && mIns[i + 1].mCode == BC_LOAD_REG_32 && mIns[i + 1].mRegister != mIns[i + 2].mRegister && @@ -2633,6 +2737,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i + 1].mCode = BC_NOP; mIns[i + 2].mRegister = mIns[i + 1].mRegister; mIns[i + 2].mRegisterFinal = mIns[i + 1].mRegisterFinal; + progress = true; } else if (mIns[i].mCode == BC_STORE_REG_16 && mIns[i + 1].mCode == BC_LOAD_REG_16 && mIns[i + 1].mRegister != mIns[i + 2].mRegister && @@ -2643,6 +2748,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i + 1].mCode = BC_NOP; mIns[i + 2].mRegister = mIns[i + 1].mRegister; mIns[i + 2].mRegisterFinal = mIns[i + 1].mRegisterFinal; + progress = true; } else if (mIns[i + 0].mCode == BC_STORE_REG_32 && mIns[i + 2].mCode == BC_BINOP_CMP_F32 && mIns[i + 2].mRegister == mIns[i + 0].mRegister && mIns[i + 2].mRegisterFinal && @@ -2651,24 +2757,34 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i + 1].mRegister = mIns[i + 0].mRegister; mIns[i + 0].mCode = BC_NOP; mBranch = TransposeBranchCondition(mBranch); + progress = true; } else if (mIns[i + 0].mCode == BC_LOAD_REG_16 && mIns[i + 1].mCode == BC_STORE_REG_16 && mIns[i + 2].mCode == BC_LOAD_REG_16 && mIns[i + 0].mRegister == mIns[i + 2].mRegister) { mIns[i + 2].mCode = BC_NOP; + progress = true; } else if (mIns[i + 0].mCode == BC_CONST_16 && mIns[i + 2].mCode == BC_CONST_16 && mIns[i + 0].mRegister == mIns[i + 2].mRegister && mIns[i + 0].mValue == mIns[i + 2].mValue && !mIns[i + 1].ChangesRegister(mIns[i + 0].mRegister)) { mIns[i + 2].mCode = BC_NOP; + progress = true; } else if (mIns[i + 0].mCode >= BC_SET_EQ && mIns[i + 0].mCode <= BC_SET_LE && mIns[i + 1].mCode == BC_STORE_REG_8 && mIns[i + 2].mCode == BC_LOAD_REG_8 && mIns[i + 1].mRegister == mIns[i + 2].mRegister) { mIns[i + 2].mCode = BC_NOP; + progress = true; + } + else if (mIns[i + 0].IsLocalStore() && mIns[i + 2].IsSame(mIns[i + 0]) && !mIns[i + 1].IsLocalAccess() && mIns[i + 1].mCode != BC_JSR) + { + mIns[i + 0].mCode = BC_NOP; + progress = true; } } + if (i + 1 < mIns.Size()) { if (mIns[i].mCode == BC_STORE_REG_16 && mIns[i + 1].mCode == BC_LOAD_REG_16 && mIns[i].mRegister == mIns[i + 1].mRegister) @@ -2756,12 +2872,30 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(void) mIns[i + 1].mCode = BC_NOP; progress = true; } + else if (mIns[i].mCode == BC_BINOP_ADDI_16 && mIns[i + 1].mCode == BC_BINOP_ADDI_16 && mIns[i].mRegister == mIns[i + 1].mRegister) + { + mIns[i + 1].mValue += mIns[i].mValue; + mIns[i].mCode = BC_NOP; + progress = true; + } + else if (mIns[i].mCode == BC_BINOP_ADDI_16 && mIns[i].mValue == 0) + { + mIns[i].mCode = BC_NOP; + progress = true; + } } if ((mIns[i].mCode == BC_LOAD_REG_16 || mIns[i].mCode == BC_STORE_REG_16 || mIns[i].mCode == BC_LOAD_REG_32 || mIns[i].mCode == BC_STORE_REG_32) && accuTemp == mIns[i].mRegister) + { mIns[i].mCode = BC_NOP; + progress = true; + } + if (mIns[i].mCode == BC_ADDR_REG && mIns[i].mRegister == addrTemp) + { mIns[i].mCode = BC_NOP; + progress = true; + } if (mIns[i].ChangesAccu()) accuTemp = -1; @@ -3072,6 +3206,32 @@ void ByteCodeProcedure::Compile(ByteCodeGenerator* generator, InterCodeProcedure exitBlock = new ByteCodeBasicBlock(); mBlocks.Push(exitBlock); + entryBlock->Compile(proc, this, proc->mBlocks[0]); + +#if 1 + bool progress = false; + + do { + + progress = false; + + ResetVisited(); + progress = entryBlock->PeepHoleOptimizer(); + + ResetVisited(); + for (int i = 0; i < mBlocks.Size(); i++) + mBlocks[i]->mEntryBlocks.SetSize(0); + entryBlock->CollectEntryBlocks(nullptr); + + ResetVisited(); + if (entryBlock->JoinTailCodeSequences()) + progress = true; + + } while (progress); +#endif + + entryBlock->Assemble(generator); + if (!proc->mLeafProcedure) { exitBlock->PutCode(generator, BC_POP_FRAME); @@ -3079,13 +3239,6 @@ void ByteCodeProcedure::Compile(ByteCodeGenerator* generator, InterCodeProcedure } exitBlock->PutCode(generator, BC_RETURN); exitBlock->PutByte(tempSave); exitBlock->PutWord(proc->mLocalSize + 2 + tempSave); - entryBlock->Compile(proc, this, proc->mBlocks[0]); - - bool progress = false; - ResetVisited(); - progress = entryBlock->PeepHoleOptimizer(); - - entryBlock->Assemble(generator); int total; diff --git a/oscar64/ByteCodeGenerator.h b/oscar64/ByteCodeGenerator.h index 295b5bd..8903210 100644 --- a/oscar64/ByteCodeGenerator.h +++ b/oscar64/ByteCodeGenerator.h @@ -178,8 +178,12 @@ public: bool ChangesRegister(uint32 reg) const; bool LoadsRegister(uint32 reg) const; bool StoresRegister(uint32 reg) const; + bool IsLocalStore(void) const; + bool IsLocalLoad(void) const; + bool IsLocalAccess(void) const; bool IsCommutative(void) const; + bool IsSame(const ByteCodeInstruction& ins) const; bool ValueForwarding(ByteCodeInstruction*& accuIns, ByteCodeInstruction*& addrIns); }; @@ -196,6 +200,7 @@ public: GrowingArray mIns; GrowingArray mRelocations; + GrowingArray mEntryBlocks; int mOffset, mSize; bool mPlaced, mCopied, mKnownShortBranch, mBypassed, mAssembled, mVisited; @@ -236,6 +241,11 @@ public: void BinaryIntOperator(InterCodeProcedure* proc, const InterInstruction * ins, ByteCode code); void NumericConversion(InterCodeProcedure* proc, const InterInstruction * ins); + void CollectEntryBlocks(ByteCodeBasicBlock * block); + + bool JoinTailCodeSequences(void); + bool SameTail(ByteCodeInstruction& ins); + bool PeepHoleOptimizer(void); }; diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 2e98f3f..1909b05 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -3243,9 +3243,25 @@ void InterCodeBasicBlock::PeepholeOptimization(void) { mVisited = true; + // Remove none instructions + + int j = 0; + for (i = 0; i < mInstructions.Size(); i++) + { + if (mInstructions[i]->mCode != IC_NONE) + { + mInstructions[j++] = mInstructions[i]; + } + } + mInstructions.SetSize(j); + // shorten lifespan - int i = mInstructions.Size() - 2; + int limit = mInstructions.Size() - 1; + if (limit >= 2 && mInstructions[limit]->mCode == IC_BRANCH) + limit -= 2; + + int i = limit; while (i >= 0) { @@ -3254,7 +3270,7 @@ void InterCodeBasicBlock::PeepholeOptimization(void) { InterInstruction * ins(mInstructions[i]); int j = i; - while (j + 2 < mInstructions.Size() && CanBypassLoad(ins, mInstructions[j + 1])) + while (j < limit && CanBypassLoad(ins, mInstructions[j + 1])) { mInstructions[j] = mInstructions[j + 1]; j++; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 55f6ecf..cac624c 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -495,6 +495,35 @@ bool NativeCodeInstruction::IsCommutative(void) const } +bool NativeCodeInstruction::IsSame(const NativeCodeInstruction& ins) const +{ + if (mType == ins.mType && mMode == ins.mMode) + { + switch (mMode) + { + case ASMIM_IMPLIED: + return true; + case ASMIM_IMMEDIATE: + case ASMIM_ZERO_PAGE: + case ASMIM_ZERO_PAGE_X: + case ASMIM_ZERO_PAGE_Y: + case ASMIM_INDIRECT_X: + case ASMIM_INDIRECT_Y: + return ins.mAddress == mAddress; + case ASMIM_IMMEDIATE_ADDRESS: + return (ins.mLinkerObject == mLinkerObject && ins.mAddress == mAddress && ins.mFlags == mFlags); + case ASMIM_ABSOLUTE: + case ASMIM_ABSOLUTE_X: + case ASMIM_ABSOLUTE_Y: + return (ins.mLinkerObject == mLinkerObject && ins.mAddress == mAddress); + default: + return false; + } + } + else + return false; +} + bool NativeCodeInstruction::SameEffectiveAddress(const NativeCodeInstruction& ins) const { if (mMode != ins.mMode) @@ -4124,6 +4153,79 @@ bool NativeCodeBasicBlock::MergeBasicBlocks(void) return changed; } +void NativeCodeBasicBlock::CollectEntryBlocks(NativeCodeBasicBlock* block) +{ + if (block) + mEntryBlocks.Push(block); + + if (!mVisited) + { + mVisited = true; + + if (mTrueJump) + mTrueJump->CollectEntryBlocks(this); + if (mFalseJump) + mFalseJump->CollectEntryBlocks(this); + } +} + +bool NativeCodeBasicBlock::SameTail(const NativeCodeInstruction& ins) const +{ + if (mIns.Size() > 0) + return mIns[mIns.Size() - 1].IsSame(ins); + else + return false; +} + +bool NativeCodeBasicBlock::JoinTailCodeSequences(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + if (mEntryBlocks.Size() > 1) + { + int i = 0; + while (i < mEntryBlocks.Size() && mEntryBlocks[i]->mBranch == ASMIT_JMP) + i++; + if (i == mEntryBlocks.Size()) + { + NativeCodeBasicBlock* eb = mEntryBlocks[0]; + + while (eb->mIns.Size() > 0) + { + NativeCodeInstruction& ins(eb->mIns[eb->mIns.Size() - 1]); + i = 1; + while (i < mEntryBlocks.Size() && mEntryBlocks[i]->SameTail(ins)) + i++; + if (i == mEntryBlocks.Size()) + { + mIns.Insert(0, ins); + for (int i = 0; i < mEntryBlocks.Size(); i++) + { + NativeCodeBasicBlock* b = mEntryBlocks[i]; + b->mIns.SetSize(b->mIns.Size() - 1); + } + changed = true; + } + else + break; + } + } + } + + if (mTrueJump && mTrueJump->JoinTailCodeSequences()) + changed = true; + if (mFalseJump && mFalseJump->JoinTailCodeSequences()) + changed = true; + } + + return changed; +} + + bool NativeCodeBasicBlock::FindGlobalAddress(int at, int reg, int& apos) { int j = at - 4; @@ -4986,7 +5088,7 @@ void NativeCodeBasicBlock::CalculateOffset(int& total) } NativeCodeBasicBlock::NativeCodeBasicBlock(void) - : mIns(NativeCodeInstruction(ASMIT_INV, ASMIM_IMPLIED)), mRelocations({ 0 }) + : mIns(NativeCodeInstruction(ASMIT_INV, ASMIM_IMPLIED)), mRelocations({ 0 }), mEntryBlocks(nullptr) { mTrueJump = mFalseJump = NULL; mOffset = 0x7fffffff; @@ -5175,6 +5277,15 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) if (entryBlock->MergeBasicBlocks()) changed = true; + ResetVisited(); + for (int i = 0; i < mBlocks.Size(); i++) + mBlocks[i]->mEntryBlocks.SetSize(0); + entryBlock->CollectEntryBlocks(nullptr); + + ResetVisited(); + if (entryBlock->JoinTailCodeSequences()) + changed = true; + } while (changed); #endif diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 83cfffb..f99be0c 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -48,12 +48,14 @@ public: bool IsUsedResultInstructions(NumberSet& requiredTemps); bool ValueForwarding(NativeRegisterDataSet& data); + bool LoadsAccu(void) const; bool ChangesAccuAndFlag(void) const; bool ChangesAddress(void) const; bool ChangesAccu(void) const; bool RequiresAccu(void) const; bool SameEffectiveAddress(const NativeCodeInstruction& ins) const; + bool IsSame(const NativeCodeInstruction& ins) const; bool IsCommutative(void) const; }; @@ -72,6 +74,8 @@ public: GrowingArray mIns; GrowingArray mRelocations; + GrowingArray mEntryBlocks; + int mOffset, mSize, mNumEntries; bool mPlaced, mCopied, mKnownShortBranch, mBypassed, mAssembled, mNoFrame, mVisited; @@ -132,6 +136,11 @@ public: bool ValueForwarding(const NativeRegisterDataSet& data); + void CollectEntryBlocks(NativeCodeBasicBlock* block); + + bool JoinTailCodeSequences(void); + bool SameTail(const NativeCodeInstruction& ins) const; + }; class NativeCodeProcedure