From fefa2e7d9a930de2e5b6a73858f2d11c4cef1ed8 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 11 Dec 2021 23:04:38 +0100 Subject: [PATCH] Optimize jump to rts and simple loop invariants in native code generator --- oscar64/NativeCodeGenerator.cpp | 119 ++++++++++++++++++++++++-------- oscar64/NativeCodeGenerator.h | 5 +- 2 files changed, 95 insertions(+), 29 deletions(-) diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index b3eed0d..f4451cd 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -2523,22 +2523,39 @@ static AsmInsType TransposeBranchCondition(AsmInsType code) } -int NativeCodeBasicBlock::PutJump(NativeCodeProcedure* proc, int offset) +int NativeCodeBasicBlock::PutJump(NativeCodeProcedure* proc, NativeCodeBasicBlock* target) { - PutByte(0x4c); + if (target->mIns.Size() == 1 && target->mIns[0].mType == ASMIT_RTS) + { + PutByte(0x60); + return 1; + } + else + { + PutByte(0x4c); - LinkerReference rl; - rl.mObject = nullptr; - rl.mOffset = mCode.Size(); - rl.mFlags = LREF_LOWBYTE | LREF_HIGHBYTE; - rl.mRefObject = nullptr; - rl.mRefOffset = mOffset + mCode.Size() + offset - 1; - mRelocations.Push(rl); + LinkerReference rl; + rl.mObject = nullptr; + rl.mOffset = mCode.Size(); + rl.mFlags = LREF_LOWBYTE | LREF_HIGHBYTE; + rl.mRefObject = nullptr; + rl.mRefOffset = target->mOffset; + mRelocations.Push(rl); - PutWord(0); - return 3; + PutWord(0); + return 3; + } } +int NativeCodeBasicBlock::JumpByteSize(NativeCodeBasicBlock* target) +{ + if (target->mIns.Size() == 1 && target->mIns[0].mType == ASMIT_RTS) + return 1; + else + return 3; +} + + int NativeCodeBasicBlock::PutBranch(NativeCodeProcedure* proc, AsmInsType code, int offset) { if (offset >= -126 && offset <= 129) @@ -4443,6 +4460,8 @@ void NativeCodeBasicBlock::ShiftRegisterLeft(InterCodeProcedure* proc, int reg, int NativeCodeBasicBlock::ShortMultiply(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins, const InterInstruction* sins, int index, int mul) { + mul &= 0xffff; + if (sins) LoadValueToReg(proc, sins, BC_REG_ACCU, nullptr, nullptr); else @@ -4573,11 +4592,24 @@ int NativeCodeBasicBlock::ShortMultiply(InterCodeProcedure* proc, NativeCodeProc #endif #endif default: - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, mul)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); + if (mul & 0xff00) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, mul & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, mul >> 8)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 1)); - NativeCodeGenerator::Runtime& rt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8"))); - mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, rt.mOffset, rt.mLinkerObject, NCIF_RUNTIME)); + NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16"))); + mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME)); + } + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, mul)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 0)); + + NativeCodeGenerator::Runtime& rt(nproc->mGenerator->ResolveRuntime(Ident::Unique("mul16by8"))); + mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, rt.mOffset, rt.mLinkerObject, NCIF_RUNTIME)); + } return BC_REG_WORK + 2; } @@ -5531,13 +5563,13 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p { int reg = BC_REG_ACCU; - if (ins->mOperator == IA_MUL && ins->mSrc[1].mTemp < 0 && (ins->mSrc[1].mIntConst & ~0xff) == 0) + if (ins->mOperator == IA_MUL && ins->mSrc[1].mTemp < 0) { - reg = ShortMultiply(proc, nproc, ins, sins0, 0, ins->mSrc[1].mIntConst & 0xff); + reg = ShortMultiply(proc, nproc, ins, sins0, 0, ins->mSrc[1].mIntConst); } - else if (ins->mOperator == IA_MUL && ins->mSrc[0].mTemp < 0 && (ins->mSrc[0].mIntConst & ~0xff) == 0) + else if (ins->mOperator == IA_MUL && ins->mSrc[0].mTemp < 0) { - reg = ShortMultiply(proc, nproc, ins, sins1, 1, ins->mSrc[0].mIntConst & 0xff); + reg = ShortMultiply(proc, nproc, ins, sins1, 1, ins->mSrc[0].mIntConst); } else { @@ -8860,6 +8892,32 @@ bool NativeCodeBasicBlock::ValueForwarding(const NativeRegisterDataSet& data, bo return changed; } +void NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock* lblock) +{ + if (lblock->mIns[0].mType == ASMIT_LDA && lblock->mIns[0].mMode == ASMIM_IMMEDIATE) + { + int i = 1; + while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu()) + i++; + if (i == lblock->mIns.Size()) + { + mIns.Push(lblock->mIns[0]); + lblock->mIns.Remove(0); + } + } + else if (lblock->mIns[0].mType == ASMIT_LDA && lblock->mIns[0].mMode == ASMIM_ZERO_PAGE) + { + int i = 1; + while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu() && !lblock->mIns[i].ChangesZeroPage(lblock->mIns[0].mAddress)) + i++; + if (i == lblock->mIns.Size()) + { + mIns.Push(lblock->mIns[0]); + lblock->mIns.Remove(0); + } + } +} + bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) { if (!mVisited) @@ -8953,6 +9011,8 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) mTrueJump = lblock; mFalseJump = nullptr; + OptimizeSimpleLoopInvariant(proc, lblock); + changed = true; assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV); @@ -8985,6 +9045,8 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) mTrueJump = lblock; mFalseJump = nullptr; + OptimizeSimpleLoopInvariant(proc, lblock); + changed = true; assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV); @@ -9041,6 +9103,8 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) mTrueJump = lblock; mFalseJump = nullptr; + OptimizeSimpleLoopInvariant(proc, lblock); + changed = true; assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV); @@ -9121,6 +9185,8 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) mTrueJump = lblock; mFalseJump = nullptr; + OptimizeSimpleLoopInvariant(proc, lblock); + changed = true; assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV); @@ -9153,6 +9219,8 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) mTrueJump = lblock; mFalseJump = nullptr; + OptimizeSimpleLoopInvariant(proc, lblock); + changed = true; assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV); @@ -11369,11 +11437,6 @@ static int BranchByteSize(int from, int to) return 5; } -static int JumpByteSize(int from, int to) -{ - return 3; -} - NativeCodeBasicBlock* NativeCodeBasicBlock::BypassEmptyBlocks(void) { if (mBypassed) @@ -11480,13 +11543,13 @@ bool NativeCodeBasicBlock::CalculateOffset(int& total) else { total += BranchByteSize(total, mTrueJump->mOffset); - total += JumpByteSize(total, mFalseJump->mOffset); + total += JumpByteSize(mFalseJump); } } else if (mTrueJump) { if (mTrueJump->mPlace != mPlace + 1) - total += JumpByteSize(total, mTrueJump->mOffset); + total += JumpByteSize(mTrueJump); } if (mOffset + mSize != total) @@ -11515,13 +11578,13 @@ void NativeCodeBasicBlock::CopyCode(NativeCodeProcedure * proc, uint8* target) else { end += PutBranch(proc, mBranch, mTrueJump->mOffset - end); - end += PutJump(proc, mFalseJump->mOffset - end); + end += PutJump(proc, mFalseJump); } } else if (mTrueJump) { if (mTrueJump->mPlace != mPlace + 1) - end += PutJump(proc, mTrueJump->mOffset - end); + end += PutJump(proc, mTrueJump); } assert(end == next); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 410d539..355bb2a 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -113,7 +113,8 @@ public: NativeRegisterDataSet mDataSet, mNDataSet; int PutBranch(NativeCodeProcedure* proc, AsmInsType code, int offset); - int PutJump(NativeCodeProcedure* proc, int offset); + int PutJump(NativeCodeProcedure* proc, NativeCodeBasicBlock* target); + int JumpByteSize(NativeCodeBasicBlock * target); NativeCodeBasicBlock* BypassEmptyBlocks(void); @@ -128,6 +129,8 @@ public: bool RemoveNops(void); bool PeepHoleOptimizer(int pass); void BlockSizeReduction(void); + + void OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock * lblock); bool OptimizeSimpleLoop(NativeCodeProcedure* proc); bool OptimizeInnerLoop(NativeCodeProcedure* proc, NativeCodeBasicBlock* head, NativeCodeBasicBlock* tail, GrowingArray& blocks);