From 80efe2351bf996bb215697dcdcfc7a19d8e1f197 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sun, 28 Jan 2024 19:09:59 +0100 Subject: [PATCH] Optimize short wait loop handling --- include/c128/vdc.c | 2 +- oscar64/InterCode.cpp | 41 ++++++- oscar64/InterCode.h | 1 + oscar64/Linker.cpp | 91 +++++++++++++- oscar64/Linker.h | 1 + oscar64/NativeCodeGenerator.cpp | 205 ++++++++++++++++++++++++++++++-- oscar64/NativeCodeGenerator.h | 2 + 7 files changed, 321 insertions(+), 22 deletions(-) diff --git a/include/c128/vdc.c b/include/c128/vdc.c index ccfdec1..3cdc437 100644 --- a/include/c128/vdc.c +++ b/include/c128/vdc.c @@ -34,8 +34,8 @@ byte vdc_reg_read(VDCRegister reg) void vdc_mem_addr(unsigned addr) { - vdc_reg_write(VDCR_ADDRH, addr >> 8); vdc_reg_write(VDCR_ADDRL, addr); + vdc_reg_write(VDCR_ADDRH, addr >> 8); vdc_reg(VDCR_DATA); } diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 941dd13..f31d45e 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -3368,6 +3368,17 @@ void InterOperand::ForwardMem(const InterOperand& op) } void InterOperand::Forward(const InterOperand& op) +{ + mTemp = op.mTemp; + if (mType != IT_INT8 || op.mType != IT_INT16 && op.mType != IT_INT32) + mType = op.mType; + mRange = op.mRange; + mIntConst = op.mIntConst; + mFloatConst = op.mFloatConst; + mFinal = false; +} + +void InterOperand::ForwardTemp(const InterOperand& op) { mTemp = op.mTemp; if (mType != IT_INT8 || op.mType != IT_INT16 && op.mType != IT_INT32) @@ -9699,13 +9710,13 @@ bool InterCodeBasicBlock::EliminateAliasValues(const GrowingInstructionPtrArray& for (int j = 0; j < ins->mNumOperands; j++) { - if (ins->mSrc[j].mTemp > 0 && lavalue[ins->mSrc[j].mTemp]) + if (ins->mSrc[j].mTemp >= 0 && lavalue[ins->mSrc[j].mTemp]) { InterInstruction* mins = lavalue[ins->mSrc[j].mTemp]; if (mExitRequiredTemps[mins->mDst.mTemp] && !mExitRequiredTemps[mins->mSrc[0].mTemp]) { - ins->mSrc[j].Forward(mins->mDst); + ins->mSrc[j].ForwardTemp(mins->mDst); changed = true; } } @@ -10329,7 +10340,7 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra if (pins->mSrc[0].mTemp < 0 && ins->mSrc[1].mIntConst + pins->mSrc[0].mIntConst >= 0) { - ins->mSrc[1].Forward(pins->mSrc[1]); + ins->mSrc[1].ForwardTemp(pins->mSrc[1]); pins->mSrc[1].mFinal = false; ins->mSrc[1].mIntConst += pins->mSrc[0].mIntConst; changed = true; @@ -11277,6 +11288,7 @@ bool InterCodeBasicBlock::LoadStoreForwarding(const GrowingInstructionPtrArray& flushMem = true; else if (ins->mCode == IC_LEA || ins->mCode == IC_UNARY_OPERATOR || ins->mCode == IC_BINARY_OPERATOR || ins->mCode == IC_RELATIONAL_OPERATOR || ins->mCode == IC_CONVERSION_OPERATOR) { + // int j = 0; while (j < mLoadStoreInstructions.Size() && !SameInstruction(ins, mLoadStoreInstructions[j])) j++; @@ -13384,7 +13396,24 @@ InterCodeBasicBlock* InterCodeBasicBlock::BuildLoopPrefix(void) bool InterCodeBasicBlock::CollectLoopBody(InterCodeBasicBlock* head, ExpandingArray & body) { if (mLoopHead) + { +#if 0 return this == head; +#else + if (this == head) + return true; + else if ((mTrueJump == this || mFalseJump == this) && mEntryBlocks.Size() == 2) + { + int j = 0; + while (j < mInstructions.Size() && (mInstructions[j]->mDst.mTemp < 0 || !mExitRequiredTemps[mInstructions[j]->mDst.mTemp])) + j++; + if (j != mInstructions.Size()) + return false; + } + else + return false; +#endif + } if (body.IndexOf(this) != -1) return true; @@ -16941,7 +16970,7 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray mInstructions[i + 1]->mCode == IC_LEA && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal ) { - mInstructions[i + 1]->mSrc[0].Forward(mInstructions[i + 0]->mSrc[0]); + mInstructions[i + 1]->mSrc[0].ForwardTemp(mInstructions[i + 0]->mSrc[0]); mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0; changed = true; } @@ -16952,7 +16981,7 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray mInstructions[i + 1]->mCode == IC_LOAD && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal ) { - mInstructions[i + 1]->mSrc[0].Forward(mInstructions[i + 0]->mSrc[0]); + mInstructions[i + 1]->mSrc[0].ForwardTemp(mInstructions[i + 0]->mSrc[0]); mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0; changed = true; } @@ -19873,7 +19902,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "test"); + CheckFunc = !strcmp(mIdent->mString, "strcat"); CheckCase = false; mEntryBlock = mBlocks[0]; diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 053ed34..2aa1c2a 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -274,6 +274,7 @@ public: IntegerValueRange mRange; void Forward(const InterOperand& op); + void ForwardTemp(const InterOperand& op); void ForwardMem(const InterOperand& op); InterOperand(void); diff --git a/oscar64/Linker.cpp b/oscar64/Linker.cpp index a5fb477..8ba675f 100644 --- a/oscar64/Linker.cpp +++ b/oscar64/Linker.cpp @@ -536,16 +536,95 @@ void Linker::ReferenceObject(LinkerObject* obj) } } +bool LinkerRegion::AllocateAppend(Linker* linker, LinkerObject* lobj) +{ + if (lobj->mPrefix && (lobj->mPrefix->mFlags & LOBJF_PLACED)) + { + if (lobj->mPrefix == mLastObject) + { + int start = mStart + mUsed - 3; + int end = start + lobj->mSize; + + if (end <= mEnd) + { + lobj->mPrefix->mReferences[lobj->mPrefix->mSuffixReference]->mFlags = 0; + lobj->mPrefix->mSize -= 3; + + lobj->mFlags |= LOBJF_PLACED; + lobj->mAddress = start; + lobj->mRefAddress = start + mReloc; + lobj->mRegion = this; + mUsed = end - mStart; + + mLastObject = lobj; + + if (lobj->mSuffix && !(lobj->mSuffix->mFlags & LOBJF_PLACED)) + { + if (!Allocate(linker, lobj->mSuffix, true)) + return false; + } + + return true; + } + } + else + { + int i = 0; + while (i < mFreeChunks.Size() && lobj->mPrefix != mFreeChunks[i].mLastObject) + i++; + if (i < mFreeChunks.Size()) + { + int start = mFreeChunks[i].mStart - 3; + int end = start + lobj->mSize; + + if (end <= mFreeChunks[i].mEnd) + { + lobj->mPrefix->mReferences[lobj->mPrefix->mSuffixReference]->mFlags = 0; + lobj->mPrefix->mSize -= 3; + + lobj->mFlags |= LOBJF_PLACED; + lobj->mAddress = start; + lobj->mRefAddress = start + mReloc; + lobj->mRegion = this; + + if (end == mFreeChunks[i].mEnd) + mFreeChunks.Remove(i); + else + { + mFreeChunks[i].mStart = end; + mFreeChunks[i].mLastObject = lobj; + } + + if (lobj->mSuffix && !(lobj->mSuffix->mFlags & LOBJF_PLACED)) + { + if (!Allocate(linker, lobj->mSuffix, true)) + return false; + } + + return true; + } + } + } + } + return false; +} + bool LinkerRegion::Allocate(Linker * linker, LinkerObject* lobj, bool merge) { - if (merge && lobj->mPrefix && !(lobj->mPrefix->mFlags & LOBJF_PLACED)) + if (merge && lobj->mPrefix) { - if (!Allocate(linker, lobj->mPrefix, true)) - return false; + if (!(lobj->mPrefix->mFlags & LOBJF_PLACED)) + { + if (!Allocate(linker, lobj->mPrefix, true)) + return false; - if (lobj->mFlags & LOBJF_PLACED) + if (lobj->mFlags & LOBJF_PLACED) + return true; + } + + if (AllocateAppend(linker, lobj)) return true; - } + } int i = 0; while (i < mFreeChunks.Size()) @@ -587,7 +666,7 @@ bool LinkerRegion::Allocate(Linker * linker, LinkerObject* lobj, bool merge) } else { - mFreeChunks.Insert(i + 1, FreeChunk{ end, mFreeChunks[i].mEnd, lobj } ); + mFreeChunks.Insert(i + 1, FreeChunk{ end, mFreeChunks[i].mEnd, lobj }); mFreeChunks[i].mEnd = start; } diff --git a/oscar64/Linker.h b/oscar64/Linker.h index f5e09cd..76fd234 100644 --- a/oscar64/Linker.h +++ b/oscar64/Linker.h @@ -98,6 +98,7 @@ public: GrowingArray mFreeChunks; LinkerObject * mLastObject; + bool AllocateAppend(Linker* linker, LinkerObject* obj); bool Allocate(Linker * linker, LinkerObject* obj, bool merge); void PlaceStackSection(LinkerSection* stackSection, LinkerSection* section); }; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 54b0e2f..e0de622 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -30624,6 +30624,8 @@ bool NativeCodeBasicBlock::MoveTYADCStoreDown(int at) bool NativeCodeBasicBlock::MoveLDSTXOutOfRange(int at) { + bool vol = mIns[at + 1].mFlags & NCIF_VOLATILE; + int j = at + 2; while (j < mIns.Size()) { @@ -30634,8 +30636,12 @@ bool NativeCodeBasicBlock::MoveLDSTXOutOfRange(int at) } else if (mIns[j].MayBeSameAddress(mIns[at + 1])) return false; + else if (vol && mIns[j].ChangesGlobalMemory()) + return false; else if (mIns[at + 1].mMode == ASMIM_ABSOLUTE && (mIns[j].mFlags & NCIF_VOLATILE)) return false; + else if (mIns[j].ReferencesXReg()) + return false; if (mIns[j].mType == ASMIT_JSR) return false; @@ -30652,6 +30658,38 @@ bool NativeCodeBasicBlock::MoveLDSTXOutOfRange(int at) j++; } + j = at; + while (j > 0) + { + j--; + if (!(mIns[j].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) + { + mIns.Insert(j + 1, NativeCodeInstruction(mIns[at + 1].mIns, ASMIT_STA, mIns[at + 1])); + mIns.Insert(j + 1, NativeCodeInstruction(mIns[at + 1].mIns, ASMIT_LDA, mIns[at + 1])); + mIns.Remove(at + 2, 2); + + return true; + } + + if (mIns[at + 1].mMode == ASMIM_ZERO_PAGE) + { + if (mIns[j].ReferencesZeroPage(mIns[at + 1].mAddress)) + return false; + } + else if (mIns[j].MayBeSameAddress(mIns[at + 1])) + return false; + else if (vol && mIns[j].ChangesGlobalMemory()) + return false; + else if (mIns[at + 1].mMode == ASMIM_ABSOLUTE && (mIns[j].mFlags & NCIF_VOLATILE)) + return false; + else if (mIns[j].ReferencesXReg()) + return false; + + if (mIns[j].mType == ASMIT_JSR) + return false; + + } + return false; } @@ -34138,6 +34176,113 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) return changed; } +bool NativeCodeBasicBlock::OptimizeXYSpilling(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + if (mLoopHead && (mTrueJump == this || mFalseJump == this) && mEntryBlocks.Size() == 2) + { + NativeCodeBasicBlock* pblock = mEntryBlocks[0]; + if (pblock == this) + pblock = mEntryBlocks[1]; + NativeCodeBasicBlock* nblock = mTrueJump; + if (nblock == this) + nblock = mFalseJump; + + int ps = pblock->mIns.Size(); + if (nblock->mEntryBlocks.Size() == 1 && !pblock->mFalseJump && ps > 0) + { + if (!ReferencesAccu()) + { + if (!ReferencesXReg() && pblock->mIns.Size() > 0 && pblock->mIns[ps - 1].mType == ASMIT_TAX) + { + nblock->mIns.Insert(0, NativeCodeInstruction(pblock->mIns[ps - 1].mIns, ASMIT_TAX)); + pblock->mIns.Remove(ps - 1); + ps--; + pblock->mExitRequiredRegs += CPU_REG_A; + nblock->mEntryRequiredRegs += CPU_REG_A; + mEntryRequiredRegs += CPU_REG_A; + mExitRequiredRegs += CPU_REG_A; + for (int i = 0; i < mIns.Size(); i++) + mIns[i].mLive |= LIVE_CPU_REG_A; + changed = true; + } + + if (!ReferencesYReg() && pblock->mIns.Size() > 0 && pblock->mIns[ps - 1].mType == ASMIT_TAY) + { + nblock->mIns.Insert(0, NativeCodeInstruction(pblock->mIns[ps - 1].mIns, ASMIT_TAY)); + pblock->mIns.Remove(ps - 1); + ps--; + pblock->mExitRequiredRegs += CPU_REG_A; + nblock->mEntryRequiredRegs += CPU_REG_A; + mEntryRequiredRegs += CPU_REG_A; + mExitRequiredRegs += CPU_REG_A; + for (int i = 0; i < mIns.Size(); i++) + mIns[i].mLive |= LIVE_CPU_REG_A; + changed = true; + } + } + } + } + + for (int i = 0; i < mIns.Size(); i++) + { + if (i + 2 < mIns.Size()) + { + if (mIns[i + 0].mType == ASMIT_TAX && + mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_IMMEDIATE && + mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ABSOLUTE && !(mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) + { + mIns[i + 1].mType = ASMIT_LDX; mIns[i + 1].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_X; + mIns[i + 2].mType = ASMIT_STX; mIns[i + 2].mLive |= LIVE_CPU_REG_A; + mIns.Insert(i + 3, NativeCodeInstruction(mIns[i + 0].mIns, ASMIT_TAX)); + mIns.Remove(i); + changed = true; + } + else if (mIns[i + 0].mType == ASMIT_TAY && + mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_IMMEDIATE && + mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ABSOLUTE && !(mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) + { + mIns[i + 1].mType = ASMIT_LDY; mIns[i + 1].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_Y; + mIns[i + 2].mType = ASMIT_STY; mIns[i + 2].mLive |= LIVE_CPU_REG_A; + mIns.Insert(i + 3, NativeCodeInstruction(mIns[i + 0].mIns, ASMIT_TAY)); + mIns.Remove(i); + changed = true; + } + } + + if (i + 1 < mIns.Size()) + { + if (mIns[i + 0].mType == ASMIT_TAX && !mIns[i + 1].ChangesAccu() && !mIns[i + 1].ReferencesXReg() && !(mIns[i + 1].mLive & LIVE_CPU_REG_Z)) + { + mIns[i + 1].mLive |= LIVE_CPU_REG_A; + mIns.Insert(i + 2, NativeCodeInstruction(mIns[i + 0].mIns, ASMIT_TAX)); + mIns.Remove(i); + changed = true; + } + else if (mIns[i + 0].mType == ASMIT_TAY && !mIns[i + 1].ChangesAccu() && !mIns[i + 1].ReferencesYReg() && !(mIns[i + 1].mLive & LIVE_CPU_REG_Z)) + { + mIns[i + 1].mLive |= LIVE_CPU_REG_A; + mIns.Insert(i + 2, NativeCodeInstruction(mIns[i + 0].mIns, ASMIT_TAY)); + mIns.Remove(i); + changed = true; + } + } + } + + if (mTrueJump && mTrueJump->OptimizeXYSpilling()) + changed = true; + if (mFalseJump && mFalseJump->OptimizeXYSpilling()) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::OptimizeXYSimpleLoop(void) { bool changed = false; @@ -35004,6 +35149,17 @@ bool NativeCodeBasicBlock::OptimizeInnerLoop(NativeCodeProcedure* proc, NativeCo } else if (!yindex && (block->mIns[i].mType == ASMIT_STY || block->mIns[i].mType == ASMIT_TYA || block->mIns[i].mMode == ASMIM_ABSOLUTE_Y || block->mIns[i].mMode == ASMIM_INDIRECT_Y)) yother = true; + else if (block->mIns[i].mType == ASMIT_LDX && block->mIns[i].mMode == ASMIM_ZERO_PAGE && block->mIns[i].mAddress == zreg && xother && !yother && !(block->mIns[i].mLive & LIVE_CPU_REG_Y)) + { + if (i + 1 < bz && !(block->mIns[i + 1].mLive & (LIVE_CPU_REG_X | LIVE_CPU_REG_Y)) && block->mIns[i + 1].mMode == ASMIM_ABSOLUTE_X && HasAsmInstructionMode(block->mIns[i + 1].mType, ASMIM_ABSOLUTE_Y)) + { + block->mIns[i].mType = ASMIT_LDY; block->mIns[i].mLive |= LIVE_CPU_REG_Y; + block->mIns[i + 1].mMode = ASMIM_ABSOLUTE_Y; + yindex = true; + } + else + yother = true; + } else if (block->mIns[i].mType != ASMIT_LDA && block->mIns[i].mMode == ASMIM_ZERO_PAGE && block->mIns[i].mAddress == zreg) yother = true; @@ -35207,14 +35363,21 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::CollectInnerLoop(NativeCodeBasicBloc if (mTrueJump != head && mFalseJump != head) { - if (mTrueJump) - mLoopTailBlock = mTrueJump->CollectInnerLoop(head, lblocks); - - if (mLoopTailBlock && mFalseJump) + if (mFalseJump && mTrueJump == this && mEntryBlocks.Size() == 2 && mIns.Size() == 1 && (mIns[0].mType == ASMIT_BIT || mIns[0].mType == ASMIT_LDA && !(mIns[0].mLive & LIVE_CPU_REG_A))) { - NativeCodeBasicBlock * tail = mFalseJump->CollectInnerLoop(head, lblocks); - if (tail != mLoopTailBlock) - mLoopTailBlock = nullptr; + mLoopTailBlock = mFalseJump->CollectInnerLoop(head, lblocks); + } + else + { + if (mTrueJump) + mLoopTailBlock = mTrueJump->CollectInnerLoop(head, lblocks); + + if (mLoopTailBlock && mFalseJump) + { + NativeCodeBasicBlock* tail = mFalseJump->CollectInnerLoop(head, lblocks); + if (tail != mLoopTailBlock) + mLoopTailBlock = nullptr; + } } } else @@ -37842,7 +38005,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass for (int i = mIns.Size() - 2 ; i >= 0; i--) { - if (mIns[i].mType == ASMIT_LDX && (mIns[i].mMode == ASMIM_IMMEDIATE|| mIns[i].mMode == ASMIM_IMMEDIATE_ADDRESS) && mIns[i + 1].mType == ASMIT_STX && !(mIns[i + 1].mLive & LIVE_CPU_REG_X) && !(mIns[i + 1].mFlags & NCIF_VOLATILE)) + if (mIns[i].mType == ASMIT_LDX && (mIns[i].mMode == ASMIM_IMMEDIATE|| mIns[i].mMode == ASMIM_IMMEDIATE_ADDRESS) && mIns[i + 1].mType == ASMIT_STX && !(mIns[i + 1].mLive & LIVE_CPU_REG_X)) { if (MoveLDSTXOutOfRange(i)) changed = true; @@ -45479,7 +45642,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) { mInterProc = proc; - CheckFunc = !strcmp(mInterProc->mIdent->mString, "krnio_setbnk"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "vdc_hchar"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -46704,6 +46867,29 @@ void NativeCodeProcedure::Optimize(void) if (mEntryBlock->JoinXYCascade()) changed = true; } +#if 1 + if (step == 9 && cnt < 10) + { +#if _DEBUG + ResetVisited(); + mEntryBlock->CheckBlocks(); +#endif + ResetVisited(); + while (mEntryBlock->OptimizeXYSpilling()) + { + BuildDataFlowSets(); + ResetVisited(); + mEntryBlock->RemoveUnusedResultInstructions(); + + changed = true; + } + +#if _DEBUG + ResetVisited(); + mEntryBlock->CheckBlocks(); +#endif + } +#endif #if 1 if (step == 6) @@ -46719,6 +46905,7 @@ void NativeCodeProcedure::Optimize(void) changed = true; } #endif + #if _DEBUG ResetVisited(); mEntryBlock->CheckBlocks(); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index bb677fd..4f13974 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -305,6 +305,8 @@ public: bool OptimizeInnerLoop(NativeCodeProcedure* proc, NativeCodeBasicBlock* head, NativeCodeBasicBlock* tail, ExpandingArray& blocks); bool OptimizeXYSimpleLoop(void); + bool OptimizeXYSpilling(void); + bool OptimizeSelect(NativeCodeProcedure* proc); bool OptimizeInnerLoops(NativeCodeProcedure* proc);