From 8175fae67aae0c1dad53db528b2026a0646e8fb8 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 7 Jun 2025 19:35:40 +0200 Subject: [PATCH] Add complex loop value forwarding, tweak sidfx state machine --- include/audio/sidfx.c | 140 +++++++++++++++++--------------- oscar64/InterCode.cpp | 46 +++++++++++ oscar64/InterCode.h | 1 + oscar64/NativeCodeGenerator.cpp | 113 ++++++++++++++++++++++---- oscar64/NativeCodeGenerator.h | 6 +- 5 files changed, 219 insertions(+), 87 deletions(-) diff --git a/include/audio/sidfx.c b/include/audio/sidfx.c index 0dcb631..18109e4 100644 --- a/include/audio/sidfx.c +++ b/include/audio/sidfx.c @@ -10,7 +10,7 @@ enum SIDFXState SIDFX_WAIT }; -static struct SIDFXChannel +__striped static struct SIDFXChannel { const SIDFX * volatile com; byte delay, priority; @@ -27,6 +27,7 @@ void sidfx_init(void) channels[i].com = nullptr; channels[i].state = SIDFX_IDLE; channels[i].priority = 0; + channels[i].delay = 1; } } @@ -52,6 +53,7 @@ void sidfx_play(byte chn, const SIDFX * fx, byte cnt) return; channels[chn].state = SIDFX_IDLE; + channels[chn].delay = 1; channels[chn].com = fx; channels[chn].cnt = cnt - 1; @@ -64,71 +66,92 @@ void sidfx_stop(byte chn) { channels[chn].com = nullptr; if (channels[chn].state != SIDFX_IDLE) + { channels[chn].state = SIDFX_RESET_0; + channels[chn].delay = 1; + } } inline void sidfx_loop_ch(byte ch) { - switch (channels[ch].state) + if (channels[ch].state) { - case SIDFX_IDLE: - break; - case SIDFX_RESET_0: - sid.voices[ch].ctrl = 0; - sid.voices[ch].attdec = 0; - sid.voices[ch].susrel = 0; - channels[ch].state = SIDFX_READY; - break; - case SIDFX_RESET_1: - sid.voices[ch].ctrl = SID_CTRL_TEST; - channels[ch].state = SIDFX_READY; - break; - case SIDFX_READY: + const SIDFX * com = channels[ch].com; + + channels[ch].delay--; + if (channels[ch].delay) + { + if (com->dfreq) { - const SIDFX * com = channels[ch].com; + channels[ch].freq += com->dfreq; + sid.voices[ch].freq = channels[ch].freq; + } + if (com->dpwm) + { + channels[ch].pwm += com->dpwm; + sid.voices[ch].pwm = channels[ch].pwm; + } + } + + while (!channels[ch].delay) + { + switch (channels[ch].state) + { + case SIDFX_IDLE: + channels[ch].delay = 1; + break; + case SIDFX_RESET_0: + sid.voices[ch].ctrl = 0; + sid.voices[ch].attdec = 0; + sid.voices[ch].susrel = 0; if (com) + channels[ch].state = SIDFX_READY; + else + channels[ch].state = SIDFX_IDLE; + channels[ch].delay = 1; + break; + case SIDFX_RESET_1: + sid.voices[ch].ctrl = SID_CTRL_TEST; + channels[ch].state = SIDFX_READY; + break; + case SIDFX_READY: + channels[ch].freq = com->freq; + channels[ch].pwm = com->pwm; + + sid.voices[ch].freq = com->freq; + sid.voices[ch].pwm = com->pwm; + sid.voices[ch].attdec = com->attdec; + sid.voices[ch].susrel = com->susrel; + sid.voices[ch].ctrl = com->ctrl; + + if (com->ctrl & SID_CTRL_GATE) { - channels[ch].freq = com->freq; - channels[ch].pwm = com->pwm; - - sid.voices[ch].freq = com->freq; - sid.voices[ch].pwm = com->pwm; - sid.voices[ch].attdec = com->attdec; - sid.voices[ch].susrel = com->susrel; - sid.voices[ch].ctrl = com->ctrl; - channels[ch].delay = com->time1; channels[ch].state = SIDFX_PLAY; } else - channels[ch].state = SIDFX_IDLE; - } - break; - case SIDFX_PLAY: - { - const SIDFX * com = channels[ch].com; - if (com->dfreq) { - channels[ch].freq += com->dfreq; - sid.voices[ch].freq = channels[ch].freq; + channels[ch].delay = com->time0; + channels[ch].state = SIDFX_PLAY; } - if (com->dpwm) - { - channels[ch].pwm += com->dpwm; - sid.voices[ch].pwm = channels[ch].pwm; - } - - if (channels[ch].delay) - channels[ch].delay--; - else if (com->time0) + break; + case SIDFX_PLAY: + if (com->time0) { sid.voices[ch].ctrl = com->ctrl & ~SID_CTRL_GATE; - channels[ch].delay = com->time0; + channels[ch].delay = com->time0 - 1; channels[ch].state = SIDFX_WAIT; } else if (channels[ch].cnt) { + char sr = com->susrel & 0xf0; com++; + char ctrl = com->ctrl; + if (com->attdec == 0 && (ctrl & SID_CTRL_GATE) && (com->susrel & 0xf0) > sr) + { + sid.voices[ch].ctrl = ctrl & ~SID_CTRL_GATE; + sid.voices[ch].ctrl = ctrl | SID_CTRL_GATE; + } channels[ch].cnt--; channels[ch].com = com; channels[ch].priority = com->priority; @@ -137,33 +160,17 @@ inline void sidfx_loop_ch(byte ch) else { channels[ch].com = nullptr; - channels[ch].state = SIDFX_RESET_0; + channels[ch].state = SIDFX_RESET_0; } - } - break; - case SIDFX_WAIT: - { - const SIDFX * com = channels[ch].com; - if (com->dfreq) - { - channels[ch].freq += com->dfreq; - sid.voices[ch].freq = channels[ch].freq; - } - if (com->dpwm) - { - channels[ch].pwm += com->dpwm; - sid.voices[ch].pwm = channels[ch].pwm; - } - - if (channels[ch].delay) - channels[ch].delay--; - else if (channels[ch].cnt) + break; + case SIDFX_WAIT: + if (channels[ch].cnt) { com++; channels[ch].cnt--; channels[ch].com = com; channels[ch].priority = com->priority; - if (com->time1) + if (com->ctrl & SID_CTRL_GATE) channels[ch].state = SIDFX_RESET_0; else channels[ch].state = SIDFX_READY; @@ -173,8 +180,9 @@ inline void sidfx_loop_ch(byte ch) channels[ch].com = nullptr; channels[ch].state = SIDFX_RESET_0; } + break; } - break; + } } } diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 5c18971..1741ddb 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -15638,6 +15638,48 @@ bool SameExitCondition(InterCodeBasicBlock* b1, InterCodeBasicBlock* b2) return false; } +InterCodeBasicBlock* InterCodeBasicBlock::CheckIsSimpleIntRangeBranch(const GrowingIntegerValueRangeArray& irange) +{ + int sz = mInstructions.Size(); + if (irange.Size() > 0 && + mTrueJump && mFalseJump && mInstructions.Size() == 2 && + mInstructions[sz - 1]->mCode == IC_BRANCH && + mInstructions[sz - 2]->mCode == IC_RELATIONAL_OPERATOR && + IsScalarType(mInstructions[sz - 2]->mSrc[0].mType) && + IsScalarType(mInstructions[sz - 2]->mSrc[1].mType) && + mInstructions[sz - 1]->mSrc[0].mTemp == mInstructions[sz - 2]->mDst.mTemp && mInstructions[sz - 1]->mSrc[0].mFinal) + { + const InterInstruction* cins = mInstructions[sz - 2]; + if ((cins->mSrc[0].mTemp < 0 || irange[cins->mSrc[0].mTemp].IsBound()) && + (cins->mSrc[1].mTemp < 0 || irange[cins->mSrc[1].mTemp].IsBound())) + { + IntegerValueRange r0, r1; + if (cins->mSrc[0].mTemp < 0) + r0.SetConstant(cins->mSrc[0].mIntConst); + else + r0 = irange[cins->mSrc[0].mTemp]; + if (cins->mSrc[1].mTemp < 0) + r1.SetConstant(cins->mSrc[1].mIntConst); + else + r1 = irange[cins->mSrc[1].mTemp]; + + switch (mInstructions[sz - 2]->mOperator) + { + case IA_CMPEQ: + if (r0.mMinValue > r1.mMaxValue || r0.mMaxValue < r1.mMinValue) + return mFalseJump; + break; + case IA_CMPNE: + if (r0.mMinValue > r1.mMaxValue || r0.mMaxValue < r1.mMinValue) + return mTrueJump; + break; + } + } + } + + return nullptr; +} + InterCodeBasicBlock* InterCodeBasicBlock::CheckIsConstBranch(const GrowingInstructionPtrArray& cins) { if (!mFalseJump || mInstructions.Size() < 1 || mInstructions[mInstructions.Size() - 1]->mCode != IC_BRANCH) @@ -15911,6 +15953,8 @@ bool InterCodeBasicBlock::ShortcutConstBranches(const GrowingInstructionPtrArray if (mTrueJump) { InterCodeBasicBlock* tblock = mTrueJump->CheckIsConstBranch(tins); + if (!tblock) tblock = mTrueJump->CheckIsSimpleIntRangeBranch(mTrueValueRange); + if (tblock) { mTrueJump->mEntryBlocks.RemoveAll(this); @@ -15925,6 +15969,8 @@ bool InterCodeBasicBlock::ShortcutConstBranches(const GrowingInstructionPtrArray if (mFalseJump) { InterCodeBasicBlock* tblock = mFalseJump->CheckIsConstBranch(tins); + if (!tblock) tblock = mFalseJump->CheckIsSimpleIntRangeBranch(mFalseValueRange); + if (tblock) { mFalseJump->mEntryBlocks.RemoveAll(this); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 449215a..2ae4c76 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -656,6 +656,7 @@ public: bool ChangeTrueJump(InterCodeBasicBlock* block); bool ChangeFalseJump(InterCodeBasicBlock* block); + InterCodeBasicBlock* CheckIsSimpleIntRangeBranch(const GrowingIntegerValueRangeArray & irange); InterCodeBasicBlock* CheckIsConstBranch(const GrowingInstructionPtrArray& cins); bool ShortcutConstBranches(const GrowingInstructionPtrArray& cins); bool ShortcutDuplicateBranches(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index e0b1652..858ed3e 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -32239,7 +32239,40 @@ bool NativeCodeBasicBlock::JoinTAYARange(int from, int to) return true; } } +#if 1 + if (to + 2 < mIns.Size() && + !(mIns[from].mLive & LIVE_CPU_REG_A) && + mIns[to + 1].mType == ASMIT_LDY && + mIns[to + 2].mType == ASMIT_STA && !(mIns[to + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) + { + bool usesX = mIns[to + 1].ReferencesXReg() || mIns[to + 2].ReferencesXReg(); + uint32 vflag = (mIns[to + 1].mFlags | mIns[to + 2].mFlags) & NCIF_VOLATILE; + int i = to - 1; + while (i > from && + !mIns[i].MayBeChangedOnAddress(mIns[to + 2]) && + !mIns[to + 1].MayBeChangedOnAddress(mIns[i]) && + !mIns[i].ReferencesYReg() && + !(mIns[i].mFlags & vflag) && + !(usesX && mIns[i].ChangesXReg())) + { + i--; + } + if (i == from) + { + NativeCodeInstruction i0 = mIns[to + 1]; + NativeCodeInstruction i1 = mIns[to + 2]; + i0.mLive |= mIns[from].mLive; + i1.mLive |= mIns[from].mLive; + + mIns.Remove(to, 3); + mIns.Remove(from); + mIns.Insert(from, i0); + mIns.Insert(from + 1, i1); + return true; + } + } +#endif return false; } @@ -37623,7 +37656,7 @@ bool NativeCodeBasicBlock::GlobalValueForwarding(NativeCodeProcedure* proc, bool mDataSet.Intersect(mEntryBlocks[i]->mFDataSet); } } - + for (int i = 0; i < mIns.Size(); i++) { AsmInsType carryop; @@ -37797,6 +37830,45 @@ bool NativeCodeBasicBlock::ValueForwarding(NativeCodeProcedure* proc, const Nati ResetModifiedDataSet(mNDataSet); lblock->ResetModifiedDataSet(mNDataSet); } +#if 1 + else if (final) + { + ExpandingArray lblocks; + + if (CollectSingleEntryGenericLoop(lblocks)) + { + mNDataSet.mRegs[CPU_REG_C].Reset(); + mNDataSet.mRegs[CPU_REG_Z].Reset(); + for (int i = 0; i < lblocks.Size(); i++) + lblocks[i]->ResetModifiedDataSet(mNDataSet); + } + else + mNDataSet.Reset(); + } +#endif + else + mNDataSet.Reset(); + } + else if (global && final) + { + ExpandingArray lblocks; + + if (CollectSingleEntryGenericLoop(lblocks)) + { + int k = 0; + for (int i = 0; i < mEntryBlocks.Size(); i++) + if (lblocks.Contains(mEntryBlocks[i])) + k++; + if (k + 1 == mEntryBlocks.Size()) + { + mNDataSet.mRegs[CPU_REG_C].Reset(); + mNDataSet.mRegs[CPU_REG_Z].Reset(); + for (int i = 0; i < lblocks.Size(); i++) + lblocks[i]->ResetModifiedDataSet(mNDataSet); + } + else + mNDataSet.Reset(); + } else mNDataSet.Reset(); } @@ -37836,6 +37908,7 @@ bool NativeCodeBasicBlock::ValueForwarding(NativeCodeProcedure* proc, const Nati #if 1 if (mIns[i].mMode == ASMIM_INDIRECT_Y && HasAsmInstructionMode(mIns[i].mType, ASMIM_ABSOLUTE_Y) && + !(mIns[i].mType == ASMIT_CMP && (mIns[i].mLive & LIVE_CPU_REG_Y)) && mNDataSet.mRegs[CPU_REG_Y].mMode == NRDM_IMMEDIATE && mNDataSet.mRegs[mIns[i].mAddress + 1].mMode == NRDM_IMMEDIATE_ADDRESS && (mNDataSet.mRegs[mIns[i].mAddress + 1].mFlags & NCIF_UPPER) && mNDataSet.mRegs[mIns[i].mAddress].mMode != NRDM_IMMEDIATE_ADDRESS) @@ -37846,8 +37919,11 @@ bool NativeCodeBasicBlock::ValueForwarding(NativeCodeProcedure* proc, const Nati mIns[i + 1].mLinkerObject = mNDataSet.mRegs[reg + 1].mLinkerObject; mIns[i + 1].mAddress = (mNDataSet.mRegs[reg + 1].mValue & 0xff00) + mNDataSet.mRegs[CPU_REG_Y].mValue; mIns[i + 1].mFlags &= ~NCIF_LOWER; - mIns.Insert(i + 2, NativeCodeInstruction(mIns[i].mIns, ASMIT_LDY, ASMIM_IMMEDIATE, mNDataSet.mRegs[CPU_REG_Y].mValue)); - mIns.Insert(i + 3, NativeCodeInstruction(mIns[i].mIns, ASMIT_ORA, ASMIM_IMMEDIATE, 0)); + if (mIns[i + 1].mLive & LIVE_CPU_REG_Y) + { + mIns.Insert(i + 2, NativeCodeInstruction(mIns[i].mIns, ASMIT_LDY, ASMIM_IMMEDIATE, mNDataSet.mRegs[CPU_REG_Y].mValue)); + mIns.Insert(i + 3, NativeCodeInstruction(mIns[i].mIns, ASMIT_ORA, ASMIM_IMMEDIATE, 0)); + } changed = true; } #endif @@ -38963,6 +39039,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (!prevBlock) return OptimizeSimpleLoopInvariant(proc, full); + mIns[i].mLive |= mIns[0].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_X | LIVE_CPU_REG_Y); prevBlock->mIns.Push(mIns[i]); prevBlock->mExitRequiredRegs += CPU_REG_C; for (int j = 0; j < i; j++) @@ -42831,7 +42908,7 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BuildSingleExit(NativeCodeProcedure* } -bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) +bool NativeCodeBasicBlock::OptimizeGenericLoop(void) { bool changed = false; @@ -42841,8 +42918,8 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) { ExpandingArray lblocks; - proc->ResetPatched(); - if (CollectSingleEntryGenericLoop(proc, lblocks)) + mProc->ResetPatched(); + if (CollectSingleEntryGenericLoop(lblocks)) { int yreg = -1, xreg = -1, areg = -1; int zyreg[NUM_REGS], zxreg[NUM_REGS], zareg[NUM_REGS]; @@ -43213,7 +43290,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) { if (!lblocks.Contains(block->mEntryBlocks[j])) { - block->mEntryBlocks[j] = block->BuildSingleExit(proc, block->mEntryBlocks[j]); + block->mEntryBlocks[j] = block->BuildSingleExit(mProc, block->mEntryBlocks[j]); if (areg >= 0 && mEntryRequiredRegs[areg]) { if (areg == CPU_REG_X) @@ -43417,7 +43494,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) if (block->mTrueJump && !lblocks.Contains(block->mTrueJump)) { - block->mTrueJump = block->BuildSingleEntry(proc, block->mTrueJump); + block->mTrueJump = block->BuildSingleEntry(mProc, block->mTrueJump); if (areg >= 0 && block->mTrueJump->mEntryRequiredRegs[areg]) { if (areg < 256) @@ -43440,7 +43517,7 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) } if (block->mFalseJump && !lblocks.Contains(block->mFalseJump)) { - block->mFalseJump = block->BuildSingleEntry(proc, block->mFalseJump); + block->mFalseJump = block->BuildSingleEntry(mProc, block->mFalseJump); if (areg >= 0 && block->mFalseJump->mEntryRequiredRegs[areg]) { if (areg < 256) @@ -43474,9 +43551,9 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(NativeCodeProcedure* proc) mVisited = true; - if (mTrueJump && mTrueJump->OptimizeGenericLoop(proc)) + if (mTrueJump && mTrueJump->OptimizeGenericLoop()) changed = true; - if (mFalseJump && mFalseJump->OptimizeGenericLoop(proc)) + if (mFalseJump && mFalseJump->OptimizeGenericLoop()) changed = true; } @@ -43495,14 +43572,14 @@ void NativeCodeBasicBlock::CollectReachable(ExpandingArray& lblocks) +bool NativeCodeBasicBlock::CollectGenericLoop(ExpandingArray& lblocks) { ExpandingArray rblocks; - proc->ResetPatched(); + mProc->ResetPatched(); CollectReachable(rblocks); - proc->ResetPatched(); + mProc->ResetPatched(); bool changed; do @@ -43528,9 +43605,9 @@ bool NativeCodeBasicBlock::CollectGenericLoop(NativeCodeProcedure* proc, Expandi return lblocks.Size() > 0; } -bool NativeCodeBasicBlock::CollectSingleEntryGenericLoop(NativeCodeProcedure* proc, ExpandingArray& lblocks) +bool NativeCodeBasicBlock::CollectSingleEntryGenericLoop(ExpandingArray& lblocks) { - if (CollectGenericLoop(proc, lblocks)) + if (CollectGenericLoop(lblocks)) { for (int i = 0; i < lblocks.Size(); i++) { @@ -55568,7 +55645,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) mInterProc->mLinkerObject->mNativeProc = this; - CheckFunc = !strcmp(mIdent->mString, "sprite_mobs_update_slide"); + CheckFunc = !strcmp(mIdent->mString, "main"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -56819,7 +56896,7 @@ void NativeCodeProcedure::Optimize(void) CheckBlocks(true); ResetVisited(); - if (!changed && mEntryBlock->OptimizeGenericLoop(this)) + if (!changed && mEntryBlock->OptimizeGenericLoop()) changed = true; } #endif diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 62a43f9..93b492e 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -373,9 +373,9 @@ public: bool OptimizeInnerLoops(NativeCodeProcedure* proc); NativeCodeBasicBlock* CollectInnerLoop(NativeCodeBasicBlock* head, ExpandingArray& lblocks); - bool OptimizeGenericLoop(NativeCodeProcedure* proc); - bool CollectGenericLoop(NativeCodeProcedure* proc, ExpandingArray& lblocks); - bool CollectSingleEntryGenericLoop(NativeCodeProcedure* proc, ExpandingArray& lblocks); + bool OptimizeGenericLoop(void); + bool CollectGenericLoop(ExpandingArray& lblocks); + bool CollectSingleEntryGenericLoop(ExpandingArray& lblocks); void CollectReachable(ExpandingArray& lblock); bool OptimizeFindLoop(NativeCodeProcedure* proc);