diff --git a/include/c64/rasterirq.c b/include/c64/rasterirq.c index 74f1606..5ccc8c5 100644 --- a/include/c64/rasterirq.c +++ b/include/c64/rasterirq.c @@ -10,7 +10,7 @@ volatile byte rirq_count; byte rasterIRQRows[NUM_IRQS]; byte rasterIRQIndex[NUM_IRQS]; -byte rasterIRQNext[NUM_IRQS]; +byte rasterIRQNext[NUM_IRQS + 1]; byte rasterIRQLow[NUM_IRQS]; byte rasterIRQHigh[NUM_IRQS]; @@ -255,6 +255,8 @@ ex2: void rirq_build(RIRQCode * ic, byte size) { + __assume(size < 26); + ic->size = size; asm_im(ic->code + 0, ASM_LDY, 0); @@ -372,6 +374,7 @@ void rirq_init_kernal(void) rasterIRQRows[i] = 255; rasterIRQIndex[i] = i; } + rasterIRQNext[NUM_IRQS] = 255; __asm { @@ -393,6 +396,7 @@ void rirq_init_io(void) rasterIRQRows[i] = 255; rasterIRQIndex[i] = i; } + rasterIRQNext[NUM_IRQS] = 255; __asm { @@ -414,6 +418,7 @@ void rirq_init_memmap(void) rasterIRQRows[i] = 255; rasterIRQIndex[i] = i; } + rasterIRQNext[NUM_IRQS] = 255; __asm { diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 0aff1e1..c67799d 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -6081,11 +6081,62 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray bool singleLoop = CheckSingleBlockLimitedLoop(pblock, nloop); +#if 0 FastNumberSet dependTemps(mExitRequiredTemps.Size()); +#endif if (singleLoop) { +#if 1 + struct TempChain + { + int mBaseTemp; + int64 mOffset; + }; + + ExpandingArray tempChain; + tempChain.SetSize(mExitRequiredTemps.Size()); + + for (int i = 0; i < mExitRequiredTemps.Size(); i++) + { + tempChain[i].mBaseTemp = i; + tempChain[i].mOffset = 0; + } + + for (int i = 0; i < sz; i++) + { + InterInstruction* ins(mInstructions[i]); + if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && + ins->mSrc[1].mTemp >= 0 && ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst > 0 && + tempChain[ins->mSrc[1].mTemp].mBaseTemp >= 0) + { + tempChain[ins->mDst.mTemp].mBaseTemp = tempChain[ins->mSrc[1].mTemp].mBaseTemp; + tempChain[ins->mDst.mTemp].mOffset = tempChain[ins->mSrc[1].mTemp].mOffset + ins->mSrc[0].mIntConst; + } + else if (ins->mCode == IC_CONVERSION_OPERATOR && ins->mOperator == IA_EXT8TO16U && ins->mSrc[0].mTemp >= 0) + tempChain[ins->mDst.mTemp] = tempChain[ins->mSrc[0].mTemp]; + else if (ins->mDst.mTemp >= 0) + { + tempChain[ins->mDst.mTemp].mBaseTemp = -1; + } + } + + for (int i = 0; i < tempChain.Size(); i++) + { + if (tempChain[i].mBaseTemp == i) + { + IntegerValueRange& r(pblock->mTrueValueRange[i]); + if (r.IsConstant()) + { + mLocalValueRange[i].LimitMax(r.mMinValue + (nloop - 1) * tempChain[i].mOffset); + } + } + } + + +#else FastNumberSet changedTemps(mExitRequiredTemps.Size()); + GrowingArray expandedTemps(-1); for (int i = 0; i < sz; i++) { @@ -6100,12 +6151,31 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray else changedTemps += ins->mDst.mTemp; } + else if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && + ins->mSrc[1].mTemp >= 0 && + ins->mDst.mTemp == expandedTemps[ins->mSrc[1].mTemp] && ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst > 0) + { + if (dependTemps[ins->mDst.mTemp]) + changedTemps += ins->mDst.mTemp; + else if (pblock->mTrueValueRange[ins->mDst.mTemp].IsConstant()) + dependTemps += ins->mDst.mTemp; + else + changedTemps += ins->mDst.mTemp; + expandedTemps[ins->mSrc[1].mTemp] = -1; + } + else if (ins->mCode == IC_CONVERSION_OPERATOR && ins->mOperator == IA_EXT8TO16U && ins->mSrc[0].mTemp >= 0) + { + expandedTemps[ins->mDst.mTemp] = ins->mSrc[0].mTemp; + changedTemps += ins->mDst.mTemp; + } else if (ins->mDst.mTemp >= 0) { + expandedTemps[ins->mDst.mTemp] = -1; changedTemps += ins->mDst.mTemp; dependTemps -= ins->mDst.mTemp; } } +#endif } for (int i = 0; i < sz; i++) @@ -6290,12 +6360,14 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray case IA_ADD: if (ins->mSrc[0].mTemp < 0) { - if (ins->mSrc[1].mTemp == ins->mDst.mTemp && dependTemps[ins->mDst.mTemp] && i + 3 != sz) +#if 0 + if (/*ins->mSrc[1].mTemp == ins->mDst.mTemp &&*/ dependTemps[ins->mDst.mTemp] && i + 3 != sz) { int64 start = pblock->mTrueValueRange[ins->mDst.mTemp].mMinValue; vr.SetLimit(start + ins->mSrc[0].mIntConst, start + nloop * ins->mSrc[0].mIntConst); } else +#endif { vr = mLocalValueRange[ins->mSrc[1].mTemp]; if (ins->mSrc[0].mIntConst > 0 && vr.mMaxState == IntegerValueRange::S_WEAK) @@ -11955,6 +12027,32 @@ bool InterCodeBasicBlock::CheckSingleBlockLimitedLoop(InterCodeBasicBlock*& pbl nloop++; nloop = (nloop + ains->mSrc[0].mIntConst - 1) / ains->mSrc[0].mIntConst; + return true; + } + } + else if (bins->mSrc[0].mTemp == cins->mDst.mTemp && + cins->mSrc[1].mTemp == ains->mDst.mTemp && + cins->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND && + ains->mSrc[1].mTemp == ains->mDst.mTemp && + ains->mSrc[0].mTemp < 0 && + (cins->mOperator == IA_CMPLU || cins->mOperator == IA_CMPLEU) && + cins->mSrc[0].mRange.mMaxValue < 255 && + ains->mSrc[0].mRange.mMaxValue > 0) + { + int pi = pblock->mInstructions.Size() - 1; + while (pi >= 0 && pblock->mInstructions[pi]->mDst.mTemp != ains->mDst.mTemp) + pi--; + + int i = 0; + while (i < nins - 3 && mInstructions[i]->mDst.mTemp != ains->mDst.mTemp) + i++; + if (i == nins - 3) + { + nloop = cins->mSrc[0].mRange.mMaxValue; + if (cins->mOperator == IA_CMPLEU) + nloop++; + nloop = (nloop + ains->mSrc[0].mIntConst - 1) / ains->mSrc[0].mIntConst; + return true; } } @@ -15555,7 +15653,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "main"); + CheckFunc = !strcmp(mIdent->mString, "rirq_build1"); mEntryBlock = mBlocks[0]; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 3a6c098..cf16a16 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -20148,6 +20148,64 @@ bool NativeCodeBasicBlock::CrossBlockXYPreservation(void) return changed; } +bool NativeCodeBasicBlock::FoldLoopEntry(void) +{ + bool changed = false; + if (!mVisited) + { + mVisited = true; + + if (mTrueJump && mFalseJump && mIns.Size() >= 1) + { + int sz = mIns.Size(); + + if (mIns[sz - 1].mType == ASMIT_LDA) + { + if (mTrueJump->mIns.Size() == 1 && mTrueJump != this) + { + if (mTrueJump->mIns[0].mType == ASMIT_LDA && mTrueJump->mIns[0].SameEffectiveAddress(mIns[sz - 1])) + { + if (mBranch == mTrueJump->mBranch && mFalseJump == mTrueJump->mFalseJump && mTrueJump == mTrueJump->mTrueJump || + mBranch == InvertBranchCondition(mTrueJump->mBranch) && mFalseJump == mTrueJump->mTrueJump && mTrueJump == mTrueJump->mFalseJump) + { + mIns[sz - 1].mType = ASMIT_NOP; + mIns[sz - 1].mMode = ASMIM_IMPLIED; + mBranch = ASMIT_JMP; + mFalseJump->RemEntryBlock(this); + mFalseJump = nullptr; + changed = true; + } + } + } + if (!changed && mFalseJump->mIns.Size() == 1 && mFalseJump != this) + { + if (mFalseJump->mIns[0].mType == ASMIT_LDA && mFalseJump->mIns[0].SameEffectiveAddress(mIns[sz - 1])) + { + if (mBranch == mFalseJump->mBranch && mFalseJump == mFalseJump->mFalseJump && mFalseJump == mTrueJump->mTrueJump || + mBranch == InvertBranchCondition(mFalseJump->mBranch) && mFalseJump == mFalseJump->mTrueJump && mTrueJump == mFalseJump->mFalseJump) + { + mIns[sz - 1].mType = ASMIT_NOP; + mIns[sz - 1].mMode = ASMIM_IMPLIED; + mBranch = ASMIT_JMP; + mTrueJump->RemEntryBlock(this); + mTrueJump = mFalseJump; + mFalseJump = nullptr; + changed = true; + } + } + } + } + } + + if (mTrueJump && mTrueJump->FoldLoopEntry()) + changed = true; + if (mFalseJump && mFalseJump->FoldLoopEntry()) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::BypassRegisterConditionBlock(void) { bool changed = false; @@ -39237,6 +39295,13 @@ void NativeCodeProcedure::Optimize(void) } #endif + if (step == 5) + { + ResetVisited(); + if (mEntryBlock->FoldLoopEntry()) + changed = true; + } + #if 1 if (step < 7) { diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 2c06441..8bd8cb1 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -476,6 +476,7 @@ public: bool CrossBlockYAliasProgpagation(const int * yalias, int yoffset); bool BypassRegisterConditionBlock(void); + bool FoldLoopEntry(void); bool Check16BitSum(int at, NativeRegisterSum16Info& info); bool Propagate16BitSum(const ExpandingArray& cinfo);