From ae4b48c44556666562bee3be1e4729273991427a Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 16 Dec 2023 21:03:09 +0100 Subject: [PATCH] Various loop optimizations --- include/c64/kernalio.c | 13 ++ include/c64/kernalio.h | 2 + oscar64/Declaration.cpp | 2 +- oscar64/InterCode.cpp | 370 ++++++++++++++++++++++++++++++++ oscar64/InterCode.h | 1 + oscar64/NativeCodeGenerator.cpp | 237 ++++++++++++++++++-- 6 files changed, 611 insertions(+), 14 deletions(-) diff --git a/include/c64/kernalio.c b/include/c64/kernalio.c index 22bb5cd..d33eb51 100644 --- a/include/c64/kernalio.c +++ b/include/c64/kernalio.c @@ -23,6 +23,19 @@ void krnio_setnam(const char * name) #pragma native(krnio_setnam) +void krnio_setnam_n(const char * name, char len) +{ + __asm + { + lda len + ldx name + ldy name + 1 + jsr $ffbd // setnam + } +} + +#pragma native(krnio_setnam_n) + bool krnio_open(char fnum, char device, char channel) { krnio_pstatus[fnum] = KRNIO_OK; diff --git a/include/c64/kernalio.h b/include/c64/kernalio.h index 721074e..ca2cfc2 100644 --- a/include/c64/kernalio.h +++ b/include/c64/kernalio.h @@ -23,6 +23,8 @@ extern krnioerr krnio_pstatus[16]; void krnio_setnam(const char * name); +void krnio_setnam_n(const char * name, char len); + // open a kernal file/stream/io channel, returns true on success bool krnio_open(char fnum, char device, char channel); diff --git a/oscar64/Declaration.cpp b/oscar64/Declaration.cpp index 9f3fdf6..09d00d4 100644 --- a/oscar64/Declaration.cpp +++ b/oscar64/Declaration.cpp @@ -457,7 +457,7 @@ Expression* Expression::ConstantFold(Errors * errors, LinkerSection * dataSectio if (mType == EX_PREFIX && mToken == TK_BANKOF && linker) { LinkerRegion* rgn; - if (mLeft->mDecValue->mSection && (rgn = linker->FindRegionOfSection(mLeft->mDecValue->mSection))) + if (mLeft->mDecValue && mLeft->mDecValue->mSection && (rgn = linker->FindRegionOfSection(mLeft->mDecValue->mSection))) { uint64 i = 0; while (i < 64 && rgn->mCartridgeBanks != (1ULL << i)) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index d3dd0fc..6001fb4 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -585,6 +585,54 @@ bool InterCodeBasicBlock::DestroyingMem(const InterInstruction* lins, const Inte return CollidingMem(sins->mSrc[1], sins->mSrc[0].mType, lins); else if (sins->mCode == IC_COPY || sins->mCode == IC_STRCPY) return CollidingMem(sins->mSrc[1], IT_NONE, lins); + else if (sins->mCode == IC_CALL || sins->mCode == IC_CALL_NATIVE) + { + if (sins->mSrc[0].mTemp < 0 && sins->mSrc[0].mLinkerObject) + { + InterCodeProcedure* proc = sins->mSrc[0].mLinkerObject->mProc; + if (proc) + { + int opmask = 0; + if (lins->mCode == IC_LOAD) + opmask = 1; + else if (lins->mCode == IC_STORE) + opmask = 2; + else if (lins->mCode == IC_COPY) + opmask = 3; + + for (int k = 0; k < lins->mNumOperands; k++) + { + if ((1 << k) & opmask) + { + const InterOperand& op(lins->mSrc[k]); + + if (op.mTemp >= 0) + { + if (proc->mStoresIndirect) + return true; + } + else if (op.mMemory == IM_FFRAME || op.mMemory == IM_FRAME) + return true; + else if (op.mMemory == IM_GLOBAL) + { + if (proc->ModifiesGlobal(op.mVarIndex)) + return true; + } + else if (op.mMemory == IM_LOCAL && !mProc->mLocalVars[op.mVarIndex]->mAliased) + ; + else if ((op.mMemory == IM_PARAM || op.mMemory == IM_FPARAM) && !mProc->mParamVars[op.mVarIndex]->mAliased) + ; + else + return true; + } + } + + return false; + } + } + + return true; + } else return false; } @@ -12785,6 +12833,37 @@ void InterCodeBasicBlock::BuildLoopSuffix(void) { mVisited = true; + if (mFalseJump) + { + if (mTrueJump->mLoopHead && mTrueJump->mNumEntries == 2 && mFalseJump->mNumEntries > 1) + { + InterCodeBasicBlock* suffix = new InterCodeBasicBlock(mProc); + + suffix->mEntryRequiredTemps = mFalseJump->mEntryRequiredTemps; + suffix->mExitRequiredTemps = mFalseJump->mEntryRequiredTemps; + suffix->mLocalModifiedTemps.Reset(mExitRequiredTemps.Size()); + + InterInstruction* jins = new InterInstruction(mInstructions[0]->mLocation, IC_JUMP); + suffix->Append(jins); + suffix->Close(mFalseJump, nullptr); + mFalseJump = suffix; + suffix->mNumEntries = 1; + } + else if (mFalseJump->mLoopHead && mFalseJump->mNumEntries == 2 && mTrueJump->mNumEntries > 1) + { + InterCodeBasicBlock* suffix = new InterCodeBasicBlock(mProc); + suffix->mEntryRequiredTemps = mTrueJump->mEntryRequiredTemps; + suffix->mExitRequiredTemps = mTrueJump->mEntryRequiredTemps; + suffix->mLocalModifiedTemps.Reset(mExitRequiredTemps.Size()); + + InterInstruction* jins = new InterInstruction(mInstructions[0]->mLocation, IC_JUMP); + suffix->Append(jins); + suffix->Close(mTrueJump, nullptr); + mTrueJump = suffix; + suffix->mNumEntries = 1; + } + } +#if 0 if (mLoopHead && mNumEntries == 2 && mFalseJump) { if (mTrueJump == this && mFalseJump != this) @@ -12820,6 +12899,7 @@ void InterCodeBasicBlock::BuildLoopSuffix(void) } } } +#endif if (mTrueJump) mTrueJump->BuildLoopSuffix(); @@ -13043,6 +13123,136 @@ bool InterCodeBasicBlock::MoveLoopHeadCheckToTail(void) return modified; } +bool SameExitCondition(InterCodeBasicBlock* b1, InterCodeBasicBlock* b2) +{ + if (b1->mTrueJump == b2->mTrueJump && b1->mFalseJump == b2->mFalseJump) + { + int n1 = b1->mInstructions.Size(), n2 = b2->mInstructions.Size(); + if (n1 > 1 && n2 > 1) + { + if (b1->mInstructions[n1 - 1]->mCode == IC_BRANCH && b2->mInstructions[n2 - 1]->mCode == IC_BRANCH && + b1->mInstructions[n1 - 2]->mCode == IC_RELATIONAL_OPERATOR && + b1->mInstructions[n1 - 1]->mSrc[0].mTemp == b1->mInstructions[n1 - 2]->mDst.mTemp && + b1->mInstructions[n1 - 1]->mSrc[0].mFinal && + b2->mInstructions[n2 - 2]->mCode == IC_RELATIONAL_OPERATOR && + b2->mInstructions[n2 - 1]->mSrc[0].mTemp == b2->mInstructions[n2 - 2]->mDst.mTemp && + b2->mInstructions[n2 - 1]->mSrc[0].mFinal && + b1->mInstructions[n1 - 2]->IsEqualSource(b2->mInstructions[n2 - 2])) + { + return true; + } + } + } + + return false; +} + +bool InterCodeBasicBlock::MergeLoopTails(void) +{ + bool modified = false; + + if (!mVisited) + { + mVisited = true; + + if (mLoopHead && mEntryBlocks.Size() > 2) + { + int fi = 0; + while (fi < mEntryBlocks.Size() && mEntryBlocks[fi] == mLoopPrefix) + fi++; + + int i = fi + 1; + while (i < mEntryBlocks.Size() && (mEntryBlocks[i] == mLoopPrefix || SameExitCondition(mEntryBlocks[i], mEntryBlocks[fi]))) + i++; + if (i == mEntryBlocks.Size()) + { + int n = 2; + bool match = true; + while (i == mEntryBlocks.Size() && n < mEntryBlocks[fi]->mInstructions.Size()) + { + i = fi + 1; + while (i < mEntryBlocks.Size() && (mEntryBlocks[i] == mLoopPrefix || + n < mEntryBlocks[i]->mInstructions.Size() && + mEntryBlocks[i]->mInstructions[mEntryBlocks[i]->mInstructions.Size() - n - 1]->IsEqual(mEntryBlocks[fi]->mInstructions[mEntryBlocks[fi]->mInstructions.Size() - n - 1]))) + i++; + + if (i == mEntryBlocks.Size()) + n++; + } + + InterCodeBasicBlock* block = new InterCodeBasicBlock(mProc); + block->mTrueJump = mEntryBlocks[fi]->mTrueJump; + block->mFalseJump = mEntryBlocks[fi]->mFalseJump; + for (int i = 0; i < n; i++) + block->mInstructions.Push(mEntryBlocks[fi]->mInstructions[mEntryBlocks[fi]->mInstructions.Size() - n + i]); + + InterInstruction* bins = mEntryBlocks[fi]->mInstructions.Last(); + + i = 0; + while (i < mEntryBlocks.Size()) + { + if (mEntryBlocks[i] == mLoopPrefix) + i++; + else + { + mEntryBlocks[i]->mInstructions.SetSize(mEntryBlocks[i]->mInstructions.Size() - n); + mEntryBlocks[i]->mFalseJump = nullptr; + mEntryBlocks[i]->mTrueJump = block; + InterInstruction* jins = new InterInstruction(bins->mLocation, IC_JUMP); + mEntryBlocks[i]->mInstructions.Push(jins); + block->mEntryBlocks.Push(mEntryBlocks[i]); + block->mNumEntries++; + mEntryBlocks.Remove(i); + } + } + + mNumEntries = 2; + mEntryBlocks.Push(block); + + modified = true; + } + } + + if (mTrueJump && mTrueJump->MergeLoopTails()) + modified = true; + if (mFalseJump && mFalseJump->MergeLoopTails()) + modified = true; + } + + return modified; +} + +bool IsSingleLoopAssign(int at, InterCodeBasicBlock* block, const GrowingArray& body) +{ + InterInstruction* ai = block->mInstructions[at]; + if (ai->mDst.mTemp < 0) + return true; + if (block->IsTempReferencedInRange(0, at, ai->mDst.mTemp)) + return false; + if (block->IsTempModifiedInRange(at + 1, block->mInstructions.Size(), ai->mDst.mTemp)) + return false; + for (int i = 1; i < body.Size(); i++) + if (body[i]->IsTempModified(ai->mDst.mTemp)) + return false; + return true; +} + +bool IsLoopInvariantTemp(int tmp, const GrowingArray& body) +{ + if (tmp < 0) + return true; + + for (int i = 0; i < body.Size(); i++) + if (body[i]->IsTempModified(tmp)) + return false; + return true; +} + +static bool IsSimpleFactor(int64 val) +{ + return (val == 1 || val == 2 || val == 4 || val == 8); +} + bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedParams, const GrowingVariableArray& staticVars) { bool modified = false; @@ -13072,6 +13282,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar if (tail->CollectSingleHeadLoopBody(this, tail, body)) { int tz = tail->mInstructions.Size(); + #if 1 if (tz > 2) { @@ -13159,6 +13370,29 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar } } #endif + + GrowingIntArray indexScale(0); + + if (!modified) + { + int tz = tail->mInstructions.Size(); + if (tz > 2) + { + InterInstruction* ai = tail->mInstructions[tz - 3]; + if (ai->mCode == IC_BINARY_OPERATOR && ai->mOperator == IA_ADD && ai->mSrc[0].mTemp < 0 && ai->mDst.mTemp == ai->mSrc[1].mTemp && ai->mSrc[0].mIntConst > 0 && IsIntegerType(ai->mDst.mType) && + !tail->IsTempModifiedInRange(tz - 1, tz, ai->mDst.mTemp) && !tail->IsTempModifiedInRange(0, tz - 3, ai->mDst.mTemp)) + { + int i = 0; + while (i + 1 < body.Size() && !body[i]->IsTempModified(ai->mDst.mTemp)) + i++; + if (i + 1 == body.Size()) + { + indexScale[ai->mDst.mTemp] = (int)ai->mSrc[0].mIntConst; + } + } + } + } + bool hasStore = false; for (int j = 0; j < body.Size(); j++) { @@ -13176,6 +13410,87 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar { InterInstruction* lins = mInstructions[i]; + if (lins->mCode == IC_BINARY_OPERATOR) + { + if (lins->mOperator == IA_MUL && lins->mSrc[0].mTemp < 0 && (lins->mDst.IsNotUByte() || !IsSimpleFactor(lins->mSrc[0].mIntConst)) && lins->mSrc[1].mTemp >= 0 && indexScale[lins->mSrc[1].mTemp] != 0 && IsSingleLoopAssign(i, this, body)) + { + mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins); + mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp; + mEntryRequiredTemps += lins->mDst.mTemp; + tail->mExitRequiredTemps += lins->mDst.mTemp; + tail->mEntryRequiredTemps += lins->mDst.mTemp; + mInstructions.Remove(i); + + InterInstruction* ains = new InterInstruction(lins->mLocation, IC_BINARY_OPERATOR); + ains->mOperator = IA_ADD; + ains->mDst = lins->mDst; + ains->mSrc[1] = lins->mDst; + ains->mSrc[0] = lins->mSrc[0]; + ains->mSrc[0].mIntConst *= indexScale[lins->mSrc[1].mTemp]; + tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains); + + indexScale[ains->mDst.mTemp] = (int)ains->mSrc[0].mIntConst; + + modified = true; + continue; + } + } + else if (lins->mCode == IC_CONVERSION_OPERATOR && lins->mOperator == IA_EXT8TO16U && i + 1 < mInstructions.Size() && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body)) + { + InterInstruction* nins = mInstructions[i + 1]; + + if (nins->mCode == IC_BINARY_OPERATOR) + { + if (nins->mOperator == IA_MUL && nins->mSrc[0].mTemp < 0 && (nins->mDst.IsNotUByte() || !IsSimpleFactor(nins->mSrc[0].mIntConst)) && nins->mSrc[1].mTemp >= 0 && nins->mSrc[1].mFinal && nins->mDst.mTemp && IsSingleLoopAssign(i + 1, this, body)) + { + mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins); + mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, nins); + mLoopPrefix->mExitRequiredTemps += nins->mDst.mTemp; + mEntryRequiredTemps += nins->mDst.mTemp; + tail->mExitRequiredTemps += nins->mDst.mTemp; + tail->mEntryRequiredTemps += nins->mDst.mTemp; + mInstructions.Remove(i); + mInstructions.Remove(i); + + InterInstruction* ains = new InterInstruction(nins->mLocation, IC_BINARY_OPERATOR); + ains->mOperator = IA_ADD; + ains->mDst = nins->mDst; + ains->mSrc[1] = nins->mDst; + ains->mSrc[0] = nins->mSrc[0]; + ains->mSrc[0].mIntConst *= indexScale[lins->mSrc[0].mTemp]; + tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains); + + indexScale[ains->mDst.mTemp] = (int)ains->mSrc[0].mIntConst; + + modified = true; + continue; + } + } + } + else if (lins->mCode == IC_LEA) + { + if (lins->mSrc[0].mTemp >= 0 && lins->mSrc[0].IsNotUByte() && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body) && IsLoopInvariantTemp(lins->mSrc[1].mTemp, body)) + { + mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins); + mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp; + mEntryRequiredTemps += lins->mDst.mTemp; + tail->mExitRequiredTemps += lins->mDst.mTemp; + tail->mEntryRequiredTemps += lins->mDst.mTemp; + mInstructions.Remove(i); + + InterInstruction* ains = new InterInstruction(lins->mLocation, IC_LEA); + ains->mDst = lins->mDst; + ains->mSrc[1] = lins->mDst; + ains->mSrc[0].mTemp = -1; + ains->mSrc[0].mType = IT_INT16; + ains->mSrc[0].mIntConst = indexScale[lins->mSrc[0].mTemp]; + tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains); + + modified = true; + continue; + } + } + if (lins->mCode == IC_BINARY_OPERATOR || lins->mCode == IC_CONSTANT || lins->mCode == IC_UNARY_OPERATOR || lins->mCode == IC_CONVERSION_OPERATOR || lins->mCode == IC_SELECT || lins->mCode == IC_LEA || @@ -16458,6 +16773,46 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray changed = true; } #endif + +#if 1 + if (i + 2 < mInstructions.Size() && + mInstructions[i + 0]->mCode == IC_CONVERSION_OPERATOR && mInstructions[i + 0]->mOperator == IA_EXT8TO16U && + mInstructions[i + 0]->mSrc[0].mTemp >= 0 && + mInstructions[i + 1]->mDst.mTemp != mInstructions[i + 0]->mSrc[0].mTemp && + + mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && (mInstructions[i + 1]->mOperator == IA_ADD || mInstructions[i + 1]->mOperator == IA_SUB) && + mInstructions[i + 1]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && + + mInstructions[i + 2]->mCode == IC_STORE && + mInstructions[i + 2]->mSrc[0].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[0].mFinal && + mInstructions[i + 2]->mSrc[0].mType == IT_INT8) + { + mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[0]; + mInstructions[i + 1]->mDst.mType = IT_INT8; + + changed = true; + } + if (i + 2 < mInstructions.Size() && + mInstructions[i + 0]->mCode == IC_CONVERSION_OPERATOR && mInstructions[i + 0]->mOperator == IA_EXT8TO16U && + mInstructions[i + 0]->mSrc[0].mTemp >= 0 && + mInstructions[i + 1]->mDst.mTemp != mInstructions[i + 0]->mSrc[0].mTemp && + + mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && (mInstructions[i + 1]->mOperator == IA_ADD || mInstructions[i + 1]->mOperator == IA_SUB) && + mInstructions[i + 1]->mSrc[1].mTemp < 0 && + mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && + + mInstructions[i + 2]->mCode == IC_STORE && + mInstructions[i + 2]->mSrc[0].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[0].mFinal && + mInstructions[i + 2]->mSrc[0].mType == IT_INT8) + { + mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mSrc[0]; + mInstructions[i + 1]->mDst.mType = IT_INT8; + + changed = true; + } +#endif + #if 1 if (i + 2 < mInstructions.Size() && mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && @@ -19410,6 +19765,21 @@ void InterCodeProcedure::Close(void) PropagateConstOperationsUp(); #if 1 + BuildLoopPrefix(); + ResetEntryBlocks(); + ResetVisited(); + mEntryBlock->CollectEntryBlocks(nullptr); + + DisassembleDebug("Pre MergeLoopTails"); + + ResetVisited(); + if (mEntryBlock->MergeLoopTails()) + { + BuildTraces(false); + BuildDataFlowSets(); + } + + DisassembleDebug("Post MergeLoopTails"); SingleTailLoopOptimization(paramMemory); BuildDataFlowSets(); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index cd2d3b9..96f02b6 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -586,6 +586,7 @@ public: bool CollectSingleHeadLoopBody(InterCodeBasicBlock* head, InterCodeBasicBlock* tail, GrowingArray& body); bool SingleTailLoopOptimization(const NumberSet& aliasedParams, const GrowingVariableArray& staticVars); + bool MergeLoopTails(void); InterCodeBasicBlock* BuildLoopPrefix(void); void BuildLoopSuffix(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 7290d5a..2c332c1 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -1890,6 +1890,13 @@ void NativeCodeInstruction::Simulate(NativeRegisterDataSet& data) data.mRegs[CPU_REG_Z].mValue = t & 255; data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE; } + else if (data.mRegs[reg].mMode == NRDM_IMMEDIATE) + { + data.mRegs[CPU_REG_Z].Reset(); + data.mRegs[CPU_REG_C].mValue = data.mRegs[reg].mValue >= 128; + data.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE; + data.mRegs[reg].Reset(); + } else { data.mRegs[reg].Reset(); @@ -1915,6 +1922,13 @@ void NativeCodeInstruction::Simulate(NativeRegisterDataSet& data) data.mRegs[CPU_REG_Z].mValue = t & 255; data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE; } + else if (data.mRegs[reg].mMode == NRDM_IMMEDIATE) + { + data.mRegs[CPU_REG_Z].Reset(); + data.mRegs[CPU_REG_C].mValue = data.mRegs[reg].mValue & 1; + data.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE; + data.mRegs[reg].Reset(); + } else { data.mRegs[reg].Reset(); @@ -6682,16 +6696,16 @@ bool NativeCodeBasicBlock::LoadOpStoreIndirectValue(InterCodeProcedure* proc, co if (ram == ASMIM_INDIRECT_Y && wam == ASMIM_INDIRECT_Y && rareg == wareg && rindex == windex) { - CheckFrameIndex(rins, rareg, rindex, size, BC_REG_ADDR); + CheckFrameIndex(rins, rareg, rindex, (size - 1) * rstride + 1, BC_REG_ADDR); windex = rindex; wareg = rareg; } else { if (ram == ASMIM_INDIRECT_Y) - CheckFrameIndex(rins, rareg, rindex, size, BC_REG_ADDR); + CheckFrameIndex(rins, rareg, rindex, (size - 1) * rstride + 1, BC_REG_ADDR); if (wam == ASMIM_INDIRECT_Y) - CheckFrameIndex(wins, wareg, windex, size, BC_REG_ACCU); + CheckFrameIndex(wins, wareg, windex, (size - 1) * wstride + 1, BC_REG_ACCU); } switch (oins->mOperator) @@ -19010,6 +19024,63 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc) } } + if (mFalseJump && mIns.Size() >= 8 && (mBranch == ASMIT_BEQ || mBranch == ASMIT_BNE)) + { + int sz = mIns.Size(); + + if (mIns[sz - 8].mType == ASMIT_SEC && + mIns[sz - 7].mType == ASMIT_LDA && + mIns[sz - 6].mType == ASMIT_SBC && mIns[sz - 6].mMode == ASMIM_IMMEDIATE && mIns[sz - 6].mAddress == 0x01 && + mIns[sz - 5].mType == ASMIT_STA && mIns[sz - 5].SameEffectiveAddress(mIns[sz - 7]) && + mIns[sz - 4].mType == ASMIT_LDA && + mIns[sz - 3].mType == ASMIT_SBC && mIns[sz - 3].mMode == ASMIM_IMMEDIATE && mIns[sz - 3].mAddress == 0x00 && + mIns[sz - 2].mType == ASMIT_STA && mIns[sz - 2].SameEffectiveAddress(mIns[sz - 4]) && + mIns[sz - 1].mType == ASMIT_ORA && mIns[sz - 1].SameEffectiveAddress(mIns[sz - 7]) && + HasAsmInstructionMode(ASMIT_DEC, mIns[sz - 5].mMode) && + HasAsmInstructionMode(ASMIT_DEC, mIns[sz - 1].mMode) && + !(mIns[sz - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))) + { + changed = true; + + NativeCodeBasicBlock* hiblock = proc->AllocateBlock(); + NativeCodeBasicBlock* loblock = proc->AllocateBlock(); + NativeCodeBasicBlock* orblock = proc->AllocateBlock(); + + NativeCodeBasicBlock* eqblock, * neblock; + if (mBranch == ASMIT_BEQ) + { + eqblock = mTrueJump; + neblock = mFalseJump; + } + else + { + neblock = mTrueJump; + eqblock = mFalseJump; + } + + hiblock->mBranch = ASMIT_JMP; + hiblock->mTrueJump = loblock; + + loblock->mBranch = ASMIT_BNE; + loblock->mTrueJump = neblock; + loblock->mFalseJump = orblock; + + orblock->mBranch = ASMIT_BNE; + orblock->mTrueJump = neblock; + orblock->mFalseJump = eqblock; + + mBranch = ASMIT_BNE; + mTrueJump = loblock; + mFalseJump = hiblock; + + hiblock->mIns.Push(NativeCodeInstruction(mIns[sz - 4].mIns, ASMIT_DEC, mIns[sz - 4])); + loblock->mIns.Push(NativeCodeInstruction(mIns[sz - 7].mIns, ASMIT_DEC, mIns[sz - 7])); + orblock->mIns.Push(NativeCodeInstruction(mIns[sz - 4].mIns, ASMIT_LDA, mIns[sz - 4])); + + mIns[sz - 8].mType = ASMIT_NOP; + mIns.SetSize(sz - 6); + } + } if (mTrueJump && mTrueJump->ExpandADCToBranch(proc)) changed = true; @@ -22516,7 +22587,7 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool mTrueJump->mIns[0].SameEffectiveAddress(mFalseJump->mIns[0]) && mTrueJump->mIns[1].SameEffectiveAddress(mFalseJump->mIns[1]) && HasAsmInstructionMode(ASMIT_LDX, mTrueJump->mIns[0].mMode) && HasAsmInstructionMode(ASMIT_STX, mTrueJump->mIns[1].mMode)) { - uint32 live = mIns[s - 1].mLive; + uint32 live = mIns[s - 1].mLive | LIVE_CPU_REG_A; if (mIns[s - 1].RequiresYReg()) live |= LIVE_CPU_REG_Y; if (s >= 2) @@ -24339,7 +24410,7 @@ bool NativeCodeBasicBlock::CheckForwardSumYPointer(const NativeCodeBasicBlock* b } else if (ins.mType == ASMIT_RTS) { - if ((ins.mFlags & NCIF_LOWER) && base == BC_REG_ACCU) + if ((ins.mFlags & NCIF_LOWER) && (base == BC_REG_ACCU || reg == BC_REG_ACCU)) return false; } else if (ins.ChangesZeroPage(base) || ins.ChangesZeroPage(base + 1) || iins.MayBeChangedOnAddress(ins)) @@ -30666,12 +30737,12 @@ bool NativeCodeBasicBlock::OffsetValueForwarding(const ValueNumberingDataSet& da ins.mType = ASMIT_NOP; ins.mMode = ASMIM_IMPLIED; changed = true; } - else if (d == 1 && !(ins.mLive & LIVE_CPU_REG_Z)) + else if (d == 1 && !(ins.mLive & (LIVE_CPU_REG_Z | LIVE_CPU_REG_C))) { ins.mType = ASMIT_INC; changed = true; } - else if (d == -1 && !(ins.mLive & LIVE_CPU_REG_Z)) + else if (d == -1 && !(ins.mLive & (LIVE_CPU_REG_Z | LIVE_CPU_REG_C))) { ins.mType = ASMIT_DEC; changed = true; @@ -33192,6 +33263,65 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc } #endif +#if 1 + sz = mIns.Size(); + if (sz >= 2 && (mBranch == ASMIT_BEQ && mFalseJump == this || mBranch == ASMIT_BNE && mTrueJump == this) && + ((mIns[sz - 2].mType == ASMIT_LDA && mIns[sz - 1].mType == ASMIT_CMP && !mEntryRequiredRegs[CPU_REG_A]) || + (mIns[sz - 2].mType == ASMIT_LDX && mIns[sz - 1].mType == ASMIT_CPX && !mEntryRequiredRegs[CPU_REG_X]) || + (mIns[sz - 2].mType == ASMIT_LDY && mIns[sz - 1].mType == ASMIT_CPY && !mEntryRequiredRegs[CPU_REG_Y])) && + mIns[sz - 2].mMode == ASMIM_ZERO_PAGE && mIns[sz - 1].mMode == ASMIM_ZERO_PAGE) + { + if (ChangesZeroPage(mIns[sz - 1].mAddress) && !ChangesZeroPage(mIns[sz - 2].mAddress)) + { + int a = mIns[sz - 1].mAddress; + mIns[sz - 1].mAddress = mIns[sz - 2].mAddress; + mIns[sz - 2].mAddress = a; + changed = true; + } + } +#endif + + if (sz >= 2 && (mIns[0].mType == ASMIT_DEC || mIns[0].mType == ASMIT_INC) && mIns[0].mMode == ASMIM_ZERO_PAGE && + mIns[1].mType == ASMIT_LDY && mIns[1].SameEffectiveAddress(mIns[0])) + { + int i = 2; + int inc = 0; + while (i < mIns.Size()) + { + if (mIns[i].ChangesYReg()) + { + if (mIns[i].mType == ASMIT_INY) + inc++; + else if (mIns[i].mType == ASMIT_DEY) + inc--; + else if (mIns[i].mType == ASMIT_LDY && mIns[i].SameEffectiveAddress(mIns[0])) + inc = 0; + else + break; + } + else if (mIns[i].ChangesZeroPage(mIns[0].mAddress)) + break; + i++; + } + + if (i == mIns.Size() && inc == 0) + { + if (!prevBlock) + return OptimizeSimpleLoopInvariant(proc, full); + prevBlock->mIns.Push(NativeCodeInstruction(mIns[1].mIns, ASMIT_LDY, mIns[1])); + prevBlock->mExitRequiredRegs += CPU_REG_Y; + mEntryRequiredRegs += CPU_REG_Y; + mExitRequiredRegs += CPU_REG_Y; + mIns[1].mType = ASMIT_STY; + mIns[0].mLive |= LIVE_CPU_REG_Y; + if (mIns[0].mType == ASMIT_DEC) + mIns[0].mType = ASMIT_DEY; + else + mIns[0].mType = ASMIT_INY; + mIns[0].mMode = ASMIM_IMPLIED; + changed = true; + } + } if (mIns.Size() > 0 && (mIns.Last().mType == ASMIT_DEX || mIns.Last().mType == ASMIT_DEC || mIns.Last().mType == ASMIT_CPX)) { @@ -39210,9 +39340,14 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #endif CheckLive(); + #if 1 if (i + 2 < mIns.Size()) { + NativeCodeInstruction i0 = mIns[i]; + NativeCodeInstruction i1 = mIns[i + 1]; + NativeCodeInstruction i2 = mIns[i + 2]; + if (mIns[i].mType == ASMIT_LDA && mIns[i + 2].mType == ASMIT_LDA && (mIns[i + 1].mType == ASMIT_CLC || mIns[i + 1].mType == ASMIT_SEC)) { mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; @@ -39268,6 +39403,17 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; progress = true; } + else if ( + mIns[i + 0].mType == ASMIT_LDA && + mIns[i + 1].mType == ASMIT_STA && + mIns[i + 2].mType == ASMIT_LDA && !(mIns[i + 2].mFlags & NCIF_VOLATILE) && + mIns[i + 0].SameEffectiveAddress(mIns[i + 2])) + { + mIns[i + 0].mLive |= mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z); + mIns[i + 1].mLive |= mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z); + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + progress = true; + } else if ( mIns[i + 0].mType == ASMIT_STA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && mIns[i + 2].mType == ASMIT_LDY && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && mIns[i + 2].mAddress == mIns[i + 0].mAddress && @@ -39334,6 +39480,8 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass { mIns[i + 2] = mIns[i + 0]; mIns[i + 2].mLive |= LIVE_CPU_REG_Y | LIVE_CPU_REG_Z; + if (mIns[i + 0].RequiresCarry()) + mIns[i + 1].mLive |= LIVE_CPU_REG_C; mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; progress = true; } @@ -40446,6 +40594,8 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } } + CheckLive(); + if ( mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 1].mType == ASMIT_LDA && @@ -40470,9 +40620,10 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass progress = true; } } + + CheckLive(); } #endif - CheckLive(); #if 1 if (i + 3 < mIns.Size()) { @@ -42105,7 +42256,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass HasAsmInstructionMode(ASMIT_INC, mIns[i + 0].mMode)) { mIns[i + 4].mType = ASMIT_INC; - mIns[i + 2].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED; progress = true; @@ -42119,12 +42270,28 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass HasAsmInstructionMode(ASMIT_DEC, mIns[i + 0].mMode)) { mIns[i + 4].mType = ASMIT_DEC; - mIns[i + 2].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED; progress = true; } #endif + else if ( + pass > 8 && + mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_LDA && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 1 && + mIns[i + 3].mType == ASMIT_CLC && + mIns[i + 4].mType == ASMIT_ADC && + !(mIns[i + 4].mLive & LIVE_CPU_REG_C)) + { + mIns[i + 3].mType = ASMIT_SEC; + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + + progress = true; + } + #if 0 else if ( mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && @@ -42831,6 +42998,34 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } } #endif +#if 1 + if (mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_ADC && (mIns[i + 2].mMode == ASMIM_ZERO_PAGE || mIns[i + 2].mMode == ASMIM_ABSOLUTE) && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mAddress != mIns[i + 2].mAddress && + mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && + mIns[i + 5].mType == ASMIT_ADC && mIns[i + 5].mMode == ASMIM_IMMEDIATE && mIns[i + 5].mAddress == 0 && + mIns[i + 6].mType == ASMIT_STA && mIns[i + 6].mMode == ASMIM_ZERO_PAGE && mIns[i + 6].mAddress == mIns[i + 3].mAddress + 1 && + !(mIns[i + 6].mLive & LIVE_CPU_REG_A)) + { + int yval = RetrieveYValue(i); + proc->ResetPatched(); + if (CheckForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 1].mAddress, mIns[i + 2], i + 7, yval, 3)) + { + proc->ResetPatched(); + if (PatchForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 3].mAddress, mIns[i + 2], i + 7, yval)) + progress = true; + + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + mIns[i + 5].mType = ASMIT_NOP; mIns[i + 5].mMode = ASMIM_IMPLIED; + + if (mTrueJump) + mTrueJump->CheckLive(); + if (mFalseJump) + mFalseJump->CheckLive(); + } + } +#endif #if 1 if ( mIns[i + 0].mType == ASMIT_STA && (mIns[i + 0].mMode == ASMIM_ZERO_PAGE || mIns[i + 0].mMode == ASMIM_ABSOLUTE) && @@ -44528,7 +44723,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) { mInterProc = proc; - CheckFunc = !strcmp(mInterProc->mIdent->mString, "Enemy::StepTurn"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "mh_size"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -44985,6 +45180,8 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) void NativeCodeProcedure::Assemble(void) { + CheckFunc = !strcmp(mInterProc->mIdent->mString, "mh_size"); + if (mInterProc->mCompilerOptions & COPT_OPTIMIZE_MERGE_CALLS) { ResetVisited(); @@ -45138,6 +45335,8 @@ void NativeCodeProcedure::Optimize(void) int cnt = 0; bool swappedXY = false; + CheckCase = false; + #if _DEBUG ResetVisited(); mEntryBlock->CheckBlocks(); @@ -45327,6 +45526,7 @@ void NativeCodeProcedure::Optimize(void) if (mEntryBlock->PeepHoleOptimizer(this, step)) changed = true; + #endif if (step == 2) { @@ -45614,7 +45814,7 @@ void NativeCodeProcedure::Optimize(void) for (int i = 0; i < 256; i++) if (xregs[i] > xregs[j]) j = i; - if (xregs[j] > 0) + if (xregs[j] > 2) { ResetVisited(); mEntryBlock->GlobalRegisterXMap(j); @@ -45632,7 +45832,7 @@ void NativeCodeProcedure::Optimize(void) for (int i = 0; i < 256; i++) if (yregs[i] > yregs[j]) j = i; - if (yregs[j] > 0) + if (yregs[j] > 2) { ResetVisited(); mEntryBlock->GlobalRegisterYMap(j); @@ -46077,6 +46277,17 @@ void NativeCodeProcedure::Optimize(void) ResetVisited(); mEntryBlock->RemoveUnusedResultInstructions(); +#if 1 + ResetVisited(); + data.Reset(); + mEntryBlock->BuildEntryDataSet(data); + + CheckCase = true; + ResetVisited(); + if (mEntryBlock->ApplyEntryDataSet()) + changed = true; +#endif + #if 1 ResetVisited(); mEntryBlock->BlockSizeReduction(this, -1, -1);