From a81f810a6385eee61db3a8274134365aab22ece8 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Fri, 10 Jun 2022 15:56:08 +0200 Subject: [PATCH] Size optimizations --- include/stdlib.c | 204 +++++++++++++++++++++- include/stdlib.h | 16 ++ oscar64/Array.h | 10 ++ oscar64/InterCode.cpp | 202 ++++++++++++++++++---- oscar64/InterCode.h | 1 + oscar64/NativeCodeGenerator.cpp | 296 +++++++++++++++++++++++++++++++- oscar64/NativeCodeGenerator.h | 8 +- 7 files changed, 699 insertions(+), 38 deletions(-) diff --git a/include/stdlib.c b/include/stdlib.c index d1d14a1..cfcbda6 100644 --- a/include/stdlib.c +++ b/include/stdlib.c @@ -224,6 +224,11 @@ int atoi(const char * s) const float tpow10[7] = {1.0, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0}; float atof(const char * s) +{ + return strtof(s, nullptr); +} + +float strtof(const char *s, const char **endp) { char c; while ((c = *s++) <= ' ') @@ -302,7 +307,204 @@ float atof(const char * s) if (neg) v = -v; - return v; + if (endp) + *endp = (char *)s; + + return v; +} + +int strtoi(const char *s, const char **endp, char base) +{ + char c; + while ((c = *s++) <= ' ') + if (!c) return 0; + + bool neg = false; + if (c == '-') + { + neg = true; + c = *s++; + } + else if (c == '+') + c = *s++; + + if (c == '0') + { + c = *s++; + if (c == 'x' || c == 'X') + { + base = 16; + c = *s++; + } + else if (base == 0) + base = 8; + } else if (base == 0) + base = 10; + + int v = 0; + for(;;) + { + if (c >= '0' && c <= '9') + v = v * base + (c - '0'); + else if (c >= 'a' && c <= 'f') + v = v * base + (c - 'a' + 10); + else if (c >= 'A' && c <= 'F') + v = v * base + (c - 'A' + 10); + else + break; + + c = *s++; + } + + if (neg) + v = -v; + + if (endp) + *endp = (char *)s; + + return v; +} + +unsigned strtou(const char *s, const char **endp, char base) +{ + char c; + while ((c = *s++) <= ' ') + if (!c) return 0; + + if (c == '0') + { + c = *s++; + if (c == 'x' || c == 'X') + { + base = 16; + c = *s++; + } + else if (base == 0) + base = 8; + } else if (base == 0) + base = 10; + + unsigned v = 0; + for(;;) + { + if (c >= '0' && c <= '9') + v = v * base + (c - '0'); + else if (c >= 'a' && c <= 'f') + v = v * base + (c - 'a' + 10); + else if (c >= 'A' && c <= 'F') + v = v * base + (c - 'A' + 10); + else + break; + + c = *s++; + } + + if (endp) + *endp = (char *)s; + + return v; +} + +long strtol(const char *s, const char **endp, char base) +{ + char c; + while ((c = *s++) <= ' ') + if (!c) return 0; + + bool neg = false; + if (c == '-') + { + neg = true; + c = *s++; + } + else if (c == '+') + c = *s++; + + if (c == '0') + { + c = *s++; + if (c == 'x' || c == 'X') + { + base = 16; + c = *s++; + } + else if (base == 0) + base = 8; + } else if (base == 0) + base = 10; + + long v = 0; + for(;;) + { + if (c >= '0' && c <= '9') + v = v * base + (c - '0'); + else if (c >= 'a' && c <= 'f') + v = v * base + (c - 'a' + 10); + else if (c >= 'A' && c <= 'F') + v = v * base + (c - 'A' + 10); + else + break; + + c = *s++; + } + + if (neg) + v = -v; + + if (endp) + *endp = (char *)s; + + return v; +} + +unsigned long strtoul(const char *s, const char **endp, char base) +{ + char c; + while ((c = *s++) <= ' ') + if (!c) return 0; + + if (c == '0') + { + c = *s++; + if (c == 'x' || c == 'X') + { + base = 16; + c = *s++; + } + else if (base == 0) + base = 8; + } else if (base == 0) + base = 10; + + unsigned long v = 0; + for(;;) + { + if (c >= '0' && c <= '9') + v = v * base + (c - '0'); + else if (c >= 'a' && c <= 'f') + v = v * base + (c - 'a' + 10); + else if (c >= 'A' && c <= 'F') + v = v * base + (c - 'A' + 10); + else + break; + + c = *s++; + } + + if (endp) + *endp = (char *)s; + + return v; +} + +int abs(int n) +{ + return n < 0 ? - n : n; +} + +long labs(long n) +{ + return n < 0 ? - n : n; } void exit(int status) diff --git a/include/stdlib.h b/include/stdlib.h index 51f1a49..1761f40 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -16,8 +16,24 @@ void ultoa(unsigned long n, char * s, unsigned radix); int atoi(const char * s); +long atol(const char * s); + float atof(const char * s); +float strtof(const char *s, const char **endp); + +int strtoi(const char *s, const char **endp, char base); + +unsigned strtou(const char *s, const char **endp, char base); + +long strtol(const char *s, const char **endp, char base); + +unsigned long strtoul(const char *s, const char **endp, char base); + +int abs(int n); + +long labs(long n); + void exit(int status); diff --git a/oscar64/Array.h b/oscar64/Array.h index 89f3528..23dc440 100644 --- a/oscar64/Array.h +++ b/oscar64/Array.h @@ -263,6 +263,16 @@ public: Grow(at, false); } + void Remove(int at, int n) + { + while (at + n < size) + { + array[at] = array[at + n]; + at++; + } + Grow(at, false); + } + T Top(void) const { return array[size - 1]; diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 4177b9e..4171b0d 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -14,6 +14,11 @@ int InterTypeSize[] = { 2 }; +static bool IsCommutative(InterOperator op) +{ + return op == IA_ADD || op == IA_MUL || op == IA_AND || op == IA_OR || op == IA_XOR; +} + static bool IsIntegerType(InterType type) { return type >= IT_INT8 && type <= IT_INT32; @@ -510,10 +515,19 @@ static bool SameInstruction(const InterInstruction* ins1, const InterInstruction { if (ins1->mCode == ins2->mCode && ins1->mNumOperands == ins2->mNumOperands && ins1->mOperator == ins2->mOperator) { - for (int i = 0; i < ins1->mNumOperands; i++) - if (!ins1->mSrc[i].IsEqual(ins2->mSrc[i])) - return false; - return true; + if (ins1->mCode == IC_BINARY_OPERATOR && IsCommutative(ins1->mOperator)) + { + return + ins1->mSrc[0].IsEqual(ins2->mSrc[0]) && ins1->mSrc[1].IsEqual(ins2->mSrc[1]) || + ins1->mSrc[0].IsEqual(ins2->mSrc[1]) && ins1->mSrc[1].IsEqual(ins2->mSrc[0]); + } + else + { + for (int i = 0; i < ins1->mNumOperands; i++) + if (!ins1->mSrc[i].IsEqual(ins2->mSrc[i])) + return false; + return true; + } } return false; @@ -1246,6 +1260,9 @@ void TempForwardingTable::Build(int from, int to) bool InterInstruction::ReferencesTemp(int temp) const { + if (temp < 0) + return false; + if (temp == mDst.mTemp) return true; for (int i = 0; i < mNumOperands; i++) @@ -5611,6 +5628,21 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(void) ins->mSrc[0].mRange.LimitMax(vr.mMaxValue - ins->mSrc[1].mIntConst); mReverseValueRange[ins->mSrc[0].mTemp].Limit(ins->mSrc[0].mRange); } + else if (ins->mSrc[0].mTemp >= 0 && ins->mSrc[1].mTemp >= 0) + { + if (vr.mMinState == IntegerValueRange::S_BOUND && ins->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND) + ins->mSrc[1].mRange.LimitMin(vr.mMinValue - ins->mSrc[0].mRange.mMaxValue); + if (vr.mMaxState == IntegerValueRange::S_BOUND && ins->mSrc[0].mRange.mMinState == IntegerValueRange::S_BOUND) + ins->mSrc[1].mRange.LimitMax(vr.mMaxValue - ins->mSrc[0].mRange.mMinValue); + + if (vr.mMinState == IntegerValueRange::S_BOUND && ins->mSrc[1].mRange.mMaxState == IntegerValueRange::S_BOUND) + ins->mSrc[0].mRange.LimitMin(vr.mMinValue - ins->mSrc[1].mRange.mMaxValue); + if (vr.mMaxState == IntegerValueRange::S_BOUND && ins->mSrc[1].mRange.mMinState == IntegerValueRange::S_BOUND) + ins->mSrc[0].mRange.LimitMax(vr.mMaxValue - ins->mSrc[1].mRange.mMinValue); + + mReverseValueRange[ins->mSrc[0].mTemp].Limit(ins->mSrc[0].mRange); + mReverseValueRange[ins->mSrc[1].mTemp].Limit(ins->mSrc[1].mRange); + } break; case IA_MUL: if (ins->mSrc[0].mTemp < 0 && ins->mSrc[1].mTemp >= 0 && ins->mSrc[0].mIntConst > 0) @@ -7817,13 +7849,13 @@ void InterCodeBasicBlock::MarkRelevantStatics(void) } } -bool InterCodeBasicBlock::CanMoveInstructionBehindBlock(int ii) const +bool InterCodeBasicBlock::CanMoveInstructionDown(int si, int ti) const { - InterInstruction* ins = mInstructions[ii]; + InterInstruction* ins = mInstructions[si]; if (ins->mCode == IC_LOAD) { - for (int i = ii + 1; i < mInstructions.Size(); i++) + for (int i = si + 1; i < ti; i++) if (!CanBypassLoad(ins, mInstructions[i])) return false; } @@ -7836,7 +7868,7 @@ bool InterCodeBasicBlock::CanMoveInstructionBehindBlock(int ii) const return false; else { - for (int i = ii + 1; i < mInstructions.Size(); i++) + for (int i = si + 1; i < ti; i++) if (!CanBypass(ins, mInstructions[i])) return false; } @@ -7844,6 +7876,12 @@ bool InterCodeBasicBlock::CanMoveInstructionBehindBlock(int ii) const return true; } +bool InterCodeBasicBlock::CanMoveInstructionBehindBlock(int ii) const +{ + return CanMoveInstructionDown(ii, mInstructions.Size()); +} + + bool InterCodeBasicBlock::CanMoveInstructionBeforeBlock(int ii) const { @@ -9676,10 +9714,6 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa } } -static bool IsCommutative(InterOperator op) -{ - return op == IA_ADD || op == IA_MUL || op == IA_AND || op == IA_OR || op == IA_XOR; -} void InterCodeBasicBlock::CompactInstructions(void) { if (!mVisited) @@ -10271,7 +10305,28 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mDst; changed = true; } +#if 1 + else if ( + mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mMemory == IM_GLOBAL && + mInstructions[i + 1]->mCode == IC_LEA && mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal && + mInstructions[i + 1]->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND && !mInstructions[i + 1]->mSrc[0].IsUByte() && + mInstructions[i + 0]->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND && !mInstructions[i + 0]->mSrc[0].IsUByte()) + { + mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1]; + mInstructions[i + 0]->mCode = IC_BINARY_OPERATOR; + mInstructions[i + 0]->mOperator = IA_ADD; + mInstructions[i + 0]->mSrc[1] = mInstructions[i + 1]->mSrc[0]; + mInstructions[i + 0]->mDst.mType = IT_INT16; + mInstructions[i + 0]->mDst.mRange.mMaxState = IntegerValueRange::S_BOUND; + mInstructions[i + 0]->mDst.mRange.mMaxValue = mInstructions[i + 0]->mSrc[1].mRange.mMaxValue + mInstructions[i + 0]->mSrc[0].mRange.mMaxValue; + mInstructions[i + 0]->mDst.mRange.mMinState = IntegerValueRange::S_BOUND; + mInstructions[i + 0]->mDst.mRange.mMinValue = 0; + + mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mDst; + changed = true; + } +#endif #if 1 // Postincrement artifact if (mInstructions[i + 0]->mCode == IC_LOAD_TEMPORARY && mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && @@ -10302,6 +10357,25 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati } while (changed); + // build trains + + for(int i = mInstructions.Size() - 1; i > 0; i--) + { + InterInstruction* tins = mInstructions[i]; + + j = i - 1; + while (j >= 0 && !tins->ReferencesTemp(mInstructions[j]->mDst.mTemp)) + j--; + if (j >= 0 && j < i - 1) + { + if (CanMoveInstructionDown(j, i)) + { + mInstructions.Insert(i, mInstructions[j]); + mInstructions.Remove(j); + } + } + } + // sort stores up do @@ -11414,6 +11488,21 @@ void InterCodeProcedure::Close(void) #endif +#if 1 + do { + GrowingInstructionPtrArray gipa(nullptr); + ResetVisited(); + changed = mEntryBlock->LoadStoreForwarding(gipa); + + RemoveUnusedStoreInstructions(paramMemory); + + TempForwarding(); + RemoveUnusedInstructions(); + + DisassembleDebug("Load/Store forwarding2"); + } while (changed); +#endif + #if 1 BuildLoopPrefix(); DisassembleDebug("added dominators"); @@ -11638,6 +11727,39 @@ bool InterCodeBasicBlock::SameExitCode(const InterCodeBasicBlock* block) const return false; } +bool PartitionSameExitCode(GrowingArray & eblocks, GrowingArray & mblocks) +{ + int i = 0; + + mblocks.SetSize(0, true); + + while (i + 1 < eblocks.Size()) + { + int j = i + 1; + while (j < eblocks.Size()) + { + if (eblocks[i]->SameExitCode(eblocks[j])) + { + mblocks.Push(eblocks[j]); + eblocks.Remove(j); + } + else + j++; + } + + if (mblocks.Size()) + { + mblocks.Push(eblocks[i]); + eblocks.Remove(i); + return true; + } + + i++; + } + + return false; +} + void InterCodeProcedure::MergeBasicBlocks(void) { ResetVisited(); @@ -11646,6 +11768,8 @@ void InterCodeProcedure::MergeBasicBlocks(void) ResetVisited(); mEntryBlock->SplitBranches(this); + DisassembleDebug("PostSplit"); + bool changed; do { @@ -11735,28 +11859,48 @@ void InterCodeProcedure::MergeBasicBlocks(void) if (eblocks.Size() == block->mNumEntries) { - bool ok; - do { - ok = false; + GrowingArray mblocks(nullptr); - if (eblocks[0]->mInstructions.Size() > 1) + while (PartitionSameExitCode(eblocks, mblocks)) + { + InterCodeBasicBlock* nblock; + + if (eblocks.Size() || mblocks.IndexOf(block) != -1) { - InterInstruction* ins = eblocks[0]->mInstructions[eblocks[0]->mInstructions.Size() - 2]; +// break; - int j = 1; - while (j < eblocks.Size() && eblocks[0]->SameExitCode(eblocks[j])) - j++; - if (j == eblocks.Size()) - { - block->mInstructions.Insert(0, ins); - for (int j = 0; j < eblocks.Size(); j++) - eblocks[j]->mInstructions.Remove(eblocks[j]->mInstructions.Size() - 2); - ok = true; - changed = true; - } + nblock = new InterCodeBasicBlock(); + this->Append(nblock); + + for (int i = 0; i < mblocks.Size(); i++) + mblocks[i]->mTrueJump = nblock; + block->mNumEntries -= mblocks.Size(); + + InterInstruction* jins = new InterInstruction(); + jins->mCode = IC_JUMP; + nblock->mInstructions.Push(jins); + nblock->Close(block, nullptr); + + nblock->mNumEntries = mblocks.Size(); + block->mNumEntries++; + + eblocks.Push(nblock); } + else + nblock = block; - } while (ok); + InterInstruction* ins = mblocks[0]->mInstructions[mblocks[0]->mInstructions.Size() - 2]; + + nblock->mInstructions.Insert(0, ins); + for (int j = 0; j < mblocks.Size(); j++) + { + assert(mblocks[j]->mInstructions[mblocks[j]->mInstructions.Size() - 1]->mCode == IC_JUMP); + assert(mblocks[j]->mInstructions[mblocks[j]->mInstructions.Size() - 2]->IsEqual(ins)); + + mblocks[j]->mInstructions.Remove(mblocks[j]->mInstructions.Size() - 2); + } + changed = true; + } } } } diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 0da8869..30b9213 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -460,6 +460,7 @@ public: bool PushSinglePathResultInstructions(void); bool CanMoveInstructionBeforeBlock(int ii) const; bool CanMoveInstructionBehindBlock(int ii) const; + bool CanMoveInstructionDown(int si, int ti) const; bool MergeCommonPathInstructions(void); void PeepholeOptimization(const GrowingVariableArray& staticVars); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index ce06efa..c3d6fc6 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -13075,6 +13075,69 @@ bool NativeCodeBasicBlock::PropagateSinglePath(void) return changed; } +bool NativeCodeBasicBlock::Is16BitAddSubImmediate(int at, int& sreg, int& dreg, int& offset) const +{ + if (mIns[at + 0].mType == ASMIT_CLC && + mIns[at + 1].mType == ASMIT_LDA && mIns[at + 1].mMode == ASMIM_ZERO_PAGE && + mIns[at + 2].mType == ASMIT_ADC && mIns[at + 2].mMode == ASMIM_IMMEDIATE && + mIns[at + 3].mType == ASMIT_STA && mIns[at + 3].mMode == ASMIM_ZERO_PAGE && + mIns[at + 4].mType == ASMIT_LDA && mIns[at + 4].mMode == ASMIM_ZERO_PAGE && mIns[at + 4].mAddress == mIns[at + 1].mAddress + 1 && + mIns[at + 5].mType == ASMIT_ADC && mIns[at + 5].mMode == ASMIM_IMMEDIATE && + mIns[at + 6].mType == ASMIT_STA && mIns[at + 6].mMode == ASMIM_ZERO_PAGE && mIns[at + 6].mAddress == mIns[at + 3].mAddress + 1 && + !(mIns[at + 6].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C | LIVE_CPU_REG_Z))) + { + sreg = mIns[at + 1].mAddress; + dreg = mIns[at + 3].mAddress; + offset = mIns[at + 2].mAddress + 256 * mIns[at + 5].mAddress; + return true; + } + + return false; +} + +bool NativeCodeBasicBlock::CanForward16BitAddSubImmediate(int sreg, int dreg, int offset, int& index) const +{ + int i = mIns.Size() - 1; + while (i >= 6) + { + int asreg, adreg, aoffset; + if (Is16BitAddSubImmediate(i - 6, asreg, adreg, aoffset) && asreg == sreg && adreg == dreg && aoffset == offset) + { + index = i - 6; + return true; + } + + if (mIns[i].ReferencesZeroPage(dreg) || mIns[i].ReferencesZeroPage(dreg + 1) || mIns[i].ChangesZeroPage(sreg) || mIns[i].ChangesZeroPage(sreg + 1)) + return false; + + i--; + } + + return false; +} + +bool NativeCodeBasicBlock::CanForwardZPMove(int saddr, int daddr, int & index) const +{ + int i = mIns.Size() - 1; + while (i > 0) + { + if (mIns[i].ChangesZeroPage(saddr)) + return false; + if (mIns[i].ChangesZeroPage(daddr)) + { + index = i - 1; + return + (mIns[i].mType == ASMIT_STA && mIns[i - 1].mType == ASMIT_LDA && mIns[i - 1].mMode == ASMIM_ZERO_PAGE && mIns[i - 1].mAddress == saddr && !(mIns[i].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) || + (mIns[i].mType == ASMIT_STX && mIns[i - 1].mType == ASMIT_LDX && mIns[i - 1].mMode == ASMIM_ZERO_PAGE && mIns[i - 1].mAddress == saddr && !(mIns[i].mLive & (LIVE_CPU_REG_X | LIVE_CPU_REG_Z))); + } + if (mIns[i].ReferencesZeroPage(daddr)) + return false; + i--; + } + + return false; +} + bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool loops) { @@ -13096,6 +13159,37 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool { NativeCodeBasicBlock* eb = mEntryBlocks[0]; + if (mEntryRequiredRegs.Size() && !mEntryRequiredRegs[CPU_REG_Z] && (!mEntryRequiredRegs[CPU_REG_A] || !mEntryRequiredRegs[CPU_REG_X]) && !mEntryRequiredRegs[CPU_REG_C]) + { + for (int i = eb->mIns.Size() - 7; i >= 0; i--) + { + int sreg, dreg, offset; + if (eb->Is16BitAddSubImmediate(i, sreg, dreg, offset)) + { + int j = 0; + while (j < mEntryBlocks.Size() && mEntryBlocks[j]->CanForward16BitAddSubImmediate(sreg, dreg, offset, mEntryBlocks[j]->mTemp)) + j++; + + if (j == mEntryBlocks.Size()) + { + for (int k = 0; k < 7; k++) + mIns.Insert(k, eb->mIns[i + k]); + + for (int j = 0; j < mEntryBlocks.Size(); j++) + mEntryBlocks[j]->mIns.Remove(mEntryBlocks[j]->mTemp, 7); + + if (mEntryRequiredRegs[CPU_REG_A]) + { + mIns.Insert(0, NativeCodeInstruction(ASMIT_TAX)); + mIns.Insert(8, NativeCodeInstruction(ASMIT_TXA)); + } + + changed = true; + } + } + } + } + while (eb->mIns.Size() > 0) { NativeCodeInstruction& ins(eb->mIns[eb->mIns.Size() - 1]); @@ -13139,6 +13233,37 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool break; } } + + if (!changed && !mEntryRequiredRegs[CPU_REG_Z] && (!mEntryRequiredRegs[CPU_REG_A] || !mEntryRequiredRegs[CPU_REG_X])) + { + for (int i = eb->mIns.Size() - 1; i > 0; i--) + { + if (eb->mIns[i - 1].mType == ASMIT_LDA && eb->mIns[i - 1].mMode == ASMIM_ZERO_PAGE && eb->mIns[i - 0].mType == ASMIT_STA && eb->mIns[i - 0].mMode == ASMIM_ZERO_PAGE || + eb->mIns[i - 1].mType == ASMIT_LDX && eb->mIns[i - 1].mMode == ASMIM_ZERO_PAGE && eb->mIns[i - 0].mType == ASMIT_STX && eb->mIns[i - 0].mMode == ASMIM_ZERO_PAGE) + { + int saddr = eb->mIns[i - 1].mAddress, daddr = eb->mIns[i - 0].mAddress; + int j = 0; + while (j < mEntryBlocks.Size() && mEntryBlocks[j]->CanForwardZPMove(saddr, daddr, mEntryBlocks[j]->mTemp)) + j++; + + if (j == mEntryBlocks.Size()) + { + if (!mEntryRequiredRegs[CPU_REG_A]) + { + mIns.Insert(0, NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, saddr)); + mIns.Insert(1, NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, daddr)); + changed = true; + } + else if (!mEntryRequiredRegs[CPU_REG_X]) + { + mIns.Insert(0, NativeCodeInstruction(ASMIT_LDX, ASMIM_ZERO_PAGE, saddr)); + mIns.Insert(1, NativeCodeInstruction(ASMIT_STX, ASMIM_ZERO_PAGE, daddr)); + changed = true; + } + } + } + } + } } if (mEntryBlocks.Size() > 2) @@ -13150,6 +13275,7 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool changed = true; } } + } #endif if (mEntryBlocks.Size() == 1) @@ -15216,6 +15342,47 @@ bool NativeCodeBasicBlock::MoveLoadIndirectTempStoreUp(int at) return false; } + if (mIns[j].mType == ASMIT_STX && mIns[j].mMode == ASMIM_ZERO_PAGE && (mIns[j].mAddress == mIns[at + 1].mAddress || mIns[j].mAddress == mIns[at + 1].mAddress + 1)) + { + if (mIns[j - 3].mType == ASMIT_LDX && mIns[j - 3].mMode == ASMIM_ZERO_PAGE && + mIns[j - 2].mType == ASMIT_STX && mIns[j - 2].mMode == ASMIM_ZERO_PAGE && + mIns[j - 1].mType == ASMIT_LDX && mIns[j - 1].mMode == ASMIM_ZERO_PAGE && + mIns[j - 0].mType == ASMIT_STX && mIns[j - 0].mMode == ASMIM_ZERO_PAGE) + { + if (mIns[j - 2].mAddress == mIns[at + 1].mAddress && + mIns[j - 0].mAddress == mIns[at + 1].mAddress + 1 && + mIns[j - 1].mAddress == mIns[j - 3].mAddress + 1) + { + int addr = mIns[j - 3].mAddress; + + while (mIns[j].mLive & LIVE_CPU_REG_A) + { + j++; + if (mIns[j].ReferencesZeroPage(addr) || mIns[j].ReferencesZeroPage(addr + 1)) + return false; + } + + mIns[at + 1].mLive |= mIns[j].mLive; + mIns[at + 2].mLive |= mIns[j].mLive; + mIns[at + 3].mLive |= mIns[j].mLive; + + mIns[at + 1].mAddress = addr; + mIns[at + 1].mLive |= LIVE_MEM; + + mIns.Insert(j + 1, mIns[at + 2]); + mIns.Insert(j + 1, mIns[at + 2]); + mIns.Insert(j + 1, mIns[at + 2]); + + mIns.Remove(at + 3); + mIns.Remove(at + 3); + mIns.Remove(at + 3); + + return true; + } + } + + return false; + } if (mIns[j].ReferencesYReg()) return false; @@ -15232,6 +15399,107 @@ bool NativeCodeBasicBlock::MoveLoadIndirectTempStoreUp(int at) return false; } +bool NativeCodeBasicBlock::MoveLoadIndirectBypassYUp(int at) +{ + // ldy #imm + // lda (t0), y + + // move up, and keep A in Y for the intervall + + int j = at - 1; + while (j >= 3) + { + if (mIns[j].mType == ASMIT_STA && mIns[j].mMode == ASMIM_ZERO_PAGE && (mIns[j].mAddress == mIns[at + 1].mAddress || mIns[j].mAddress == mIns[at + 1].mAddress + 1)) + { + if (mIns[j - 3].mType == ASMIT_LDA && mIns[j - 3].mMode == ASMIM_ZERO_PAGE && + mIns[j - 2].mType == ASMIT_STA && mIns[j - 2].mMode == ASMIM_ZERO_PAGE && + mIns[j - 1].mType == ASMIT_LDA && mIns[j - 1].mMode == ASMIM_ZERO_PAGE && + mIns[j - 0].mType == ASMIT_STA && mIns[j - 0].mMode == ASMIM_ZERO_PAGE) + { + if (mIns[j - 2].mAddress == mIns[at + 1].mAddress && + mIns[j - 0].mAddress == mIns[at + 1].mAddress + 1 && + mIns[j - 1].mAddress == mIns[j - 3].mAddress + 1) + { + mIns[at + 0].mLive |= mIns[j].mLive; + mIns[at + 1].mLive |= mIns[j].mLive; + + mIns[at + 1].mAddress = mIns[j - 3].mAddress; + mIns[at + 1].mLive |= LIVE_MEM; + + mIns.Insert(j + 1, mIns[at + 0]); + mIns.Remove(at + 1); + mIns.Insert(j + 2, mIns[at + 1]); + mIns.Remove(at + 2); + + mIns.Insert(at + 2, NativeCodeInstruction(ASMIT_TYA)); + mIns.Insert(j + 3, NativeCodeInstruction(ASMIT_TAY)); + + for (int k = j + 3; k < at + 3; k++) + mIns[k].mLive |= LIVE_CPU_REG_Y; + + return true; + } + } + + return false; + } + if (mIns[j].mType == ASMIT_STX && mIns[j].mMode == ASMIM_ZERO_PAGE && (mIns[j].mAddress == mIns[at + 1].mAddress || mIns[j].mAddress == mIns[at + 1].mAddress + 1)) + { + if (mIns[j - 3].mType == ASMIT_LDX && mIns[j - 3].mMode == ASMIM_ZERO_PAGE && + mIns[j - 2].mType == ASMIT_STX && mIns[j - 2].mMode == ASMIM_ZERO_PAGE && + mIns[j - 1].mType == ASMIT_LDX && mIns[j - 1].mMode == ASMIM_ZERO_PAGE && + mIns[j - 0].mType == ASMIT_STX && mIns[j - 0].mMode == ASMIM_ZERO_PAGE) + { + if (mIns[j - 2].mAddress == mIns[at + 1].mAddress && + mIns[j - 0].mAddress == mIns[at + 1].mAddress + 1 && + mIns[j - 1].mAddress == mIns[j - 3].mAddress + 1) + { + int addr = mIns[j - 3].mAddress; + + while (mIns[j].mLive & LIVE_CPU_REG_A) + { + j++; + if (mIns[j].ReferencesZeroPage(addr) || mIns[j].ReferencesZeroPage(addr + 1)) + return false; + } + + mIns[at + 0].mLive |= mIns[j].mLive; + mIns[at + 1].mLive |= mIns[j].mLive; + + mIns[at + 1].mAddress = addr; + mIns[at + 1].mLive |= LIVE_MEM; + + mIns.Insert(j + 1, mIns[at + 0]); + mIns.Remove(at + 1); + mIns.Insert(j + 2, mIns[at + 1]); + mIns.Remove(at + 2); + + mIns.Insert(at + 2, NativeCodeInstruction(ASMIT_TYA)); + mIns.Insert(j + 3, NativeCodeInstruction(ASMIT_TAY)); + + for (int k = j + 3; k < at + 3; k++) + mIns[k].mLive |= LIVE_CPU_REG_Y; + + return true; + } + } + + return false; + } + + if (mIns[j].ReferencesYReg()) + return false; + if (mIns[j].ChangesZeroPage(mIns[at + 1].mAddress)) + return false; + if (mIns[j].ChangesZeroPage(mIns[at + 1].mAddress + 1)) + return false; + + j--; + } + + return false; +} + bool NativeCodeBasicBlock::MoveIndirectLoadStoreUp(int at) { int j = at - 1; @@ -19309,11 +19577,13 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #if 1 // move load (),y store zp down to potential user - for (int i = 2; i + 1 < mIns.Size(); i++) + for (int i = 0; i + 1 < mIns.Size(); i++) { if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_INDIRECT_Y && mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) { - if (MoveIndirectLoadStoreDown(i)) + if (MoveLoadIndirectTempStoreUp(i)) + changed = true; + else if (MoveIndirectLoadStoreDown(i)) changed = true; } } @@ -19324,13 +19594,12 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #if 1 // move load (),y store zp up to potential user - for (int i = 4; i + 2 < mIns.Size(); i++) + for (int i = 4; i + 1 < mIns.Size(); i++) { if (mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_IMMEDIATE && - mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_INDIRECT_Y && !(mIns[i + 1].mLive & LIVE_MEM) && - mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && !(mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z | LIVE_CPU_REG_Y))) + mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_INDIRECT_Y && !(mIns[i + 1].mLive & (LIVE_MEM | LIVE_CPU_REG_Y))) { - if (MoveLoadIndirectTempStoreUp(i)) + if (MoveLoadIndirectBypassYUp(i)) changed = true; } } @@ -21594,6 +21863,21 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 1].mType = ASMIT_STA; mIns[i + 1].mLive |= mIns[i + 2].mLive; progress = true; } + else if ( + mIns[i + 0].mType == ASMIT_LDX && + mIns[i + 1].mType == ASMIT_STX && !(mIns[i + 1].mLive & LIVE_CPU_REG_X) && + mIns[i + 2].mType == ASMIT_STA && !(mIns[i + 2].mLive & LIVE_CPU_REG_A) && + !mIns[i + 0].MayBeChangedOnAddress(mIns[i + 2]) && + !mIns[i + 1].MayBeChangedOnAddress(mIns[i + 2])) + { + NativeCodeInstruction ins = mIns[i + 2]; + mIns[i + 2] = mIns[i + 1]; + mIns[i + 1] = mIns[i + 0]; + mIns[i + 0] = ins; + mIns[i + 1].mType = ASMIT_LDA; mIns[i + 1].mLive |= LIVE_CPU_REG_A | mIns[i + 0].mLive; + mIns[i + 2].mType = ASMIT_STA; mIns[i + 2].mLive |= mIns[i + 0].mLive; + progress = true; + } else if ( mIns[i + 0].mType == ASMIT_LDX && mIns[i + 1].mType == ASMIT_STX && diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index d61d482..cb9954f 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -148,7 +148,7 @@ public: GrowingArray mEntryBlocks; - int mOffset, mSize, mPlace, mNumEntries, mNumEntered, mFrameOffset; + int mOffset, mSize, mPlace, mNumEntries, mNumEntered, mFrameOffset, mTemp; bool mPlaced, mCopied, mKnownShortBranch, mBypassed, mAssembled, mNoFrame, mVisited, mLoopHead, mVisiting, mLocked, mPatched, mPatchFail; NativeCodeBasicBlock * mDominator, * mSameBlock; @@ -258,7 +258,8 @@ public: bool MoveAbsoluteLoadStoreUp(int at); bool MoveLoadStoreOutOfXYRangeUp(int at); bool MoveLoadIndirectTempStoreUp(int at); - + bool MoveLoadIndirectBypassYUp(int at); + bool MoveLoadAddImmStoreAbsXUp(int at); bool MoveStaTaxLdaStaDown(int at); @@ -344,6 +345,9 @@ public: bool AlternateXYUsage(void); bool OptimizeXYPairUsage(void); bool ForwardAbsoluteLoadStores(void); + bool CanForwardZPMove(int saddr, int daddr, int & index) const; + bool Is16BitAddSubImmediate(int at, int& sreg, int &dreg, int& offset) const; + bool CanForward16BitAddSubImmediate(int sreg, int dreg, int offset, int & index) const; bool CheckPatchFail(const NativeCodeBasicBlock* block, int reg);