From ba661759fbad5d02bc0b62cda6ee29b1ebddcf60 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Thu, 30 Jun 2022 20:28:36 +0200 Subject: [PATCH] Fix reordering of function calls --- oscar64/InterCode.cpp | 117 ++++++++++++++++++++++---------- oscar64/NativeCodeGenerator.cpp | 75 +++++++++++++++++++- oscar64/NativeCodeGenerator.h | 1 + oscar64/oscar64.vcxproj | 1 + samples/scrolling/bigfont.c | 38 +++++------ 5 files changed, 175 insertions(+), 57 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 9d40e47..670eb6a 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -869,6 +869,22 @@ void ValueSet::InsertValue(InterInstruction * ins) mInstructions[mNum++] = ins; } +static bool HasSideEffect(InterCode code) +{ + return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER; +} + +static bool IsMoveable(InterCode code) +{ + if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME) + return false; + if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE) + return false; + + return true; +} + + static bool CanBypassLoad(const InterInstruction* lins, const InterInstruction* bins) { // Check ambiguity @@ -922,6 +938,9 @@ static bool CanBypassLoad(const InterInstruction* lins, const InterInstruction* static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins) { + if (HasSideEffect(lins->mCode) && HasSideEffect(bins->mCode)) + return false; + if (lins->mDst.mTemp >= 0) { if (lins->mDst.mTemp == bins->mDst.mTemp) @@ -948,6 +967,9 @@ static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins static bool CanBypassUp(const InterInstruction* lins, const InterInstruction* bins) { + if (HasSideEffect(lins->mCode) && HasSideEffect(bins->mCode)) + return false; + if (lins->mDst.mTemp >= 0) { if (lins->mDst.mTemp == bins->mDst.mTemp) @@ -2757,22 +2779,6 @@ void InterInstruction::PerformTempForwarding(TempForwardingTable& forwardingTabl } } -bool HasSideEffect(InterCode code) -{ - return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER; -} - -bool IsMoveable(InterCode code) -{ - if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME) - return false; - if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE) - return false; - - return true; -} - - bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, NumberSet& requiredTemps) { bool changed = false; @@ -6920,27 +6926,29 @@ bool InterCodeBasicBlock::MergeIndexedLoadStore(const GrowingInstructionPtrArra if (lins->mSrc[0].mTemp >= 0) { - InterInstruction* bins = lins; - - for (int j = 0; j < ltvalue.Size(); j++) + if (lins->mSrc[1].mMemory != IM_ABSOLUTE || (lins->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND && lins->mSrc[0].mRange.mMaxValue >= 256)) { - InterInstruction* cins = ltvalue[j]; - if (cins && - cins->mSrc[0].mTemp == bins->mSrc[0].mTemp && - cins->mSrc[1].mTemp < 0 && bins->mSrc[1].mTemp < 0 && - cins->mSrc[1].mMemory == bins->mSrc[1].mMemory && - cins->mSrc[1].mVarIndex == bins->mSrc[1].mVarIndex && - cins->mSrc[1].mIntConst < bins->mSrc[1].mIntConst && - cins->mSrc[1].mMemory != IM_ABSOLUTE) + InterInstruction* bins = lins; + for (int j = 0; j < ltvalue.Size(); j++) + { + InterInstruction* cins = ltvalue[j]; + if (cins && + cins->mSrc[0].mTemp == bins->mSrc[0].mTemp && + cins->mSrc[1].mTemp < 0 && bins->mSrc[1].mTemp < 0 && + cins->mSrc[1].mMemory == bins->mSrc[1].mMemory && + cins->mSrc[1].mVarIndex == bins->mSrc[1].mVarIndex && + cins->mSrc[1].mIntConst < bins->mSrc[1].mIntConst) + { + bins = cins; + } + } - bins = cins; - } - - if (bins != lins && ins->mSrc[pi].mIntConst + lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst < 252) - { - ins->mSrc[pi].mTemp = bins->mDst.mTemp; - ins->mSrc[pi].mIntConst += lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst; - changed = true; + if (bins != lins && ins->mSrc[pi].mIntConst + lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst < 252) + { + ins->mSrc[pi].mTemp = bins->mDst.mTemp; + ins->mSrc[pi].mIntConst += lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst; + changed = true; + } } } } @@ -10568,6 +10576,18 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 0]->mSrc[0].mIntConst = ~((1LL << shift) - 1); changed = true; } + else if ( + mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_SHL && mInstructions[i + 0]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_MUL && mInstructions[i + 1]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal && + (mInstructions[i + 1]->mSrc[0].mIntConst << mInstructions[i + 0]->mSrc[0].mIntConst) < 65536) + { + mInstructions[i + 1]->mSrc[0].mIntConst <<= mInstructions[i + 0]->mSrc[0].mIntConst;; + mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1]; + mInstructions[i + 0]->mCode = IC_NONE; + mInstructions[i + 0]->mNumOperands = 0; + changed = true; + } #if 1 else if ( mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_OR && mInstructions[i + 0]->mSrc[0].mTemp < 0 && @@ -10808,6 +10828,15 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mDst; changed = true; } + else if ( + mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mTemp < 0 && + mInstructions[i + 1]->mCode == IC_STORE && mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal && + mInstructions[i + 1]->mSrc[1].mIntConst != 0) + { + mInstructions[i + 0]->mSrc[1].mIntConst += mInstructions[i + 1]->mSrc[1].mIntConst; + mInstructions[i + 1]->mSrc[1].mIntConst = 0; + changed = true; + } #if 1 else if ( mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mMemory == IM_GLOBAL && @@ -10830,6 +10859,22 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati changed = true; } #endif + else if ( + mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_ADD && + mInstructions[i + 0]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_SHL && + mInstructions[i + 1]->mSrc[0].mTemp < 0 && + mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal && + mInstructions[i + 2]->mCode == IC_LEA && + mInstructions[i + 2]->mSrc[0].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[0].mFinal && + mInstructions[i + 2]->mSrc[1].mTemp < 0) + { + mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1]; + mInstructions[i + 2]->mSrc[1].mIntConst += mInstructions[i + 0]->mSrc[0].mIntConst << mInstructions[i + 1]->mSrc[0].mIntConst; + + mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0; + changed = true; + } #if 1 // Postincrement artifact @@ -12047,6 +12092,8 @@ void InterCodeProcedure::Close(void) EliminateAliasValues(); + MergeIndexedLoadStore(); + #if 1 ResetVisited(); mEntryBlock->RestartLocalIntegerRangeSets(mLocalVars); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 6dff860..091604d 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -2726,7 +2726,7 @@ bool NativeCodeInstruction::ValueForwarding(NativeRegisterDataSet& data, AsmInsT #endif #if 1 - if (mMode == ASMIM_ABSOLUTE_X && data.mRegs[CPU_REG_X].SameData(data.mRegs[CPU_REG_Y]) && HasAsmInstructionMode(mType, ASMIM_ABSOLUTE_Y)) + if (mMode == ASMIM_ABSOLUTE_X && data.mRegs[CPU_REG_X].SameData(data.mRegs[CPU_REG_Y]) && HasAsmInstructionMode(mType, ASMIM_ABSOLUTE_Y) && !(mFlags & NICT_INDEXFLIPPED)) { mMode = ASMIM_ABSOLUTE_Y; changed = true; @@ -20740,6 +20740,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass { assert(HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_X)); mIns[j].mMode = ASMIM_ABSOLUTE_X; + mIns[j].mFlags |= NICT_INDEXFLIPPED; n = j; changed = true; } @@ -20762,6 +20763,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass { assert(HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_Y)); mIns[j].mMode = ASMIM_ABSOLUTE_Y; + mIns[j].mFlags |= NICT_INDEXFLIPPED; n = j; changed = true; } @@ -21376,7 +21378,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; progress = true; } - else if (mIns[i].mType == ASMIT_SEC && mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && !(mIns[i + 1].mLive & LIVE_CPU_REG_C)) + else if (mIns[i].mType == ASMIT_SEC && mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && !(mIns[i + 1].mLive & LIVE_CPU_REG_C)) { mIns[i + 0].mType = ASMIT_CLC; mIns[i + 1].mAddress++; @@ -21401,7 +21403,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass progress = true; } else if (mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 0].mAddress == 0 && - mIns[i + 1].mType == ASMIT_CMP && !(mIns[i + 1].mLive & (LIVE_CPU_REG_C | LIVE_CPU_REG_A))) + mIns[i + 1].mType == ASMIT_CMP && !(mIns[i + 1].mLive & (LIVE_CPU_REG_C | LIVE_CPU_REG_A))) { mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; mIns[i + 1].mType = ASMIT_LDA; @@ -21491,6 +21493,36 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 1].mType = ASMIT_STA; progress = true; } +#if 1 + else if (mIns[i + 0].mType == ASMIT_TXA && mIns[i + 1].mType == ASMIT_STX) + { + NativeCodeInstruction ins(mIns[i + 0]); + mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_X; + mIns[i + 1] = ins; + progress = true; + } + else if (mIns[i + 0].mType == ASMIT_TYA && mIns[i + 1].mType == ASMIT_STY) + { + NativeCodeInstruction ins(mIns[i + 0]); + mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_Y; + mIns[i + 1] = ins; + progress = true; + } + else if (mIns[i + 0].mType == ASMIT_TAX && mIns[i + 1].mType == ASMIT_STA && !mIns[i + 1].RequiresXReg()) + { + NativeCodeInstruction ins(mIns[i + 0]); + mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_A; + mIns[i + 1] = ins; + progress = true; + } + else if (mIns[i + 0].mType == ASMIT_TAY && mIns[i + 1].mType == ASMIT_STA && !mIns[i + 1].RequiresYReg()) + { + NativeCodeInstruction ins(mIns[i + 0]); + mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_A; + mIns[i + 1] = ins; + progress = true; + } +#endif else if ( mIns[i + 0].mType == ASMIT_ROL && mIns[i + 0].mMode == ASMIM_IMPLIED && mIns[i + 1].mType == ASMIT_LSR && mIns[i + 1].mMode == ASMIM_IMPLIED && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) @@ -23871,6 +23903,43 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass progress = true; } } +#if 1 + else if ( + mIns[i + 0].mType == ASMIT_LDA && + mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_LDA && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mAddress != mIns[i + 1].mAddress && + mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && mIns[i + 4].mAddress == mIns[i + 1].mAddress && + + !mIns[i + 0].ReferencesZeroPage(mIns[i + 3].mAddress) && + !mIns[i + 2].ReferencesZeroPage(mIns[i + 1].mAddress) && + !(mIns[i + 0].mFlags & NCIF_VOLATILE) && !(mIns[i + 2].mFlags & NCIF_VOLATILE)) + { + NativeCodeInstruction ins(mIns[i + 0]); + mIns[i + 0] = mIns[i + 2]; + mIns[i + 2] = ins; + mIns[i + 1].mAddress = mIns[i + 3].mAddress; + mIns[i + 3].mAddress = mIns[i + 4].mAddress; + + if (mIns[i + 2].RequiresYReg()) + { + mIns[i + 0].mLive |= LIVE_CPU_REG_Y; + mIns[i + 1].mLive |= LIVE_CPU_REG_Y; + } + if (mIns[i + 2].RequiresXReg()) + { + mIns[i + 0].mLive |= LIVE_CPU_REG_X; + mIns[i + 1].mLive |= LIVE_CPU_REG_X; + } + + mIns[i + 0].mLive |= mIns[i + 2].mLive; + mIns[i + 2].mLive |= mIns[i + 4].mLive; + mIns[i + 3].mLive |= mIns[i + 4].mLive; + + mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED; + progress = true; + } +#endif } #endif CheckLive(); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index ce433c2..d55c170 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -64,6 +64,7 @@ static const uint32 NCIF_VOLATILE = 0x00000010; static const uint32 NCIF_LONG = 0x00000020; static const uint32 NCIF_FEXEC = 0x00000040; static const uint32 NCIF_JSRFLAGS = 0x00000080; +static const uint32 NICT_INDEXFLIPPED = 0x00000100; static const uint32 NCIF_USE_CPU_REG_A = 0x00001000; static const uint32 NCIF_USE_CPU_REG_X = 0x00002000; diff --git a/oscar64/oscar64.vcxproj b/oscar64/oscar64.vcxproj index 023f311..ee4f784 100644 --- a/oscar64/oscar64.vcxproj +++ b/oscar64/oscar64.vcxproj @@ -93,6 +93,7 @@ Console true kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;version.lib;%(AdditionalDependencies) + 16000000 diff --git a/samples/scrolling/bigfont.c b/samples/scrolling/bigfont.c index db5e0d6..c196a84 100644 --- a/samples/scrolling/bigfont.c +++ b/samples/scrolling/bigfont.c @@ -14,12 +14,12 @@ void copyFont(void) mmap_set(MMAP_ROM); } -// Screen and color space -#define screen ((byte *)0x0400) -#define color ((byte *)0xd800) +// Single row of screen has 40 characters +typedef char ScreenRow[40]; -// Macro for easy access to screen space -#define sline(x, y) (screen + 40 * (y) + (x)) +// Screen and color space +ScreenRow * const screen = (ScreenRow *)0x0400; +ScreenRow * const color = (ScreenRow *)0xd800; // Start row for text #define srow 5 @@ -30,12 +30,12 @@ void scrollLeft(void) // Loop horizontaly for(char x=0; x<39; x++) { - // Unroll vetical loop 16 times -#assign y 0 -#repeat - sline(0, srow + y)[x] = sline(1, srow + y)[x]; -#assign y y + 1 -#until y == 16 + // Unroll vertical loop 16 times + #pragma unroll(full) + for(char y=0; y<16; y++) + { + screen[srow + y][x] = screen[srow + y][x + 1]; + } } } @@ -46,13 +46,13 @@ void expand(char c, byte f) byte * fp = font + 8 * c; // Unroll eight times for each byte in glyph data -#assign y 0 -#repeat - sline(39, srow + 2 * y + 0)[0] = - sline(39, srow + 2 * y + 1)[0] = (fp[y] & f) ? 160 : 32; -#assign y y + 1 -#until y == 8 - +// #pragma unroll(full) + for(char y=0; y<8; y++) + { + char t = (fp[y] & f) ? 160 : 32; + screen[srow + 2 * y + 0][39] = t; + screen[srow + 2 * y + 1][39] = t; + } } const char * text = @@ -77,7 +77,7 @@ int main(void) // Color bars for(int i=0; i<16; i++) - memset(color + 40 * (srow + i), i + 1, 40); + memset(color[srow + i], i + 1, 40); vic.color_back = VCOL_BLACK; vic.color_border = VCOL_BLACK;