From 893b6f22942ba448ccc850e2c40081e75e0e19b3 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sun, 19 Dec 2021 22:18:05 +0100 Subject: [PATCH] Optimize long divide for short divisor --- autotest/divmodtest.c | 11 ++++++ include/crt.c | 36 +++++++++++++++++++- oscar64/NativeCodeGenerator.cpp | 59 ++++++++++++++++++++++++++++----- oscar64/Parser.cpp | 4 +-- 4 files changed, 98 insertions(+), 12 deletions(-) diff --git a/autotest/divmodtest.c b/autotest/divmodtest.c index bc4ef16..12019d1 100644 --- a/autotest/divmodtest.c +++ b/autotest/divmodtest.c @@ -23,6 +23,17 @@ int main(void) assert(r >= 0 && r < j); } } + + for(unsigned i=0; i<64000; i+=121) + { + for(unsigned j=1; j= 0 && r < j); + } + } return 0; } diff --git a/include/crt.c b/include/crt.c index a85464c..90e92c2 100644 --- a/include/crt.c +++ b/include/crt.c @@ -379,12 +379,46 @@ W1: dey __asm divmod32 { sty tmpy + ldy #32 + lda #0 sta tmp + 4 sta tmp + 5 sta tmp + 6 sta tmp + 7 - ldy #32 + + lda tmp + 2 + ora tmp + 3 + bne W32 + +// divide 32 by 16 bit + + clc +LS1: rol accu + rol accu + 1 + rol accu + 2 + rol accu + 3 + rol tmp + 4 + rol tmp + 5 + sec + lda tmp + 4 + sbc tmp + tax + lda tmp + 5 + sbc tmp + 1 + bcc WS1 + stx tmp + 4 + sta tmp + 5 +WS1: dey + bne LS1 + rol accu + rol accu + 1 + rol accu + 2 + rol accu + 3 + ldy tmpy + rts + +W32: clc L1: rol accu rol accu + 1 diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 6a0e41e..096f798 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -9286,26 +9286,57 @@ bool NativeCodeBasicBlock::ValueForwarding(const NativeRegisterDataSet& data, bo void NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock* lblock) { - if (lblock->mIns[0].mType == ASMIT_LDA && lblock->mIns[0].mMode == ASMIM_IMMEDIATE) + int ai = 0; + while (ai < lblock->mIns.Size() && !lblock->mIns[ai].ChangesAccu()) + ai++; + + if (lblock->mIns[ai].mType == ASMIT_LDA && lblock->mIns[ai].mMode == ASMIM_IMMEDIATE) { - int i = 1; + int i = ai + 1; while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu()) i++; if (i == lblock->mIns.Size()) { - mIns.Push(lblock->mIns[0]); - lblock->mIns.Remove(0); + mIns.Push(lblock->mIns[ai]); + lblock->mIns.Remove(ai); } } - else if (lblock->mIns[0].mType == ASMIT_LDA && lblock->mIns[0].mMode == ASMIM_ZERO_PAGE) + else if (lblock->mIns[ai].mType == ASMIT_LDA && lblock->mIns[ai].mMode == ASMIM_ZERO_PAGE) { - int i = 1; + int i = ai + 1; while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesAccu() && !lblock->mIns[i].ChangesZeroPage(lblock->mIns[0].mAddress)) i++; if (i == lblock->mIns.Size()) { - mIns.Push(lblock->mIns[0]); - lblock->mIns.Remove(0); + mIns.Push(lblock->mIns[ai]); + lblock->mIns.Remove(ai); + } + } + + ai = 0; + while (ai < lblock->mIns.Size() && !lblock->mIns[ai].ChangesYReg()) + ai++; + + if (lblock->mIns[ai].mType == ASMIT_LDY && lblock->mIns[ai].mMode == ASMIM_IMMEDIATE) + { + int i = ai + 1; + while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesYReg()) + i++; + if (i == lblock->mIns.Size()) + { + mIns.Push(lblock->mIns[ai]); + lblock->mIns.Remove(ai); + } + } + else if (lblock->mIns[ai].mType == ASMIT_LDY && lblock->mIns[ai].mMode == ASMIM_ZERO_PAGE) + { + int i = ai + 1; + while (i < lblock->mIns.Size() && !lblock->mIns[i].ChangesYReg() && !lblock->mIns[i].ChangesZeroPage(lblock->mIns[0].mAddress)) + i++; + if (i == lblock->mIns.Size()) + { + mIns.Push(lblock->mIns[ai]); + lblock->mIns.Remove(ai); } } } @@ -10426,6 +10457,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) NativeCodeInstruction pins = mIns[i]; mIns[i] = mIns[i + 1]; mIns[i + 1] = pins; + changed = true; } } } @@ -10444,7 +10476,10 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) while (j < mIns.Size() && !mIns[j].ChangesXReg() && !mIns[j].ChangesYReg()) { if (mIns[j].mMode == ASMIM_ABSOLUTE_Y) + { mIns[j].mMode = ASMIM_ABSOLUTE_X; + changed = true; + } j++; } } @@ -10454,7 +10489,10 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) while (j < mIns.Size() && !mIns[j].ChangesXReg() && !mIns[j].ChangesYReg()) { if (mIns[j].mMode == ASMIM_ABSOLUTE_X) + { mIns[j].mMode = ASMIM_ABSOLUTE_Y; + changed = true; + } j++; } } @@ -10501,7 +10539,8 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) mIns.Reserve(mIns.Size() * 2 + 32); - changed = RemoveNops(); + if (RemoveNops()) + changed = true; // Replace (a & 0x80) != 0 with bpl/bmi int sz = mIns.Size(); @@ -10833,6 +10872,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) mIns[i + 0].mLive |= LIVE_CPU_REG_A; progress = true; } +#if 1 else if ( mIns[i + 0].mType == ASMIT_TXA && mIns[i + 1].mType == ASMIT_STA && (mIns[i + 1].mMode == ASMIM_ZERO_PAGE || mIns[i + 1].mMode == ASMIM_ABSOLUTE)) @@ -10840,6 +10880,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) mIns[i + 1].mType = ASMIT_STX; progress = true; } +#endif else if ( mIns[i + 0].mType == ASMIT_TYA && mIns[i + 1].mType == ASMIT_STA && (mIns[i + 1].mMode == ASMIM_ZERO_PAGE || mIns[i + 1].mMode == ASMIM_ABSOLUTE)) diff --git a/oscar64/Parser.cpp b/oscar64/Parser.cpp index 5fe2f35..c0708d9 100644 --- a/oscar64/Parser.cpp +++ b/oscar64/Parser.cpp @@ -563,7 +563,7 @@ Declaration * Parser::CopyConstantInitializer(int offset, Declaration* dtype, Ex dec->mOffset = offset; } } - else if (dtype->mType == DT_TYPE_POINTER && dec->mType == DT_VARIABLE && dec->mBase->mType == DT_TYPE_ARRAY && (dec->mFlags & DTF_STATIC)) + else if (dtype->mType == DT_TYPE_POINTER && dec->mType == DT_VARIABLE && dec->mBase->mType == DT_TYPE_ARRAY && (dec->mFlags & DTF_GLOBAL)) { if (dtype->CanAssign(exp->mDecType)) { @@ -3126,7 +3126,7 @@ void Parser::ParsePragma(void) if (mScanner->mToken == TK_IDENT) { Declaration* dec = mGlobals->Lookup(mScanner->mTokenIdent); - if (dec && dec->mType == DT_VARIABLE && (dec->mFlags & DTF_STATIC)) + if (dec && dec->mType == DT_VARIABLE && (dec->mFlags & DTF_GLOBAL)) { mScanner->NextToken(); ConsumeToken(TK_COMMA);