From 272b7b08df3311e66d9e8704fdaf58d56d1ad585 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sun, 2 Jan 2022 12:44:37 +0100 Subject: [PATCH] Optimize more short multiplies in native code --- oscar64/InterCode.cpp | 75 ++++++++++++++++ oscar64/NativeCodeGenerator.cpp | 148 +++++++++++++++++++++++++------- 2 files changed, 193 insertions(+), 30 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 1453c99..6995222 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -3658,6 +3658,81 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(void) else vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND; break; + case IA_SHR: + if (ins->mSrc[0].mTemp < 0) + { + vr = mLocalValueRange[ins->mSrc[1].mTemp]; + + if (ins->mSrc[0].mIntConst > 0) + { + if (vr.mMinState == IntegerValueRange::S_BOUND && vr.mMinState >= 0) + { + switch (ins->mSrc[1].mType) + { + case IT_INT16: + vr.mMaxValue = (unsigned short)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; + vr.mMinValue = (unsigned short)(vr.mMinValue) >> ins->mSrc[0].mIntConst; + break; + case IT_INT8: + vr.mMaxValue = (unsigned char)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; + vr.mMinValue = (unsigned char)(vr.mMinValue) >> ins->mSrc[0].mIntConst; + break; + case IT_INT32: + vr.mMaxValue = (unsigned)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; + vr.mMinValue = (unsigned)(vr.mMinValue) >> ins->mSrc[0].mIntConst; + break; + } + } + else + { + switch (ins->mSrc[1].mType) + { + case IT_INT16: + vr.mMaxValue = 65535 >> ins->mSrc[0].mIntConst; + vr.mMinValue = 0; + break; + case IT_INT8: + vr.mMaxValue = 255 >> ins->mSrc[0].mIntConst; + vr.mMinValue = 0; + break; + case IT_INT32: + vr.mMaxValue = 0x100000000ULL >> ins->mSrc[0].mIntConst; + vr.mMinValue = 0; + break; + } + } + } + } + else + vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND; + break; + case IA_SAR: + if (ins->mSrc[0].mTemp < 0) + { + vr = mLocalValueRange[ins->mSrc[1].mTemp]; + + if (ins->mSrc[0].mIntConst > 0) + { + switch (ins->mSrc[1].mType) + { + case IT_INT16: + vr.mMaxValue = (short)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; + vr.mMinValue = (short)(vr.mMinValue) >> ins->mSrc[0].mIntConst; + break; + case IT_INT8: + vr.mMaxValue = (char)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; + vr.mMinValue = (char)(vr.mMinValue) >> ins->mSrc[0].mIntConst; + break; + case IT_INT32: + vr.mMaxValue = (int)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; + vr.mMinValue = (int)(vr.mMinValue) >> ins->mSrc[0].mIntConst; + break; + } + } + } + else + vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND; + break; case IA_AND: if (ins->mSrc[0].mTemp < 0) { diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 8437507..ae178b8 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -4760,37 +4760,58 @@ int NativeCodeBasicBlock::ShortMultiply(InterCodeProcedure* proc, NativeCodeProc ShiftRegisterLeft(proc, BC_REG_ACCU, lshift); return BC_REG_ACCU; case 3: - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); - mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); - mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); - mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + if (ins->mSrc[index].IsUByte() && ins->mSrc[index].mRange.mMaxValue <= 85) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + } + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); + mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + } ShiftRegisterLeft(proc, BC_REG_ACCU, lshift); return BC_REG_ACCU; case 5: - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); - mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); - mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); - mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); - mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); - mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + if (ins->mSrc[index].IsUByte() && ins->mSrc[index].mRange.mMaxValue <= 51) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + } + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); + mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); + mIns.Push(NativeCodeInstruction(ASMIT_CLC, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 4)); + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 5)); + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 1)); + } ShiftRegisterLeft(proc, BC_REG_ACCU, lshift); return BC_REG_ACCU; case 7: @@ -6309,12 +6330,22 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, treg + 1)); mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_ZERO_PAGE, treg)); } - mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); + + int check = 0xffff; + if (ins->mSrc[1].IsUByte()) + check = ins->mSrc[1].mRange.mMaxValue; + + check <<= 1; + if (check >= 0x100) + mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); for (int i = 1; i < shift; i++) { mIns.Push(NativeCodeInstruction(ASMIT_ASL, ASMIM_ZERO_PAGE, treg)); - mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); + check <<= 1; + if (check >= 0x100) + mIns.Push(NativeCodeInstruction(ASMIT_ROL, ASMIM_IMPLIED)); } + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); } } @@ -10877,6 +10908,7 @@ bool NativeCodeBasicBlock::OptimizeInnerLoops(NativeCodeProcedure* proc) return changed; } + // Size reduction violating various assumptions such as no branches in basic blocks // must be last step before actual assembly @@ -11153,6 +11185,43 @@ void NativeCodeBasicBlock::BlockSizeReduction(void) yimm = ximm = false; } #endif + + bool carrySet = true, carryClear = true; + + for (int i = 0; i < mEntryBlocks.Size(); i++) + { + if (mEntryBlocks[i]->mBranch == ASMIT_BCC) + { + if (mEntryBlocks[i]->mTrueJump == this) + carrySet = false; + else + carryClear = false; + } + else if (mEntryBlocks[i]->mBranch == ASMIT_BCS) + { + if (mEntryBlocks[i]->mTrueJump == this) + carryClear = false; + else + carrySet = false; + } + else + carryClear = carrySet = false; + } + + if (carryClear || carrySet) + { + int i = 0; + while (i < mIns.Size() && !mIns[i].ChangesCarry()) + i++; + if (i < mIns.Size()) + { + if (mIns[i].mType == ASMIT_CLC && carryClear) + mIns.Remove(i); + else if (mIns[i].mType == ASMIT_SEC && carrySet) + mIns.Remove(i); + } + } + if (mTrueJump) mTrueJump->BlockSizeReduction(); if (mFalseJump) @@ -12957,6 +13026,25 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) mIns[i + 2].mAddress = mIns[i + 0].mAddress; progress = true; } + else if ( + mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && (mIns[i + 2].mAddress == 1 || mIns[i + 2].mAddress == 2) && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].mAddress == mIns[i + 3].mAddress && + mIns[i + 4].mType == ASMIT_TAY && !(mIns[i + 4].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))) + { + mIns[i + 0].mType = ASMIT_NOP; + mIns[i + 1].mType = ASMIT_LDY; mIns[i + 1].mFlags |= LIVE_CPU_REG_Y; + mIns[i + 2].mType = ASMIT_INY; mIns[i + 2].mMode = ASMIM_IMPLIED; mIns[i + 2].mFlags |= LIVE_CPU_REG_Y; + mIns[i + 3].mType == ASMIT_STY; mIns[i + 3].mFlags |= LIVE_CPU_REG_Y; + mIns[i + 4].mType = ASMIT_NOP; + if (mIns[i + 2].mAddress == 2) + { + mIns.Insert(i + 3, mIns[i + 2]); + } + + progress = true; + } #if 1 else if ( mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 0].mAddress == 0 &&