From 4cdc501a34cee24819793ced735d0ce2190e56c6 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Thu, 21 Apr 2022 08:26:04 +0200 Subject: [PATCH] Bit shift optimizations --- oscar64/Compiler.cpp | 3 +- oscar64/InterCode.cpp | 281 ++++++++++++++++++++++++++++--- oscar64/InterCode.h | 4 + oscar64/NativeCodeGenerator.cpp | 276 ++++++++++++++++++++++++++---- oscar64/NativeCodeGenerator.h | 9 +- oscar64/oscar64.cpp | 2 +- oscar64/oscar64.rc | 8 +- oscar64setup/oscar64setup.vdproj | 6 +- 8 files changed, 513 insertions(+), 76 deletions(-) diff --git a/oscar64/Compiler.cpp b/oscar64/Compiler.cpp index 70f32a3..0677620 100644 --- a/oscar64/Compiler.cpp +++ b/oscar64/Compiler.cpp @@ -403,7 +403,8 @@ bool Compiler::WriteOutputFile(const char* targetPath) int i = strlen(prgPath); while (i > 0 && prgPath[i - 1] != '.') i--; - prgPath[i] = 0; + if (i > 0) + prgPath[i] = 0; strcpy_s(mapPath, prgPath); strcpy_s(asmPath, prgPath); diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 82ee9ff..5063463 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -2282,6 +2282,13 @@ bool InterOperand::IsUByte(void) const mRange.mMaxState == IntegerValueRange::S_BOUND && mRange.mMaxValue < 256; } +bool InterOperand::IsSByte(void) const +{ + return + mRange.mMinState == IntegerValueRange::S_BOUND && mRange.mMinValue >= -128 && + mRange.mMaxState == IntegerValueRange::S_BOUND && mRange.mMaxValue < 128; +} + bool InterOperand::IsUnsigned(void) const { if (mRange.mMinState == IntegerValueRange::S_BOUND && mRange.mMinValue >= 0 && mRange.mMaxState == IntegerValueRange::S_BOUND) @@ -5205,6 +5212,36 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(void) vr.mMaxValue <<= ins->mSrc[0].mIntConst; vr.mMinValue <<= ins->mSrc[0].mIntConst; } + else if (ins->mSrc[0].IsUByte() && ins->mSrc[0].mRange.mMaxValue < 16) + { + if (ins->mSrc[1].mTemp < 0) + { + vr.mMinState = IntegerValueRange::S_BOUND; + vr.mMaxState = IntegerValueRange::S_BOUND; + + if (ins->mSrc[1].mIntConst < 0) + { + vr.mMinValue = ins->mSrc[1].mIntConst << ins->mSrc[0].mRange.mMaxValue; + vr.mMaxValue = 0; + } + else + { + vr.mMinValue = 0; + vr.mMaxValue = ins->mSrc[1].mIntConst << ins->mSrc[0].mRange.mMaxValue; + } + } + else + { + vr = mLocalValueRange[ins->mSrc[1].mTemp]; + if (vr.mMaxState == IntegerValueRange::S_WEAK) + vr.mMaxState = IntegerValueRange::S_UNBOUND; + else if (vr.mMinState == IntegerValueRange::S_WEAK) + vr.mMinState = IntegerValueRange::S_UNBOUND; + + vr.mMaxValue <<= ins->mSrc[0].mRange.mMaxValue; + vr.mMinValue <<= ins->mSrc[0].mRange.mMaxValue; + } + } else vr.mMaxState = vr.mMinState = IntegerValueRange::S_UNBOUND; break; @@ -6317,6 +6354,130 @@ bool InterCodeBasicBlock::RemoveUnusedStoreInstructions(const GrowingVariableArr } +bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArray& tvalue, int& spareTemps) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + GrowingInstructionPtrArray ltvalue(tvalue); + + if (mNumEntries > 1) + ltvalue.Clear(); + + for (int i = 0; i < mInstructions.Size(); i++) + { + InterInstruction* ins = mInstructions[i]; + + switch (ins->mCode) + { + case IC_BINARY_OPERATOR: + { + switch (ins->mOperator) + { + case IA_SHL: +#if 1 + if (ins->mSrc[0].mTemp < 0 && ins->mSrc[1].mTemp >= 0 && ltvalue[ins->mSrc[1].mTemp] && ins->mDst.mType == IT_INT16) + { + InterInstruction* pins = ltvalue[ins->mSrc[1].mTemp]; + if (pins->mCode == IC_CONVERSION_OPERATOR && pins->mOperator == IA_EXT8TO16U && pins->mSrc[0].IsUByte() && pins->mSrc[0].mTemp >= 0 && ltvalue[pins->mSrc[0].mTemp]) + { + InterInstruction* ains = ltvalue[pins->mSrc[0].mTemp]; + + if (ains->mCode == IC_BINARY_OPERATOR && ains->mOperator == IA_ADD && ains->mSrc[0].mTemp < 0) + { + if (spareTemps + 2 >= ltvalue.Size()) + return true; + + InterInstruction* nins = new InterInstruction(); + nins->mCode = IC_BINARY_OPERATOR; + nins->mOperator = IA_SHL; + nins->mSrc[0] = ins->mSrc[0]; + nins->mSrc[1] = ains->mSrc[1]; + nins->mDst.mTemp = spareTemps++; + nins->mDst.mType = IT_INT16; + nins->mDst.mRange = ins->mDst.mRange; + mInstructions.Insert(i, nins); + + ins->mOperator = IA_ADD; + ins->mSrc[0] = ains->mSrc[0]; + ins->mSrc[0].mIntConst <<= nins->mSrc[0].mIntConst; + ins->mSrc[1] = nins->mDst; + + changed = true; + break; + } + } + } +#endif +#if 1 + if (ins->mSrc[1].mTemp < 0 && ins->mSrc[0].mTemp >= 0 && ltvalue[ins->mSrc[0].mTemp] && ins->mDst.mType == IT_INT16) + { + InterInstruction* pins = ltvalue[ins->mSrc[0].mTemp]; + if (pins->mCode == IC_CONVERSION_OPERATOR && pins->mOperator == IA_EXT8TO16U && pins->mSrc[0].IsUByte() && pins->mSrc[0].mRange.mMaxValue < 16 && pins->mSrc[0].mTemp >= 0 && ltvalue[pins->mSrc[0].mTemp]) + { + InterInstruction* ains = ltvalue[pins->mSrc[0].mTemp]; + + if (ains->mCode == IC_BINARY_OPERATOR && ains->mOperator == IA_ADD && ains->mSrc[0].mTemp < 0 && ains->mSrc[1].IsUByte()) + { + ins->mSrc[0] = ains->mSrc[1]; + ins->mSrc[1].mIntConst <<= ains->mSrc[0].mIntConst; + + changed = true; + break; + } + } + } +#endif + break; + } + } break; + + case IC_LEA: + if (ins->mSrc[1].mTemp < 0 && ins->mSrc[0].mTemp >= 0 && ltvalue[ins->mSrc[0].mTemp]) + { + InterInstruction* ains = ltvalue[ins->mSrc[0].mTemp]; + + if (ains->mCode == IC_BINARY_OPERATOR && ains->mOperator == IA_ADD && ains->mSrc[0].mTemp < 0) + { + ins->mSrc[0] = ains->mSrc[1]; + ins->mSrc[1].mIntConst += ains->mSrc[0].mIntConst; + changed = true; + } + } + break; + + } + + // Now kill all instructions that referenced the current destination as source, they are + // not valid anymore + + int dtemp = ins->mDst.mTemp; + + if (dtemp >= 0) + { + for (int i = 0; i < ltvalue.Size(); i++) + { + if (ltvalue[i] && ltvalue[i]->ReferencesTemp(dtemp)) + ltvalue[i] = nullptr; + } + + ltvalue[dtemp] = ins; + } + } + + if (mTrueJump && mTrueJump->SimplifyIntegerNumeric(ltvalue, spareTemps)) + changed = true; + + if (mFalseJump && mFalseJump->SimplifyIntegerNumeric(ltvalue, spareTemps)) + changed = true; + } + + return changed; +} + void InterCodeBasicBlock::PerformValueForwarding(const GrowingInstructionPtrArray& tvalue, const ValueSet& values, FastNumberSet& tvalid, const NumberSet& aliasedLocals, const NumberSet& aliasedParams, int& spareTemps, const GrowingVariableArray& staticVars) { int i; @@ -7164,6 +7325,35 @@ void InterCodeBasicBlock::MarkRelevantStatics(void) } } +bool InterCodeBasicBlock::CanMoveInstructionBeforeBlock(int ii) const +{ + InterInstruction* ins = mInstructions[ii]; + + if (ins->mCode == IC_LOAD) + { + for (int i = 0; i < ii; i++) + if (!CanBypassLoadUp(ins, mInstructions[i])) + return false; + } + else if (ins->mCode == IC_STORE) + { + for (int i = 0; i < ii; i++) + if (!CanBypassStore(ins, mInstructions[i])) + return false; + } + else if (ins->mCode == IC_CALL || ins->mCode == IC_CALL_NATIVE || ins->mCode == IC_COPY || ins->mCode == IC_PUSH_FRAME || ins->mCode == IC_POP_FRAME || + ins->mCode == IC_RETURN || ins->mCode == IC_RETURN_STRUCT || ins->mCode == IC_RETURN_VALUE) + return false; + else + { + for (int i = 0; i < ii; i++) + if (!CanBypassUp(ins, mInstructions[i])) + return false; + } + + return true; +} + bool InterCodeBasicBlock::MergeCommonPathInstructions(void) { bool changed = false; @@ -7174,40 +7364,53 @@ bool InterCodeBasicBlock::MergeCommonPathInstructions(void) if (mTrueJump && mFalseJump && mTrueJump->mNumEntries == 1 && mFalseJump->mNumEntries == 1 && mTrueJump->mInstructions.Size() && mFalseJump->mInstructions.Size()) { - InterInstruction* tins = mTrueJump->mInstructions[0]; - InterInstruction* fins = mFalseJump->mInstructions[0]; - - if (tins->IsEqualSource(fins) && - tins->mCode != IC_BRANCH && - tins->mCode != IC_JUMP && - tins->mCode != IC_RELATIONAL_OPERATOR) + int ti = 0; + while (ti < mTrueJump->mInstructions.Size() && !changed) { - if ((tins->mDst.mTemp == -1 || !mFalseJump->mEntryRequiredTemps[tins->mDst.mTemp]) && - (fins->mDst.mTemp == -1 || !mTrueJump->mEntryRequiredTemps[fins->mDst.mTemp])) + InterInstruction* tins = mTrueJump->mInstructions[ti]; + if (tins->mCode != IC_BRANCH && tins->mCode != IC_JUMP && tins->mCode != IC_RELATIONAL_OPERATOR) { - int tindex = mInstructions.Size() - 1; - if (mInstructions.Size() >= 2 && mInstructions[tindex - 1]->mDst.mTemp == mInstructions[tindex]->mSrc->mTemp) - tindex--; + int fi = 0; + while (fi < mFalseJump->mInstructions.Size() && !tins->IsEqualSource(mFalseJump->mInstructions[fi])) + fi++; - mInstructions.Insert(tindex, tins); - tindex++; - if (tins->mDst.mTemp != -1) + if (fi < mFalseJump->mInstructions.Size()) { - if (fins->mDst.mTemp != tins->mDst.mTemp) + InterInstruction* fins = mFalseJump->mInstructions[fi]; + + if ((tins->mDst.mTemp == -1 || !mFalseJump->mEntryRequiredTemps[tins->mDst.mTemp]) && + (fins->mDst.mTemp == -1 || !mTrueJump->mEntryRequiredTemps[fins->mDst.mTemp])) { - InterInstruction* nins = new InterInstruction(); - nins->mCode = IC_LOAD_TEMPORARY; - nins->mDst.mTemp = fins->mDst.mTemp; - nins->mDst.mType = fins->mDst.mType; - nins->mSrc[0].mTemp = tins->mDst.mTemp; - nins->mSrc[0].mType = tins->mDst.mType; - mInstructions.Insert(tindex, nins); + if (mTrueJump->CanMoveInstructionBeforeBlock(ti) && mFalseJump->CanMoveInstructionBeforeBlock(fi)) + { + int tindex = mInstructions.Size() - 1; + if (mInstructions.Size() >= 2 && mInstructions[tindex - 1]->mDst.mTemp == mInstructions[tindex]->mSrc[0].mTemp && CanBypassUp(tins, mInstructions[tindex - 1])) + tindex--; + + mInstructions.Insert(tindex, tins); + tindex++; + if (tins->mDst.mTemp != -1) + { + if (fins->mDst.mTemp != tins->mDst.mTemp) + { + InterInstruction* nins = new InterInstruction(); + nins->mCode = IC_LOAD_TEMPORARY; + nins->mDst.mTemp = fins->mDst.mTemp; + nins->mDst.mType = fins->mDst.mType; + nins->mSrc[0].mTemp = tins->mDst.mTemp; + nins->mSrc[0].mType = tins->mDst.mType; + mInstructions.Insert(tindex, nins); + } + } + mTrueJump->mInstructions.Remove(ti); + mFalseJump->mInstructions.Remove(fi); + changed = true; + } } } - mTrueJump->mInstructions.Remove(0); - mFalseJump->mInstructions.Remove(0); - changed = true; } + + ti++; } } @@ -10289,6 +10492,32 @@ void InterCodeProcedure::Close(void) DisassembleDebug("Simplified range limited relational ops"); #endif +#if 1 + GrowingInstructionPtrArray silvalues(nullptr); + int silvused; + + do + { + activeSet.Clear(); + + ResetVisited(); + mEntryBlock->CollectActiveTemporaries(activeSet); + + silvused = activeSet.Num(); + silvalues.SetSize(silvused + 16, true); + + mTemporaries.SetSize(activeSet.Num(), true); + + ResetVisited(); + mEntryBlock->ShrinkActiveTemporaries(activeSet, mTemporaries); + + ResetVisited(); + } while (mEntryBlock->SimplifyIntegerNumeric(silvalues, silvused)); + + DisassembleDebug("SimplifyIntegerNumeric"); + +#endif + #if 1 if (mModule->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) { diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 942c9f3..10d3c98 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -257,6 +257,7 @@ public: bool IsEqual(const InterOperand & op) const; bool IsUByte(void) const; + bool IsSByte(void) const; bool IsUnsigned(void) const; void Disassemble(FILE* file); @@ -417,6 +418,8 @@ public: void PerformMachineSpecificValueUsageCheck(const GrowingInstructionPtrArray& tvalue, FastNumberSet& tvalid, const GrowingVariableArray& staticVars); bool EliminateDeadBranches(void); + bool SimplifyIntegerNumeric(const GrowingInstructionPtrArray& tvalue, int& spareTemps); + bool CalculateSingleAssignmentTemps(FastNumberSet& tassigned, GrowingInstructionPtrArray& tvalue, NumberSet& modifiedParams, InterMemory paramMemory); bool SingleAssignmentTempForwarding(const GrowingInstructionPtrArray& tunified, const GrowingInstructionPtrArray& tvalues); @@ -446,6 +449,7 @@ public: bool IsTempModifiedOnPath(int temp, int at) const; bool PushSinglePathResultInstructions(void); + bool CanMoveInstructionBeforeBlock(int ii) const; bool MergeCommonPathInstructions(void); void PeepholeOptimization(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 936966e..deaa8f7 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -3081,7 +3081,7 @@ void NativeCodeInstruction::Assemble(NativeCodeBasicBlock* block) } break; case ASMIM_IMMEDIATE: - block->PutByte(uint16(mAddress)); + block->PutByte(uint8(mAddress)); break; case ASMIM_IMMEDIATE_ADDRESS: if (mLinkerObject) @@ -3106,7 +3106,7 @@ void NativeCodeInstruction::Assemble(NativeCodeBasicBlock* block) } else { - block->PutByte(uint16(mAddress)); + block->PutByte(uint8(mAddress)); } break; case ASMIM_ABSOLUTE: @@ -5667,7 +5667,7 @@ int NativeCodeBasicBlock::ShortMultiply(InterCodeProcedure* proc, NativeCodeProc mIns.Push(NativeCodeInstruction(ASMIT_LDX, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[index].mTemp])); } - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, nproc->mGenerator->AllocateShortMulTable(mul, ins->mSrc[index].mRange.mMaxValue + 1, false))); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, nproc->mGenerator->AllocateShortMulTable(IA_MUL, mul, ins->mSrc[index].mRange.mMaxValue + 1, false))); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, dreg)); if (ins->mDst.IsUByte()) { @@ -5676,7 +5676,7 @@ int NativeCodeBasicBlock::ShortMultiply(InterCodeProcedure* proc, NativeCodeProc } else { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, nproc->mGenerator->AllocateShortMulTable(mul, ins->mSrc[index].mRange.mMaxValue + 1, true))); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, nproc->mGenerator->AllocateShortMulTable(IA_MUL, mul, ins->mSrc[index].mRange.mMaxValue + 1, true))); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, dreg + 1)); } @@ -7406,6 +7406,30 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, frt.mOffset + l, frt.mLinkerObject)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); } + else if (ins->mSrc[1].mTemp < 0) + { + int size = ins->mSrc[0].mRange.mMaxValue + 1; + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); + if (!ins->mSrc[0].IsUByte() || ins->mSrc[0].mRange.mMaxValue > 15) + { + mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); + size = 16; + } + mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, nproc->mGenerator->AllocateShortMulTable(IA_SHL, ins->mSrc[1].mIntConst, size, false))); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg)); + if (ins->mDst.IsUByte()) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + } + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, nproc->mGenerator->AllocateShortMulTable(IA_SHL, ins->mSrc[1].mIntConst, size, true))); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + } + } else { NativeCodeBasicBlock* lblock = nproc->AllocateBlock(); @@ -7624,6 +7648,30 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, frt.mOffset + 47 - l, frt.mLinkerObject)); mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); } + else if (ins->mSrc[1].mTemp < 0) + { + int size = ins->mSrc[0].mRange.mMaxValue + 1; + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); + if (!ins->mSrc[0].IsUByte() || ins->mSrc[0].mRange.mMaxValue > 15) + { + mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); + size = 16; + } + mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, nproc->mGenerator->AllocateShortMulTable(IA_SHR, ins->mSrc[1].mIntConst, size, false))); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg)); + if (ins->mDst.IsUByte()) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + } + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, nproc->mGenerator->AllocateShortMulTable(IA_SHR, ins->mSrc[1].mIntConst, size, true))); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + } + } else { NativeCodeBasicBlock* lblock = nproc->AllocateBlock(); @@ -7723,6 +7771,21 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); } } + else if (ins->mSrc[1].IsSByte() && shift != 0 && shift < 5) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp])); + + for (int i = 0; i < shift; i++) + mIns.Push(NativeCodeInstruction(ASMIT_LSR, ASMIM_IMPLIED)); + + mIns.Push(NativeCodeInstruction(ASMIT_EOR, ASMIM_IMMEDIATE, 0x80 >> shift)); + mIns.Push(NativeCodeInstruction(ASMIT_SEC, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_SBC, ASMIM_IMMEDIATE, 0x80 >> shift)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + mIns.Push(NativeCodeInstruction(ASMIT_SBC, ASMIM_IMMEDIATE, 0)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + } else { if (shift == 0) @@ -7735,6 +7798,16 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); } } + else if (shift == 1) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp] + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_CMP, ASMIM_IMMEDIATE, 0x80)); + mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp])); + mIns.Push(NativeCodeInstruction(ASMIT_ROR, ASMIM_IMPLIED)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg)); + } else if (shift == 7) { mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp])); @@ -7851,6 +7924,30 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); } } + else if (ins->mSrc[1].mTemp < 0) + { + int size = ins->mSrc[0].mRange.mMaxValue + 1; + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp])); + if (!ins->mSrc[0].IsUByte() || ins->mSrc[0].mRange.mMaxValue > 15) + { + mIns.Push(NativeCodeInstruction(ASMIT_AND, ASMIM_IMMEDIATE, 0x0f)); + size = 16; + } + mIns.Push(NativeCodeInstruction(ASMIT_TAX, ASMIM_IMPLIED)); + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, nproc->mGenerator->AllocateShortMulTable(IA_SAR, ins->mSrc[1].mIntConst, size, false))); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg)); + if (ins->mDst.IsUByte()) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + } + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_X, 0, nproc->mGenerator->AllocateShortMulTable(IA_SAR, ins->mSrc[1].mIntConst, size, true))); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); + } + } else { NativeCodeBasicBlock* lblock = nproc->AllocateBlock(); @@ -11668,6 +11765,11 @@ bool NativeCodeBasicBlock::PatchSingleUseGlobalLoad(const NativeCodeBasicBlock* { NativeCodeInstruction& ins(mIns[at]); + if (ains.mMode == ASMIM_ABSOLUTE_X) + ins.mLive |= LIVE_CPU_REG_X; + if (ains.mMode == ASMIM_ABSOLUTE_Y) + ins.mLive |= LIVE_CPU_REG_Y; + if (ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == reg) { if (ins.UsesAddress()) @@ -12453,6 +12555,32 @@ bool NativeCodeBasicBlock::JoinTAXARange(int from, int to) return true; } + + if (!(mIns[from].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C | LIVE_CPU_REG_Z))) + { + start = from; + while (start > 0 && mIns[start - 1].mType != ASMIT_LDA && (mIns[start - 1].mMode == ASMIM_IMMEDIATE || mIns[start - 1].mMode == ASMIM_IMPLIED)) + start--; + if (start > 0 && mIns[start - 1].mType == ASMIT_LDA && mIns[start - 1].mMode == ASMIM_ZERO_PAGE) + { + for (int i = from + 1; i < to; i++) + { + if (mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == mIns[start].mAddress && mIns[i].ChangesAddress()) + return false; + } + + start--; + mIns.Remove(to); + for (int i = start; i < from; i++) + { + mIns.Insert(to, mIns[start]); + mIns.Remove(start); + } + mIns.Remove(start); + + return true; + } + } } if (to + 1 < mIns.Size() && mIns[to + 1].mType == ASMIT_STA && !(mIns[to + 1].mLive & LIVE_CPU_REG_A)) @@ -13500,6 +13628,7 @@ bool NativeCodeBasicBlock::MoveLoadImmStoreAbsoluteUp(int at) return false; } + bool NativeCodeBasicBlock::MoveLoadStoreUp(int at) { int j = at; @@ -13520,14 +13649,10 @@ bool NativeCodeBasicBlock::MoveLoadStoreUp(int at) if (j > 0 && j < at) { - for (int i = at; i > j; i--) - { - mIns[i] = mIns[i - 1]; - } + mIns.Insert(j, mIns[at + 1]); mIns[j - 1].mLive |= LIVE_CPU_REG_A; - mIns[j] = mIns[at + 1]; mIns[j].mLive |= LIVE_CPU_REG_A; - mIns[at + 1].mType = ASMIT_NOP; mIns[at + 1].mMode = ASMIM_IMPLIED; + mIns[at + 2].mType = ASMIT_NOP; mIns[at + 2].mMode = ASMIM_IMPLIED; return true; } @@ -14711,21 +14836,35 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) lb->mIns[lbs - 2].mType = ASMIT_DEX; lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; lb->mBranch = ASMIT_BNE; + changed = true; } } else if (lb->mIns[lbs - 1].mMode == ASMIM_IMMEDIATE) { int a = lb->mIns[lbs - 1].mAddress - mIns[li].mAddress; - int i = 0; - while (i + 2 < lbs && !lb->mIns[i].RequiresXReg()) - i++; - if (i + 2 == lbs) + if (lbs == 3 && lb->mIns[0].mType == ASMIT_STA && lb->mIns[0].mMode == ASMIM_ABSOLUTE_X && lb->mIns[0].mLinkerObject && a < 128) { - mIns[li].mAddress = a; - lb->mIns[lbs - 2].mType = ASMIT_DEX; - lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; - lb->mBranch = ASMIT_BNE; + lb->mIns[0].mAddress += mIns[li].mAddress; + mIns[li].mAddress = a - 1; + lb->mIns[1].mType = ASMIT_DEX; + lb->mIns[2].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; + lb->mBranch = ASMIT_BPL; + changed = true; + } + else + { + int i = 0; + while (i + 2 < lbs && !lb->mIns[i].RequiresXReg()) + i++; + if (i + 2 == lbs) + { + mIns[li].mAddress = a; + lb->mIns[lbs - 2].mType = ASMIT_DEX; + lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; + lb->mBranch = ASMIT_BNE; + changed = true; + } } } } @@ -14733,27 +14872,54 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) } else if (lb->mIns[lbs - 1].mType == ASMIT_CPY) { - NativeCodeBasicBlock* lb = mTrueJump; - int lbs = lb->mIns.Size(); + int li = mIns.Size() - 1; + while (li >= 0 && !mIns[li].ReferencesYReg()) + li--; - if (lb->mIns[lbs - 1].mType == ASMIT_CPY) + if (li >= 0 && lb->mIns[lbs - 2].mType == ASMIT_INY && mIns[li].mType == ASMIT_LDY && mIns[li].mMode == ASMIM_IMMEDIATE && mIns[li].mAddress == 0) { - if (lb->mIns[lbs - 2].mType == ASMIT_INY && mIns.Last().mType == ASMIT_LDY && mIns.Last().mMode == ASMIM_IMMEDIATE && mIns.Last().mAddress == 0) + if (lb->mIns[lbs - 1].mMode == ASMIM_ZERO_PAGE) { - if (lb->mIns[lbs - 1].mMode == ASMIM_ZERO_PAGE) - { - int a = lb->mIns[lbs - 1].mAddress; + int a = lb->mIns[lbs - 1].mAddress; + int i = 0; + while (i + 2 < lbs && !(lb->mIns[i].RequiresYReg() || lb->mIns[i].ChangesZeroPage(a))) + i++; + if (i + 2 == lbs) + { + mIns[li].mMode = ASMIM_ZERO_PAGE; + mIns[li].mAddress = a; + lb->mIns[lbs - 2].mType = ASMIT_DEY; + lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; + lb->mBranch = ASMIT_BNE; + changed = true; + } + } + else if (lb->mIns[lbs - 1].mMode == ASMIM_IMMEDIATE) + { + int a = lb->mIns[lbs - 1].mAddress - mIns[li].mAddress; + + if (lbs == 3 && lb->mIns[0].mType == ASMIT_STA && lb->mIns[0].mMode == ASMIM_ABSOLUTE_Y && lb->mIns[0].mLinkerObject && a < 128) + { + lb->mIns[0].mAddress += mIns[li].mAddress; + mIns[li].mAddress = a - 1; + lb->mIns[1].mType = ASMIT_DEY; + lb->mIns[2].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; + lb->mBranch = ASMIT_BPL; + changed = true; + } + else + { int i = 0; - while (i + 2 < lbs && !(lb->mIns[i].RequiresYReg() || lb->mIns[i].ChangesZeroPage(a))) + while (i + 2 < lbs && !lb->mIns[i].RequiresYReg()) i++; if (i + 2 == lbs) { - mIns[mIns.Size() - 1].mMode = ASMIM_ZERO_PAGE; - mIns[mIns.Size() - 1].mAddress = a; + mIns[li].mAddress = a; lb->mIns[lbs - 2].mType = ASMIT_DEY; lb->mIns[lbs - 1].mType = ASMIT_NOP; lb->mIns[lbs - 1].mMode = ASMIM_IMPLIED; lb->mBranch = ASMIT_BNE; + changed = true; } } } @@ -16601,7 +16767,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass for (int i = 2; i + 1 < mIns.Size(); i++) { - if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && (mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)) == 0) + if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE) { if (MoveLoadStoreUp(i)) changed = true; @@ -16687,8 +16853,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass for (int i = 2; i + 1 < mIns.Size(); i++) { if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].mType == ASMIT_STA && - (mIns[i + 1].mMode == ASMIM_ABSOLUTE || mIns[i + 1].mMode == ASMIM_ABSOLUTE_X || mIns[i + 1].mMode == ASMIM_ABSOLUTE_Y) && - !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) + (mIns[i + 1].mMode == ASMIM_ABSOLUTE || mIns[i + 1].mMode == ASMIM_ABSOLUTE_X || mIns[i + 1].mMode == ASMIM_ABSOLUTE_Y)) { if (MoveAbsoluteLoadStoreUp(i)) changed = true; @@ -19262,6 +19427,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 3].mMode = ASMIM_IMPLIED; progress = true; } + else if ( mIns[i + 0].mType == ASMIT_TXA && mIns[i + 1].mType == ASMIT_CLC && @@ -21689,17 +21855,34 @@ void NativeCodeGenerator::CompleteRuntime(void) for (int j = 0; j < m.mSize; j++) { - m.mLinkerLSB->mData[j] = (uint8)(m.mFactor * j); - m.mLinkerMSB->mData[j] = (uint8)(m.mFactor * j >> 8); + int val = m.mFactor; + switch (m.mOperator) + { + case IA_MUL: + val *= j; + break; + case IA_SHL: + val <<= j; + break; + case IA_SHR: + val = (val & 0xffff) >> j; + break; + case IA_SAR: + val = (int)(short)val >> j; + break; + } + + m.mLinkerLSB->mData[j] = (uint8)(val); + m.mLinkerMSB->mData[j] = (uint8)(val >> 8); } } } -LinkerObject* NativeCodeGenerator::AllocateShortMulTable(int factor, int size, bool msb) +LinkerObject* NativeCodeGenerator::AllocateShortMulTable(InterOperator op, int factor, int size, bool msb) { int i = 0; - while (i < mMulTables.Size() && mMulTables[i].mFactor != factor) + while (i < mMulTables.Size() && (mMulTables[i].mFactor != factor || mMulTables[i].mOperator != op)) i++; if (i == mMulTables.Size()) @@ -21708,13 +21891,32 @@ LinkerObject* NativeCodeGenerator::AllocateShortMulTable(int factor, int size, b MulTable mt; char name[20]; - sprintf_s(name, "__multab%dL", factor); + const char* base = ""; + + switch (op) + { + case IA_MUL: + base = "mul"; + break; + case IA_SHL: + base = "shl"; + break; + case IA_SHR: + base = "shr"; + break; + case IA_SAR: + base = "sar"; + break; + } + + sprintf_s(name, "__%stab%dL", base, factor); mt.mLinkerLSB = mLinker->AddObject(loc, Ident::Unique(name), mRuntimeSection, LOT_DATA); - sprintf_s(name, "__multab%dH", factor); + sprintf_s(name, "__%stab%dH", base, factor); mt.mLinkerMSB = mLinker->AddObject(loc, Ident::Unique(name), mRuntimeSection, LOT_DATA); mt.mFactor = factor; mt.mSize = size; + mt.mOperator = op; mMulTables.Push(mt); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 058bc19..c0ce896 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -390,11 +390,12 @@ public: struct MulTable { - LinkerObject* mLinkerLSB, * mLinkerMSB; - int mFactor, mSize; + LinkerObject * mLinkerLSB, * mLinkerMSB; + int mFactor, mSize; + InterOperator mOperator; }; - LinkerObject* AllocateShortMulTable(int factor, int size, bool msb); + LinkerObject* AllocateShortMulTable(InterOperator op, int factor, int size, bool msb); Runtime& ResolveRuntime(const Ident* ident); @@ -404,4 +405,4 @@ public: GrowingArray mRuntime; GrowingArray mMulTables; -}; \ No newline at end of file +}; diff --git a/oscar64/oscar64.cpp b/oscar64/oscar64.cpp index b5b5964..dae0e70 100644 --- a/oscar64/oscar64.cpp +++ b/oscar64/oscar64.cpp @@ -73,7 +73,7 @@ int main2(int argc, const char** argv) #else strcpy(strProductName, "oscar64"); - strcpy(strProductVersion, "1.5.118"); + strcpy(strProductVersion, "1.5.119"); #ifdef __APPLE__ uint32_t length = sizeof(basePath); diff --git a/oscar64/oscar64.rc b/oscar64/oscar64.rc index c662b3b..eeb5da3 100644 --- a/oscar64/oscar64.rc +++ b/oscar64/oscar64.rc @@ -25,8 +25,8 @@ LANGUAGE LANG_ENGLISH, SUBLANG_NEUTRAL // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,5,118,0 - PRODUCTVERSION 1,5,118,0 + FILEVERSION 1,5,119,0 + PRODUCTVERSION 1,5,119,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -43,12 +43,12 @@ BEGIN BEGIN VALUE "CompanyName", "oscar64" VALUE "FileDescription", "oscar64 compiler" - VALUE "FileVersion", "1.5.118.0" + VALUE "FileVersion", "1.5.119.0" VALUE "InternalName", "oscar64.exe" VALUE "LegalCopyright", "Copyright (C) 2021" VALUE "OriginalFilename", "oscar64.exe" VALUE "ProductName", "oscar64" - VALUE "ProductVersion", "1.5.118.0" + VALUE "ProductVersion", "1.5.119.0" END END BLOCK "VarFileInfo" diff --git a/oscar64setup/oscar64setup.vdproj b/oscar64setup/oscar64setup.vdproj index a9add1d..51b21ac 100644 --- a/oscar64setup/oscar64setup.vdproj +++ b/oscar64setup/oscar64setup.vdproj @@ -4023,15 +4023,15 @@ { "Name" = "8:Microsoft Visual Studio" "ProductName" = "8:oscar64" - "ProductCode" = "8:{608057E0-DAA0-4E03-846A-C1CC3DB0DF23}" - "PackageCode" = "8:{65A09B39-BEDF-4797-AFF2-480B80A618D0}" + "ProductCode" = "8:{B1BF49F7-D667-4323-99B3-BBC698F30A19}" + "PackageCode" = "8:{4B34FDC5-E807-4B25-8603-B7969A6CF518}" "UpgradeCode" = "8:{9AB61EFF-ACAC-4079-9950-8D96615CD4EF}" "AspNetVersion" = "8:2.0.50727.0" "RestartWWWService" = "11:FALSE" "RemovePreviousVersions" = "11:TRUE" "DetectNewerInstalledVersion" = "11:TRUE" "InstallAllUsers" = "11:FALSE" - "ProductVersion" = "8:1.5.118" + "ProductVersion" = "8:1.5.119" "Manufacturer" = "8:oscar64" "ARPHELPTELEPHONE" = "8:" "ARPHELPLINK" = "8:"