diff --git a/README.md b/README.md index 307bff9..78b127e 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ After extensive optimizations it turns out, that the interpreted code is not sig ## Limits and Errors -There are still several open areas, but most targets have been reached. The current Dhrystone performance is 81 iterations per second with byte code (11108) and 345 iterations with native code (10965 Bytes). This clearly shows that Dhrystone is not a valid benchmark for optimizing compilers, because it puts the 6502 on par with a 4MHz 8088 or 68k, which it clearly is not. +There are still several open areas, but most targets have been reached. The current Dhrystone performance is 82 iterations per second with byte code (11831) and 365 iterations with native code (11372 Bytes). This clearly shows that Dhrystone is not a valid benchmark for optimizing compilers, because it puts the 6502 on par with a 4MHz 8088 or 68k, which it clearly is not. ### Language diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index d34cdcd..9afc125 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -946,6 +946,8 @@ static bool CanBypassLoadUp(const InterInstruction* lins, const InterInstruction { if (lins->mVolatile) return false; + else if (bins->mSrc[1].mMemory == IM_FRAME || bins->mSrc[1].mMemory == IM_FFRAME) + return true; else if (lins->mSrc[0].mTemp >= 0 || bins->mSrc[1].mTemp >= 0) return false; else if (lins->mSrc[0].mMemory != bins->mSrc[1].mMemory) @@ -2645,6 +2647,20 @@ bool InterInstruction::PropagateConstTemps(const GrowingInstructionPtrArray& cte } } break; + case IC_CONVERSION_OPERATOR: + case IC_UNARY_OPERATOR: + { + if (mSrc[0].mTemp >= 0 && ctemps[mSrc[0].mTemp]) + { + InterInstruction* ains = ctemps[mSrc[0].mTemp]; + mSrc[0] = ains->mConst; + mSrc[0].mType = ains->mDst.mType; + this->ConstantFolding(); + return true; + } + + } break; + case IC_LEA: if (mSrc[0].mTemp >= 0 && ctemps[mSrc[0].mTemp]) { @@ -4929,6 +4945,8 @@ bool InterCodeBasicBlock::BuildGlobalIntegerRangeSets(bool initial) mLocalValueRange.Clear(); + assert(mLocalValueRange.Size() == mExitRequiredTemps.Size()); + for (int j = 0; j < mEntryBlocks.Size(); j++) { InterCodeBasicBlock* from = mEntryBlocks[j]; @@ -4940,6 +4958,7 @@ bool InterCodeBasicBlock::BuildGlobalIntegerRangeSets(bool initial) for (int i = 0; i < mLocalValueRange.Size(); i++) mLocalValueRange[i].Merge(range[i], mLoopHead, initial); } + assert(mLocalValueRange.Size() == mExitRequiredTemps.Size()); } for (int i = 0; i < mLocalValueRange.Size(); i++) @@ -5012,6 +5031,8 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(void) int sz = mInstructions.Size(); + assert(mLocalValueRange.Size() == mExitRequiredTemps.Size()); + for (int i = 0; i < sz; i++) { InterInstruction* ins(mInstructions[i]); @@ -5347,12 +5368,12 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(void) switch (ins->mSrc[1].mType) { case IT_INT16: - vr.mMaxValue = (unsigned short)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; - vr.mMinValue = (unsigned short)(vr.mMinValue) >> ins->mSrc[0].mIntConst; + vr.mMaxValue = (unsigned short)(int64min(65535, vr.mMaxValue)) >> ins->mSrc[0].mIntConst; + vr.mMinValue = (unsigned short)(int64max(0, vr.mMinValue)) >> ins->mSrc[0].mIntConst; break; case IT_INT8: - vr.mMaxValue = (unsigned char)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; - vr.mMinValue = (unsigned char)(vr.mMinValue) >> ins->mSrc[0].mIntConst; + vr.mMaxValue = (unsigned char)(int64min(255, vr.mMaxValue)) >> ins->mSrc[0].mIntConst; + vr.mMinValue = (unsigned char)(int64max(0, vr.mMinValue)) >> ins->mSrc[0].mIntConst; break; case IT_INT32: vr.mMaxValue = (unsigned)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; @@ -5399,12 +5420,12 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(void) switch (ins->mSrc[1].mType) { case IT_INT16: - vr.mMaxValue = (short)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; - vr.mMinValue = (short)(vr.mMinValue) >> ins->mSrc[0].mIntConst; + vr.mMaxValue = (short)(int64min( 32767, vr.mMaxValue)) >> ins->mSrc[0].mIntConst; + vr.mMinValue = (short)(int64max(-32768, vr.mMinValue)) >> ins->mSrc[0].mIntConst; break; case IT_INT8: - vr.mMaxValue = (char)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; - vr.mMinValue = (char)(vr.mMinValue) >> ins->mSrc[0].mIntConst; + vr.mMaxValue = (char)(int64min( 127, vr.mMaxValue)) >> ins->mSrc[0].mIntConst; + vr.mMinValue = (char)(int64max(-128, vr.mMinValue)) >> ins->mSrc[0].mIntConst; break; case IT_INT32: vr.mMaxValue = (int)(vr.mMaxValue) >> ins->mSrc[0].mIntConst; @@ -5504,6 +5525,7 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(void) #endif } + assert(mLocalValueRange.Size() == mExitRequiredTemps.Size()); } #if 1 @@ -5870,6 +5892,120 @@ void InterCodeBasicBlock::BuildLocalIntegerRangeSets(int num) } } +void InterCodeBasicBlock::BuildConstTempSets(void) +{ + int i; + + if (!mVisited) + { + mVisited = true; + + mEntryConstTemp = NumberSet(mEntryRequiredTemps.Size()); + mExitConstTemp = NumberSet(mEntryRequiredTemps.Size()); + + for (i = 0; i < mInstructions.Size(); i++) + { + const InterInstruction* ins = mInstructions[i]; + + if (ins->mDst.mTemp >= 0) + { + if (ins->mCode == IC_CONSTANT) + mExitConstTemp += ins->mDst.mTemp; + else + mExitConstTemp -= ins->mDst.mTemp; + } + } + + if (mTrueJump) mTrueJump->BuildConstTempSets(); + if (mFalseJump) mFalseJump->BuildConstTempSets(); + } +} + +bool InterCodeBasicBlock::PropagateConstOperationsUp(void) +{ +// return false; + + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + if (mTrueJump && mTrueJump->PropagateConstOperationsUp()) + changed = true; + + if (mFalseJump && mFalseJump->PropagateConstOperationsUp()) + changed = true; + + if (mEntryBlocks.Size()) + { + mEntryConstTemp = mEntryBlocks[0]->mExitConstTemp; + + for (int i = 1; i < mEntryBlocks.Size(); i++) + mEntryConstTemp &= mEntryBlocks[i]->mExitConstTemp; + + int i = 0; + while (i + 1 < mInstructions.Size()) + { + const InterInstruction* ins = mInstructions[i]; + + if (!HasSideEffect(ins->mCode) && ins->mCode != IC_CONSTANT && ins->mCode != IC_STORE && ins->mCode != IC_COPY) + { + bool isProvided = false; + if (ins->mDst.mTemp >= 0) + { + for (int j = 0; j < mEntryBlocks.Size(); j++) + if (mEntryBlocks[j]->mExitRequiredTemps[ins->mDst.mTemp]) + isProvided = true; + } + + + bool hasop = false; + int j = 0; + while (j < ins->mNumOperands && (ins->mSrc[j].mTemp < 0 || mEntryConstTemp[ins->mSrc[j].mTemp])) + { + if (ins->mSrc[j].mTemp >= 0) + hasop = true; + j++; + } + + if (j == ins->mNumOperands && hasop && !isProvided && CanMoveInstructionBeforeBlock(i)) + { + for (int j = 0; j < mEntryBlocks.Size(); j++) + { + InterInstruction* nins = ins->Clone(); + InterCodeBasicBlock* eb = mEntryBlocks[j]; + + int di = eb->mInstructions.Size() - 1; + if (eb->mInstructions[di]->mCode == IC_BRANCH && di > 0 && eb->mInstructions[di - 1]->mDst.mTemp == eb->mInstructions[di]->mSrc[0].mTemp && + CanBypassUp(ins, eb->mInstructions[di - 1])) + { + di--; + } + + eb->mInstructions.Insert(di, nins); + } + mInstructions.Remove(i); + changed = true; + } + else + i++; + } + else + i++; + + if (ins->mDst.mTemp >= 0) + mEntryConstTemp -= ins->mDst.mTemp; + } + } + } + + return changed; +} + + + + void InterCodeBasicBlock::BuildLocalTempSets(int num) { int i; @@ -6661,6 +6797,55 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra } #endif break; +#if 1 + case IA_ADD: + if (ins->mSrc[1].mTemp < 0 && ins->mSrc[0].mTemp >= 0 && ltvalue[ins->mSrc[0].mTemp] && ins->mSrc[0].mFinal) + { + InterInstruction* pins = ltvalue[ins->mSrc[0].mTemp]; + + if (pins->mCode == IC_BINARY_OPERATOR && pins->mOperator == IA_ADD) + { + if (pins->mSrc[0].mTemp < 0) + { + ins->mSrc[0].Forward(pins->mSrc[1]); + pins->mSrc[1].mFinal = false; + ins->mSrc[1].mIntConst += pins->mSrc[0].mIntConst; + changed = true; + } + else if (pins->mSrc[1].mTemp < 0) + { + ins->mSrc[0].Forward(pins->mSrc[0]); + pins->mSrc[0].mFinal = false; + ins->mSrc[1].mIntConst += pins->mSrc[1].mIntConst; + changed = true; + } + } + } + else if (ins->mSrc[0].mTemp < 0 && ins->mSrc[1].mTemp >= 0 && ltvalue[ins->mSrc[1].mTemp] && ins->mSrc[1].mFinal) + { + InterInstruction* pins = ltvalue[ins->mSrc[1].mTemp]; + + if (pins->mCode == IC_BINARY_OPERATOR && pins->mOperator == IA_ADD) + { + if (pins->mSrc[0].mTemp < 0) + { + ins->mSrc[1].Forward(pins->mSrc[1]); + pins->mSrc[1].mFinal = false; + ins->mSrc[0].mIntConst += pins->mSrc[0].mIntConst; + changed = true; + } + else if (pins->mSrc[1].mTemp < 0) + { + ins->mSrc[1].Forward(pins->mSrc[0]); + pins->mSrc[0].mFinal = false; + ins->mSrc[0].mIntConst += pins->mSrc[1].mIntConst; + changed = true; + } + } + } + + break; +#endif } } break; @@ -6675,6 +6860,14 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra ins->mSrc[1].mIntConst += pins->mSrc[0].mIntConst; changed = true; } +#if 1 + else if (pins->mCode == IC_BINARY_OPERATOR && pins->mOperator == IA_ADD && pins->mSrc[1].mTemp < 0 && pins->mDst.mType == IT_INT16) + { + ins->mSrc[0] = pins->mSrc[0]; + ins->mSrc[1].mIntConst += pins->mSrc[1].mIntConst; + changed = true; + } +#endif #if 1 else if (pins->mCode == IC_CONVERSION_OPERATOR && pins->mOperator == IA_EXT8TO16U && pins->mSrc[0].IsUByte() && pins->mSrc[0].mTemp >= 0 && ltvalue[pins->mSrc[0].mTemp]) { @@ -6689,6 +6882,14 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra } #endif } + else if (ins->mSrc[1].mTemp >= 0 && ltvalue[ins->mSrc[1].mTemp] && ltvalue[ins->mSrc[1].mTemp]->mCode == IC_CONSTANT) + { + InterInstruction* pins = ltvalue[ins->mSrc[1].mTemp]; + + ins->mSrc[1].ForwardMem(pins->mConst); + ins->mSrc[1].mType = IT_POINTER; + changed = true; + } else if (ins->mSrc[1].mTemp >= 0 && ins->mSrc[0].mTemp < 0 && ltvalue[ins->mSrc[1].mTemp]) { InterInstruction* pins = ltvalue[ins->mSrc[1].mTemp]; @@ -6702,7 +6903,29 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra } } break; +#if 1 + case IC_CONVERSION_OPERATOR: + if (ins->mOperator == IA_EXT8TO16U) + { + if (ins->mSrc[0].mTemp >= 0 && ltvalue[ins->mSrc[0].mTemp] && ltvalue[ins->mSrc[0].mTemp]->mDst.mType == IT_INT16 && ins->mSrc[0].IsUByte()) + { + ins->mCode = IC_LOAD_TEMPORARY; + ins->mSrc[0].mType = IT_INT16; + changed = true; + } + } + else if (ins->mOperator == IA_EXT8TO16S) + { + if (ins->mSrc[0].mTemp >= 0 && ltvalue[ins->mSrc[0].mTemp] && ltvalue[ins->mSrc[0].mTemp]->mDst.mType == IT_INT16 && ins->mSrc[0].IsSByte()) + { + ins->mCode = IC_LOAD_TEMPORARY; + ins->mSrc[0].mType = IT_INT16; + changed = true; + } + } + break; +#endif } // Now kill all instructions that referenced the current destination as source, they are @@ -6718,7 +6941,8 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra ltvalue[i] = nullptr; } - ltvalue[dtemp] = ins; + if (!ins->UsesTemp(dtemp)) + ltvalue[dtemp] = ins; } } @@ -9347,7 +9571,7 @@ void InterCodeBasicBlock::CompactInstructions(void) } } -void InterCodeBasicBlock::PeepholeOptimization(void) +void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& staticVars) { int i; @@ -9653,6 +9877,12 @@ void InterCodeBasicBlock::PeepholeOptimization(void) mInstructions[i]->mCode = IC_NONE; changed = true; } + if (mInstructions[i]->mCode == IC_LOAD && mInstructions[i]->mSrc[0].mMemory == IM_GLOBAL && (mInstructions[i]->mSrc->mLinkerObject->mFlags & LOBJF_CONST)) + { + LoadConstantFold(mInstructions[i], nullptr, staticVars); + changed = true; + } + if (i + 2 < mInstructions.Size()) { if (mInstructions[i + 0]->mCode == IC_LOAD && @@ -9967,8 +10197,8 @@ void InterCodeBasicBlock::PeepholeOptimization(void) } while (changed); - if (mTrueJump) mTrueJump->PeepholeOptimization(); - if (mFalseJump) mFalseJump->PeepholeOptimization(); + if (mTrueJump) mTrueJump->PeepholeOptimization(staticVars); + if (mFalseJump) mFalseJump->PeepholeOptimization(staticVars); } } @@ -10108,6 +10338,39 @@ void InterCodeBasicBlock::CollectActiveTemporaries(FastNumberSet& set) } } +void InterCodeBasicBlock::RemapActiveTemporaries(const FastNumberSet& set) +{ + if (!mVisited) + { + mVisited = true; + + GrowingIntegerValueRangeArray entryValueRange(mEntryValueRange); + GrowingIntegerValueRangeArray trueValueRange(mTrueValueRange); + GrowingIntegerValueRangeArray falseValueRange(mFalseValueRange); + GrowingIntegerValueRangeArray localValueRange(mLocalValueRange); + GrowingIntegerValueRangeArray reverseValueRange(mReverseValueRange); + + mEntryValueRange.SetSize(set.Num(), true); + mTrueValueRange.SetSize(set.Num(), true); + mFalseValueRange.SetSize(set.Num(), true); + mLocalValueRange.SetSize(set.Num(), true); + mReverseValueRange.SetSize(set.Num(), true); + + for (int i = 0; i < set.Num(); i++) + { + int j = set.Element(i); + mEntryValueRange[i] = entryValueRange[j]; + mTrueValueRange[i] = trueValueRange[j]; + mFalseValueRange[i] = falseValueRange[j]; + mLocalValueRange[i] = localValueRange[j]; + mReverseValueRange[i] = reverseValueRange[j]; + } + + if (mTrueJump) mTrueJump->RemapActiveTemporaries(set); + if (mFalseJump) mFalseJump->RemapActiveTemporaries(set); + } +} + void InterCodeBasicBlock::ShrinkActiveTemporaries(FastNumberSet& set, GrowingTypeArray& temporaries) { int i; @@ -10711,7 +10974,7 @@ void InterCodeProcedure::Close(void) BuildDataFlowSets(); ResetVisited(); - mEntryBlock->PeepholeOptimization(); + mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); DisassembleDebug("Peephole optimized"); @@ -10743,7 +11006,7 @@ void InterCodeProcedure::Close(void) SingleAssignmentForwarding(); ResetVisited(); - mEntryBlock->PeepholeOptimization(); + mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); TempForwarding(); RemoveUnusedInstructions(); @@ -10897,12 +11160,21 @@ void InterCodeProcedure::Close(void) BuildTraces(false); DisassembleDebug("Rebuilt traces"); + ResetEntryBlocks(); + ResetVisited(); + mEntryBlock->CollectEntryBlocks(nullptr); + #if 1 GrowingInstructionPtrArray silvalues(nullptr); int silvused; do { + BuildDataFlowSets(); + + TempForwarding(); + RemoveUnusedInstructions(); + activeSet.Clear(); ResetVisited(); @@ -10916,9 +11188,14 @@ void InterCodeProcedure::Close(void) ResetVisited(); mEntryBlock->ShrinkActiveTemporaries(activeSet, mTemporaries); + ResetVisited(); + mEntryBlock->RemapActiveTemporaries(activeSet); + ResetVisited(); } while (mEntryBlock->SimplifyIntegerNumeric(silvalues, silvused)); + assert(silvused == mTemporaries.Size()); + DisassembleDebug("SimplifyIntegerNumeric"); #endif @@ -10927,6 +11204,8 @@ void InterCodeProcedure::Close(void) do { BuildDataFlowSets(); + assert(mTemporaries.Size() == mEntryBlock->mLocalValueRange.Size()); + eivalues.SetSize(mTemporaries.Size(), true); ResetVisited(); @@ -10934,6 +11213,25 @@ void InterCodeProcedure::Close(void) DisassembleDebug("EliminateAliasValues"); +#if 1 + ResetVisited(); + mEntryBlock->RestartLocalIntegerRangeSets(); + + do { + DisassembleDebug("tr"); + + ResetVisited(); + } while (mEntryBlock->BuildGlobalIntegerRangeSets(true)); + + do { + DisassembleDebug("tr"); + + ResetVisited(); + } while (mEntryBlock->BuildGlobalIntegerRangeSets(false)); + + DisassembleDebug("Estimated value range 2"); +#endif + #if 1 if (mModule->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) { @@ -10970,7 +11268,7 @@ void InterCodeProcedure::Close(void) #if 1 ResetVisited(); - mEntryBlock->PeepholeOptimization(); + mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); TempForwarding(); RemoveUnusedInstructions(); @@ -10998,7 +11296,7 @@ void InterCodeProcedure::Close(void) #if 1 ResetVisited(); - mEntryBlock->PeepholeOptimization(); + mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); TempForwarding(); RemoveUnusedInstructions(); @@ -11024,6 +11322,36 @@ void InterCodeProcedure::Close(void) DisassembleDebug("Rebuilt traces"); #endif +#if 1 + ResetEntryBlocks(); + ResetVisited(); + mEntryBlock->CollectEntryBlocks(nullptr); + + do { + changed = false; + + ResetVisited(); + mEntryBlock->BuildConstTempSets(); + + ResetVisited(); + if (mEntryBlock->PropagateConstOperationsUp()) + { + BuildDataFlowSets(); + + GlobalConstantPropagation(); + + TempForwarding(); + + RemoveUnusedInstructions(); + + changed = true; + + DisassembleDebug("prop const op up"); + } + } while (changed); +#endif + + #if 1 ResetVisited(); if (!mInterruptCalled && mEntryBlock->CheckStaticStack()) @@ -11056,7 +11384,7 @@ void InterCodeProcedure::Close(void) } while (GlobalConstantPropagation()); ResetVisited(); - mEntryBlock->PeepholeOptimization(); + mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); TempForwarding(); RemoveUnusedInstructions(); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index cfd9c70..5583fef 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -338,6 +338,7 @@ public: NumberSet mLocalRequiredTemps, mLocalProvidedTemps; NumberSet mEntryRequiredTemps, mEntryProvidedTemps; NumberSet mExitRequiredTemps, mExitProvidedTemps; + NumberSet mEntryConstTemp, mExitConstTemp; NumberSet mLocalRequiredVars, mLocalProvidedVars; NumberSet mEntryRequiredVars, mEntryProvidedVars; @@ -389,6 +390,8 @@ public: bool BuildGlobalRequiredTempSet(NumberSet& fromRequiredTemps); bool RemoveUnusedResultInstructions(void); void BuildCallerSaveTempSet(NumberSet& callerSaveTemps); + void BuildConstTempSets(void); + bool PropagateConstOperationsUp(void); void BuildLocalVariableSets(const GrowingVariableArray& localVars, const GrowingVariableArray& params, InterMemory paramMemory); void BuildGlobalProvidedVariableSet(const GrowingVariableArray& localVars, NumberSet fromProvidedVars, const GrowingVariableArray& params, NumberSet fromProvidedParams, InterMemory paramMemory); @@ -435,6 +438,7 @@ public: void CollectActiveTemporaries(FastNumberSet& set); void ShrinkActiveTemporaries(FastNumberSet& set, GrowingTypeArray& temporaries); + void RemapActiveTemporaries(const FastNumberSet& set); void Disassemble(FILE* file, bool dumpSets); @@ -456,7 +460,7 @@ public: bool CanMoveInstructionBeforeBlock(int ii) const; bool MergeCommonPathInstructions(void); - void PeepholeOptimization(void); + void PeepholeOptimization(const GrowingVariableArray& staticVars); void SingleBlockLoopOptimisation(const NumberSet& aliasedParams); void SingleBlockLoopUnrolling(void); bool CollectLoopBody(InterCodeBasicBlock* head, GrowingArray & body); diff --git a/oscar64/MachineTypes.h b/oscar64/MachineTypes.h index 58d9f40..891f66f 100644 --- a/oscar64/MachineTypes.h +++ b/oscar64/MachineTypes.h @@ -98,4 +98,9 @@ extern uint8 BC_REG_TMP_SAVED; inline int64 int64max(int64 a, int64 b) { return a > b ? a : b; +} + +inline int64 int64min(int64 a, int64 b) +{ + return a < b ? a : b; } \ No newline at end of file diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index b377670..ad04070 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -8886,7 +8886,7 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In } NativeCodeGenerator::Runtime& frt(nproc->mGenerator->ResolveRuntime(Ident::Unique("fcmp"))); - mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME | NCIF_LOWER | NCIF_UPPER)); + mIns.Push(NativeCodeInstruction(ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME | NCIF_LOWER | NCIF_UPPER | NCIF_JSRFLAGS)); switch (op) { @@ -11583,6 +11583,7 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc) for (int j = i + 6; j < mIns.Size(); j++) rblock->mIns.Push(mIns[j]); mIns.SetSize(i + 1); + mIns[i + 0].mLive |= LIVE_CPU_REG_Z; mTrueJump = neblock; mFalseJump = eblock; @@ -11620,6 +11621,7 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc) for (int j = i + 5; j < mIns.Size(); j++) rblock->mIns.Push(mIns[j]); mIns.SetSize(i + 1); + mIns[i + 0].mLive |= LIVE_CPU_REG_Z; mTrueJump = neblock; mFalseJump = eblock; @@ -21898,6 +21900,34 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 3].mType = ASMIT_STY; progress = true; } +#if 1 + else if ( + mIns[i + 0].mType == ASMIT_TYA && + mIns[i + 1].mType == ASMIT_CLC && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 0xff && + mIns[i + 3].mType == ASMIT_STA && (mIns[i + 3].mMode == ASMIM_ZERO_PAGE || mIns[i + 3].mMode == ASMIM_ABSOLUTE) && + !(mIns[i + 3].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Y | LIVE_CPU_REG_C))) + { + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 1].mType = ASMIT_DEY; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + mIns[i + 3].mType = ASMIT_STY; + progress = true; + } + else if ( + mIns[i + 0].mType == ASMIT_TXA && + mIns[i + 1].mType == ASMIT_CLC && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 0xff && + mIns[i + 3].mType == ASMIT_STA && (mIns[i + 3].mMode == ASMIM_ZERO_PAGE || mIns[i + 3].mMode == ASMIM_ABSOLUTE) && + !(mIns[i + 3].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_X | LIVE_CPU_REG_C))) + { + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 1].mType = ASMIT_DEX; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + mIns[i + 3].mType = ASMIT_STX; + progress = true; + } +#endif #if 1 else if ( mIns[i + 0].mType == ASMIT_TAX && @@ -22924,7 +22954,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass tblock->mEntryBlocks.Push(this); mIns[sz - 2].mType = ASMIT_NOP; mIns[sz - 2].mMode = ASMIM_IMPLIED; - mIns[sz - 1].mType = ASMIT_ORA; mIns[sz - 1].mAddress = 0; + mIns[sz - 1].mType = ASMIT_ORA; mIns[sz - 1].mAddress = 0; mIns[sz - 1].mLive |= LIVE_CPU_REG_Z; } } @@ -22955,7 +22985,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[sz - 4].mType = ASMIT_NOP; mIns[sz - 4].mMode = ASMIM_IMPLIED; mIns[sz - 3].mType = ASMIT_NOP; mIns[sz - 3].mMode = ASMIM_IMPLIED; mIns[sz - 2].mType = ASMIT_NOP; mIns[sz - 2].mMode = ASMIM_IMPLIED; - mIns[sz - 1].mType = ASMIT_ORA; mIns[sz - 1].mMode = ASMIM_IMMEDIATE; mIns[sz - 1].mAddress = 0; + mIns[sz - 1].mType = ASMIT_ORA; mIns[sz - 1].mMode = ASMIM_IMMEDIATE; mIns[sz - 1].mAddress = 0; mIns[sz - 1].mLive |= LIVE_CPU_REG_Z; } } @@ -22981,8 +23011,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[sz - 1].mType == ASMIT_AND && mIns[sz - 1].mMode == ASMIM_IMMEDIATE && mIns[sz - 1].mAddress == 0x80 && !(mIns[sz - 1].mLive & LIVE_CPU_REG_A) && (mBranch == ASMIT_BEQ || mBranch == ASMIT_BNE)) { - mIns[sz - 1].mType = ASMIT_ORA; - mIns[sz - 1].mAddress = 0x00; + mIns[sz - 1].mType = ASMIT_ORA; mIns[sz - 1].mAddress = 0x00; mIns[sz - 1].mLive |= LIVE_CPU_REG_Z; if (mBranch == ASMIT_BEQ) mBranch = ASMIT_BPL; @@ -23091,6 +23120,20 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } +void NativeCodeBasicBlock::CheckBlocks(void) +{ +#if _DEBUG + if (!mVisited) + { + mVisited = true; + CheckLive(); + + if (mTrueJump) mTrueJump->CheckBlocks(); + if (mFalseJump) mFalseJump->CheckBlocks(); + } +#endif +} + void NativeCodeBasicBlock::CheckLive(void) { #if _DEBUG @@ -23109,7 +23152,9 @@ void NativeCodeBasicBlock::CheckLive(void) if (mIns[j].mType == ASMIT_JSR) { - assert(!(live & (LIVE_CPU_REG_X | LIVE_CPU_REG_Y | LIVE_CPU_REG_C | LIVE_CPU_REG_Z))); + assert(!(live & (LIVE_CPU_REG_X | LIVE_CPU_REG_Y))); + if (!(mIns[j].mFlags & NCIF_JSRFLAGS)) + assert(!(live & (LIVE_CPU_REG_C | LIVE_CPU_REG_Z))); } if (mIns[j].ChangesXReg()) live &= ~LIVE_CPU_REG_X; @@ -24071,6 +24116,11 @@ void NativeCodeProcedure::Optimize(void) } while (changed); #endif +#if _DEBUG + ResetVisited(); + mEntryBlock->CheckBlocks(); +#endif + #if 1 ResetVisited(); if (mEntryBlock->PeepHoleOptimizer(this, step)) @@ -24237,6 +24287,12 @@ void NativeCodeProcedure::Optimize(void) } } #endif + +#if _DEBUG + ResetVisited(); + mEntryBlock->CheckBlocks(); +#endif + #if 1 ResetVisited(); NativeRegisterDataSet data; @@ -24255,6 +24311,11 @@ void NativeCodeProcedure::Optimize(void) } #endif +#if _DEBUG + ResetVisited(); + mEntryBlock->CheckBlocks(); +#endif + #if 1 ResetVisited(); if (mEntryBlock->ForwardAccuAddSub()) @@ -24273,6 +24334,11 @@ void NativeCodeProcedure::Optimize(void) changed = true; #endif +#if _DEBUG + ResetVisited(); + mEntryBlock->CheckBlocks(); +#endif + #if 1 if (step >= 6) { diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 5fb682a..7c0c997 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -54,6 +54,7 @@ static const uint32 NCIF_YZERO = 0x00000008; static const uint32 NCIF_VOLATILE = 0x00000010; static const uint32 NCIF_LONG = 0x00000020; static const uint32 NCIF_FEXEC = 0x00000040; +static const uint32 NCIF_JSRFLAGS = 0x00000080; static const uint32 NCIF_USE_CPU_REG_A = 0x00001000; static const uint32 NCIF_USE_CPU_REG_X = 0x00002000; @@ -352,6 +353,7 @@ public: bool IsDominatedBy(const NativeCodeBasicBlock* block) const; void CheckLive(void); + void CheckBlocks(void); }; class NativeCodeProcedure diff --git a/oscar64/NumberSet.cpp b/oscar64/NumberSet.cpp index 6119c5f..aeacfb2 100644 --- a/oscar64/NumberSet.cpp +++ b/oscar64/NumberSet.cpp @@ -247,7 +247,7 @@ void FastNumberSet::Clear(void) num = 0; } -int FastNumberSet::Index(int elem) +int FastNumberSet::Index(int elem) const { uint32 dw = buffer[size + elem]; diff --git a/oscar64/NumberSet.h b/oscar64/NumberSet.h index 7cf0640..e060b34 100644 --- a/oscar64/NumberSet.h +++ b/oscar64/NumberSet.h @@ -77,21 +77,21 @@ public: FastNumberSet& operator+=(int elem); FastNumberSet& operator-=(int elem); - bool operator[](int elem); + bool operator[](int elem) const; FastNumberSet& operator=(const FastNumberSet& set); - bool Empty(void) { return !num; } + bool Empty(void) const { return !num; } void Clear(void); - int Num(void) { return num; } - int Element(int i); + int Num(void) const { return num; } + int Element(int i) const; - int Size(void) { return size; } - int Index(int elem); + int Size(void) const { return size; } + int Index(int elem) const; }; -inline bool FastNumberSet::operator[](int elem) +inline bool FastNumberSet::operator[](int elem) const { uint32 dw = buffer[size + elem]; @@ -130,7 +130,7 @@ inline FastNumberSet& FastNumberSet::operator-=(int elem) return *this; } -inline int FastNumberSet::Element(int i) +inline int FastNumberSet::Element(int i) const { if (i < num) return buffer[i]; diff --git a/oscar64/oscar64.cpp b/oscar64/oscar64.cpp index 007839e..6d1a4e2 100644 --- a/oscar64/oscar64.cpp +++ b/oscar64/oscar64.cpp @@ -74,7 +74,7 @@ int main2(int argc, const char** argv) #else strcpy(strProductName, "oscar64"); - strcpy(strProductVersion, "1.7.131"); + strcpy(strProductVersion, "1.7.132"); #ifdef __APPLE__ uint32_t length = sizeof(basePath); diff --git a/oscar64/oscar64.rc b/oscar64/oscar64.rc index b9c1127..d84b4fe 100644 --- a/oscar64/oscar64.rc +++ b/oscar64/oscar64.rc @@ -25,8 +25,8 @@ LANGUAGE LANG_ENGLISH, SUBLANG_NEUTRAL // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,7,131,0 - PRODUCTVERSION 1,7,131,0 + FILEVERSION 1,7,132,0 + PRODUCTVERSION 1,7,132,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -43,12 +43,12 @@ BEGIN BEGIN VALUE "CompanyName", "oscar64" VALUE "FileDescription", "oscar64 compiler" - VALUE "FileVersion", "1.7.131.0" + VALUE "FileVersion", "1.7.132.0" VALUE "InternalName", "oscar64.exe" VALUE "LegalCopyright", "Copyright (C) 2021" VALUE "OriginalFilename", "oscar64.exe" VALUE "ProductName", "oscar64" - VALUE "ProductVersion", "1.7.131.0" + VALUE "ProductVersion", "1.7.132.0" END END BLOCK "VarFileInfo" diff --git a/oscar64setup/oscar64setup.vdproj b/oscar64setup/oscar64setup.vdproj index b91f794..0dea71e 100644 --- a/oscar64setup/oscar64setup.vdproj +++ b/oscar64setup/oscar64setup.vdproj @@ -4127,15 +4127,15 @@ { "Name" = "8:Microsoft Visual Studio" "ProductName" = "8:oscar64" - "ProductCode" = "8:{0C6C4C06-0820-4C2F-B1B9-1E525CEC0D53}" - "PackageCode" = "8:{CE1DC98E-EB19-476E-9D59-CA1D561B38E9}" + "ProductCode" = "8:{A0EA5973-AD76-484C-B67E-4289D67E95D6}" + "PackageCode" = "8:{06EA7EB2-B913-4F30-8E87-E092699B4F9A}" "UpgradeCode" = "8:{9AB61EFF-ACAC-4079-9950-8D96615CD4EF}" "AspNetVersion" = "8:2.0.50727.0" "RestartWWWService" = "11:FALSE" "RemovePreviousVersions" = "11:TRUE" "DetectNewerInstalledVersion" = "11:TRUE" "InstallAllUsers" = "11:FALSE" - "ProductVersion" = "8:1.7.131" + "ProductVersion" = "8:1.7.132" "Manufacturer" = "8:oscar64" "ARPHELPTELEPHONE" = "8:" "ARPHELPLINK" = "8:"