From 9156db9c32fde1f93b3d54bc6dc7d5143b67d046 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 14 Oct 2023 13:43:11 +0200 Subject: [PATCH] Restart integer range estimation from full state --- oscar64/InterCode.cpp | 351 +++++++++++++++++++++++++++++--- oscar64/InterCode.h | 24 ++- oscar64/NativeCodeGenerator.cpp | 351 +++++++++++++++++++++++++++++++- oscar64/NativeCodeGenerator.h | 3 + oscar64/NumberSet.cpp | 28 +++ oscar64/NumberSet.h | 4 + 6 files changed, 728 insertions(+), 33 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index ee838e9..1b1b5f0 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -45,6 +45,14 @@ void IntegerValueRange::Reset(void) mMaxExpanded = 0; } +void IntegerValueRange::Restart(void) +{ + if (mMinState == IntegerValueRange::S_UNBOUND) + mMinState = IntegerValueRange::S_UNKNOWN; + if (mMaxState == IntegerValueRange::S_UNBOUND) + mMaxState = IntegerValueRange::S_UNKNOWN; +} + bool IntegerValueRange::Same(const IntegerValueRange& range) const { @@ -115,6 +123,26 @@ bool IntegerValueRange::IsConstant(void) const return mMinState == S_BOUND && mMaxState == S_BOUND && mMinValue == mMaxValue; } +void IntegerValueRange::MergeUnknown(const IntegerValueRange& range) +{ + if (mMinState != S_BOUND) + { + mMinState = range.mMinState; + mMinValue = range.mMinValue; + } + else if (range.mMinState == S_BOUND && mMinValue < range.mMinValue) + mMinValue = range.mMinValue; + + if (mMaxState != S_BOUND) + { + mMaxState = range.mMaxState; + mMaxValue = range.mMaxValue; + } + else if (range.mMaxState == S_BOUND && mMaxValue > range.mMaxValue) + mMaxValue = range.mMaxValue; +} + + void IntegerValueRange::Limit(const IntegerValueRange& range) { if (range.mMinState == S_BOUND) @@ -160,6 +188,30 @@ void IntegerValueRange::Expand(const IntegerValueRange& range) } } +void IntegerValueRange::Union(const IntegerValueRange& range) +{ + if (range.mMinState == S_UNBOUND || mMinState == S_UNBOUND) + mMinState = S_UNBOUND; + else if (range.mMinState == S_UNKNOWN || mMinState == S_UNKNOWN) + mMinState = S_UNKNOWN; + else + { + mMinValue = int64min(mMinValue, range.mMinValue); + if (range.mMinState == S_WEAK) + mMinState = S_WEAK; + } + if (range.mMaxState == S_UNBOUND || mMaxState == S_UNBOUND) + mMaxState = S_UNBOUND; + else if (range.mMaxState == S_UNKNOWN || mMaxState == S_UNKNOWN) + mMaxState = S_UNKNOWN; + else + { + mMaxValue = int64max(mMaxValue, range.mMaxValue); + if (range.mMaxState == S_WEAK) + mMaxState = S_WEAK; + } +} + bool IntegerValueRange::Merge(const IntegerValueRange& range, bool head, bool initial) { bool changed = false; @@ -3231,6 +3283,44 @@ void InterInstruction::FilterStaticVarsUsage(const GrowingVariableArray& staticV } } +void InterInstruction::FilterStaticVarsByteUsage(const GrowingVariableArray& staticVars, NumberSet& requiredVars, NumberSet& providedVars) +{ + if (mCode == IC_LOAD) + { + if (mSrc[0].mMemory == IM_INDIRECT) + { + if (!mSrc[0].mRestricted) + { + for (int i = 0; i < staticVars.Size(); i++) + { + if (staticVars[i]->mAliased && !providedVars[i]) + requiredVars.AddRange(staticVars[i]->mByteIndex, staticVars[i]->mSize); + } + } + } + else if (mSrc[0].mMemory == IM_GLOBAL) + { + if (mSrc[0].mVarIndex >= 0 && !providedVars.RangeFilled(staticVars[mSrc[0].mVarIndex]->mByteIndex + int(mSrc[0].mIntConst), InterTypeSize[mDst.mType])) + requiredVars.AddRange(staticVars[mSrc[0].mVarIndex]->mByteIndex + int(mSrc[0].mIntConst), InterTypeSize[mDst.mType]); + } + } + else if (mCode == IC_STORE) + { + if (mSrc[1].mMemory == IM_INDIRECT) + { + } + else if (mSrc[1].mMemory == IM_GLOBAL) + { + if (mSrc[1].mVarIndex >= 0) + providedVars.AddRange(staticVars[mSrc[1].mVarIndex]->mByteIndex + int(mSrc[1].mIntConst), InterTypeSize[mSrc[0].mType]); + } + } + else if (mCode == IC_COPY || mCode == IC_CALL || mCode == IC_CALL_NATIVE || mCode == IC_RETURN || mCode == IC_RETURN_STRUCT || mCode == IC_RETURN_VALUE || mCode == IC_STRCPY || mCode == IC_DISPATCH) + { + requiredVars.OrNot(providedVars); + } +} + void InterInstruction::FilterVarsUsage(const GrowingVariableArray& localVars, NumberSet& requiredVars, NumberSet& providedVars, const GrowingVariableArray& params, NumberSet& requiredParams, NumberSet& providedParams, InterMemory paramMemory) { if (mCode == IC_LOAD) @@ -3878,6 +3968,57 @@ bool InterInstruction::RemoveUnusedStaticStoreInstructions(InterCodeBasicBlock* return changed; } +bool InterInstruction::RemoveUnusedStaticStoreByteInstructions(InterCodeBasicBlock* block, const GrowingVariableArray& staticVars, NumberSet& requiredVars) +{ + bool changed = false; + + if (mCode == IC_LOAD) + { + if (mSrc[0].mMemory == IM_INDIRECT) + { + if (!mSrc[0].mRestricted) + { + for (int i = 0; i < staticVars.Size(); i++) + { + if (staticVars[i]->mAliased) + requiredVars.AddRange(staticVars[i]->mByteIndex, staticVars[i]->mSize); + } + } + } + else if (mSrc[0].mMemory == IM_GLOBAL) + { + if (mSrc[0].mVarIndex >= 0) + requiredVars.AddRange(staticVars[mSrc[0].mVarIndex]->mByteIndex + int(mSrc[0].mIntConst), InterTypeSize[mDst.mType]); + } + } + else if (mCode == IC_STORE) + { + if (mSrc[1].mMemory == IM_GLOBAL && mSrc[1].mVarIndex >= 0) + { + if (!requiredVars.RangeClear(staticVars[mSrc[1].mVarIndex]->mByteIndex + int(mSrc[1].mIntConst), InterTypeSize[mSrc[0].mType])) + { + requiredVars.SubRange(staticVars[mSrc[1].mVarIndex]->mByteIndex + int(mSrc[1].mIntConst), InterTypeSize[mSrc[0].mType]); + } + else if (!mVolatile) + { + mSrc[0].mTemp = -1; + mCode = IC_NONE; + changed = true; + } + } + } + else if (mCode == IC_COPY || mCode == IC_STRCPY) + { + requiredVars.Fill(); + } + else if (mCode == IC_CALL || mCode == IC_CALL_NATIVE || mCode == IC_RETURN || mCode == IC_RETURN_STRUCT || mCode == IC_RETURN_VALUE || mCode == IC_DISPATCH) + { + requiredVars.Fill(); + } + + return changed; +} + int InterInstruction::NumUsedTemps(void) const { int n = 0; @@ -4112,6 +4253,13 @@ void InterInstruction::CollectSimpleLocals(FastNumberSet& complexLocals, FastNum } } +void InterInstruction::UnionRanges(InterInstruction* ins) +{ + mDst.mRange.Union(ins->mDst.mRange); + for(int i=0; imSrc[i].mRange); +} + void InterInstruction::SimpleLocalToTemp(int vindex, int temp) { switch (mCode) @@ -4644,7 +4792,7 @@ InterCodeBasicBlock::InterCodeBasicBlock(InterCodeProcedure * proc) mInstructions(nullptr), mEntryRenameTable(-1), mExitRenameTable(-1), mMergeTValues(nullptr), mMergeAValues(nullptr), mTrueJump(nullptr), mFalseJump(nullptr), mLoopPrefix(nullptr), mDominator(nullptr), mEntryValueRange(IntegerValueRange()), mTrueValueRange(IntegerValueRange()), mFalseValueRange(IntegerValueRange()), mLocalValueRange(IntegerValueRange()), mEntryParamValueRange(IntegerValueRange()), mTrueParamValueRange(IntegerValueRange()), mFalseParamValueRange(IntegerValueRange()), mLocalParamValueRange(IntegerValueRange()), - mReverseValueRange(IntegerValueRange()), mEntryBlocks(nullptr), mLoadStoreInstructions(nullptr), mLoopPathBlocks(nullptr), mMemoryValueSize(0), mEntryMemoryValueSize(0) + mReverseValueRange(IntegerValueRange()), mLoadStoreInstructions(nullptr), mMemoryValueSize(0), mEntryMemoryValueSize(0) { mVisited = false; mInPath = false; @@ -4816,6 +4964,8 @@ void InterCodeBasicBlock::GenerateTraces(bool expand, bool compact) if (mInPath) mLoopHead = true; + assert(mIndex != 0 || !mLoopHead); + if (!mVisited) { mVisited = true; @@ -6559,6 +6709,29 @@ static int64 BuildLowerBitsMask(int64 v) return v; } +void InterCodeBasicBlock::UnionIntegerRanges(const InterCodeBasicBlock* block) +{ + if (mEntryValueRange.Size() > 0) + { + if (block->mEntryValueRange.Size()) + { + assert(mEntryValueRange.Size() == block->mEntryValueRange.Size()); + + for (int i = 0; i < mEntryValueRange.Size(); i++) + mEntryValueRange[i].Union(block->mEntryValueRange[i]); + } + else + mEntryValueRange.SetSize(0); + } + + for (int i = 0; i < mInstructions.Size(); i++) + { + assert(mInstructions[i]->IsEqual(block->mInstructions[i])); + mInstructions[i]->UnionRanges(block->mInstructions[i]); + } + +} + void InterCodeBasicBlock::MarkIntegerRangeBoundUp(int temp, int64 value, GrowingIntegerValueRangeArray& range) { range[temp].SetLimit(value, value); @@ -6675,7 +6848,7 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray { if (ins->mSrc[i].mTemp >= 0) { - ins->mSrc[i].mRange = mLocalValueRange[ins->mSrc[i].mTemp]; + ins->mSrc[i].mRange.MergeUnknown(mLocalValueRange[ins->mSrc[i].mTemp]); #if 1 if (ins->mCode != IC_ASSEMBLER&& ins->mSrc[i].mRange.mMinState == IntegerValueRange::S_BOUND && ins->mSrc[i].mRange.mMaxState == IntegerValueRange::S_BOUND && ins->mSrc[i].mRange.mMinValue == ins->mSrc[i].mRange.mMaxValue) { @@ -7324,7 +7497,7 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray vr.LimitMaxBound(65535); } #endif - ins->mDst.mRange = vr; + ins->mDst.mRange.MergeUnknown(vr); #if 1 if (vr.mMaxState == IntegerValueRange::S_BOUND && vr.mMinState == IntegerValueRange::S_BOUND && vr.mMaxValue == vr.mMinValue) { @@ -7850,21 +8023,16 @@ void InterCodeBasicBlock::RestartLocalIntegerRangeSets(int num, const GrowingVar mLocalParamValueRange.SetSize(paramVars.Size(), false); for (int i = 0; i < mEntryValueRange.Size(); i++) - { - IntegerValueRange& vr(mEntryValueRange[i]); - if (vr.mMinState == IntegerValueRange::S_UNBOUND) - vr.mMinState = IntegerValueRange::S_UNKNOWN; - if (vr.mMaxState == IntegerValueRange::S_UNBOUND) - vr.mMaxState = IntegerValueRange::S_UNKNOWN; - } + mEntryValueRange[i].Restart(); for (int i = 0; i < mEntryParamValueRange.Size(); i++) + mEntryParamValueRange[i].Restart(); + + for (int i = 0; i < mInstructions.Size(); i++) { - IntegerValueRange& vr(mEntryParamValueRange[i]); - if (vr.mMinState == IntegerValueRange::S_UNBOUND) - vr.mMinState = IntegerValueRange::S_UNKNOWN; - if (vr.mMaxState == IntegerValueRange::S_UNBOUND) - vr.mMaxState = IntegerValueRange::S_UNKNOWN; + mInstructions[i]->mDst.mRange.Restart(); + for (int j = 0; j < mInstructions[i]->mNumOperands; j++) + mInstructions[i]->mSrc[j].mRange.Restart(); } UpdateLocalIntegerRangeSets(localVars, paramVars); @@ -8335,7 +8503,7 @@ void InterCodeBasicBlock::PerformTempForwarding(const TempForwardingTable& forwa } else if (mLoopPrefix && checkloops) { - GrowingArray body(nullptr); + ExpandingArray body; body.Push(this); bool innerLoop = true; @@ -8619,6 +8787,67 @@ bool InterCodeBasicBlock::RemoveUnusedStaticStoreInstructions(const GrowingVaria return changed; } + +void InterCodeBasicBlock::BuildStaticVariableByteSet(const GrowingVariableArray& staticVars, int bsize) +{ + if (!mVisited) + { + mVisited = true; + + mLocalRequiredStatics = NumberSet(bsize); + mLocalProvidedStatics = NumberSet(bsize); + + mEntryRequiredStatics = NumberSet(bsize); + mEntryProvidedStatics = NumberSet(bsize); + mExitRequiredStatics = NumberSet(bsize); + mExitProvidedStatics = NumberSet(bsize); + + for (int i = 0; i < mInstructions.Size(); i++) + mInstructions[i]->FilterStaticVarsByteUsage(staticVars, mLocalRequiredStatics, mLocalProvidedStatics); + + mEntryRequiredStatics = mLocalRequiredStatics; + mExitProvidedStatics = mLocalProvidedStatics; + + if (mTrueJump) mTrueJump->BuildStaticVariableByteSet(staticVars, bsize); + if (mFalseJump) mFalseJump->BuildStaticVariableByteSet(staticVars, bsize); + } +} + +bool InterCodeBasicBlock::RemoveUnusedStaticStoreByteInstructions(const GrowingVariableArray& staticVars, int bsize) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + NumberSet requiredVars(mExitRequiredStatics); + + int i; + + for (i = mInstructions.Size() - 1; i >= 0; i--) + { + if (mInstructions[i]->RemoveUnusedStaticStoreByteInstructions(this, staticVars, requiredVars)) + changed = true; + } + + if (mTrueJump) + { + if (mTrueJump->RemoveUnusedStaticStoreByteInstructions(staticVars, bsize)) + changed = true; + } + if (mFalseJump) + { + if (mFalseJump->RemoveUnusedStaticStoreByteInstructions(staticVars, bsize)) + changed = true; + } + } + + return changed; +} + + + void InterCodeBasicBlock::BuildLocalVariableSets(const GrowingVariableArray& localVars, const GrowingVariableArray& params, InterMemory paramMemory) { int i; @@ -11145,7 +11374,7 @@ bool InterCodeBasicBlock::ForwardLoopMovedTemp(void) else if (mTrueJump->mFalseJump == mTrueJump) eblock = mTrueJump->mTrueJump; - if (eblock) + if (eblock && eblock->mNumEntries == 1) { int i = mInstructions.Size() - 1; while (i >= 0) @@ -12277,7 +12506,7 @@ InterCodeBasicBlock* InterCodeBasicBlock::BuildLoopPrefix(void) return mLoopPrefix ? mLoopPrefix : this; } -bool InterCodeBasicBlock::CollectLoopBody(InterCodeBasicBlock* head, GrowingArray & body) +bool InterCodeBasicBlock::CollectLoopBody(InterCodeBasicBlock* head, ExpandingArray & body) { if (mLoopHead) return this == head; @@ -12293,7 +12522,7 @@ bool InterCodeBasicBlock::CollectLoopBody(InterCodeBasicBlock* head, GrowingArra return true; } -bool InterCodeBasicBlock::CollectLoopBodyRecursive(InterCodeBasicBlock* head, GrowingArray& body) +bool InterCodeBasicBlock::CollectLoopBodyRecursive(InterCodeBasicBlock* head, ExpandingArray& body) { if (this == head) return true; @@ -12309,7 +12538,7 @@ bool InterCodeBasicBlock::CollectLoopBodyRecursive(InterCodeBasicBlock* head, Gr return true; } -void InterCodeBasicBlock::CollectLoopPath(const GrowingArray& body, GrowingArray& path) +void InterCodeBasicBlock::CollectLoopPath(const ExpandingArray& body, ExpandingArray& path) { if (body.IndexOf(this) >= 0) { @@ -12320,7 +12549,7 @@ void InterCodeBasicBlock::CollectLoopPath(const GrowingArrayCollectLoopPath(body, mLoopPathBlocks); if (mFalseJump) { - GrowingArray fpath(nullptr); + ExpandingArray fpath; if (!mFalseJump->mLoopHead) mFalseJump->CollectLoopPath(body, fpath); @@ -12526,8 +12755,12 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar mins->mConst.mType = ai->mDst.mType; mins->mConst.mIntConst = num; mins->mDst = ai->mDst; + mins->mDst.mRange.SetLimit(num, num); mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, mins); + if (mEntryValueRange.Size()) + mEntryValueRange[ai->mSrc[1].mTemp].SetLimit(1, num); + ai->mSrc[1].mRange.SetLimit(1, num); ai->mDst.mRange.SetLimit(0, num - 1); ci->mSrc[1].mRange.SetLimit(0, num - 1); @@ -12731,7 +12964,7 @@ void InterCodeBasicBlock::InnerLoopOptimization(const NumberSet& aliasedParams) if (mLoopHead) { - GrowingArray body(nullptr), path(nullptr); + ExpandingArray body, path; body.Push(this); bool innerLoop = true; @@ -13541,7 +13774,7 @@ void InterCodeBasicBlock::PushMoveOutOfLoop(void) bool InterCodeBasicBlock::CheckSingleBlockLimitedLoop(InterCodeBasicBlock*& pblock, int64& nloop) { - if (mLoopHead && mNumEntries == 2 && mFalseJump && (mTrueJump == this || mFalseJump == this) && mInstructions.Size() > 3) + if (mLoopHead && mEntryBlocks.Size() == 2 && mFalseJump && (mTrueJump == this || mFalseJump == this) && mInstructions.Size() > 3) { int nins = mInstructions.Size(); @@ -14857,7 +15090,10 @@ void InterCodeBasicBlock::CheckBlocks(void) for (int i = 0; i < mInstructions.Size(); i++) assert(mInstructions[i] != nullptr); - + + assert(!mTrueJump || mTrueJump->mIndex > 0); + assert(!mFalseJump || mFalseJump->mIndex > 0); + if (mTrueJump) mTrueJump->CheckBlocks(); if (mFalseJump) mFalseJump->CheckBlocks(); } @@ -17163,6 +17399,43 @@ void InterCodeProcedure::RemoveUnusedLocalStoreInstructions(void) } } +void InterCodeProcedure::RemoveUnusedPartialStoreInstructions(void) +{ + if (mCompilerOptions & COPT_OPTIMIZE_BASIC) + { + if (mModule->mGlobalVars.Size()) + { + int byteIndex = 0; + for (int i = 0; i < mModule->mGlobalVars.Size(); i++) + { + if (mModule->mGlobalVars[i]) + { + mModule->mGlobalVars[i]->mByteIndex = byteIndex; + byteIndex += mModule->mGlobalVars[i]->mSize; + } + } + + do { + ResetVisited(); + mEntryBlock->BuildStaticVariableByteSet(mModule->mGlobalVars, byteIndex); + + ResetVisited(); + mEntryBlock->BuildGlobalProvidedStaticVariableSet(mModule->mGlobalVars, NumberSet(byteIndex)); + + NumberSet totalRequired2(byteIndex); + + do { + ResetVisited(); + } while (mEntryBlock->BuildGlobalRequiredStaticVariableSet(mModule->mGlobalVars, totalRequired2)); + + ResetVisited(); + } while (mEntryBlock->RemoveUnusedStaticStoreByteInstructions(mModule->mGlobalVars, byteIndex)); + + DisassembleDebug("removed unused static byte stores"); + } + } +} + void InterCodeProcedure::RemoveUnusedStoreInstructions(InterMemory paramMemory) { if (mCompilerOptions & COPT_OPTIMIZE_BASIC) @@ -17652,7 +17925,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "atoi"); + CheckFunc = !strcmp(mIdent->mString, "ftoa"); CheckCase = false; mEntryBlock = mBlocks[0]; @@ -18136,6 +18409,8 @@ void InterCodeProcedure::Close(void) Disassemble("gcp-"); #endif + CheckCase = true; + #if 1 RebuildIntegerRangeSet(); #endif @@ -18349,17 +18624,27 @@ void InterCodeProcedure::Close(void) } #endif + DisassembleDebug("PreLoopTemp"); BuildDataFlowSets(); ResetVisited(); mEntryBlock->ForwardLoopMovedTemp(); + DisassembleDebug("PostLoopTemp"); + CheckFinal(); + DisassembleDebug("PreConstP"); #if 1 do { + DisassembleDebug("InConstP"); + CheckFinal(); TempForwarding(); + CheckFinal(); } while (GlobalConstantPropagation()); + CheckFinal(); BuildTraces(false); DisassembleDebug("Rebuilt traces"); + CheckFinal(); + PeepholeOptimization(); TempForwarding(); @@ -18404,8 +18689,12 @@ void InterCodeProcedure::Close(void) ReduceTemporaries(); + CheckBlocks(); + MergeBasicBlocks(); + CheckBlocks(); + DisassembleDebug("TempForward Rev 1"); BuildDataFlowSets(); @@ -18414,6 +18703,8 @@ void InterCodeProcedure::Close(void) RemoveUnusedInstructions(); + CheckBlocks(); + BuildDataFlowSets(); DisassembleDebug("TempForward Rev 2"); @@ -18422,6 +18713,8 @@ void InterCodeProcedure::Close(void) DisassembleDebug("TempForward Rev 3"); + CheckBlocks(); + BuildLoopPrefix(); BuildDataFlowSets(); @@ -18467,6 +18760,8 @@ void InterCodeProcedure::Close(void) } #endif + RemoveUnusedPartialStoreInstructions(); + MapVariables(); DisassembleDebug("mapped variabled"); @@ -18824,9 +19119,11 @@ void InterCodeProcedure::MergeBasicBlocks(void) if (block->mNumEntries) { int j = 0; - while (j < i && !(mBlocks[j]->mNumEntries && mBlocks[j]->IsEqual(block))) + while (j < i && !(mBlocks[j]->mNumEntries && mBlocks[j]->IsEqual(block) && mBlocks[j]->mIndex != 0)) j++; blockMap[i] = mBlocks[j]; + if (i != j) + mBlocks[j]->UnionIntegerRanges(block); } } @@ -18934,6 +19231,8 @@ void InterCodeProcedure::MergeBasicBlocks(void) { assert(mblocks[j]->mInstructions[mblocks[j]->mInstructions.Size() - 1]->mCode == IC_JUMP); assert(mblocks[j]->mInstructions[mblocks[j]->mInstructions.Size() - 2]->IsEqual(ins)); + + ins->UnionRanges(mblocks[j]->mInstructions[mblocks[j]->mInstructions.Size() - 2]); mblocks[j]->mInstructions.Remove(mblocks[j]->mInstructions.Size() - 2); } diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index a38b802..3169011 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -148,6 +148,7 @@ public: ~IntegerValueRange(void); void Reset(void); + void Restart(void); int64 mMinValue, mMaxValue; int mMinExpanded, mMaxExpanded; @@ -163,8 +164,10 @@ public: bool Same(const IntegerValueRange& range) const; bool Merge(const IntegerValueRange& range, bool head, bool initial); void Expand(const IntegerValueRange& range); + void Union(const IntegerValueRange& range); void Limit(const IntegerValueRange& range); + void MergeUnknown(const IntegerValueRange& range); void SetLimit(int64 minValue, int64 maxValue); bool IsConstant(void) const; @@ -245,7 +248,7 @@ class InterVariable { public: bool mUsed, mAliased, mTemp; - int mIndex, mSize, mOffset, mTempIndex; + int mIndex, mSize, mOffset, mTempIndex, mByteIndex; int mNumReferences; const Ident * mIdent; LinkerObject * mLinkerObject; @@ -318,10 +321,12 @@ public: void FilterTempUsage(NumberSet& requiredTemps, NumberSet& providedTemps); void FilterVarsUsage(const GrowingVariableArray& localVars, NumberSet& requiredVars, NumberSet& providedVars, const GrowingVariableArray& params, NumberSet& requiredParams, NumberSet& providedParams, InterMemory paramMemory); void FilterStaticVarsUsage(const GrowingVariableArray& staticVars, NumberSet& requiredVars, NumberSet& providedVars); - + void FilterStaticVarsByteUsage(const GrowingVariableArray& staticVars, NumberSet& requiredVars, NumberSet& providedVars); + bool RemoveUnusedResultInstructions(InterInstruction* pre, NumberSet& requiredTemps); bool RemoveUnusedStoreInstructions(const GrowingVariableArray& localVars, NumberSet& requiredVars, const GrowingVariableArray& params, NumberSet& requiredParams, InterMemory paramMemory); bool RemoveUnusedStaticStoreInstructions(InterCodeBasicBlock * block, const GrowingVariableArray& staticVars, NumberSet& requiredVars, GrowingInstructionPtrArray& storeIns); + bool RemoveUnusedStaticStoreByteInstructions(InterCodeBasicBlock* block, const GrowingVariableArray& staticVars, NumberSet& requiredVars); void PerformValueForwarding(GrowingInstructionPtrArray& tvalue, FastNumberSet& tvalid); void BuildCallerSaveTempSet(NumberSet& requiredTemps, NumberSet& callerSaveTemps); @@ -343,6 +348,8 @@ public: bool ConstantFolding(void); bool ConstantFoldingRelationRange(void); + void UnionRanges(InterInstruction* ins); + void Disassemble(FILE* file, InterCodeProcedure * proc); }; @@ -381,7 +388,7 @@ public: GrowingArray mMemoryValueSize, mEntryMemoryValueSize; - GrowingArray mEntryBlocks, mLoopPathBlocks; + ExpandingArray mEntryBlocks, mLoopPathBlocks; GrowingInstructionPtrArray mMergeTValues, mMergeAValues; ValueSet mMergeValues; @@ -435,6 +442,9 @@ public: bool BuildGlobalRequiredStaticVariableSet(const GrowingVariableArray& staticVars, NumberSet& fromRequiredVars); bool RemoveUnusedStaticStoreInstructions(const GrowingVariableArray& staticVars); + void BuildStaticVariableByteSet(const GrowingVariableArray& staticVars, int bsize); + bool RemoveUnusedStaticStoreByteInstructions(const GrowingVariableArray& staticVars, int bsize); + bool CheckSingleBlockLimitedLoop(InterCodeBasicBlock*& pblock, int64 & nloop); void RestartLocalIntegerRangeSets(int num, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars); @@ -443,6 +453,7 @@ public: bool BuildGlobalIntegerRangeSets(bool initial, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars); void SimplifyIntegerRangeRelops(void); void MarkIntegerRangeBoundUp(int temp, int64 value, GrowingIntegerValueRangeArray& range); + void UnionIntegerRanges(const InterCodeBasicBlock* block); bool CombineIndirectAddressing(void); @@ -551,9 +562,9 @@ public: void SingleBlockLoopUnrolling(void); bool SingleBlockLoopPointerSplit(int& spareTemps); bool SingleBlockLoopPointerToByte(int& spareTemps); - bool CollectLoopBody(InterCodeBasicBlock* head, GrowingArray & body); - bool CollectLoopBodyRecursive(InterCodeBasicBlock* head, GrowingArray& body); - void CollectLoopPath(const GrowingArray& body, GrowingArray& path); + bool CollectLoopBody(InterCodeBasicBlock* head, ExpandingArray & body); + bool CollectLoopBodyRecursive(InterCodeBasicBlock* head, ExpandingArray& body); + void CollectLoopPath(const ExpandingArray& body, ExpandingArray& path); void InnerLoopOptimization(const NumberSet& aliasedParams); void PushMoveOutOfLoop(void); @@ -670,6 +681,7 @@ protected: void BuildLoopPrefix(void); void SingleAssignmentForwarding(void); void RemoveUnusedStoreInstructions(InterMemory paramMemory); + void RemoveUnusedPartialStoreInstructions(void); void RemoveUnusedLocalStoreInstructions(void); void MergeCommonPathInstructions(void); void PushSinglePathResultInstructions(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 8797913..bfdd59e 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -17948,6 +17948,183 @@ bool NativeCodeBasicBlock::RemoveDoubleZPStore(void) return changed; } +struct ValueNumbers +{ + int zvalues[256], avalue, xvalue, yvalue; + int ivalue = 0; + + void Reset(void) + { + for (int i = 0; i < 256; i++) + zvalues[i] = ivalue++; + avalue = ivalue++; + xvalue = ivalue++; + yvalue = ivalue++; + } + +}; + +bool NativeCodeBasicBlock::LocalZeroPageValueNumbering(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + ValueNumbers vn; + vn.Reset(); + + for (int i = 0; i < mIns.Size(); i++) + { + NativeCodeInstruction& ins = mIns[i]; + + if (ins.mType == ASMIT_LDA && ins.mMode == ASMIM_ZERO_PAGE) + { + if (vn.avalue == vn.zvalues[ins.mAddress]) + { + if (ins.mLive & LIVE_CPU_REG_Z) + { + ins.mType = ASMIT_ORA; + ins.mMode = ASMIM_IMMEDIATE; + ins.mAddress = 0; + changed = true; + } + else + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + } + else + vn.avalue = vn.zvalues[ins.mAddress]; + } + else if (ins.mType == ASMIT_LDX && ins.mMode == ASMIM_ZERO_PAGE) + { + if (vn.xvalue == vn.zvalues[ins.mAddress] && !(ins.mLive & LIVE_CPU_REG_Z)) + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else + vn.xvalue = vn.zvalues[ins.mAddress]; + } + else if (ins.mType == ASMIT_LDY && ins.mMode == ASMIM_ZERO_PAGE) + { + if (vn.yvalue == vn.zvalues[ins.mAddress] && !(ins.mLive & LIVE_CPU_REG_Z)) + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else + vn.yvalue = vn.zvalues[ins.mAddress]; + } + else if (ins.mType == ASMIT_STA && ins.mMode == ASMIM_ZERO_PAGE) + { + if (vn.avalue == vn.zvalues[ins.mAddress]) + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else + vn.zvalues[ins.mAddress] = vn.avalue; + } + else if (ins.mType == ASMIT_STX && ins.mMode == ASMIM_ZERO_PAGE) + { + if (vn.xvalue == vn.zvalues[ins.mAddress]) + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else + vn.zvalues[ins.mAddress] = vn.xvalue; + } + else if (ins.mType == ASMIT_STY && ins.mMode == ASMIM_ZERO_PAGE) + { + if (vn.yvalue == vn.zvalues[ins.mAddress]) + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else + vn.zvalues[ins.mAddress] = vn.yvalue; + } + else if (ins.mType == ASMIT_TAX) + { + if (vn.avalue == vn.xvalue && !(ins.mLive & LIVE_CPU_REG_Z)) + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else + vn.xvalue = vn.avalue; + } + else if (ins.mType == ASMIT_TAY) + { + if (vn.avalue == vn.yvalue && !(ins.mLive & LIVE_CPU_REG_Z)) + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else + vn.yvalue = vn.avalue; + } + else if (ins.mType == ASMIT_TXA) + { + if (vn.xvalue == vn.avalue && !(ins.mLive & LIVE_CPU_REG_Z)) + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else + vn.avalue = vn.xvalue; + } + else if (ins.mType == ASMIT_TYA) + { + if (vn.yvalue == vn.avalue && !(ins.mLive & LIVE_CPU_REG_Z)) + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else + vn.avalue = vn.yvalue; + } + else if (ins.mType == ASMIT_JSR) + vn.Reset(); + else + { + if (ins.ChangesAccu()) + vn.avalue = vn.ivalue++; + if (ins.ChangesXReg()) + vn.xvalue = vn.ivalue++; + if (ins.ChangesYReg()) + vn.yvalue = vn.ivalue++; + if (ins.mMode == ASMIM_ZERO_PAGE && ins.ChangesAddress()) + vn.zvalues[ins.mAddress] = vn.ivalue++; + } + } + + + if (mTrueJump && mTrueJump->LocalZeroPageValueNumbering()) + changed = true; + + if (mFalseJump && mFalseJump->LocalZeroPageValueNumbering()) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::LocalRegisterXYMap(void) { bool changed = false; @@ -27176,6 +27353,50 @@ bool NativeCodeBasicBlock::FoldShiftORAIntoLoadImmUp(int at) return false; } +// TAX +// CLC +// ADC #1, 2, 3 +// STA +// +// convert to INX/DEX/STX when X not needed anymore + +bool NativeCodeBasicBlock::MoveTAXADCSTADown(int at) +{ + int n = mIns[at + 2].mAddress; + int addr = mIns[at + 3].mAddress; + + int si = at + 4; + while (si < mIns.Size()) + { + if (!(mIns[si].mLive & LIVE_CPU_REG_X)) + { + if (mIns[si].mLive & LIVE_CPU_REG_Z) + return false; + + mIns[si].mLive |= LIVE_CPU_REG_X; + si++; + + for (int i = 0; i < n; i++) + { + mIns.Insert(si, NativeCodeInstruction(mIns[at].mIns, ASMIT_INX)); + si++; + } + mIns.Insert(si, NativeCodeInstruction(mIns[at].mIns, ASMIT_STX, ASMIM_ZERO_PAGE, addr)); + return true; + } + + if (mIns[si].ChangesXReg()) + return false; + if (mIns[si].ReferencesZeroPage(addr)) + return false; + + si++; + } + + return false; +} + + // CLC // LDA zp0 // ADC #1, 2, 3 @@ -31487,7 +31708,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc, bool f { // check for usage of Y register - bool yother = false, yindex = false; + bool yother = false, yindex = false, xother = false, xindex = false; int zreg = mIns[sz - 1].mAddress; int yinc = 0, xinc = 0; @@ -31495,6 +31716,8 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc, bool f if (mIns[sz - 1].mLive & LIVE_CPU_REG_Y) yother = true; + if (mIns[sz - 1].mLive & LIVE_CPU_REG_X) + xother = true; for (int i = 0; i < sz - 1; i++) { @@ -31518,6 +31741,27 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc, bool f yother = true; else if (mIns[i].mType != ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == zreg) yother = true; + + if (mIns[i].mType == ASMIT_TAX) + xother = true; + else if (mIns[i].mType == ASMIT_INX) + xinc++; + else if (mIns[i].mType == ASMIT_DEX) + xinc--; + else if (mIns[i].mType == ASMIT_LDX) + { + if (mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == zreg && xinc >= -1 && xinc <= 1) + { + xinc = 0; + xindex = true; + } + else + xother = true; + } + else if (!xindex && (mIns[i].mType == ASMIT_STX || mIns[i].mType == ASMIT_TXA || mIns[i].mMode == ASMIM_ABSOLUTE_X)) + xother = true; + else if (mIns[i].mType != ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == zreg) + xother = true; } if (!yother) @@ -31601,6 +31845,89 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc, bool f changed = true; + assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV); + } + else if (!xother) + { + int linc = xinc + 1; + + NativeCodeBasicBlock* lblock = proc->AllocateBlock(); + NativeCodeBasicBlock* eblock = proc->AllocateBlock(); + + yinc = 0; + for (int i = 0; i + 1 < sz; i++) + { + mIns[i].mLive |= LIVE_CPU_REG_X; + + if (mIns[i].mType == ASMIT_LDA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == zreg) + lblock->mIns.Push(NativeCodeInstruction(mIns[i].mIns, ASMIT_TXA, ASMIM_IMPLIED)); + else if (mIns[i].mType == ASMIT_LDX) + { + if (yinc > 0) + lblock->mIns.Push(NativeCodeInstruction(mIns[i].mIns, ASMIT_DEX)); + else if (yinc < 0) + lblock->mIns.Push(NativeCodeInstruction(mIns[i].mIns, ASMIT_INX)); + yinc = 0; + } + else + { + lblock->mIns.Push(mIns[i]); + if (mIns[i].mType == ASMIT_INX) + yinc++; + else if (mIns[i].mType == ASMIT_DEX) + yinc--; + } + } + + if (linc == 0) + { + lblock->mIns.Push(NativeCodeInstruction(iins, ASMIT_CPX, ASMIM_IMMEDIATE, 0)); + } + else + { + while (linc < 0) + { + lblock->mIns.Push(NativeCodeInstruction(iins, ASMIT_INX, ASMIM_IMPLIED)); + linc++; + } + while (linc > 0) + { + lblock->mIns.Push(NativeCodeInstruction(iins, ASMIT_DEX, ASMIM_IMPLIED)); + linc--; + } + } + + lblock->mBranch = mBranch; + lblock->mTrueJump = lblock; + lblock->mFalseJump = eblock; + + eblock->mIns.Push(NativeCodeInstruction(iins, ASMIT_STX, ASMIM_ZERO_PAGE, zreg)); + eblock->mBranch = ASMIT_JMP; + eblock->mTrueJump = mFalseJump; + eblock->mFalseJump = nullptr; + + lblock->mEntryRequiredRegs = mEntryRequiredRegs; + lblock->mExitRequiredRegs = mExitRequiredRegs; + eblock->mEntryRequiredRegs = mExitRequiredRegs; + eblock->mExitRequiredRegs = mExitRequiredRegs; + mExitRequiredRegs = mEntryRequiredRegs; + mExitRequiredRegs += CPU_REG_X; + lblock->mEntryRequiredRegs += CPU_REG_X; + lblock->mExitRequiredRegs += CPU_REG_X; + eblock->mEntryRequiredRegs += CPU_REG_X; + + mIns.SetSize(0); + mIns.Push(NativeCodeInstruction(iins, ASMIT_LDX, ASMIM_ZERO_PAGE, zreg)); + mBranch = ASMIT_JMP; + mTrueJump = lblock; + mFalseJump = nullptr; + + lblock->OptimizeSimpleLoopInvariant(proc, this, eblock, full); + + lblock->CheckLive(); + + changed = true; + assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV); } } @@ -34543,6 +34870,22 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #endif +#if 1 + for (int i = 0; i + 3 < mIns.Size(); i++) + { + if (mIns[i + 0].mType == ASMIT_TAX && mIns[i + 1].mType == ASMIT_CLC && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress <= 2 && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && !(mIns[i + 3].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C | LIVE_CPU_REG_Z))) + + { + if (MoveTAXADCSTADown(i)) + changed = true; + } + } + + CheckLive(); +#endif + #if 1 for (int i = 0; i < mIns.Size(); i++) { @@ -42138,6 +42481,12 @@ void NativeCodeProcedure::Optimize(void) } #endif + if (step > 1) + { + ResetVisited(); + mEntryBlock->LocalZeroPageValueNumbering(); + } + int t = 0; #if 1 do diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 6ec4307..79d26c4 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -446,6 +446,8 @@ public: bool FoldShiftORAIntoLoadImmUp(int at); bool MoveSimpleADCToINCDECDown(int at); + bool MoveTAXADCSTADown(int at); + bool MoveZeroPageCrossBlockUp(int at, const NativeCodeInstruction & lins, const NativeCodeInstruction & sins); bool ShortcutCrossBlockMoves(NativeCodeProcedure* proc); @@ -570,6 +572,7 @@ public: bool LocalRegisterXYMap(void); bool ReduceLocalYPressure(void); bool ReduceLocalXPressure(void); + bool LocalZeroPageValueNumbering(void); bool CombineZPPair(int at, int r0, int r1, bool use0, bool use1, bool & swap); bool RemoveDoubleZPStore(void); diff --git a/oscar64/NumberSet.cpp b/oscar64/NumberSet.cpp index 9f269b0..74b2dd8 100644 --- a/oscar64/NumberSet.cpp +++ b/oscar64/NumberSet.cpp @@ -73,6 +73,34 @@ void NumberSet::Reset(int size, bool set) } } +void NumberSet::AddRange(int elem, int num) +{ + for (int i = 0; i < num; i++) + *this += elem + i; +} + +void NumberSet::SubRange(int elem, int num) +{ + for (int i = 0; i < num; i++) + *this -= elem + i; +} + +bool NumberSet::RangeClear(int elem, int num) const +{ + for (int i = 0; i < num; i++) + if ((*this)[elem + i]) + return false; + return true; +} + +bool NumberSet::RangeFilled(int elem, int num) const +{ + for (int i = 0; i < num; i++) + if (!(*this)[elem + i]) + return false; + return true; +} + void NumberSet::Fill(void) { int i; diff --git a/oscar64/NumberSet.h b/oscar64/NumberSet.h index 624e398..45cea76 100644 --- a/oscar64/NumberSet.h +++ b/oscar64/NumberSet.h @@ -34,6 +34,10 @@ public: void Clear(void); void Fill(void); + void AddRange(int elem, int num); + void SubRange(int elem, int num); + bool RangeClear(int elem, int num) const; + bool RangeFilled(int elem, int num) const; int Size(void) { return size; } };