diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 1741ddb..9ddb412 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -99,6 +99,61 @@ void IntegerValueRange::Restart(void) } +bool IntegerValueRange::Weaker(const IntegerValueRange& range) const +{ + bool minWeak = false, maxWeak = false; + + if (range.mMinState == S_UNKNOWN) + minWeak = false; + else if (mMinState == S_UNKNOWN) + minWeak = true; + else if (mMinState == S_BOUND) + { + if (range.mMinState >= S_WEAK && mMinValue < range.mMinValue) + minWeak = true; + } + else if (mMinState == S_WEAK) + { + if (range.mMinState == S_BOUND) + minWeak = true; + if (range.mMinState == S_WEAK && mMinValue != range.mMinValue) + minWeak = true; + } + else if (mMinState == S_UNBOUND) + { + if (mMinExpanded >= 32 && range.mMinState == S_WEAK) + ; + else if (range.mMinState != S_UNBOUND) + minWeak = true; + } + + if (range.mMaxState == S_UNKNOWN) + maxWeak = false; + else if (mMaxState == S_UNKNOWN) + maxWeak = true; + else if (mMaxState == S_BOUND) + { + if (range.mMaxState >= S_WEAK && mMaxValue > range.mMaxValue) + maxWeak = true; + } + else if (mMaxState == S_WEAK) + { + if (range.mMaxState == S_BOUND) + maxWeak = true; + if (range.mMaxState == S_WEAK && mMaxValue != range.mMaxValue) + maxWeak = true; + } + else if (mMaxState == S_UNBOUND) + { + if (mMaxExpanded >= 32 && range.mMaxState == S_WEAK) + ; + else if (range.mMaxState != S_UNBOUND) + maxWeak = true; + } + + return minWeak || maxWeak; +} + bool IntegerValueRange::Same(const IntegerValueRange& range) const { if (mMinState == range.mMinState && mMaxState == range.mMaxState) @@ -266,6 +321,91 @@ void IntegerValueRange::SetBounds(State minState, int64 minValue, State maxState void IntegerValueRange::Expand(const IntegerValueRange& range) { + if (mMinState == S_BOUND) + { + if (range.mMinState == S_BOUND) + { + if (range.mMinValue > mMinValue) + mMinValue = range.mMinValue; + } + else if (range.mMinState == S_WEAK) + { + if (range.mMinValue > mMinValue) + { + mMinValue = range.mMinValue; + mMinState = S_WEAK; + } + } + } + else if (mMinState == S_WEAK) + { + if (range.mMinState == S_BOUND) + { + mMinState = range.mMinState; + mMinValue = range.mMinValue; + } + else if (range.mMinState == S_WEAK) + { + if (range.mMinValue != mMinValue) + { + mMinExpanded++; + mMinValue = range.mMinValue; + if (mMinExpanded >= 32) + mMinState = S_UNBOUND; + } + } + else if (range.mMinState == S_UNBOUND) + mMinState = S_UNBOUND; + } + else if (mMinState == S_UNKNOWN || range.mMinState != S_UNKNOWN) + { + mMinState = range.mMinState; + mMinValue = range.mMinValue; + } + + if (mMaxState == S_BOUND) + { + if (range.mMaxState == S_BOUND) + { + if (range.mMaxValue < mMaxValue) + mMaxValue = range.mMaxValue; + } + else if (range.mMaxState == S_WEAK) + { + if (range.mMaxValue < mMaxValue) + { + mMaxValue = range.mMaxValue; + mMaxState = S_WEAK; + } + } + } + else if (mMaxState == S_WEAK) + { + if (range.mMaxState == S_BOUND) + { + mMaxValue = range.mMaxValue; + mMaxState = S_BOUND; + } + else if (range.mMaxState == S_WEAK) + { + if (range.mMaxValue != mMaxValue) + { + mMaxExpanded++; + mMaxValue = range.mMaxValue; + if (mMaxExpanded >= 32) + mMaxState = S_UNBOUND; + } + } + else if (range.mMaxState == S_UNBOUND) + mMaxState = S_UNBOUND; + } + else if (mMaxState == S_UNKNOWN || range.mMaxState != S_UNKNOWN) + { + mMaxState = range.mMaxState; + mMaxValue = range.mMaxValue; + } + +#if 0 if (range.mMinState == S_BOUND && mMinState == S_BOUND && range.mMinValue < mMinValue) { mMinValue = range.mMinValue; @@ -292,6 +432,7 @@ void IntegerValueRange::Expand(const IntegerValueRange& range) mMaxState = range.mMaxState; mMaxValue = range.mMaxValue; } +#endif } void IntegerValueRange::Union(const IntegerValueRange& range) @@ -6103,13 +6244,20 @@ bool InterCodeBasicBlock::IsDominator(InterCodeBasicBlock* block) void InterCodeBasicBlock::CollectEntries(void) { + if (mInPath) + { + mLoopDebug = true; + mLoopHead = true; + } mNumEntries++; if (!mVisited) { mVisited = true; + mInPath = true; if (mTrueJump) mTrueJump->CollectEntries(); if (mFalseJump) mFalseJump->CollectEntries(); + mInPath = false; } } @@ -6133,6 +6281,61 @@ static bool IsInfiniteLoop(InterCodeBasicBlock* head, InterCodeBasicBlock* block return false; } +bool InterCodeBasicBlock::StripLoopHead(void) +{ + bool changed = false; + + if (!mVisited) + { + if (mLoopHead && mFalseJump && mTrueJump != this && mFalseJump != this && mLoopPrefix && mInstructions.Size() < 10) + { +// printf("StripA %s %d\n", mProc->mIdent->mString, mIndex); + + ExpandingArray lblocks; + if (CollectSingleEntryGenericLoop(lblocks)) + { +// printf("StripB %s %d\n", mProc->mIdent->mString, mIndex); + + mLoopPrefix->mInstructions.SetSize(0); + for (int i = 0; i < mInstructions.Size(); i++) + mLoopPrefix->mInstructions.Push(mInstructions[i]->Clone()); + + mLoopPrefix->mFalseJump = mFalseJump; + mLoopPrefix->mTrueJump = mTrueJump; + + mEntryBlocks.RemoveAll(mLoopPrefix); + mNumEntries--; + mLoopHead = false; + + mTrueJump->mEntryBlocks.Push(mLoopPrefix); + mTrueJump->mNumEntries++; + mFalseJump->mEntryBlocks.Push(mLoopPrefix); + mFalseJump->mNumEntries++; + + if (!lblocks.Contains(mTrueJump)) + { + mFalseJump->mLoopHead = true; + } + else if (!lblocks.Contains(mFalseJump)) + { + mTrueJump->mLoopHead = true; + } + + changed = true; + } + } + + mVisited = true; + + if (mTrueJump && mTrueJump->StripLoopHead()) + changed = true; + if (mFalseJump && mFalseJump->StripLoopHead()) + changed = true; + } + + return changed; +} + void InterCodeBasicBlock::GenerateTraces(int expand, bool compact) { if (mInPath) @@ -6237,6 +6440,9 @@ void InterCodeBasicBlock::GenerateTraces(int expand, bool compact) } else if (mTrueJump && !mFalseJump && ((mTrueJump->mInstructions.Size() < expand && mTrueJump->mInstructions.Size() > 1 && !mLoopHead) || mTrueJump->mNumEntries == 1) && !mTrueJump->mLoopHead && !IsInfiniteLoop(mTrueJump, mTrueJump)) { +// if (mLoopDebug) +// printf("StripC %s %d %d\n", mProc->mIdent->mString, mTrueJump->mIndex, mTrueJump->mInstructions.Size()); + mTrueJump->mNumEntries--; int n = mTrueJump->mNumEntries; @@ -8189,11 +8395,11 @@ bool InterCodeBasicBlock::BuildGlobalIntegerRangeSets(bool initial, const Growin assert(mLocalParamValueRange.Size() == paramVars.Size()); for (int i = 0; i < mProc->mLocalValueRange.Size(); i++) - if (!mProc->mLocalValueRange[i].Same(mEntryValueRange[i])) + if (mEntryValueRange[i].Weaker(mProc->mLocalValueRange[i])) changed = true; for (int i = 0; i < mLocalParamValueRange.Size(); i++) - if (!mLocalParamValueRange[i].Same(mEntryParamValueRange[i])) + if (mEntryParamValueRange[i].Weaker(mLocalParamValueRange[i])) changed = true; if (mVisited && mNumEntered >= 2 * mEntryBlocks.Size()) @@ -9551,6 +9757,15 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray mTrueValueRange[s].mMaxState = IntegerValueRange::S_BOUND; mTrueValueRange[s].mMaxValue = 1; + mFalseValueRange[s].mMinState = IntegerValueRange::S_BOUND; + mFalseValueRange[s].mMinValue = 0; + mFalseValueRange[s].mMaxState = IntegerValueRange::S_BOUND; + mFalseValueRange[s].mMaxValue = 0; + } + else if (mInstructions[sz - 1]->mCode == IC_BRANCH && mInstructions[sz - 1]->mSrc[0].mTemp >= 0 && IsIntegerType(mInstructions[sz - 1]->mSrc[0].mType)) + { + int s = mInstructions[sz - 1]->mSrc[0].mTemp; + mFalseValueRange[s].mMinState = IntegerValueRange::S_BOUND; mFalseValueRange[s].mMinValue = 0; mFalseValueRange[s].mMaxState = IntegerValueRange::S_BOUND; @@ -15520,8 +15735,76 @@ bool InterCodeBasicBlock::InvalidatedBy(const InterInstruction* ins, const Inter return CollidingMem(by, ins); } +void InterCodeBasicBlock::CollectReachable(ExpandingArray& lblock) +{ + if (!mVisited && !mPatched) + { + lblock.Push(this); + mPatched = true; -bool InterCodeBasicBlock::CollectSingleHeadLoopBody(InterCodeBasicBlock* head, InterCodeBasicBlock* tail, GrowingArray& body) + if (mTrueJump) mTrueJump->CollectReachable(lblock); + if (mFalseJump) mFalseJump->CollectReachable(lblock); + } +} + +bool InterCodeBasicBlock::CollectGenericLoop(ExpandingArray& lblocks) +{ + ExpandingArray rblocks; + + mProc->ResetPatched(); + CollectReachable(rblocks); + + mProc->ResetPatched(); + + bool changed; + do + { + changed = false; + + for (int i = 0; i < rblocks.Size(); i++) + { + InterCodeBasicBlock* block(rblocks[i]); + + if (!block->mPatched && + (block->mTrueJump && (block->mTrueJump->mPatched || block->mTrueJump == this) || + block->mFalseJump && (block->mFalseJump->mPatched || block->mFalseJump == this))) + { + lblocks.Push(block); + block->mPatched = true; + changed = true; + } + } + + } while (changed); + + return lblocks.Size() > 0; +} + +bool InterCodeBasicBlock::CollectSingleEntryGenericLoop(ExpandingArray& lblocks) +{ + if (CollectGenericLoop(lblocks)) + { + for (int i = 0; i < lblocks.Size(); i++) + { + InterCodeBasicBlock* block = lblocks[i]; + + if (block != this) + { + for (int j = 0; j < block->mEntryBlocks.Size(); j++) + if (!lblocks.Contains(block->mEntryBlocks[j])) + return false; + } + } + + return true; + } + else + return false; +} + + + +bool InterCodeBasicBlock::CollectSingleHeadLoopBody(InterCodeBasicBlock* head, InterCodeBasicBlock* tail, ExpandingArray& body) { int i = 0; body.Push(head); @@ -16063,7 +16346,7 @@ bool InterCodeBasicBlock::MergeLoopTails(void) return modified; } -bool IsSingleLoopAssign(int at, InterCodeBasicBlock* block, const GrowingArray& body) +bool IsSingleLoopAssign(int at, InterCodeBasicBlock* block, const ExpandingArray& body) { InterInstruction* ai = block->mInstructions[at]; if (ai->mDst.mTemp < 0) @@ -16078,7 +16361,7 @@ bool IsSingleLoopAssign(int at, InterCodeBasicBlock* block, const GrowingArray& body) +bool IsLoopInvariantTemp(int tmp, const ExpandingArray& body) { if (tmp < 0) return true; @@ -16118,7 +16401,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar if (post && post->mNumEntries == 1) { - GrowingArray body(nullptr); + ExpandingArray body; if (tail->CollectSingleHeadLoopBody(this, tail, body)) { @@ -17927,6 +18210,44 @@ void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPt } } } +#if 1 + else if (loops) + { + ExpandingArray lblocks; + + if (CollectSingleEntryGenericLoop(lblocks)) + { + for (int i = 0; i < ltvalue.Size(); i++) + { + if (ltvalue[i]) + { + bool fail = false; + + for (int k = 0; k < lblocks.Size() && !fail; k++) + { + InterCodeBasicBlock* b = lblocks[k]; + for (int j = 0; j < b->mInstructions.Size() && !fail; j++) + { + InterInstruction* ins = b->mInstructions[j]; + if (ins->mDst.mTemp == i) + { + if (ins->mCode == IC_LEA && ins->mSrc[1].mTemp == i) + ; + else + fail = true; + } + } + } + + if (fail) + ltvalue[i] = nullptr; + } + } + } + else + ltvalue.Clear(); + } +#else else if (loops && mNumEntries == 2) { InterCodeBasicBlock* tail, * post; @@ -17943,7 +18264,7 @@ void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPt if (post && post->mNumEntries == 1) { - GrowingArray body(nullptr); + ExpandingArray body; if (tail->CollectSingleHeadLoopBody(this, tail, body)) { @@ -17978,6 +18299,7 @@ void InterCodeBasicBlock::PropagateMemoryAliasingInfo(const GrowingInstructionPt } } } +#endif else ltvalue.Clear(); } @@ -18387,7 +18709,7 @@ bool InterCodeBasicBlock::MoveConditionOutOfLoop(void) if (post && post->mNumEntries == 1) { - GrowingArray lbody(nullptr); + ExpandingArray lbody; if (tail->CollectSingleHeadLoopBody(this, tail, lbody)) { @@ -21255,6 +21577,7 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray { mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1]; mInstructions[i + 2]->mSrc[1].mIntConst += mInstructions[i + 0]->mSrc[0].mIntConst << mInstructions[i + 1]->mSrc[0].mIntConst; + mInstructions[i + 2]->mSrc[0].mRange.AddConstValue(IT_INT16, - (mInstructions[i + 0]->mSrc[0].mIntConst << mInstructions[i + 1]->mSrc[0].mIntConst)); mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0; changed = true; @@ -23081,6 +23404,12 @@ void InterCodeProcedure::ResetEntryBlocks(void) mBlocks[i]->mEntryBlocks.SetSize(0); } +void InterCodeProcedure::ResetPatched(void) +{ + for (int i = 0; i < mBlocks.Size(); i++) + mBlocks[i]->mPatched = false; +} + void InterCodeProcedure::ResetVisited(void) { int i; @@ -23216,6 +23545,7 @@ void InterCodeProcedure::BuildTraces(int expand, bool dominators, bool compact) { mBlocks[i]->mNumEntries = 0; mBlocks[i]->mLoopHead = false; + mBlocks[i]->mLoopDebug = false; mBlocks[i]->mTraceIndex = -1; } mEntryBlock->CollectEntries(); @@ -24238,7 +24568,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "trench_init"); + CheckFunc = !strcmp(mIdent->mString, "main"); CheckCase = false; mEntryBlock = mBlocks[0]; @@ -24246,12 +24576,28 @@ void InterCodeProcedure::Close(void) DisassembleDebug("start"); BuildTraces(10); - DisassembleDebug("traces"); EarlyBranchElimination(); + BuildTraces(0); + + do { + BuildLoopPrefix(); + ResetVisited(); + } while (mEntryBlock->StripLoopHead()); + DisassembleDebug("Loop Head"); + BuildTraces(0); + DisassembleDebug("branch elimination"); +#if 0 + do { + BuildLoopPrefix(); + ResetVisited(); + } while (mEntryBlock->StripLoopHead()); + DisassembleDebug("Loop Head"); + BuildTraces(0); +#endif ResetVisited(); mLeafProcedure = mEntryBlock->IsLeafProcedure(); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 2ae4c76..bfbabf5 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -169,6 +169,7 @@ public: } mMinState, mMaxState; bool Same(const IntegerValueRange& range) const; + bool Weaker(const IntegerValueRange& range) const; bool Merge(const IntegerValueRange& range, bool head, bool initial); void Expand(const IntegerValueRange& range); void Union(const IntegerValueRange& range); @@ -380,7 +381,7 @@ public: InterCodeBasicBlock * mTrueJump, * mFalseJump, * mLoopPrefix, * mDominator; GrowingInstructionArray mInstructions; - bool mVisited, mInPath, mLoopHead, mChecked, mConditionBlockTrue, mUnreachable, mLoopPath, mValueRangeValid; + bool mVisited, mInPath, mLoopHead, mChecked, mConditionBlockTrue, mUnreachable, mLoopPath, mValueRangeValid, mPatched, mLoopDebug; mutable int mMark; NumberSet mLocalUsedTemps, mLocalModifiedTemps; @@ -431,6 +432,7 @@ public: void CollectEntryBlocks(InterCodeBasicBlock* from); void GenerateTraces(int expand, bool compact); void BuildDominatorTree(InterCodeBasicBlock * from); + bool StripLoopHead(void); bool MergeSameConditionTraces(void); @@ -648,7 +650,11 @@ public: bool PullStoreUpToConstAddress(void); - bool CollectSingleHeadLoopBody(InterCodeBasicBlock* head, InterCodeBasicBlock* tail, GrowingArray& body); + bool CollectSingleHeadLoopBody(InterCodeBasicBlock* head, InterCodeBasicBlock* tail, ExpandingArray& body); + + bool CollectGenericLoop(ExpandingArray& lblocks); + bool CollectSingleEntryGenericLoop(ExpandingArray& lblocks); + void CollectReachable(ExpandingArray& lblock); bool SingleTailLoopOptimization(const NumberSet& aliasedParams, const GrowingVariableArray& staticVars); bool MergeLoopTails(void); @@ -707,6 +713,7 @@ protected: GrowingIntegerValueRangeArray mLocalValueRange, mReverseValueRange; void ResetVisited(void); + void ResetPatched(void); void ResetEntryBlocks(void); public: InterCodeBasicBlock * mEntryBlock; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 2661d2e..e40280b 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -26516,7 +26516,7 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool mTrueJump->mIns.Remove(0); mTrueJump->mEntryRequiredRegs += CPU_REG_A; - if (mTrueJump->mIns.Size() > 0 && (mTrueJump->mIns[0].mLive & LIVE_CPU_REG_Z)) + if (zlive) mTrueJump->mEntryRequiredRegs += CPU_REG_Z; changed = true; @@ -37084,13 +37084,10 @@ bool NativeCodeBasicBlock::CheckLoopIndexXRegisters(NativeCodeBasicBlock* head, { for (int i = mIns.Size() - 1; i >= 0; i--) { - if (mIns[i].ChangesXReg()) - { - if ((mIns[i].mType == ASMIT_LDX || mIns[i].mType == ASMIT_STX) && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == xreg) - return true; - else if (mIns[i].ChangesXReg() || mIns[i].ChangesZeroPage(xreg)) - return false; - } + if ((mIns[i].mType == ASMIT_LDX || mIns[i].mType == ASMIT_STX) && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == xreg) + return true; + else if (mIns[i].ChangesXReg() || mIns[i].ChangesZeroPage(xreg)) + return false; } if (this == head) @@ -37112,13 +37109,10 @@ bool NativeCodeBasicBlock::CheckLoopIndexYRegisters(NativeCodeBasicBlock* head, { for (int i = mIns.Size() - 1; i >= 0; i--) { - if (mIns[i].ChangesYReg()) - { - if ((mIns[i].mType == ASMIT_LDY || mIns[i].mType == ASMIT_STY) && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == yreg) - return true; - else if (mIns[i].ChangesYReg() || mIns[i].ChangesZeroPage(yreg)) - return false; - } + if ((mIns[i].mType == ASMIT_LDY || mIns[i].mType == ASMIT_STY) && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == yreg) + return true; + else if (mIns[i].ChangesYReg() || mIns[i].ChangesZeroPage(yreg)) + return false; } if (this == head) @@ -38922,8 +38916,17 @@ bool NativeCodeBasicBlock::OptimizeLoopRegisterWrapAround(void) mIns[j].mLive |= LIVE_CPU_REG_X; for (int j = 0; j < i; j++) hblock->mIns[j].mLive |= LIVE_CPU_REG_X; - ins.mType = ASMIT_NOP; - ins.mMode = ASMIM_IMPLIED; + if (ins.mLive & LIVE_CPU_REG_Z) + { + ins.mType = ASMIT_CPX; + ins.mMode = ASMIM_IMMEDIATE; + ins.mAddress = 0; + } + else + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + } mExitRequiredRegs += CPU_REG_X; pblock->mExitRequiredRegs += CPU_REG_X; @@ -43110,6 +43113,44 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BuildSingleExit(NativeCodeProcedure* return block; } +int NativeCodeBasicBlock::CorrectXOffset(const InterInstruction * ins, int xoffset, int at) +{ + while (xoffset > 0) + { + mIns.Insert(at, NativeCodeInstruction(ins, ASMIT_DEX)); + at++; + xoffset--; + } + + while (xoffset < 0) + { + mIns.Insert(at, NativeCodeInstruction(ins, ASMIT_INX)); + at++; + xoffset++; + } + + return at; +} + +int NativeCodeBasicBlock::CorrectYOffset(const InterInstruction * ins, int yoffset, int at) +{ + while (yoffset > 0) + { + mIns.Insert(at, NativeCodeInstruction(ins, ASMIT_DEY)); + at++; + yoffset--; + } + + while (yoffset < 0) + { + mIns.Insert(at, NativeCodeInstruction(ins, ASMIT_INY)); + at++; + yoffset++; + } + + return at; +} + bool NativeCodeBasicBlock::OptimizeGenericLoop(void) { @@ -43642,47 +43683,39 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(void) if (yoffset && yreg >= 0 && !(ins.mLive & LIVE_CPU_REG_Y)) { - while (yoffset > 0) + if (j + 1 == block->mIns.Size() && (ins.mLive & LIVE_CPU_REG_Z) && ins.mType == ASMIT_CMP) { - j++; - block->mIns.Insert(j, NativeCodeInstruction(ins.mIns, ASMIT_DEY)); - yoffset--; + printf("oopsie Y\n"); } - - while (yoffset < 0) + else { - j++; - block->mIns.Insert(j, NativeCodeInstruction(ins.mIns, ASMIT_INY)); - yoffset++; - } + j = block->CorrectYOffset(ins.mIns, yoffset, j + 1) - 1; + yoffset = 0; - if (j + 1 == block->mIns.Size() && (ins.mLive & LIVE_CPU_REG_Z) && ins.ChangesAccuAndFlag()) - { - ins.mLive |= LIVE_CPU_REG_A; - block->mIns.Push(NativeCodeInstruction(ins.mIns, ASMIT_ORA, ASMIM_IMMEDIATE, 0)); + if (j + 1 == block->mIns.Size() && (ins.mLive & LIVE_CPU_REG_Z) && ins.ChangesAccuAndFlag()) + { + ins.mLive |= LIVE_CPU_REG_A; + block->mIns.Push(NativeCodeInstruction(ins.mIns, ASMIT_ORA, ASMIM_IMMEDIATE, 0)); + } } } if (xoffset && xreg >= 0 && !(ins.mLive & LIVE_CPU_REG_X)) { - while (xoffset > 0) + if (j + 1 == block->mIns.Size() && (ins.mLive & LIVE_CPU_REG_Z) && ins.mType == ASMIT_CMP) { - j++; - block->mIns.Insert(j, NativeCodeInstruction(ins.mIns, ASMIT_DEX)); - xoffset--; + printf("oopsie X\n"); } - - while (xoffset < 0) + else { - j++; - block->mIns.Insert(j, NativeCodeInstruction(ins.mIns, ASMIT_INX)); - xoffset++; - } + j = block->CorrectXOffset(ins.mIns, xoffset, j + 1) - 1; + xoffset = 0; - if (j + 1 == block->mIns.Size() && (ins.mLive & LIVE_CPU_REG_Z) && ins.ChangesAccuAndFlag()) - { - ins.mLive |= LIVE_CPU_REG_A; - block->mIns.Push(NativeCodeInstruction(ins.mIns, ASMIT_ORA, ASMIM_IMMEDIATE, 0)); + if (j + 1 == block->mIns.Size() && (ins.mLive & LIVE_CPU_REG_Z) && ins.ChangesAccuAndFlag()) + { + ins.mLive |= LIVE_CPU_REG_A; + block->mIns.Push(NativeCodeInstruction(ins.mIns, ASMIT_ORA, ASMIM_IMMEDIATE, 0)); + } } } @@ -43695,50 +43728,76 @@ bool NativeCodeBasicBlock::OptimizeGenericLoop(void) } - if (block->mTrueJump && !lblocks.Contains(block->mTrueJump)) + if (block->mTrueJump) { - block->mTrueJump = block->BuildSingleEntry(mProc, block->mTrueJump); - if (areg >= 0 && block->mTrueJump->mEntryRequiredRegs[areg]) + if (!lblocks.Contains(block->mTrueJump)) { - if (areg < 256) - block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg)); - block->mExitRequiredRegs += CPU_REG_A; - block->mTrueJump->mEntryRequiredRegs += CPU_REG_A; + block->mTrueJump = block->BuildSingleEntry(mProc, block->mTrueJump); + if (areg >= 0 && block->mTrueJump->mEntryRequiredRegs[areg]) + { + if (areg < 256) + block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg)); + block->mExitRequiredRegs += CPU_REG_A; + block->mTrueJump->mEntryRequiredRegs += CPU_REG_A; + } + if (yreg >= 0 && block->mTrueJump->mEntryRequiredRegs[yreg]) + { + int j = block->mTrueJump->CorrectYOffset(block->mBranchIns, yoffset, 0); + block->mTrueJump->mIns.Insert(j, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg)); + + block->mExitRequiredRegs += CPU_REG_Y; + block->mTrueJump->mEntryRequiredRegs += CPU_REG_Y; + } + if (xreg >= 0 && block->mTrueJump->mEntryRequiredRegs[xreg]) + { + int j = block->mTrueJump->CorrectXOffset(block->mBranchIns, xoffset, 0); + + block->mTrueJump->mIns.Insert(j, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg)); + block->mExitRequiredRegs += CPU_REG_X; + block->mTrueJump->mEntryRequiredRegs += CPU_REG_X; + } } - if (yreg >= 0 && block->mTrueJump->mEntryRequiredRegs[yreg]) + else { - block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg)); - block->mExitRequiredRegs += CPU_REG_Y; - block->mTrueJump->mEntryRequiredRegs += CPU_REG_Y; - } - if (xreg >= 0 && block->mTrueJump->mEntryRequiredRegs[xreg]) - { - block->mTrueJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg)); - block->mExitRequiredRegs += CPU_REG_X; - block->mTrueJump->mEntryRequiredRegs += CPU_REG_X; + if (yreg >= 0 && yoffset != 0 && block->mFalseJump->mEntryRequiredRegs[yreg]) + block->mTrueJump->CorrectYOffset(block->mBranchIns, yoffset, 0); + if (xreg >= 0 && xoffset != 0 && block->mFalseJump->mEntryRequiredRegs[xreg]) + block->mTrueJump->CorrectXOffset(block->mBranchIns, xoffset, 0); } } - if (block->mFalseJump && !lblocks.Contains(block->mFalseJump)) + if (block->mFalseJump) { - block->mFalseJump = block->BuildSingleEntry(mProc, block->mFalseJump); - if (areg >= 0 && block->mFalseJump->mEntryRequiredRegs[areg]) + if (!lblocks.Contains(block->mFalseJump)) { - if (areg < 256) - block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg)); - block->mExitRequiredRegs += CPU_REG_A; - block->mFalseJump->mEntryRequiredRegs += CPU_REG_A; + block->mFalseJump = block->BuildSingleEntry(mProc, block->mFalseJump); + if (areg >= 0 && block->mFalseJump->mEntryRequiredRegs[areg]) + { + if (areg < 256) + block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STA, ASMIM_ZERO_PAGE, areg)); + block->mExitRequiredRegs += CPU_REG_A; + block->mFalseJump->mEntryRequiredRegs += CPU_REG_A; + } + if (yreg >= 0 && block->mFalseJump->mEntryRequiredRegs[yreg]) + { + int j = block->mTrueJump->CorrectYOffset(block->mBranchIns, yoffset, 0); + block->mFalseJump->mIns.Insert(j, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg)); + block->mExitRequiredRegs += CPU_REG_Y; + block->mFalseJump->mEntryRequiredRegs += CPU_REG_Y; + } + if (xreg >= 0 && block->mFalseJump->mEntryRequiredRegs[xreg]) + { + int j = block->mTrueJump->CorrectXOffset(block->mBranchIns, xoffset, 0); + block->mFalseJump->mIns.Insert(j, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg)); + block->mExitRequiredRegs += CPU_REG_X; + block->mFalseJump->mEntryRequiredRegs += CPU_REG_X; + } } - if (yreg >= 0 && block->mFalseJump->mEntryRequiredRegs[yreg]) + else { - block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STY, ASMIM_ZERO_PAGE, yreg)); - block->mExitRequiredRegs += CPU_REG_Y; - block->mFalseJump->mEntryRequiredRegs += CPU_REG_Y; - } - if (xreg >= 0 && block->mFalseJump->mEntryRequiredRegs[xreg]) - { - block->mFalseJump->mIns.Insert(0, NativeCodeInstruction(block->mBranchIns, ASMIT_STX, ASMIM_ZERO_PAGE, xreg)); - block->mExitRequiredRegs += CPU_REG_X; - block->mFalseJump->mEntryRequiredRegs += CPU_REG_X; + if (yreg >= 0 && yoffset != 0 && block->mFalseJump->mEntryRequiredRegs[yreg]) + block->mFalseJump->CorrectYOffset(block->mBranchIns, yoffset, 0); + if (xreg >= 0 && xoffset != 0 && block->mFalseJump->mEntryRequiredRegs[xreg]) + block->mFalseJump->CorrectXOffset(block->mBranchIns, xoffset, 0); } } @@ -44275,7 +44334,8 @@ bool NativeCodeBasicBlock::BlockSizeCopyReduction(NativeCodeProcedure* proc, int #if 1 if (si + 5 < mIns.Size() && mIns[si + 0].mType == ASMIT_LDY && mIns[si + 0].mMode == ASMIM_IMMEDIATE && - mIns[si + 1].mType == ASMIT_STA && mIns[si + 1].mMode == ASMIM_INDIRECT_Y) + mIns[si + 1].mType == ASMIT_STA && mIns[si + 1].mMode == ASMIM_INDIRECT_Y && + !(mIns[si + 1].mLive & LIVE_CPU_REG_C)) { int i = 1; while (si + 2 * i + 1 < mIns.Size() && @@ -48956,6 +49016,28 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerIterate3(int i, int pass) return true; } + if ( + mIns[i + 0].mType == ASMIT_LDA && HasAsmInstructionMode(ASMIT_LDX, mIns[i + 0].mMode) && + mIns[i + 1].mType == ASMIT_TAX && + mIns[i + 2].mType == ASMIT_CMP && HasAsmInstructionMode(ASMIT_CPX, mIns[i + 2].mMode) && !(mIns[i + 2].mLive & LIVE_CPU_REG_A)) + { + mIns[i + 0].mType = ASMIT_LDX; mIns[i + 0].mLive |= LIVE_CPU_REG_X; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_CPX; + return true; + } + + if ( + mIns[i + 0].mType == ASMIT_LDA && HasAsmInstructionMode(ASMIT_LDY, mIns[i + 0].mMode) && + mIns[i + 1].mType == ASMIT_TAY && + mIns[i + 2].mType == ASMIT_CMP && HasAsmInstructionMode(ASMIT_CPY, mIns[i + 2].mMode) && !(mIns[i + 2].mLive & LIVE_CPU_REG_A)) + { + mIns[i + 0].mType = ASMIT_LDY; mIns[i + 0].mLive |= LIVE_CPU_REG_Y; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_CPY; + return true; + } + if ( mIns[i + 0].mType == ASMIT_LDA && !mIns[i + 0].RequiresXReg() && mIns[i + 1].mType == ASMIT_LDX && @@ -55848,7 +55930,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) mInterProc->mLinkerObject->mNativeProc = this; - CheckFunc = !strcmp(mIdent->mString, "sidfx_loop"); + CheckFunc = !strcmp(mIdent->mString, "main"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -57750,14 +57832,14 @@ void NativeCodeProcedure::Optimize(void) } #if _DEBUG - ResetVisited(); - mEntryBlock->CheckAsmCode(); + ResetVisited(); + mEntryBlock->CheckAsmCode(); #endif #if DISASSEMBLE_OPT - char fname[100]; - sprintf_s(fname, "Optimize %d, %d", step, cnt); - DisassembleDebug(fname); + char fname[100]; + sprintf_s(fname, "Optimize %d, %d", step, cnt); + DisassembleDebug(fname); #endif #if 1 @@ -57915,8 +57997,8 @@ void NativeCodeProcedure::Optimize(void) if (mEntryBlock->ApplyEntryDataSet()) changed = true; #endif - #if 1 + ResetVisited(); mEntryBlock->BlockSizeReduction(this, -1, -1, -1); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 6169fa6..9223b79 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -373,6 +373,8 @@ public: bool OptimizeInnerLoops(NativeCodeProcedure* proc); NativeCodeBasicBlock* CollectInnerLoop(NativeCodeBasicBlock* head, ExpandingArray& lblocks); + int CorrectXOffset(const InterInstruction * ins, int yoffset, int at); + int CorrectYOffset(const InterInstruction * ins, int yoffset, int at); bool OptimizeGenericLoop(void); bool CollectGenericLoop(ExpandingArray& lblocks); bool CollectSingleEntryGenericLoop(ExpandingArray& lblocks); diff --git a/oscar64/Parser.cpp b/oscar64/Parser.cpp index cab25cf..de051a7 100644 --- a/oscar64/Parser.cpp +++ b/oscar64/Parser.cpp @@ -515,6 +515,12 @@ Declaration* Parser::ParseStructDeclaration(uint64 flags, DecType dt, Declaratio } } + if (mdec->mBits == 24 && mdec->mOffset > 0 && mdec->mShift == 0) + { + mdec->mOffset--; + mdec->mShift = 8; + } + if (mdec->mShift == 0 && mdec->mBits == 8 * mdec->mSize) mdec->mBits = 0; }