diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 130f5f9..6d3dcd0 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -6083,7 +6083,7 @@ static bool IsInfiniteLoop(InterCodeBasicBlock* head, InterCodeBasicBlock* block return false; } -void InterCodeBasicBlock::GenerateTraces(bool expand, bool compact) +void InterCodeBasicBlock::GenerateTraces(int expand, bool compact) { if (mInPath) mLoopHead = true; @@ -6185,7 +6185,7 @@ void InterCodeBasicBlock::GenerateTraces(bool expand, bool compact) mInstructions[ns - 1]->mCode = IC_JUMP; mInstructions[ns - 1]->mNumOperands = 0; } - else if (mTrueJump && !mFalseJump && ((expand && mTrueJump->mInstructions.Size() < 10 && mTrueJump->mInstructions.Size() > 1 && !mLoopHead) || mTrueJump->mNumEntries == 1) && !mTrueJump->mLoopHead && !IsInfiniteLoop(mTrueJump, mTrueJump)) + else if (mTrueJump && !mFalseJump && ((mTrueJump->mInstructions.Size() < expand && mTrueJump->mInstructions.Size() > 1 && !mLoopHead) || mTrueJump->mNumEntries == 1) && !mTrueJump->mLoopHead && !IsInfiniteLoop(mTrueJump, mTrueJump)) { mTrueJump->mNumEntries--; int n = mTrueJump->mNumEntries; @@ -14692,7 +14692,8 @@ bool InterCodeBasicBlock::OptimizeIntervalCompare(void) if (mInstructions[sz - 2]->mOperator == IA_CMPGES && mInstructions[sz - 2]->mSrc[0].mTemp == -1) { if (mTrueJump->mInstructions.Size() == 2 && mTrueJump->mInstructions[1]->mCode == IC_BRANCH && mTrueJump->mFalseJump == mFalseJump && - mTrueJump->mInstructions[0]->mCode == IC_RELATIONAL_OPERATOR && mTrueJump->mInstructions[0]->mDst.mTemp == mTrueJump->mInstructions[1]->mSrc[0].mTemp) + mTrueJump->mInstructions[0]->mCode == IC_RELATIONAL_OPERATOR && mTrueJump->mInstructions[0]->mDst.mTemp == mTrueJump->mInstructions[1]->mSrc[0].mTemp && + mTrueJump->mInstructions[1]->mSrc[0].mFinal) { if (mTrueJump->mInstructions[0]->mSrc[0].mTemp == -1 && mTrueJump->mInstructions[0]->mSrc[1].mTemp == mInstructions[sz - 2]->mSrc[1].mTemp) { @@ -14720,7 +14721,8 @@ bool InterCodeBasicBlock::OptimizeIntervalCompare(void) else if (mInstructions[sz - 2]->mOperator == IA_CMPLS && mInstructions[sz - 2]->mSrc[0].mTemp == -1) { if (mTrueJump->mInstructions.Size() == 2 && mTrueJump->mInstructions[1]->mCode == IC_BRANCH && mTrueJump->mFalseJump == mFalseJump && - mTrueJump->mInstructions[0]->mCode == IC_RELATIONAL_OPERATOR && mTrueJump->mInstructions[0]->mDst.mTemp == mTrueJump->mInstructions[1]->mSrc[0].mTemp) + mTrueJump->mInstructions[0]->mCode == IC_RELATIONAL_OPERATOR && mTrueJump->mInstructions[0]->mDst.mTemp == mTrueJump->mInstructions[1]->mSrc[0].mTemp && + mTrueJump->mInstructions[1]->mSrc[0].mFinal) { if (mTrueJump->mInstructions[0]->mSrc[0].mTemp == -1 && mTrueJump->mInstructions[0]->mSrc[1].mTemp == mInstructions[sz - 2]->mSrc[1].mTemp) { @@ -14738,7 +14740,8 @@ bool InterCodeBasicBlock::OptimizeIntervalCompare(void) else if (mInstructions[sz - 2]->mOperator == IA_CMPLES && mInstructions[sz - 2]->mSrc[0].mTemp == -1) { if (mTrueJump->mInstructions.Size() == 2 && mTrueJump->mInstructions[1]->mCode == IC_BRANCH && mTrueJump->mFalseJump == mFalseJump && - mTrueJump->mInstructions[0]->mCode == IC_RELATIONAL_OPERATOR && mTrueJump->mInstructions[0]->mDst.mTemp == mTrueJump->mInstructions[1]->mSrc[0].mTemp) + mTrueJump->mInstructions[0]->mCode == IC_RELATIONAL_OPERATOR && mTrueJump->mInstructions[0]->mDst.mTemp == mTrueJump->mInstructions[1]->mSrc[0].mTemp && + mTrueJump->mInstructions[1]->mSrc[0].mFinal) { if (mTrueJump->mInstructions[0]->mSrc[0].mTemp == -1 && mTrueJump->mInstructions[0]->mSrc[1].mTemp == mInstructions[sz - 2]->mSrc[1].mTemp) { @@ -22427,12 +22430,12 @@ void InterCodeProcedure::EarlyBranchElimination(void) ResetVisited(); while (mEntryBlock->EarlyBranchElimination(ctemps)) { - BuildTraces(true); + BuildTraces(10); TrimBlocks(); } } -void InterCodeProcedure::BuildTraces(bool expand, bool dominators, bool compact) +void InterCodeProcedure::BuildTraces(int expand, bool dominators, bool compact) { // Count number of entries // @@ -22688,7 +22691,7 @@ void InterCodeProcedure::ShortcutDuplicateBranches(void) void InterCodeProcedure::MoveConditionsOutOfLoop(void) { - BuildTraces(false); + BuildTraces(0); BuildLoopPrefix(); ResetEntryBlocks(); ResetVisited(); @@ -22701,13 +22704,13 @@ void InterCodeProcedure::MoveConditionsOutOfLoop(void) { Disassemble("MoveConditionOutOfLoop"); - BuildTraces(false); + BuildTraces(0); BuildDataFlowSets(); TempForwarding(); RemoveUnusedInstructions(); - BuildTraces(false); + BuildTraces(0); BuildLoopPrefix(); ResetEntryBlocks(); ResetVisited(); @@ -22724,7 +22727,7 @@ void InterCodeProcedure::MoveConditionsOutOfLoop(void) void InterCodeProcedure::EliminateDoubleLoopCounter(void) { - BuildTraces(false); + BuildTraces(0); BuildLoopPrefix(); ResetEntryBlocks(); ResetVisited(); @@ -23033,7 +23036,7 @@ void InterCodeProcedure::PromoteSimpleLocalsToTemp(InterMemory paramMemory, int DisassembleDebug("local variables to temps"); - BuildTraces(false); + BuildTraces(0); BuildDataFlowSets(); @@ -23329,7 +23332,7 @@ void InterCodeProcedure::ReduceRecursionTempSpilling(InterMemory paramMemory) void InterCodeProcedure::LoadStoreForwarding(InterMemory paramMemory) { - BuildTraces(false); + BuildTraces(0); DisassembleDebug("Load/Store forwardingY"); @@ -23440,14 +23443,16 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "main"); + CheckFunc = !strcmp(mIdent->mString, "testint0"); CheckCase = false; mEntryBlock = mBlocks[0]; DisassembleDebug("start"); - BuildTraces(true); + BuildTraces(10); + + DisassembleDebug("traces"); EarlyBranchElimination(); @@ -23510,7 +23515,7 @@ void InterCodeProcedure::Close(void) eliminated = mEntryBlock->EliminateDeadBranches(); if (eliminated) { - BuildTraces(false); + BuildTraces(0); /* ResetVisited(); for (int i = 0; i < mBlocks.Size(); i++) @@ -23749,7 +23754,7 @@ void InterCodeProcedure::Close(void) Disassemble("gcp-"); #endif - BuildTraces(false); + BuildTraces(0); DisassembleDebug("Rebuilt traces"); BuildDataFlowSets(); @@ -23792,7 +23797,7 @@ void InterCodeProcedure::Close(void) DisassembleDebug("Simplified range limited relational ops 1"); #endif - BuildTraces(false); + BuildTraces(0); DisassembleDebug("Rebuilt traces"); #if 1 @@ -23881,7 +23886,7 @@ void InterCodeProcedure::Close(void) mEntryBlock->CollectEntryBlocks(nullptr); #endif - BuildTraces(false); + BuildTraces(0); #if 1 SingleBlockLoopPointerSplit(activeSet); @@ -23910,7 +23915,7 @@ void InterCodeProcedure::Close(void) ResetVisited(); mEntryBlock->CollectEntryBlocks(nullptr); - BuildTraces(false); + BuildTraces(0); #endif SingleTailLoopOptimization(paramMemory); @@ -23925,7 +23930,7 @@ void InterCodeProcedure::Close(void) #endif #if 1 - BuildTraces(false); + BuildTraces(0); PushSinglePathResultInstructions(); @@ -23992,7 +23997,7 @@ void InterCodeProcedure::Close(void) BuildDataFlowSets(); DisassembleDebug("Removed unreachable branches"); - BuildTraces(false); + BuildTraces(0); DisassembleDebug("Rebuilt traces"); BuildDataFlowSets(); @@ -24030,7 +24035,7 @@ void InterCodeProcedure::Close(void) BuildDataFlowSets(); - BuildTraces(false); + BuildTraces(0); DisassembleDebug("Rebuilt traces"); #endif @@ -24057,7 +24062,7 @@ void InterCodeProcedure::Close(void) BuildDataFlowSets(); - BuildTraces(false); + BuildTraces(0); DisassembleDebug("Rebuilt traces"); #endif @@ -24082,9 +24087,9 @@ void InterCodeProcedure::Close(void) ResetVisited(); mEntryBlock->CollectEntryBlocks(nullptr); - BuildTraces(false); + BuildTraces(0); BuildDataFlowSets(); - BuildTraces(false); + BuildTraces(0); #endif PropagateConstOperationsUp(); @@ -24100,7 +24105,7 @@ void InterCodeProcedure::Close(void) ResetVisited(); if (mEntryBlock->MergeLoopTails()) { - BuildTraces(false); + BuildTraces(0); BuildDataFlowSets(); } @@ -24180,7 +24185,7 @@ void InterCodeProcedure::Close(void) BuildDataFlowSets(); - BuildTraces(false); + BuildTraces(0); DisassembleDebug("Rebuilt traces"); #endif } @@ -24212,7 +24217,7 @@ void InterCodeProcedure::Close(void) TempForwarding(); } while (GlobalConstantPropagation()); - BuildTraces(false); + BuildTraces(0); DisassembleDebug("Rebuilt traces"); @@ -24263,7 +24268,7 @@ void InterCodeProcedure::Close(void) #if 1 for (int i = 0; i < 8; i++) { - BuildTraces(false); + BuildTraces(0); LoadStoreForwarding(paramMemory); @@ -24390,7 +24395,7 @@ void InterCodeProcedure::Close(void) // Optimize for size MergeBasicBlocks(activeSet); - BuildTraces(false, false, true); + BuildTraces(0, false, true); DisassembleDebug("Final Merged basic blocks"); WarnInvalidValueRanges(); @@ -24606,7 +24611,7 @@ void InterCodeProcedure::SingleTailLoopOptimization(InterMemory paramMemory) RemoveUnusedStoreInstructions(paramMemory); } - BuildTraces(false); + BuildTraces(0); DisassembleDebug("Rebuilt traces"); } while (changed); @@ -25113,7 +25118,7 @@ void InterCodeProcedure::RecheckLocalAliased(void) void InterCodeProcedure::ConstLoopOptimization(void) { - BuildTraces(false); + BuildTraces(0); BuildLoopPrefix(); ResetEntryBlocks(); ResetVisited(); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 0ec1571..69df2d1 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -429,7 +429,7 @@ public: void CollectEntries(void); void CollectEntryBlocks(InterCodeBasicBlock* from); - void GenerateTraces(bool expand, bool compact); + void GenerateTraces(int expand, bool compact); void BuildDominatorTree(InterCodeBasicBlock * from); bool MergeSameConditionTraces(void); @@ -762,7 +762,7 @@ public: void Disassemble(const char* name, bool dumpSets = false); protected: void BuildLocalAliasTable(void); - void BuildTraces(bool expand, bool dominators = true, bool compact = false); + void BuildTraces(int expand, bool dominators = true, bool compact = false); void TrimBlocks(void); void EarlyBranchElimination(void); void BuildDataFlowSets(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index cd18339..c75cc3f 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -13925,7 +13925,7 @@ void NativeCodeBasicBlock::LoadStoreOpAbsolute2D(InterCodeProcedure* proc, const } -void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0, bool addrvalid) +void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0, bool addrvalid, bool addrused) { bool isub = false; int ireg = ins->mSrc[0].mTemp; @@ -14016,7 +14016,12 @@ void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const mIns.Push(NativeCodeInstruction(ins, ASMIT_LDA, ASMIM_IMMEDIATE, (ins->mSrc[1].mIntConst >> 8) & 0xff)); #if 1 if (ins->mSrc[0].IsUByte()) - mIns.Push(NativeCodeInstruction(ins, iop, ASMIM_IMMEDIATE, 0)); + { + if (iop == ASMIT_ADC && ins->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND && ins->mSrc[0].mRange.mMaxValue + (ins->mSrc[1].mIntConst & 0xff) < 0x100 && addrused) + ; + else + mIns.Push(NativeCodeInstruction(ins, iop, ASMIM_IMMEDIATE, 0)); + } else #endif mIns.Push(NativeCodeInstruction(ins, iop, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ireg] + 1)); @@ -28125,6 +28130,7 @@ bool NativeCodeBasicBlock::BypassRegisterConditionBlock(void) mIns.Remove(i); changed = true; mExitRequiredRegs -= CPU_REG_A; + CheckLive(); } } } @@ -28139,10 +28145,14 @@ bool NativeCodeBasicBlock::BypassRegisterConditionBlock(void) { cblock->mIns.Insert(0, NativeCodeInstruction(mIns[i].mIns, ASMIT_TXA)); mIns.Remove(i); + while (i < mIns.Size()) + mIns[i++].mLive |= LIVE_CPU_REG_X; + changed = true; mExitRequiredRegs -= CPU_REG_A; mExitRequiredRegs += CPU_REG_X; cblock->mEntryRequiredRegs += CPU_REG_X; + CheckLive(); } } else if (i >= 0 && mIns[i].mType == ASMIT_TYA && !(mIns[i].mLive & LIVE_CPU_REG_Z)) @@ -28151,10 +28161,14 @@ bool NativeCodeBasicBlock::BypassRegisterConditionBlock(void) { cblock->mIns.Insert(0, NativeCodeInstruction(mIns[i].mIns, ASMIT_TYA)); mIns.Remove(i); + while (i < mIns.Size()) + mIns[i++].mLive |= LIVE_CPU_REG_Y; + changed = true; mExitRequiredRegs -= CPU_REG_A; mExitRequiredRegs += CPU_REG_Y; cblock->mEntryRequiredRegs += CPU_REG_Y; + CheckLive(); } } } @@ -28175,6 +28189,7 @@ bool NativeCodeBasicBlock::BypassRegisterConditionBlock(void) mIns.Remove(i); changed = true; mExitRequiredRegs -= CPU_REG_Y; + CheckLive(); } } } @@ -28195,6 +28210,7 @@ bool NativeCodeBasicBlock::BypassRegisterConditionBlock(void) mIns.Remove(i); changed = true; mExitRequiredRegs -= CPU_REG_X; + CheckLive(); } } } @@ -28893,6 +28909,8 @@ bool NativeCodeBasicBlock::CheckCrossBlock16BitFlood(const NativeCodeBasicBlock* return false; else if ((ins.mType == ASMIT_JSR || ins.mType == ASMIT_RTS) && (ins.ReferencesZeroPage(dreg) || ins.ReferencesZeroPage(dreg + 1))) return false; + else if (ins.mMode == ASMIM_INDIRECT_Y && ins.mAddress + 1 == dreg) + return false; at++; } @@ -32242,6 +32260,7 @@ bool NativeCodeBasicBlock::MoveLDYUp(int at) NativeCodeInstruction& lins(mIns[at]); int i = at; + bool referenced = false; while (i > 0) { i--; @@ -32262,8 +32281,29 @@ bool NativeCodeBasicBlock::MoveLDYUp(int at) } return true; } + else if (lins.mMode == ASMIM_ZERO_PAGE && ins.mType == ASMIT_STX && ins.SameEffectiveAddress(lins) && !referenced) + { + if (lins.mLive & LIVE_MEM) + return false; + if (ins.mLive & (LIVE_CPU_REG_Z | LIVE_CPU_REG_A)) + return false; + + mIns[i].mType = ASMIT_TXA; mIns[i].mMode = ASMIM_IMPLIED; mIns[i].mLive |= LIVE_CPU_REG_A; + mIns.Insert(i + 1, NativeCodeInstruction(lins.mIns, ASMIT_TAY)); + mIns.Remove(at + 1); + + while (i <= at) + { + mIns[i].mLive |= LIVE_CPU_REG_Y; + i++; + } + return true; + } if (ins.ReferencesYReg() || lins.MayBeChangedOnAddress(ins)) return false; + if (lins.mMode == ASMIM_ZERO_PAGE && ins.ReferencesZeroPage(lins.mAddress)) + referenced = true; + } return false; @@ -34633,6 +34673,45 @@ bool NativeCodeBasicBlock::MoveLoadStoreUp(int at) return false; } + +// [at + 0] = LSR zp1 and not needed +// [at + 1] = ROR +// [at + 2] = STA zp2 and accu not needed + +bool NativeCodeBasicBlock::ReverseShiftByteOrder(int at) +{ + int start = at; + + while (start > 2 && + mIns[start - 2].mType == mIns[at + 0].mType && mIns[start - 2].mMode == ASMIM_ZERO_PAGE && mIns[start - 2].mAddress == mIns[at + 0].mAddress && + mIns[start - 1].mType == mIns[at + 1].mType && mIns[start - 1].mMode == ASMIM_IMPLIED) + start -= 2; + + if (start >= 5 && + mIns[start - 5].mType == ASMIT_LDA && mIns[start - 5].mMode == ASMIM_ZERO_PAGE && !(mIns[start - 5].mLive & LIVE_MEM) && + mIns[start - 4].mType == mIns[at + 0].mType && mIns[start - 4].mMode == ASMIM_IMPLIED && + mIns[start - 3].mType == ASMIT_STA && mIns[start - 3].mMode == ASMIM_ZERO_PAGE && mIns[start - 3].mAddress == mIns[at + 0].mAddress && + mIns[start - 2].mType == ASMIT_LDA && mIns[start - 2].mMode == ASMIM_ZERO_PAGE && + mIns[start - 1].mType == mIns[at + 1].mType && mIns[start - 1].mMode == ASMIM_IMPLIED) + { + int reg = mIns[start - 5].mAddress; + + mIns[start - 5].mType = mIns[at + 0].mType; mIns[start - 5].mLive |= LIVE_MEM | LIVE_CPU_REG_C; + mIns[start - 4].mType = ASMIT_NOP; mIns[start - 4].mMode = ASMIM_IMPLIED; + mIns[start - 3].mType = ASMIT_NOP; mIns[start - 3].mMode = ASMIM_IMPLIED; + + while (start <= at) + { + mIns[start + 0].mAddress = reg; + start += 2; + } + + return true; + } + + return false; +} + // Assume [at ] = SHIFT // Assume [at + 1] = ORA @@ -36021,7 +36100,21 @@ bool NativeCodeBasicBlock::OffsetValueForwarding(const ValueNumberingDataSet& da } else if (ins.mType == ASMIT_LDA && ins.mMode == ASMIM_ZERO_PAGE) { - mNNumDataSet.mRegs[CPU_REG_A] = mNNumDataSet.mRegs[mIns[i + 0].mAddress]; + int reg = ins.mAddress; + + if (carry != -1 && i + 1 < mIns.Size() && mIns[i + 1].mMode == ASMIM_IMMEDIATE && + (mIns[i + 1].mType == ASMIT_ADC || mIns[i + 1].mType == ASMIT_SBC) && !(mIns[i + 1].mLive & LIVE_CPU_REG_C) && + mNNumDataSet.mRegs[reg].SameBase(mNNumDataSet.mRegs[CPU_REG_A])) + { + ins.mType = ASMIT_NOP; ins.mMode = ASMIM_IMPLIED; + if (mIns[i + 1].mType == ASMIT_ADC) + mIns[i + 1].mAddress = (mIns[i + 1].mAddress + (mNNumDataSet.mRegs[reg].mOffset - mNNumDataSet.mRegs[CPU_REG_A].mOffset)) & 0xff; + else + mIns[i + 1].mAddress = (mIns[i + 1].mAddress - (mNNumDataSet.mRegs[reg].mOffset - mNNumDataSet.mRegs[CPU_REG_A].mOffset)) & 0xff; + changed = true; + } + else + mNNumDataSet.mRegs[CPU_REG_A] = mNNumDataSet.mRegs[reg]; } else if (ins.mType == ASMIT_STA && ins.mMode == ASMIM_ZERO_PAGE) { @@ -39271,6 +39364,29 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc } #endif + // Special case of 16 bit shift with unused upper byte result + if (mIns.Size() == 3 && mBranch == ASMIT_BNE && mTrueJump == this && !mFalseJump->mEntryRequiredRegs[CPU_REG_A] && + mIns[0].mType == ASMIT_ASL && mIns[0].mMode == ASMIM_ZERO_PAGE && + mIns[1].mType == ASMIT_ROL && mIns[1].mMode == ASMIM_IMPLIED && !(mIns[1].mLive & LIVE_CPU_REG_C) && + mIns[2].mType == ASMIT_DEX) + { + if (!prevBlock) + return OptimizeSimpleLoopInvariant(proc, full); + + prevBlock->mIns.Push(NativeCodeInstruction(mIns[0].mIns, ASMIT_LDA, mIns[0])); + prevBlock->mExitRequiredRegs += CPU_REG_A; + + exitBlock->mEntryRequiredRegs += CPU_REG_A; + exitBlock->mIns.Insert(0, NativeCodeInstruction(mIns[0].mIns, ASMIT_STA, mIns[0])); + + mIns[0].mMode = ASMIM_IMPLIED; + mIns[1].mType = ASMIT_NOP; + + + changed = true; + } + + if (sz >= 2 && (mIns[0].mType == ASMIT_ASL || mIns[0].mType == ASMIT_LSR) && mIns[0].mMode == ASMIM_ZERO_PAGE && mIns[sz - 1].mType == ASMIT_LDA && mIns[sz - 1].SameEffectiveAddress(mIns[0])) { @@ -39689,6 +39805,33 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc } } + sz = mIns.Size(); + if (sz >= 5 && mIns[0].mType == ASMIT_TXA && mIns[1].mType == ASMIT_TAY && mIns[sz - 2].mType == ASMIT_INX && mIns[sz - 1].mType == ASMIT_CPX && !exitBlock->mEntryRequiredRegs[CPU_REG_X]) + { + int i = 2; + while (i < sz - 2 && !mIns[i].ChangesXReg() && !mIns[i].ChangesYReg() && + (mIns[i].mMode != ASMIM_ABSOLUTE_X || HasAsmInstructionMode(mIns[i].mType, ASMIM_ABSOLUTE_Y))) + i++; + + if (i == sz - 2) + { + if (!prevBlock) + return OptimizeSimpleLoopInvariant(proc, full); + prevBlock->mIns.Push(NativeCodeInstruction(mBranchIns, ASMIT_TXA)); + prevBlock->mIns.Push(NativeCodeInstruction(mBranchIns, ASMIT_TAY)); + mIns[0].mType = ASMIT_NOP; + mIns[1].mType = ASMIT_TYA; + for (int i = 0; i < sz; i++) + { + mIns[i].ReplaceXRegWithYReg(); + mIns[i].mLive |= LIVE_CPU_REG_Y; + } + mEntryRequiredRegs += CPU_REG_Y; + mExitRequiredRegs += CPU_REG_Y; + changed = true; + } + } + return changed; } @@ -40339,32 +40482,56 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc, bool f } else if (xindex && xother) { - for (int i = 0; i < sz-3; i++) + bool xswap = true; + for (int i = 0; i < sz - 3; i++) { if (mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == zreg) ; else if (mIns[i].mType == ASMIT_TAX && !(mIns[i].mLive & LIVE_CPU_REG_Y) && mIns[i + 1].mMode == ASMIM_ABSOLUTE_X && HasAsmInstructionMode(mIns[i + 1].mType, ASMIM_ABSOLUTE_Y) && !(mIns[i + 1].mLive & LIVE_CPU_REG_X)) - { - mIns[i].mType = ASMIT_TAY; mIns[i].mLive |= LIVE_CPU_REG_Y; mIns[i].mLive &= ~LIVE_CPU_REG_X; - mIns[i + 1].mMode = ASMIM_ABSOLUTE_Y; - changed = true; - } + ; else if (mIns[i].mType == ASMIT_LDX && HasAsmInstructionMode(ASMIT_LDY, mIns[i].mMode) && !(mIns[i].mLive & LIVE_CPU_REG_Y)) { int j = i + 1; while (j < mIns.Size() && !mIns[j].ReferencesXReg() && !mIns[j].ReferencesYReg() && !mIns[j].ChangesGlobalMemory()) j++; if (j < mIns.Size() && mIns[j].mMode == ASMIM_ABSOLUTE_X && HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_Y) && !(mIns[j].mLive & LIVE_CPU_REG_X)) + ; + else + xswap = false; + } + else if (mIns[i].ChangesXReg()) + xswap = false; + } + + if (xswap) + { + for (int i = 0; i < sz - 3; i++) + { + if (mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == zreg) + ; + else if (mIns[i].mType == ASMIT_TAX && !(mIns[i].mLive & LIVE_CPU_REG_Y) && mIns[i + 1].mMode == ASMIM_ABSOLUTE_X && HasAsmInstructionMode(mIns[i + 1].mType, ASMIM_ABSOLUTE_Y) && !(mIns[i + 1].mLive & LIVE_CPU_REG_X)) { - mIns[i].mType = ASMIT_LDY; - mIns[j].mMode = ASMIM_ABSOLUTE_Y; - while (j > i) - { - j--; - mIns[j].mLive |= LIVE_CPU_REG_Y; mIns[j].mLive &= ~LIVE_CPU_REG_X; - } + mIns[i].mType = ASMIT_TAY; mIns[i].mLive |= LIVE_CPU_REG_Y; mIns[i].mLive &= ~LIVE_CPU_REG_X; + mIns[i + 1].mMode = ASMIM_ABSOLUTE_Y; changed = true; } + else if (mIns[i].mType == ASMIT_LDX && HasAsmInstructionMode(ASMIT_LDY, mIns[i].mMode) && !(mIns[i].mLive & LIVE_CPU_REG_Y)) + { + int j = i + 1; + while (j < mIns.Size() && !mIns[j].ReferencesXReg() && !mIns[j].ReferencesYReg() && !mIns[j].ChangesGlobalMemory()) + j++; + if (j < mIns.Size() && mIns[j].mMode == ASMIM_ABSOLUTE_X && HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_Y) && !(mIns[j].mLive & LIVE_CPU_REG_X)) + { + mIns[i].mType = ASMIT_LDY; + mIns[j].mMode = ASMIM_ABSOLUTE_Y; + while (j > i) + { + j--; + mIns[j].mLive |= LIVE_CPU_REG_Y; mIns[j].mLive &= ~LIVE_CPU_REG_X; + } + changed = true; + } + } } } } @@ -45157,6 +45324,23 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerShuffle(int pass) #endif +#if 1 + for (int i = 0; i + 2 < mIns.Size(); i++) + { + if (mIns[i + 0].mType == ASMIT_LSR && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && !(mIns[i + 0].mLive & LIVE_MEM) && + mIns[i + 1].mType == ASMIT_ROR && mIns[i + 1].mMode == ASMIM_IMPLIED && + mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && !(mIns[i + 2].mLive & LIVE_CPU_REG_A)) + { + if (ReverseShiftByteOrder(i)) + changed = true; + } + } + + CheckLive(); + +#endif + + #if 1 for (int i = 0; i + 1 < mIns.Size(); i++) { @@ -47857,6 +48041,18 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerIterate4(int i, int pass) mIns[i + 2].mType = ASMIT_NOP; return true; } + if ( + mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && mIns[i + 1].mAddress == 1 && + mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && + mIns[i + 3].mType == ASMIT_TAX && !(mIns[i + 3].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))) + { + mIns[i + 0].mType = ASMIT_TAX; mIns[i + 0].mLive |= LIVE_CPU_REG_X; + mIns[i + 1].mType = ASMIT_INX; mIns[i + 1].mMode = ASMIM_IMPLIED; mIns[i + 1].mLive |= LIVE_CPU_REG_X; + mIns[i + 2].mType = ASMIT_STX; mIns[i + 2].mLive |= LIVE_CPU_REG_X; + mIns[i + 3].mType = ASMIT_NOP; + return true; + } if ( mIns[i + 0].mType == ASMIT_LDA && mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && @@ -49921,6 +50117,25 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerIterate6(int i, int pass) } } + if (pass > 8 && + mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && mIns[i + 1].mAddress == 1 && + mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && + mIns[i + 3].mType == ASMIT_CLC && + mIns[i + 4].mType == ASMIT_ADC && mIns[i + 4].mMode == ASMIM_IMMEDIATE && mIns[i + 4].mAddress == 1 && + mIns[i + 5].mType == ASMIT_STA && mIns[i + 5].mMode == ASMIM_ZERO_PAGE && + !(mIns[i + 5].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_X | LIVE_CPU_REG_C)) + ) + { + mIns[i + 0].mType = ASMIT_TAX; mIns[i + 0].mLive |= LIVE_CPU_REG_X; + mIns[i + 1].mType = ASMIT_INX; mIns[i + 1].mMode = ASMIM_IMPLIED; mIns[i + 1].mLive |= LIVE_CPU_REG_X; + mIns[i + 2].mType = ASMIT_STX; mIns[i + 2].mLive |= LIVE_CPU_REG_X; + mIns[i + 3].mType = ASMIT_INX; mIns[i + 3].mLive |= LIVE_CPU_REG_X; + mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED; + mIns[i + 5].mType = ASMIT_STX; + return true; + } + return false; } @@ -53573,7 +53788,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) mInterProc->mLinkerObject->mNativeProc = this; - CheckFunc = !strcmp(mIdent->mString, "main"); + CheckFunc = !strcmp(mIdent->mString, "dynamic"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -54520,8 +54735,6 @@ void NativeCodeProcedure::Optimize(void) { changed = true; - CheckBlocks(); - BuildDataFlowSets(); ResetVisited(); mEntryBlock->RemoveUnusedResultInstructions(); @@ -54537,7 +54750,6 @@ void NativeCodeProcedure::Optimize(void) if (mEntryBlock->AbsoluteValueForwarding(pairs)) { changed = true; - CheckBlocks(); BuildDataFlowSets(); ResetVisited(); @@ -56066,7 +56278,15 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode else #endif { - block->LoadEffectiveAddress(iproc, ins, nullptr, nullptr, avalid); + bool addrused = false; + if (i + 1 < iblock->mInstructions.Size()) + { + if (iblock->mInstructions[i + 1]->mCode == IC_TYPECAST && iblock->mInstructions[i + 1]->mSrc[0].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[0].mFinal) + addrused = true; + else if (iblock->mInstructions[i + 1]->mCode == IC_STORE && iblock->mInstructions[i + 1]->mSrc[0].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[0].mFinal) + addrused = true; + } + block->LoadEffectiveAddress(iproc, ins, nullptr, nullptr, avalid, addrused); } } break; diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 2445ace..214fe86 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -396,7 +396,7 @@ public: NativeCodeBasicBlock* BinaryOperator(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0); void UnaryOperator(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins); void RelationalOperator(InterCodeProcedure* proc, const InterInstruction * ins, NativeCodeProcedure * nproc, NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock * falseJump); - void LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0, bool addrvalid); + void LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0, bool addrvalid, bool addrused = false); void LoadStoreOpAbsolute2D(InterCodeProcedure* proc, const InterInstruction* lins1, const InterInstruction* lins2, const InterInstruction* mins); void SignExtendAddImmediate(InterCodeProcedure* proc, const InterInstruction* xins, const InterInstruction* ains); void BinaryDivModPair(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction* ins1, const InterInstruction* ins2); @@ -525,6 +525,7 @@ public: bool MoveTXADCDown(int at); bool MoveTXALogicTAXDown(int at); bool FoldShiftORAIntoLoadImmUp(int at); + bool ReverseShiftByteOrder(int at); bool FindAccuExitValue(int& at); bool MoveLoadXAbsUpCrossBlock(int at); diff --git a/oscar64/Parser.cpp b/oscar64/Parser.cpp index 35eadd5..5ffe530 100644 --- a/oscar64/Parser.cpp +++ b/oscar64/Parser.cpp @@ -13540,7 +13540,7 @@ void Parser::ParsePragma(void) else if (ConsumeTokenIf(TK_INLINE)) mCompilerOptions |= COPT_OPTIMIZE_INLINE; else if (ConsumeIdentIf("autoinline")) - mCompilerOptions |= COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE; + mCompilerOptions |= COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE; else if (ConsumeIdentIf("maxinline")) mCompilerOptions |= COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE_ALL; else if (ConsumeIdentIf("constparams")) diff --git a/oscar64/oscar64.cpp b/oscar64/oscar64.cpp index e20bdc3..2572998 100644 --- a/oscar64/oscar64.cpp +++ b/oscar64/oscar64.cpp @@ -253,6 +253,8 @@ int main2(int argc, const char** argv) compiler->mCompilerOptions |= COPT_OPTIMIZE_ASSEMBLER; else if (arg[2] == 'i' && !arg[3]) compiler->mCompilerOptions |= COPT_OPTIMIZE_AUTO_INLINE; + else if (arg[2] == 'i' && arg[3] == 'i' && !arg[4]) + compiler->mCompilerOptions |= COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE_ALL; else if (arg[2] == 'z' && !arg[3]) compiler->mCompilerOptions |= COPT_OPTIMIZE_AUTO_ZEROPAGE; else if (arg[2] == 'p' && !arg[3])