From 0742be320413610f0324cd08a9fe88a4f773fe83 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 16 Jul 2022 14:14:02 +0200 Subject: [PATCH] Improve dataflow analysis --- include/c64/rasterirq.c | 13 +- include/c64/rasterirq.h | 2 + include/c64/sprites.c | 42 ++-- include/c64/sprites.h | 4 + oscar64/Errors.cpp | 4 +- oscar64/InterCode.cpp | 338 +++++++++++++++++++++++++++---- oscar64/InterCode.h | 10 + oscar64/NativeCodeGenerator.cpp | 322 ++++++++++++++++++++++++++++- oscar64/NativeCodeGenerator.h | 3 + oscar64/oscar64.cpp | 2 +- oscar64/oscar64.rc | 8 +- oscar64setup/oscar64setup.vdproj | 6 +- 12 files changed, 674 insertions(+), 80 deletions(-) diff --git a/include/c64/rasterirq.c b/include/c64/rasterirq.c index 29b47c6..1c92e46 100644 --- a/include/c64/rasterirq.c +++ b/include/c64/rasterirq.c @@ -51,7 +51,7 @@ jx: tay sec - sbc #4 + sbc #3 cmp $d012 bcc l1 @@ -97,7 +97,7 @@ __asm irq1 { lda $d019 bpl ex2 - + ldx nextIRQ l1: lda rasterIRQNext, x @@ -125,7 +125,7 @@ jx: tay sec - sbc #4 + sbc #3 cmp $d012 bcc l1 @@ -156,6 +156,7 @@ e1: ex: asl $d019 + jmp $ea81 ex2: @@ -187,7 +188,11 @@ void rirq_build(RIRQCode * ic, byte size) asm_rl(ic->code + 7, ASM_BCS, -5); asm_ab(ic->code + 9, ASM_STY, 0x0000); - if (size == 1) + if (size == 0) + { + asm_np(ic->code + 0, ASM_RTS); + } + else if (size == 1) { asm_np(ic->code + 12, ASM_RTS); } diff --git a/include/c64/rasterirq.h b/include/c64/rasterirq.h index d401580..ff56309 100644 --- a/include/c64/rasterirq.h +++ b/include/c64/rasterirq.h @@ -3,7 +3,9 @@ #include "types.h" +#ifndef NUM_IRQS #define NUM_IRQS 16 +#endif extern volatile byte rirq_count; diff --git a/include/c64/sprites.c b/include/c64/sprites.c index b88bc9d..e469d15 100644 --- a/include/c64/sprites.c +++ b/include/c64/sprites.c @@ -99,14 +99,12 @@ void spr_color(char sp, char color) } -#define NUM_SPRITES 16 +static char vspriteYLow[VSPRITES_MAX], vspriteXLow[VSPRITES_MAX], vspriteXHigh[VSPRITES_MAX]; +static char vspriteImage[VSPRITES_MAX], vspriteColor[VSPRITES_MAX]; -static char vspriteYLow[NUM_SPRITES], vspriteXLow[NUM_SPRITES], vspriteXHigh[NUM_SPRITES]; -static char vspriteImage[NUM_SPRITES], vspriteColor[NUM_SPRITES]; +static char spriteOrder[VSPRITES_MAX], spriteYPos[VSPRITES_MAX + 1]; -static char spriteOrder[16], spriteYPos[17]; - -static RIRQCode spirq[8], synch; +static RIRQCode spirq[VSPRITES_MAX - 8], synch; void vspr_init(char * screen) @@ -117,21 +115,23 @@ void vspr_init(char * screen) vic.spr_expand_y = 0; vic.spr_enable = 0xff; - for(int i=0; i<8; i++) + for(int i=0; i 10) exit(20); diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 927ccff..a8571c8 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -414,6 +414,8 @@ static bool CollidingMem(const InterOperand& op1, const InterOperand& op2, const case IM_INDIRECT: if (op1.mTemp == op2.mTemp) return op1.mIntConst < op2.mIntConst + op2.mOperandSize && op2.mIntConst < op1.mIntConst + op1.mOperandSize; + else if (op1.mLinkerObject && op2.mLinkerObject && op1.mLinkerObject != op2.mLinkerObject) + return false; else return true; default: @@ -903,6 +905,10 @@ static bool CanBypassLoad(const InterInstruction* lins, const InterInstruction* { if (lins->mVolatile) return false; + else if (lins->mSrc[0].mMemory == IM_INDIRECT && bins->mSrc[1].mMemory == IM_INDIRECT) + { + return lins->mSrc[0].mLinkerObject && bins->mSrc[1].mLinkerObject && lins->mSrc[0].mLinkerObject != bins->mSrc[1].mLinkerObject; + } else if (lins->mSrc[0].mTemp >= 0 || bins->mSrc[1].mTemp >= 0) return false; else if (lins->mSrc[0].mMemory != bins->mSrc[1].mMemory) @@ -1018,6 +1024,10 @@ static bool CanBypassLoadUp(const InterInstruction* lins, const InterInstruction return false; else if (bins->mSrc[1].mMemory == IM_FRAME || bins->mSrc[1].mMemory == IM_FFRAME) return true; + else if (lins->mSrc[0].mMemory == IM_INDIRECT && bins->mSrc[1].mMemory == IM_INDIRECT) + { + return lins->mSrc[0].mLinkerObject && bins->mSrc[1].mLinkerObject && lins->mSrc[0].mLinkerObject != bins->mSrc[1].mLinkerObject; + } else if (lins->mSrc[0].mTemp >= 0 || bins->mSrc[1].mTemp >= 0) return false; else if (lins->mSrc[0].mMemory != bins->mSrc[1].mMemory) @@ -1065,14 +1075,25 @@ static bool CanBypassStore(const InterInstruction* sins, const InterInstruction* if (bins->mCode == IC_COPY || bins->mCode == IC_STRCPY || bins->mCode == IC_PUSH_FRAME) return false; + // True data dependency + if (bins->mDst.mTemp >= 0) + { + for (int i = 0; i < sins->mNumOperands; i++) + if (bins->mDst.mTemp == sins->mSrc[i].mTemp) + return false; + } + InterMemory sm = IM_NONE, bm = IM_NONE; int bi = -1, si = -1, bt = -1, st = -1, bo = 0, so = 0, bz = 1, sz = 1; + LinkerObject* slo = nullptr, * blo = nullptr; + if (sins->mCode == IC_LOAD) { sm = sins->mSrc[0].mMemory; si = sins->mSrc[0].mVarIndex; st = sins->mSrc[0].mTemp; so = sins->mSrc[0].mIntConst; + slo = sins->mSrc[0].mLinkerObject; sz = InterTypeSize[sins->mDst.mType]; } else if (sins->mCode == IC_LEA || sins->mCode == IC_STORE) @@ -1081,6 +1102,7 @@ static bool CanBypassStore(const InterInstruction* sins, const InterInstruction* si = sins->mSrc[1].mVarIndex; st = sins->mSrc[1].mTemp; so = sins->mSrc[1].mIntConst; + slo = sins->mSrc[1].mLinkerObject; sz = InterTypeSize[sins->mSrc[0].mType]; } @@ -1090,6 +1112,7 @@ static bool CanBypassStore(const InterInstruction* sins, const InterInstruction* bi = bins->mSrc[0].mVarIndex; bt = bins->mSrc[0].mTemp; bo = bins->mSrc[0].mIntConst; + blo = bins->mSrc[0].mLinkerObject; bz = InterTypeSize[bins->mDst.mType]; } else if (bins->mCode == IC_LEA || bins->mCode == IC_STORE) @@ -1098,6 +1121,7 @@ static bool CanBypassStore(const InterInstruction* sins, const InterInstruction* bi = bins->mSrc[1].mVarIndex; bt = bins->mSrc[1].mTemp; bo = bins->mSrc[1].mIntConst; + blo = bins->mSrc[1].mLinkerObject; bz = InterTypeSize[bins->mSrc[0].mType]; } @@ -1123,10 +1147,15 @@ static bool CanBypassStore(const InterInstruction* sins, const InterInstruction* if (bi == si) return false; } - else if (sm == IM_INDIRECT && bm == IM_INDIRECT && st == bt) + else if (sm == IM_INDIRECT && bm == IM_INDIRECT) { - if (so + sz > bo && bo + bz > so) - return false; + if (st == bt) + { + if (so + sz > bo && bo + bz > so) + return false; + } + else + return slo && blo && slo != blo; } else return false; @@ -1139,13 +1168,6 @@ static bool CanBypassStore(const InterInstruction* sins, const InterInstruction* if (bins->mCode == IC_CALL || bins->mCode == IC_CALL_NATIVE || bins->mCode == IC_ASSEMBLER) return false; - // True data dependency - if (bins->mDst.mTemp >= 0) - { - for (int i = 0; i < sins->mNumOperands; i++) - if (bins->mDst.mTemp == sins->mSrc[i].mTemp) - return false; - } return true; } @@ -2799,6 +2821,7 @@ bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, Num mDst.mTemp = -1; for (int i = 0; i < mNumOperands; i++) mSrc[i].mTemp = -1; + mNumOperands = 0; changed = true; } @@ -2808,6 +2831,7 @@ bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, Num mDst.mTemp = -1; for (int i = 0; i < mNumOperands; i++) mSrc[i].mTemp = -1; + mNumOperands = 0; changed = true; } @@ -7162,6 +7186,13 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra } break; case IC_LEA: + if (ins->mSrc[1].mMemory == IM_INDIRECT && ins->mSrc[1].mTemp >= 0 && tvalue[ins->mSrc[1].mTemp]) + { + InterInstruction* pins = tvalue[ins->mSrc[1].mTemp]; + if (pins->mCode == IC_LEA) + ins->mSrc[1].mLinkerObject = pins->mSrc[1].mLinkerObject; + } + if (ins->mSrc[1].mTemp < 0 && ins->mSrc[0].mTemp >= 0 && ltvalue[ins->mSrc[0].mTemp]) { InterInstruction* pins = ltvalue[ins->mSrc[0].mTemp]; @@ -7267,6 +7298,9 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra { InterInstruction* pins = ltvalue[ins->mSrc[1].mTemp]; + if (ins->mSrc[1].mMemory == IM_INDIRECT && pins->mCode == IC_LEA) + ins->mSrc[1].mLinkerObject = pins->mSrc[1].mLinkerObject; + if (pins->mCode == IC_LEA && pins->mSrc[0].mTemp < 0 && ins->mSrc[1].mIntConst + pins->mSrc[0].mIntConst >= 0) { ins->mSrc[1].Forward(pins->mSrc[1]); @@ -7283,6 +7317,9 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra { InterInstruction* pins = ltvalue[ins->mSrc[0].mTemp]; + if (ins->mSrc[0].mMemory == IM_INDIRECT && pins->mCode == IC_LEA) + ins->mSrc[0].mLinkerObject = pins->mSrc[1].mLinkerObject; + if (pins->mCode == IC_LEA && pins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst + pins->mSrc[0].mIntConst >= 0) { ins->mSrc[0].Forward(pins->mSrc[1]); @@ -7295,6 +7332,7 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra #endif } + // Now kill all instructions that referenced the current destination as source, they are // not valid anymore @@ -8284,12 +8322,8 @@ bool InterCodeBasicBlock::CanMoveInstructionBehindBlock(int ii) const return CanMoveInstructionDown(ii, mInstructions.Size()); } - - -bool InterCodeBasicBlock::CanMoveInstructionBeforeBlock(int ii) const +bool InterCodeBasicBlock::CanMoveInstructionBeforeBlock(int ii, const InterInstruction* ins) const { - InterInstruction* ins = mInstructions[ii]; - if (ins->mCode == IC_LOAD) { for (int i = 0; i < ii; i++) @@ -8302,7 +8336,7 @@ bool InterCodeBasicBlock::CanMoveInstructionBeforeBlock(int ii) const if (!CanBypassStore(ins, mInstructions[i])) return false; } - else if (ins->mCode == IC_CALL || ins->mCode == IC_CALL_NATIVE || ins->mCode == IC_COPY || ins->mCode == IC_PUSH_FRAME || ins->mCode == IC_POP_FRAME || + else if (ins->mCode == IC_CALL || ins->mCode == IC_CALL_NATIVE || ins->mCode == IC_COPY || ins->mCode == IC_PUSH_FRAME || ins->mCode == IC_POP_FRAME || ins->mCode == IC_RETURN || ins->mCode == IC_RETURN_STRUCT || ins->mCode == IC_RETURN_VALUE) return false; else @@ -8315,6 +8349,11 @@ bool InterCodeBasicBlock::CanMoveInstructionBeforeBlock(int ii) const return true; } +bool InterCodeBasicBlock::CanMoveInstructionBeforeBlock(int ii) const +{ + return CanMoveInstructionBeforeBlock(ii, mInstructions[ii]); +} + bool InterCodeBasicBlock::MergeCommonPathInstructions(void) { bool changed = false; @@ -8387,6 +8426,110 @@ bool InterCodeBasicBlock::MergeCommonPathInstructions(void) return changed; } +static void CollectDominatorPath(InterCodeBasicBlock* block, InterCodeBasicBlock* dom, GrowingInterCodeBasicBlockPtrArray& blocks) +{ + if (blocks.IndexOf(block) != -1) + return; + if (block != dom) + { + blocks.Push(block); + for (int i = 0; i < block->mEntryBlocks.Size(); i++) + CollectDominatorPath(block->mEntryBlocks[i], dom, blocks); + } +} + +static bool CanMoveInstructionBeforePath(const GrowingInterCodeBasicBlockPtrArray& blocks, const InterInstruction* ins) +{ + for (int i = 0; i < blocks.Size(); i++) + if (!blocks[i]->CanMoveInstructionBeforeBlock(blocks[i]->mInstructions.Size(), ins)) + return false; + return true; +} + +bool InterCodeBasicBlock::MoveTrainCrossBlock(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + if (mDominator) + { + GrowingInterCodeBasicBlockPtrArray path(nullptr); + + CollectDominatorPath(this, mDominator, path); + + if ((!mDominator->mTrueJump || path.IndexOf(mDominator->mTrueJump) != -1) && + (!mDominator->mFalseJump || path.IndexOf(mDominator->mFalseJump) != -1) && + (!mTrueJump || path.IndexOf(mTrueJump) == -1) && + (!mFalseJump || path.IndexOf(mFalseJump) == -1)) + { + int i = 1; + while (i < path.Size() && + (!path[i]->mTrueJump || path.IndexOf(path[i]->mTrueJump) != -1) && + (!path[i]->mFalseJump || path.IndexOf(path[i]->mFalseJump) != -1)) + i++; + if (i == path.Size()) + { + path.Remove(0); + + int i = 0; + while (i < mInstructions.Size()) + { + FastNumberSet nset(mEntryRequiredTemps.Size()); + + InterInstruction* ins(mInstructions[i]); + if (ins->mCode == IC_STORE) + { + for (int k = 0; k < ins->mNumOperands; k++) + { + if (ins->mSrc[k].mTemp >= 0) + nset += ins->mSrc[k].mTemp; + } + + int j = i; + while (j > 0 && mInstructions[j - 1]->mDst.mTemp >= 0 && nset[mInstructions[j - 1]->mDst.mTemp]) + { + j--; + InterInstruction* nins(mInstructions[j]); + + for (int k = 0; k < nins->mNumOperands; k++) + { + if (nins->mSrc[k].mTemp >= 0) + nset += nins->mSrc[k].mTemp; + } + } + + int k = j; + while (k <= i && CanMoveInstructionBeforeBlock(j, mInstructions[k]) && CanMoveInstructionBeforePath(path, mInstructions[k])) + k++; + + if (k > i) + { + for (int k = j; k <= i; k++) + mDominator->mInstructions.Insert(mDominator->mInstructions.Size() - 1, mInstructions[k]); + mInstructions.Remove(j, i - j + 1); + i = j - 1; + changed = true; + } + } + + i++; + } + } + } + } + + if (mTrueJump && mTrueJump->MoveTrainCrossBlock()) + changed = true; + if (mFalseJump && mFalseJump->MoveTrainCrossBlock()) + changed = true; + } + + return changed; +} + bool InterCodeBasicBlock::ForwardDiamondMovedTemp(void) { bool changed = false; @@ -10409,6 +10552,59 @@ static void SwapInstructions(InterInstruction* it, InterInstruction* ib) } } +void InterCodeBasicBlock::CheckFinalLocal(void) +{ +#if _DEBUG + NumberSet required(mExitRequiredTemps); + + for (int i = mInstructions.Size() - 1; i >= 0; i--) + { + const InterInstruction* ins(mInstructions[i]); + if (ins->mDst.mTemp >= 0) + required -= ins->mDst.mTemp; + for (int j = 0; j < ins->mNumOperands; j++) + { + if (ins->mSrc[j].mTemp >= 0 && ins->mSrc[j].mFinal) + assert(!required[ins->mSrc[j].mTemp]); + } + + for (int j = 0; j < ins->mNumOperands; j++) + if (ins->mSrc[j].mTemp >= 0) + required += ins->mSrc[j].mTemp; + } + + NumberSet provided(mEntryProvidedTemps); + + for (int i = 0; i< mInstructions.Size(); i++) + { + const InterInstruction* ins(mInstructions[i]); + for (int j = 0; j < ins->mNumOperands; j++) + { + if (ins->mSrc[j].mTemp >= 0) + assert(provided[ins->mSrc[j].mTemp]); + } + + if (ins->mDst.mTemp >= 0) + provided += ins->mDst.mTemp; + } +#endif +} + +void InterCodeBasicBlock::CheckFinal(void) +{ +#if _DEBUG + if (!mVisited) + { + mVisited = true; + + CheckFinalLocal(); + + if (mTrueJump) mTrueJump->CheckFinal(); + if (mFalseJump) mFalseJump->CheckFinal(); + } +#endif +} + void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& staticVars) { int i; @@ -10417,6 +10613,8 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati { mVisited = true; + CheckFinalLocal(); + // Remove none instructions int j = 0; @@ -10458,8 +10656,15 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati if (k == limit) { - mInstructions.Remove(i); - mInstructions.Insert(limit, ins); + for (int l = i; l < limit; l++) + { + SwapInstructions(ins, mInstructions[l + 1]); + mInstructions[l] = mInstructions[l + 1]; + } + mInstructions[limit] = ins; + +// mInstructions.Remove(i); +// mInstructions.Insert(limit, ins); } } } @@ -10502,6 +10707,8 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati else if (limit >= 0 && mInstructions[limit]->mCode == IC_JUMP) limit --; + CheckFinalLocal(); + int i = limit; #if 1 while (i >= 0) @@ -10560,6 +10767,8 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati i--; } + + CheckFinalLocal(); #endif #if 1 // move indirect load/store pairs up @@ -10578,6 +10787,9 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati CanBypassLoadUp(lins, mInstructions[j - 1]) && CanBypassStore(sins, mInstructions[j - 1])) { + SwapInstructions(mInstructions[j - 1], lins); + SwapInstructions(mInstructions[j - 1], sins); + mInstructions[j + 1] = mInstructions[j - 1]; j--; } @@ -10593,6 +10805,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati i++; } + CheckFinalLocal(); #endif #if 1 i = 0; @@ -10641,6 +10854,8 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati i++; } + + CheckFinalLocal(); #endif #if 1 @@ -10679,6 +10894,8 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati } #endif + CheckFinalLocal(); + bool changed = false; do { @@ -10699,6 +10916,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati if (mInstructions[i]->mCode == IC_LOAD_TEMPORARY && mInstructions[i]->mDst.mTemp == mInstructions[i]->mSrc->mTemp) { mInstructions[i]->mCode = IC_NONE; + mInstructions[i]->mNumOperands = 0; changed = true; } if (mInstructions[i]->mCode == IC_LOAD && mInstructions[i]->mSrc[0].mMemory == IM_GLOBAL && (mInstructions[i]->mSrc->mLinkerObject->mFlags & LOBJF_CONST)) @@ -10714,6 +10932,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mSrc[0].mTemp && mInstructions[i + 0]->mSrc[0].mIntConst > mInstructions[i + 1]->mSrc[0].mIntConst) { + SwapInstructions(mInstructions[i + 0], mInstructions[i + 1]); InterInstruction* ins(mInstructions[i + 0]); mInstructions[i + 0] = mInstructions[i + 1]; mInstructions[i + 1] = ins; @@ -10731,6 +10950,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati int t = mInstructions[i + 0]->mDst.mTemp; mInstructions[i + 0]->mDst.mTemp = mInstructions[i + 1]->mDst.mTemp; mInstructions[i + 1]->mCode = IC_NONE; + mInstructions[i + 1]->mNumOperands = 0; mInstructions[i + 2]->mSrc[0].mTemp = mInstructions[i + 1]->mDst.mTemp; mInstructions[i + 2]->mSrc[0].mFinal = false; if (mInstructions[i + 2]->mSrc[1].mTemp == t) @@ -10751,6 +10971,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 0]->mDst.mTemp = mInstructions[i + 1]->mDst.mTemp; mInstructions[i + 1]->mCode = IC_NONE; + mInstructions[i + 1]->mNumOperands = 0; mInstructions[i + 2]->mSrc[1].mTemp = mInstructions[i + 1]->mDst.mTemp; mInstructions[i + 2]->mSrc[1].mFinal = false; changed = true; @@ -10829,6 +11050,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 0]->mSrc[0].mIntConst &= mInstructions[i + 1]->mSrc[0].mIntConst; mInstructions[i + 0]->mDst = mInstructions[i + 1]->mDst; mInstructions[i + 1]->mCode = IC_NONE; + mInstructions[i + 1]->mNumOperands = 0; changed = true; } else if ( @@ -10839,6 +11061,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 0]->mSrc[0].mIntConst |= mInstructions[i + 1]->mSrc[0].mIntConst; mInstructions[i + 0]->mDst = mInstructions[i + 1]->mDst; mInstructions[i + 1]->mCode = IC_NONE; + mInstructions[i + 1]->mNumOperands = 0; changed = true; } #endif @@ -10916,6 +11139,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 0]->mDst.mTemp = mInstructions[i + 1]->mDst.mTemp; mInstructions[i + 1]->mDst.mTemp = mInstructions[i + 1]->mSrc[0].mTemp; mInstructions[i + 1]->mSrc[0].mTemp = mInstructions[i + 0]->mDst.mTemp; + mInstructions[i + 1]->mSrc[0].mFinal = false; mInstructions[i + 0]->mSingleAssignment = mInstructions[i + 1]->mSingleAssignment; changed = true; } @@ -11117,9 +11341,12 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati } } + CheckFinalLocal(); #endif } + + #if 1 if (i + 1 < mInstructions.Size()) { @@ -11160,12 +11387,22 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati { if (CanMoveInstructionDown(j, i)) { - mInstructions.Insert(i, mInstructions[j]); - mInstructions.Remove(j); + InterInstruction* jins = mInstructions[j]; + for (int k = j; k < i - 1; k++) + { + SwapInstructions(jins, mInstructions[k + 1]); + mInstructions[k] = mInstructions[k + 1]; + } + mInstructions[i - 1] = jins; + +// mInstructions.Insert(i, mInstructions[j]); +// mInstructions.Remove(j); } } } + CheckFinalLocal(); + // sort stores up do @@ -11183,6 +11420,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati mInstructions[i + 0]->mSrc[1].mVarIndex == mInstructions[i + 1]->mSrc[1].mVarIndex && mInstructions[i + 0]->mSrc[1].mIntConst > mInstructions[i + 1]->mSrc[1].mIntConst)) { + SwapInstructions(mInstructions[i + 0], mInstructions[i + 1]); InterInstruction* ins = mInstructions[i + 1]; mInstructions[i + 1] = mInstructions[i + 0]; mInstructions[i + 0] = ins; @@ -11192,6 +11430,7 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati } while (changed); + CheckFinalLocal(); if (mTrueJump) mTrueJump->PeepholeOptimization(staticVars); if (mFalseJump) mFalseJump->PeepholeOptimization(staticVars); @@ -11502,6 +11741,12 @@ int InterCodeProcedure::AddTemporary(InterType type) return temp; } +void InterCodeProcedure::CheckFinal(void) +{ + ResetVisited(); + mEntryBlock->CheckFinal(); +} + void InterCodeProcedure::DisassembleDebug(const char* name) { Disassemble(name); @@ -11674,6 +11919,17 @@ void InterCodeProcedure::SingleAssignmentForwarding(void) } +void InterCodeProcedure::PeepholeOptimization(void) +{ + BuildDataFlowSets(); + TempForwarding(); + RemoveUnusedInstructions(); + + ResetVisited(); + mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); +} + + void InterCodeProcedure::CheckUsedDefinedTemps(void) { #if _DEBUG @@ -12171,10 +12427,7 @@ void InterCodeProcedure::Close(void) MergeIndexedLoadStore(); - BuildDataFlowSets(); - - ResetVisited(); - mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); + PeepholeOptimization(); DisassembleDebug("Peephole optimized"); @@ -12221,16 +12474,14 @@ void InterCodeProcedure::Close(void) CheckUsedDefinedTemps(); - ResetVisited(); - mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); - - DisassembleDebug("Broken Peephole"); + PeepholeOptimization(); TempForwarding(); RemoveUnusedInstructions(); DisassembleDebug("Peephole optimized"); + bool changed = false; PushSinglePathResultInstructions(); @@ -12283,6 +12534,7 @@ void InterCodeProcedure::Close(void) mEntryBlock->CollectEntryBlocks(nullptr); BuildDataFlowSets(); + #if 1 ResetVisited(); mEntryBlock->BuildLocalIntegerRangeSets(mTemporaries.Size(), mLocalVars); @@ -12341,6 +12593,24 @@ void InterCodeProcedure::Close(void) MergeIndexedLoadStore(); +#if 1 + DisassembleDebug("PreMoveTrainCrossBlockA"); + + PeepholeOptimization(); + +#if 1 + DisassembleDebug("PreMoveTrainCrossBlockB"); + + ResetVisited(); + mEntryBlock->MoveTrainCrossBlock(); +#endif + PeepholeOptimization(); + + DisassembleDebug("MoveTrainCrossBlock"); + +#endif + + #if 1 ResetVisited(); mEntryBlock->RestartLocalIntegerRangeSets(mLocalVars); @@ -12397,8 +12667,7 @@ void InterCodeProcedure::Close(void) #endif #if 1 - ResetVisited(); - mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); + PeepholeOptimization(); TempForwarding(); RemoveUnusedInstructions(); @@ -12427,8 +12696,7 @@ void InterCodeProcedure::Close(void) #endif #if 1 - ResetVisited(); - mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); + PeepholeOptimization(); TempForwarding(); RemoveUnusedInstructions(); @@ -12526,8 +12794,7 @@ void InterCodeProcedure::Close(void) TempForwarding(); } while (GlobalConstantPropagation()); - ResetVisited(); - mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); + PeepholeOptimization(); TempForwarding(); RemoveUnusedInstructions(); @@ -12544,8 +12811,7 @@ void InterCodeProcedure::Close(void) #if 1 for (int i = 0; i < 4; i++) { - ResetVisited(); - mEntryBlock->PeepholeOptimization(mModule->mGlobalVars); + PeepholeOptimization(); DisassembleDebug("Peephole Temp Check"); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 076ed59..3966cca 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -457,6 +457,8 @@ public: bool ForwardDiamondMovedTemp(void); + bool MoveTrainCrossBlock(void); + void MarkRelevantStatics(void); void RemoveNonRelevantStatics(void); @@ -465,10 +467,14 @@ public: bool PushSinglePathResultInstructions(void); bool CanMoveInstructionBeforeBlock(int ii) const; + bool CanMoveInstructionBeforeBlock(int ii, const InterInstruction * ins) const; bool CanMoveInstructionBehindBlock(int ii) const; bool CanMoveInstructionDown(int si, int ti) const; bool MergeCommonPathInstructions(void); + void CheckFinalLocal(void); + void CheckFinal(void); + void PeepholeOptimization(const GrowingVariableArray& staticVars); void SingleBlockLoopOptimisation(const NumberSet& aliasedParams, const GrowingVariableArray& staticVars); void SingleBlockLoopUnrolling(void); @@ -575,6 +581,10 @@ protected: void MergeBasicBlocks(void); void CheckUsedDefinedTemps(void); + void PeepholeOptimization(void); + + void CheckFinal(void); + void DisassembleDebug(const char* name); }; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index e43baab..bfa534a 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -8815,6 +8815,21 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p return this; } +void NativeCodeBasicBlock::SignExtendAddImmediate(InterCodeProcedure* proc, const InterInstruction* xins, const InterInstruction* ains) +{ + int val = ains->mSrc[0].mTemp == xins->mDst.mTemp ? ains->mSrc[1].mIntConst : ains->mSrc[0].mIntConst; + val -= 128; + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[xins->mSrc[0].mTemp] + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_EOR, ASMIM_IMMEDIATE, 0x80)); + mIns.Push(NativeCodeInstruction(ASMIT_CLC)); + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, val & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ains->mDst.mTemp] + 0)); + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, (val >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ains->mDst.mTemp] + 1)); +} + void NativeCodeBasicBlock::UnaryOperator(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins) { int treg = BC_REG_TMP + proc->mTempOffset[ins->mDst.mTemp]; @@ -11342,6 +11357,141 @@ bool NativeCodeBasicBlock::ForwardZpYIndex(bool full) return changed; } +bool NativeCodeBasicBlock::RegisterValueForwarding(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + FastNumberSet xreg(261), yreg(261), areg(261); + + for (int i = 0; i < mIns.Size(); i++) + { + if (mIns[i].mMode == ASMIM_ZERO_PAGE) + { + if (mIns[i].mType == ASMIT_LDX) + { + if (xreg[mIns[i].mAddress]) + { + mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; + changed = true; + } + else + { + xreg.Clear(); + xreg += mIns[i].mAddress; + } + } + else if (mIns[i].mType == ASMIT_STX) + { + if (xreg[mIns[i].mAddress]) + { + mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; + changed = true; + } + else + { + xreg += mIns[i].mAddress; + yreg -= mIns[i].mAddress; + areg -= mIns[i].mAddress; + } + } + else if (mIns[i].mType == ASMIT_LDY) + { + if (yreg[mIns[i].mAddress]) + { + mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; + changed = true; + } + else + { + yreg.Clear(); + yreg += mIns[i].mAddress; + } + } + else if (mIns[i].mType == ASMIT_STY) + { + if (yreg[mIns[i].mAddress]) + { + mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; + changed = true; + } + else + { + yreg += mIns[i].mAddress; + xreg -= mIns[i].mAddress; + areg -= mIns[i].mAddress; + } + } + else if (mIns[i].mType == ASMIT_LDA) + { + if (areg[mIns[i].mAddress]) + { + mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; + changed = true; + } + else + { + areg.Clear(); + areg += mIns[i].mAddress; + } + } + else if (mIns[i].mType == ASMIT_STA) + { + if (areg[mIns[i].mAddress]) + { + mIns[i].mType = ASMIT_NOP; mIns[i].mMode = ASMIM_IMPLIED; + changed = true; + } + else + { + areg += mIns[i].mAddress; + xreg -= mIns[i].mAddress; + yreg -= mIns[i].mAddress; + } + } + else if (mIns[i].ChangesAddress()) + { + xreg -= mIns[i].mAddress; + yreg -= mIns[i].mAddress; + areg -= mIns[i].mAddress; + } + else if (mIns[i].ChangesAccu()) + { + areg.Clear(); + } + } + else if (mIns[i].mType == ASMIT_JSR) + { + xreg.Clear(); + yreg.Clear(); + areg.Clear(); + } + else if (mIns[i].ChangesXReg()) + { + xreg.Clear(); + } + else if (mIns[i].ChangesYReg()) + { + yreg.Clear(); + } + else if (mIns[i].ChangesAccu()) + { + areg.Clear(); + } + } + + if (mTrueJump && mTrueJump->RegisterValueForwarding()) + changed = true; + if (mFalseJump && mFalseJump->RegisterValueForwarding()) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::ForwardZpXIndex(bool full) { bool changed = false; @@ -13869,6 +14019,55 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool } } #endif +#if 1 + if (mTrueJump && mTrueJump->mNumEntries == 1 && mFalseJump && !mTrueJump->mFalseJump && mTrueJump->mTrueJump == mFalseJump) + { + int s = mIns.Size(), ts = mTrueJump->mIns.Size(); + if (s > 1 && ts > 0) + { + if (mIns[s - 2].mType == ASMIT_STA && mIns[s - 2].mMode == ASMIM_ZERO_PAGE && + mTrueJump->mIns[ts - 1].mType == ASMIT_STA && mTrueJump->mIns[ts - 1].mMode == ASMIM_ZERO_PAGE && mTrueJump->mIns[ts - 1].mAddress == mIns[s - 2].mAddress) + { + } + } + } + if (mFalseJump && mFalseJump->mNumEntries == 1 && mTrueJump && !mFalseJump->mFalseJump && mFalseJump->mTrueJump == mTrueJump) + { + int s = mIns.Size(), ts = mFalseJump->mIns.Size(); + if (s > 1 && ts > 0) + { + if (mIns[s - 2].mType == ASMIT_STA && mIns[s - 2].mMode == ASMIM_ZERO_PAGE && + mFalseJump->mIns[ts - 1].mType == ASMIT_STA && mFalseJump->mIns[ts - 1].mMode == ASMIM_ZERO_PAGE && mFalseJump->mIns[ts - 1].mAddress == mIns[s - 2].mAddress && + mFalseJump->mIns[0].mType == ASMIT_LDA && mFalseJump->mIns[0].mMode == ASMIM_ZERO_PAGE && mFalseJump->mIns[0].mAddress == mIns[s - 2].mAddress) + { + if (!(mIns[s - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Y)) && HasAsmInstructionMode(ASMIT_LDY, mIns[s - 1].mMode)) + { + mIns[s - 1].mType = ASMIT_LDY; + mTrueJump->mIns.Insert(0, mIns[s - 2]); + mIns.Remove(s - 2); + mFalseJump->mIns.Remove(s - 1); + mFalseJump->mIns.Remove(0); + mExitRequiredRegs += CPU_REG_A; + mFalseJump->mExitRequiredRegs += CPU_REG_A; + mTrueJump->mEntryProvidedRegs += CPU_REG_A; + changed = true; + } + else if (!(mIns[s - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_X)) && HasAsmInstructionMode(ASMIT_LDX, mIns[s - 1].mMode)) + { + mIns[s - 1].mType = ASMIT_LDX; + mTrueJump->mIns.Insert(0, mIns[s - 2]); + mIns.Remove(s - 2); + mFalseJump->mIns.Remove(s - 1); + mFalseJump->mIns.Remove(0); + mExitRequiredRegs += CPU_REG_A; + mFalseJump->mExitRequiredRegs += CPU_REG_A; + mTrueJump->mEntryProvidedRegs += CPU_REG_A; + changed = true; + } + } + } + } +#endif #if 1 if (mTrueJump && mTrueJump->mNumEntries == 1 && mFalseJump && mFalseJump->mNumEntries == 1) { @@ -18194,6 +18393,61 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc } } + if (si < ei && mIns[si].mType == ASMIT_LDX && mIns[si].mMode == ASMIM_ZERO_PAGE) + { + // Loads X once from zero page and never changes it again + + int i = si; + while (i < ei && !mIns[i + 1].ChangesXReg()) + i++; + + if (i == ei) + { + i = 0; + while (i < si && !mIns[i].ReferencesZeroPage(mIns[si].mAddress)) + i++; + if (i == si) + { + i++; + while (i < mIns.Size() && !mIns[i].ReferencesZeroPage(mIns[si].mAddress)) + i++; + if (i < mIns.Size() && (mIns[i].mType == ASMIT_DEC || mIns[i].mType == ASMIT_INC)) + { + int j = i + 1; + while (j < mIns.Size() && !mIns[j].ReferencesZeroPage(mIns[si].mAddress)) + j++; + if (j == mIns.Size()) + { + // So we have an LDX from ZP, and exactly one INC/DECof this ZP and X never changes in the loop + if (!prevBlock) + return OptimizeSimpleLoopInvariant(proc); + prevBlock->mIns.Push(mIns[si]); + exitBlock->mIns.Insert(0, NativeCodeInstruction(ASMIT_STX, mIns[si])); + mIns[si].mType = ASMIT_NOP; + mIns[si].mMode = ASMIM_IMPLIED; + + AsmInsType t; + if (mIns[i].mType == ASMIT_DEC) + t = ASMIT_DEX; + else + t = ASMIT_INX; + + mIns[i].mType = ASMIT_NOP; + mIns[i].mMode = ASMIM_IMPLIED; + + mIns.Insert(ei + 1, NativeCodeInstruction(t)); + + for (int i = 0; i < mIns.Size(); i++) + mIns[i].mLive |= LIVE_CPU_REG_X; + + CheckLive(); + return true; + } + } + } + } + } + if (si < ei && mIns[ei].mType == ASMIT_STX && mIns[ei].mMode == ASMIM_ZERO_PAGE) { int j = 0; @@ -23357,6 +23611,18 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 2].mType = ASMIT_CMP; progress = true; } + else if ( + mIns[i + 0].mType == ASMIT_LDA && + mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mType == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_CPY && mIns[i + 2].mType == ASMIM_ZERO_PAGE && mIns[i + 2].mAddress == mIns[i + 1].mAddress && + !(mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_MEM))) + { + mIns[i + 1].mType = ASMIT_CMP; mIns[i + 1].CopyMode(mIns[i + 0]); mIns[i + 1].mLive |= LIVE_CPU_REG_C | LIVE_CPU_REG_Z; + mIns[i + 0].mType = ASMIT_TYA; mIns[i + 0].mMode = ASMIM_IMPLIED; mIns[i + 0].mLive |= LIVE_CPU_REG_A; + + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + progress = true; + } if ( mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 0].mAddress <= 1 && @@ -23444,7 +23710,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType = ASMIT_INC; mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; - mIns[i + 3].mType = ASMIT_LDA; + mIns[i + 3].mType = ASMIT_LDA; mIns[i + 3].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_Z; progress = true; } else if (mIns[i + 0].mType == ASMIT_LDA && mIns[i + 3].mType == ASMIT_STA && mIns[i + 0].SameEffectiveAddress(mIns[i + 3]) && @@ -23455,7 +23721,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType = ASMIT_DEC; mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; - mIns[i + 3].mType = ASMIT_LDA; + mIns[i + 3].mType = ASMIT_LDA; mIns[i + 3].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_Z; progress = true; } else if (mIns[i + 1].mType == ASMIT_LDA && mIns[i + 3].mType == ASMIT_STA && mIns[i + 1].SameEffectiveAddress(mIns[i + 3]) && @@ -23466,7 +23732,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; mIns[i + 1].mType = ASMIT_INC; mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; - mIns[i + 3].mType = ASMIT_LDA; + mIns[i + 3].mType = ASMIT_LDA; mIns[i + 3].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_Z; progress = true; } else if (mIns[i + 1].mType == ASMIT_LDA && mIns[i + 3].mType == ASMIT_STA && mIns[i + 1].SameEffectiveAddress(mIns[i + 3]) && @@ -23477,7 +23743,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; mIns[i + 1].mType = ASMIT_DEC; mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; - mIns[i + 3].mType = ASMIT_LDA; + mIns[i + 3].mType = ASMIT_LDA; mIns[i + 3].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_Z; progress = true; } @@ -23489,7 +23755,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType = ASMIT_DEC; mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; - mIns[i + 3].mType = ASMIT_LDA; + mIns[i + 3].mType = ASMIT_LDA; mIns[i + 3].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_Z; progress = true; } else if (mIns[i + 0].mType == ASMIT_LDA && mIns[i + 3].mType == ASMIT_STA && mIns[i + 0].SameEffectiveAddress(mIns[i + 3]) && @@ -23500,7 +23766,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType = ASMIT_INC; mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; - mIns[i + 3].mType = ASMIT_LDA; + mIns[i + 3].mType = ASMIT_LDA; mIns[i + 3].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_Z; progress = true; } else if (mIns[i + 1].mType == ASMIT_LDA && mIns[i + 3].mType == ASMIT_STA && mIns[i + 1].SameEffectiveAddress(mIns[i + 3]) && @@ -23511,7 +23777,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; mIns[i + 1].mType = ASMIT_DEC; mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; - mIns[i + 3].mType = ASMIT_LDA; + mIns[i + 3].mType = ASMIT_LDA; mIns[i + 3].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_Z; progress = true; } else if (mIns[i + 1].mType == ASMIT_LDA && mIns[i + 3].mType == ASMIT_STA && mIns[i + 1].SameEffectiveAddress(mIns[i + 3]) && @@ -23522,7 +23788,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; mIns[i + 1].mType = ASMIT_INC; mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; - mIns[i + 3].mType = ASMIT_LDA; + mIns[i + 3].mType = ASMIT_LDA; mIns[i + 3].mLive |= LIVE_CPU_REG_A | LIVE_CPU_REG_Z; progress = true; } @@ -24145,6 +24411,26 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass progress = true; } #endif +#if 1 + else if ( + mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && + mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_LDA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && mIns[i + 2].mAddress != mIns[i + 1].mAddress && + mIns[i + 3].mType == ASMIT_LDX && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mAddress == mIns[i + 0].mAddress) + { + int addr = mIns[i + 0].mAddress; + mIns[i + 0].mAddress = mIns[i + 2].mAddress; + mIns[i + 3].mAddress = mIns[i + 1].mAddress; + mIns[i + 2].mAddress = addr; + + mIns[i + 2].mType = ASMIT_LDX; mIns[i + 2].mLive |= LIVE_CPU_REG_X; + mIns[i + 3].mType = ASMIT_STX; + + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + + progress = true; + } +#endif if ( mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 0].mAddress == 0 && mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_INDIRECT_Y && @@ -25283,6 +25569,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass if (mBranch == ASMIT_BCC) { mBranch = ASMIT_BPL; + mIns[sz - 2].mLive |= LIVE_CPU_REG_Z; mIns[sz - 1].mType = ASMIT_NOP; mIns[sz - 1].mMode = ASMIM_IMPLIED; CheckLive(); @@ -25290,6 +25577,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass else if (mBranch == ASMIT_BCS) { mBranch = ASMIT_BMI; + mIns[sz - 2].mLive |= LIVE_CPU_REG_Z; mIns[sz - 1].mType = ASMIT_NOP; mIns[sz - 1].mMode = ASMIM_IMPLIED; CheckLive(); @@ -26510,6 +26798,12 @@ void NativeCodeProcedure::Optimize(void) } #endif + if (step >= 3) + { + ResetVisited(); + mEntryBlock->RegisterValueForwarding(); + } + #if 1 if (step == 2) { @@ -27087,7 +27381,17 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode block->UnaryOperator(iproc, this, ins); break; case IC_CONVERSION_OPERATOR: - block->NumericConversion(iproc, this, ins); + if (i + 1 < iblock->mInstructions.Size() && + ins->mOperator == IA_EXT8TO16S && + iblock->mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && iblock->mInstructions[i + 1]->mOperator == IA_ADD && + (iblock->mInstructions[i + 1]->mSrc[0].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[0].mFinal && iblock->mInstructions[i + 1]->mSrc[1].mTemp < 0 || + iblock->mInstructions[i + 1]->mSrc[1].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[1].mFinal && iblock->mInstructions[i + 1]->mSrc[0].mTemp < 0)) + { + block->SignExtendAddImmediate(iproc, ins, iblock->mInstructions[i + 1]); + i++; + } + else + block->NumericConversion(iproc, this, ins); break; case IC_LEA: { diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 002e89b..fd0ee31 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -223,6 +223,7 @@ public: void RelationalOperator(InterCodeProcedure* proc, const InterInstruction * ins, NativeCodeProcedure * nproc, NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock * falseJump); void LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0, bool addrvalid); void LoadStoreOpAbsolute2D(InterCodeProcedure* proc, const InterInstruction* lins1, const InterInstruction* lins2, const InterInstruction* mins); + void SignExtendAddImmediate(InterCodeProcedure* proc, const InterInstruction* xins, const InterInstruction* ains); void NumericConversion(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins); NativeCodeBasicBlock * CopyValue(InterCodeProcedure* proc, const InterInstruction * ins, NativeCodeProcedure* nproc); @@ -315,6 +316,8 @@ public: bool ForwardZpYIndex(bool full); bool ForwardZpXIndex(bool full); + bool RegisterValueForwarding(void); + bool FindImmediateStore(int at, int reg, const NativeCodeInstruction*& ains); bool JoinTAXARange(int from, int to); diff --git a/oscar64/oscar64.cpp b/oscar64/oscar64.cpp index 61ef101..d5391cb 100644 --- a/oscar64/oscar64.cpp +++ b/oscar64/oscar64.cpp @@ -74,7 +74,7 @@ int main2(int argc, const char** argv) #else strcpy(strProductName, "oscar64"); - strcpy(strProductVersion, "1.7.145"); + strcpy(strProductVersion, "1.7.146"); #ifdef __APPLE__ uint32_t length = sizeof(basePath); diff --git a/oscar64/oscar64.rc b/oscar64/oscar64.rc index 3f70926..18fc6c3 100644 --- a/oscar64/oscar64.rc +++ b/oscar64/oscar64.rc @@ -25,8 +25,8 @@ LANGUAGE LANG_ENGLISH, SUBLANG_NEUTRAL // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,7,145,0 - PRODUCTVERSION 1,7,145,0 + FILEVERSION 1,7,146,0 + PRODUCTVERSION 1,7,146,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -43,12 +43,12 @@ BEGIN BEGIN VALUE "CompanyName", "oscar64" VALUE "FileDescription", "oscar64 compiler" - VALUE "FileVersion", "1.7.145.0" + VALUE "FileVersion", "1.7.146.0" VALUE "InternalName", "oscar64.exe" VALUE "LegalCopyright", "Copyright (C) 2021" VALUE "OriginalFilename", "oscar64.exe" VALUE "ProductName", "oscar64" - VALUE "ProductVersion", "1.7.145.0" + VALUE "ProductVersion", "1.7.146.0" END END BLOCK "VarFileInfo" diff --git a/oscar64setup/oscar64setup.vdproj b/oscar64setup/oscar64setup.vdproj index 164736c..d11ec7c 100644 --- a/oscar64setup/oscar64setup.vdproj +++ b/oscar64setup/oscar64setup.vdproj @@ -4153,15 +4153,15 @@ { "Name" = "8:Microsoft Visual Studio" "ProductName" = "8:oscar64" - "ProductCode" = "8:{D89E65E6-A16E-4EDB-BA78-BD82F6FE4C62}" - "PackageCode" = "8:{9D053C95-A6F1-46A0-9AF5-FBA79B4614FC}" + "ProductCode" = "8:{4CED0CCC-42E5-4E78-8322-D85C2511864C}" + "PackageCode" = "8:{51B4E6A6-575E-4C5C-A8C9-18278CCFEB43}" "UpgradeCode" = "8:{9AB61EFF-ACAC-4079-9950-8D96615CD4EF}" "AspNetVersion" = "8:2.0.50727.0" "RestartWWWService" = "11:FALSE" "RemovePreviousVersions" = "11:TRUE" "DetectNewerInstalledVersion" = "11:TRUE" "InstallAllUsers" = "11:FALSE" - "ProductVersion" = "8:1.7.145" + "ProductVersion" = "8:1.7.146" "Manufacturer" = "8:oscar64" "ARPHELPTELEPHONE" = "8:" "ARPHELPLINK" = "8:"