diff --git a/include/gfx/bitmap.c b/include/gfx/bitmap.c index 9125368..131526b 100644 --- a/include/gfx/bitmap.c +++ b/include/gfx/bitmap.c @@ -735,6 +735,9 @@ void bm_line(const Bitmap * bm, const ClipRect * clip, int x0, int y0, int x1, i if (dx > 0) { + __assume(clip->left >= 0); + __assume(clip->right >= 0); + if (x1 < clip->left || x0 >= clip->right) return; @@ -752,6 +755,9 @@ void bm_line(const Bitmap * bm, const ClipRect * clip, int x0, int y0, int x1, i } else { + __assume(clip->left >= 0); + __assume(clip->right >= 0); + if (x0 < clip->left || x1 >= clip->right) return; @@ -770,6 +776,9 @@ void bm_line(const Bitmap * bm, const ClipRect * clip, int x0, int y0, int x1, i if (dy > 0) { + __assume(clip->top >= 0); + __assume(clip->bottom >= 0); + if (y1 < clip->top || y0 >= clip->bottom) return; @@ -787,6 +796,9 @@ void bm_line(const Bitmap * bm, const ClipRect * clip, int x0, int y0, int x1, i } else { + __assume(clip->top >= 0); + __assume(clip->bottom >= 0); + if (y0 < clip->top || y1 >= clip->bottom) return; diff --git a/oscar64/GlobalAnalyzer.cpp b/oscar64/GlobalAnalyzer.cpp index 5be7052..125e699 100644 --- a/oscar64/GlobalAnalyzer.cpp +++ b/oscar64/GlobalAnalyzer.cpp @@ -376,7 +376,13 @@ Declaration * GlobalAnalyzer::Analyze(Expression* exp, Declaration* procDec) case EX_VARIABLE: if ((exp->mDecValue->mFlags & DTF_STATIC) || (exp->mDecValue->mFlags & DTF_GLOBAL)) { - procDec->mFlags &= ~DTF_FUNC_CONSTEXPR; + Declaration* type = exp->mDecValue->mBase; + while (type->mType == DT_TYPE_ARRAY) + type = type->mBase; + + if (!(type->mFlags & DTF_CONST)) + procDec->mFlags &= ~DTF_FUNC_CONSTEXPR; + AnalyzeGlobalVariable(exp->mDecValue); } else diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 598be1d..1e47b55 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -388,10 +388,10 @@ static bool CollidingMemType(InterType type1, InterType type2) { if (type1 == IT_NONE || type2 == IT_NONE) return true; - else if (type1 == IT_POINTER || type1 == IT_FLOAT || type2 == IT_POINTER || type2 == IT_FLOAT) + else// if (type1 == IT_POINTER || type1 == IT_FLOAT || type2 == IT_POINTER || type2 == IT_FLOAT) return type1 == type2; - else - return false; +// else +// return false; } @@ -403,7 +403,7 @@ static bool CollidingMem(const InterOperand& op1, InterType type1, const InterOp { if (op2.mMemory == IM_GLOBAL) return staticVars[op2.mVarIndex]->mAliased; - else if (op2.mMemory == IM_FPARAM) + else if (op2.mMemory == IM_FPARAM || op2.mMemory == IM_FFRAME) return false; else return CollidingMemType(type1, type2); @@ -412,7 +412,7 @@ static bool CollidingMem(const InterOperand& op1, InterType type1, const InterOp { if (op1.mMemory == IM_GLOBAL) return staticVars[op1.mVarIndex]->mAliased; - else if (op1.mMemory == IM_FPARAM) + else if (op1.mMemory == IM_FPARAM || op1.mMemory == IM_FFRAME) return false; else return CollidingMemType(type1, type2); @@ -2648,6 +2648,8 @@ InterInstruction::InterInstruction(const Location& loc, InterCode code) mVolatile = false; mInvariant = false; mSingleAssignment = false; + mNoSideEffects = false; + mConstExpr = false; } static bool TypeInteger(InterType t) @@ -3278,6 +3280,15 @@ bool InterInstruction::RemoveUnusedStaticStoreInstructions(const GrowingVariable return changed; } +int InterInstruction::NumUsedTemps(void) const +{ + int n = 0; + for (int i = 0; i < mNumOperands; i++) + if (mSrc[i].mTemp >= 0) + n++; + return n; +} + bool InterInstruction::UsesTemp(int temp) const { for (int i = 0; i < mNumOperands; i++) @@ -3894,6 +3905,10 @@ void InterInstruction::Disassemble(FILE* file) fprintf(file, "I"); if (mVolatile) fprintf(file, "V"); + if (mNoSideEffects) + fprintf(file, "E"); + if (mConstExpr) + fprintf(file, "C"); if (mSingleAssignment) fprintf(file, "S"); fprintf(file, "}\n"); @@ -11026,7 +11041,7 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa for (int i = 0; i < mInstructions.Size(); i++) { InterInstruction* ins = mInstructions[i]; - if (ins->mCode == IC_CALL || ins->mCode == IC_CALL_NATIVE) + if ((ins->mCode == IC_CALL || ins->mCode == IC_CALL_NATIVE) && !ins->mConstExpr) hasCall = true; } @@ -11119,7 +11134,7 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa ins->mInvariant = false; else if (ins->mCode == IC_LOAD) { - if ((ins->mSrc[0].mTemp >= 0 && mLocalModifiedTemps[ins->mSrc[0].mTemp]) || ins->mVolatile) + if ((ins->mSrc[0].mTemp >= 0 && mLocalModifiedTemps[ins->mSrc[0].mTemp]) || ins->mVolatile || hasCall) { ins->mInvariant = false; } @@ -12641,32 +12656,45 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati } while (changed); // build trains - +#if 1 for(int i = mInstructions.Size() - 1; i > 0; i--) { InterInstruction* tins = mInstructions[i]; + int ti = i; + j = i - 1; while (j >= 0 && !tins->ReferencesTemp(mInstructions[j]->mDst.mTemp)) j--; - if (j >= 0 && j < i - 1) + while (j >= 0) { - if (CanMoveInstructionDown(j, i)) + if (j < ti - 1) { - InterInstruction* jins = mInstructions[j]; - for (int k = j; k < i - 1; k++) + if (CanMoveInstructionDown(j, ti)) { - SwapInstructions(jins, mInstructions[k + 1]); - mInstructions[k] = mInstructions[k + 1]; - } - mInstructions[i - 1] = jins; + InterInstruction* jins = mInstructions[j]; + for (int k = j; k < ti - 1; k++) + { + SwapInstructions(jins, mInstructions[k + 1]); + mInstructions[k] = mInstructions[k + 1]; + } + mInstructions[ti - 1] = jins; + if (mInstructions[ti - 1]->NumUsedTemps() <= 1) + ti--; -// mInstructions.Insert(i, mInstructions[j]); -// mInstructions.Remove(j); + // mInstructions.Insert(i, mInstructions[j]); + // mInstructions.Remove(j); + } } + else if (mInstructions[j]->NumUsedTemps() <= 1) + ti--; + + j--; + while (j >= 0 && !tins->ReferencesTemp(mInstructions[j]->mDst.mTemp)) + j--; } } - +#endif CheckFinalLocal(); // sort stores up diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 57f159c..429e249 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -287,7 +287,7 @@ public: InterOperator mOperator; int mNumOperands; - bool mInUse, mInvariant, mVolatile, mExpensive, mSingleAssignment; + bool mInUse, mInvariant, mVolatile, mExpensive, mSingleAssignment, mNoSideEffects, mConstExpr; InterInstruction(const Location& loc, InterCode code); @@ -298,6 +298,7 @@ public: bool ReferencesTemp(int temp) const; bool UsesTemp(int temp) const; + int NumUsedTemps(void) const; void CollectLocalAddressTemps(GrowingIntArray& localTable, GrowingIntArray& paramTable); void MarkAliasedLocalTemps(const GrowingIntArray& localTable, NumberSet& aliasedLocals, const GrowingIntArray& paramTable, NumberSet& aliasedParams); diff --git a/oscar64/InterCodeGenerator.cpp b/oscar64/InterCodeGenerator.cpp index 49b9a22..d2ca93e 100644 --- a/oscar64/InterCodeGenerator.cpp +++ b/oscar64/InterCodeGenerator.cpp @@ -2413,7 +2413,7 @@ InterCodeGenerator::ExValue InterCodeGenerator::TranslateExpression(Declaration* { if (vr.mType->mType == DT_TYPE_ARRAY || vr.mType->mType == DT_TYPE_FUNCTION) vr = Dereference(proc, texp, block, vr, 1); - else if (pdec && pdec->mBase->mType == DT_TYPE_POINTER && vr.mType->mType == DT_TYPE_INTEGER && texp->mDecValue->mType == DT_CONST_INTEGER && texp->mDecValue->mInteger == 0) + else if (pdec && pdec->mBase->mType == DT_TYPE_POINTER && vr.mType->mType == DT_TYPE_INTEGER && texp->mType == EX_CONSTANT && texp->mDecValue->mType == DT_CONST_INTEGER && texp->mDecValue->mInteger == 0) vr = CoerceType(proc, texp, block, vr, pdec->mBase); else vr = Dereference(proc, texp, block, vr); @@ -2474,6 +2474,10 @@ InterCodeGenerator::ExValue InterCodeGenerator::TranslateExpression(Declaration* cins->mCode = IC_CALL_NATIVE; else cins->mCode = IC_CALL; + + if (exp->mLeft->mType == EX_CONSTANT && exp->mLeft->mDecValue->mFlags & DTF_FUNC_CONSTEXPR) + cins->mConstExpr = true; + cins->mSrc[0].mType = IT_POINTER; cins->mSrc[0].mTemp = vl.mTemp; if (ftype->mBase->mType != DT_TYPE_VOID && ftype->mBase->mType != DT_TYPE_STRUCT) diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index f949f4e..6dff7b7 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -2305,6 +2305,65 @@ bool NativeCodeInstruction::BitFieldForwarding(NativeRegisterDataSet& data, AsmI } break; + case ASMIT_LDX: + if (mMode == ASMIM_ZERO_PAGE) + { + data.mRegs[CPU_REG_X].mMask = data.mRegs[mAddress].mMask; + data.mRegs[CPU_REG_X].mValue = data.mRegs[mAddress].mValue; + + if (data.mRegs[CPU_REG_X].mMask == 0xff) + { + mType = ASMIT_LDX; + mMode = ASMIM_IMMEDIATE; + mAddress = data.mRegs[CPU_REG_X].mValue; + changed = true; + } + } + else if (mMode == ASMIM_IMMEDIATE) + { + data.mRegs[CPU_REG_X].mMask = 0xff; + data.mRegs[CPU_REG_X].mValue = mAddress & 0xff; + } + else + data.mRegs[CPU_REG_X].mMask = 0; + break; + case ASMIT_STX: + if (mMode == ASMIM_ZERO_PAGE) + { + data.mRegs[mAddress].mMask = data.mRegs[CPU_REG_X].mMask; + data.mRegs[mAddress].mValue = data.mRegs[CPU_REG_X].mValue; + } + break; + + case ASMIT_LDY: + if (mMode == ASMIM_ZERO_PAGE) + { + data.mRegs[CPU_REG_Y].mMask = data.mRegs[mAddress].mMask; + data.mRegs[CPU_REG_Y].mValue = data.mRegs[mAddress].mValue; + + if (data.mRegs[CPU_REG_Y].mMask == 0xff) + { + mType = ASMIT_LDY; + mMode = ASMIM_IMMEDIATE; + mAddress = data.mRegs[CPU_REG_Y].mValue; + changed = true; + } + } + else if (mMode == ASMIM_IMMEDIATE) + { + data.mRegs[CPU_REG_Y].mMask = 0xff; + data.mRegs[CPU_REG_Y].mValue = mAddress & 0xff; + } + else + data.mRegs[CPU_REG_Y].mMask = 0; + break; + case ASMIT_STY: + if (mMode == ASMIM_ZERO_PAGE) + { + data.mRegs[mAddress].mMask = data.mRegs[CPU_REG_Y].mMask; + data.mRegs[mAddress].mValue = data.mRegs[CPU_REG_Y].mValue; + } + break; case ASMIT_AND: if (mMode == ASMIM_ZERO_PAGE) @@ -2439,8 +2498,6 @@ bool NativeCodeInstruction::BitFieldForwarding(NativeRegisterDataSet& data, AsmI case ASMIT_INC: case ASMIT_DEC: - case ASMIT_STX: - case ASMIT_STY: if (mMode == ASMIM_ZERO_PAGE) data.mRegs[mAddress].mMask = 0; break; @@ -3743,8 +3800,6 @@ void NativeCodeInstruction::FilterRegUsage(NumberSet& requiredTemps, NumberSet& requiredTemps += BC_REG_ACCU + i; if (!providedTemps[BC_REG_WORK + i]) requiredTemps += BC_REG_WORK + i; - if (!providedTemps[BC_REG_ADDR + i]) - requiredTemps += BC_REG_ADDR + i; } if (mFlags & NCIF_USE_ZP_32_X) { @@ -8748,8 +8803,13 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg)); if (InterTypeSize[ins->mDst.mType] > 1) { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins->mSrc[1].mIntConst >> 8) & 0xff)); - mIns.Push(NativeCodeInstruction(ASMIT_SBC, ASMIM_ZERO_PAGE, treg + 1)); + if (ins->mDst.IsUByte()) + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins->mSrc[1].mIntConst >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_SBC, ASMIM_ZERO_PAGE, treg + 1)); + } mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); } } @@ -8761,8 +8821,13 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg)); if (InterTypeSize[ins->mDst.mType] > 1) { - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins->mSrc[1].mIntConst >> 8) & 0xff)); - mIns.Push(NativeCodeInstruction(ASMIT_SBC, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp] + 1)); + if (ins->mDst.IsUByte()) + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, 0)); + else + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (ins->mSrc[1].mIntConst >> 8) & 0xff)); + mIns.Push(NativeCodeInstruction(ASMIT_SBC, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp] + 1)); + } mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, treg + 1)); } } @@ -13241,6 +13306,8 @@ bool NativeCodeBasicBlock::MoveAccuTrainUp(int at, int end) { CheckLive(); + bool needXY = (mIns[end - 1].mLive & (LIVE_CPU_REG_X | LIVE_CPU_REG_Y)) != 0; + int i = at; while (i > 0) { @@ -13270,7 +13337,7 @@ bool NativeCodeBasicBlock::MoveAccuTrainUp(int at, int end) return true; } - if (mIns[i].mType == ASMIT_JSR) + if (mIns[i].mType == ASMIT_JSR && needXY) return false; for (int j = at; j < end; j++) @@ -13330,7 +13397,13 @@ bool NativeCodeBasicBlock::MoveAccuTrainsUp(void) } else if (mIns[i].mType == ASMIT_JSR) { - wzero.Clear(); + for (int j = 0; j < 4; j++) + { + wzero -= BC_REG_ACCU + j; + wzero -= BC_REG_WORK + j; + } + +// wzero.Clear(); i++; } else if (mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].ChangesAddress()) @@ -16943,6 +17016,9 @@ void NativeCodeBasicBlock::DoCrossBlockAShortcut(int addr) bool NativeCodeBasicBlock::CanCrossBlockAShortcut(int addr) { + if (mExitRequiredRegs[CPU_REG_A]) + return false; + int i = mIns.Size(); while (i > 0) { @@ -16980,6 +17056,9 @@ void NativeCodeBasicBlock::DoCrossBlockXShortcut(int addr) bool NativeCodeBasicBlock::CanCrossBlockXShortcut(int addr) { + if (mExitRequiredRegs[CPU_REG_X]) + return false; + int i = mIns.Size(); while (i > 0) { @@ -17018,6 +17097,9 @@ void NativeCodeBasicBlock::DoCrossBlockYShortcut(int addr) bool NativeCodeBasicBlock::CanCrossBlockYShortcut(int addr) { + if (mExitRequiredRegs[CPU_REG_Y]) + return false; + int i = mIns.Size(); while (i > 0) { @@ -21586,6 +21668,7 @@ bool NativeCodeBasicBlock::MoveCLCLoadAddZPStoreDown(int at) return false; } +//static bool PeepCheck = false; //static bool PeepCheck = false; bool NativeCodeBasicBlock::ReverseBitfieldForwarding(void) @@ -22411,7 +22494,7 @@ bool NativeCodeBasicBlock::ValueForwarding(const NativeRegisterDataSet& data, bo return changed; } -bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc) +bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, bool full) { NativeCodeBasicBlock* lblock = proc->AllocateBlock(); NativeCodeBasicBlock* eblock = proc->AllocateBlock(); @@ -22441,7 +22524,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc mTrueJump = lblock; mFalseJump = nullptr; - return lblock->OptimizeSimpleLoopInvariant(proc, this, eblock); + return lblock->OptimizeSimpleLoopInvariant(proc, this, eblock, full); } bool NativeCodeBasicBlock::RemoveSimpleLoopUnusedIndex(void) @@ -22538,7 +22621,7 @@ bool NativeCodeBasicBlock::RemoveSimpleLoopUnusedIndex(void) return changed; } -bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock* prevBlock, NativeCodeBasicBlock* exitBlock) +bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock* prevBlock, NativeCodeBasicBlock* exitBlock, bool full) { bool changed = false; @@ -22552,7 +22635,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (sz == 2 && (mBranch == ASMIT_BEQ || mBranch == ASMIT_BNE) && mIns[0].mType == ASMIT_LDA && mIns[1].mType == ASMIT_CMP && !(mIns[1].mFlags & NCIF_VOLATILE) && !(mIns[1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); mIns[1].mType = ASMIT_LDA; mIns[1].mLive |= LIVE_CPU_REG_A; mIns[0].mType = ASMIT_CMP; mIns[0].mLive |= LIVE_CPU_REG_Z; prevBlock->mIns.Push(mIns[1]); @@ -22563,10 +22646,36 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc return true; } +#if 1 + if (full && mEntryRequiredRegs.Size() && !mEntryRequiredRegs[CPU_REG_C] && (mBranch == ASMIT_BCC || mBranch == ASMIT_BCS)) + { + int i = 0; + while (i < mIns.Size() && !mIns[i].ChangesCarry()) + i++; + if (i < mIns.Size() && + mIns[i].mType == ASMIT_CLC && ((mBranch == ASMIT_BCC && mTrueJump == this) || (mBranch == ASMIT_BCS && mFalseJump == this)) || + mIns[i].mType == ASMIT_SEC && ((mBranch == ASMIT_BCS && mTrueJump == this) || (mBranch == ASMIT_BCC && mFalseJump == this))) + { + if (!prevBlock) + return OptimizeSimpleLoopInvariant(proc, full); + + prevBlock->mIns.Push(mIns[i]); + prevBlock->mExitRequiredRegs += CPU_REG_C; + for (int j = 0; j < i; j++) + mIns[j].mLive |= LIVE_CPU_REG_C; + mEntryRequiredRegs += CPU_REG_C; + mIns.Remove(i); + + CheckLive(); + + return true; + } + } +#endif if (sz >= 3 && mIns[0].mType == ASMIT_LDA && mIns[sz - 2].mType == ASMIT_LDA && mIns[0].SameEffectiveAddress(mIns[sz - 2]) && mIns[sz - 1].mType == ASMIT_CMP) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); prevBlock->mIns.Push(mIns[0]); mIns.Remove(0); @@ -22580,7 +22689,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc mIns[sz - 1].mType == ASMIT_CMP && HasAsmInstructionMode(ASMIT_CPY, mIns[sz - 1].mMode) && !(mIns[sz - 1].mLive & LIVE_CPU_REG_A)) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); mIns[sz - 2].mType = ASMIT_LDY; mIns[sz - 2].mLive |= LIVE_CPU_REG_Y; mIns[sz - 1].mType = ASMIT_CPY; mIns[sz - 1].mLive |= LIVE_CPU_REG_Y; @@ -22600,7 +22709,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (sz >= 2 && mIns[0].mType == ASMIT_LDY && mIns[sz - 1].mType == ASMIT_LDA && mIns[0].SameEffectiveAddress(mIns[sz - 1]) && !(mIns[sz - 1].mLive & LIVE_CPU_REG_A)) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); mIns[sz - 1].mType = ASMIT_LDY; mIns[sz - 1].mLive |= LIVE_CPU_REG_Y; @@ -22620,7 +22729,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc mIns[sz - 1].mType == ASMIT_CMP && HasAsmInstructionMode(ASMIT_CPX, mIns[sz - 1].mMode) && !(mIns[sz - 1].mLive & LIVE_CPU_REG_A)) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); mIns[sz - 2].mType = ASMIT_LDX; mIns[sz - 2].mLive |= LIVE_CPU_REG_X; mIns[sz - 1].mType = ASMIT_CPX; mIns[sz - 1].mLive |= LIVE_CPU_REG_X; @@ -22658,7 +22767,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (i == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); i = 0; while (i < si) @@ -22692,7 +22801,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (j == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); exitBlock->mIns.Insert(0, mIns[ei]); mIns.Remove(ei); @@ -22724,7 +22833,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (i == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); i = 0; while (i < si) @@ -22777,7 +22886,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc { // So we have an LDX from ZP, and exactly one INC/DECof this ZP and X never changes in the loop if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); prevBlock->mIns.Push(mIns[si]); exitBlock->mIns.Insert(0, NativeCodeInstruction(ASMIT_STX, mIns[si])); mIns[si].mType = ASMIT_NOP; @@ -22813,7 +22922,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (j == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); exitBlock->mIns.Insert(0, mIns[ei]); mIns.Remove(ei); @@ -22845,7 +22954,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (i == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); i = 0; while (i < si) @@ -22890,7 +22999,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (i == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); i = 0; while (i < si) @@ -22907,6 +23016,12 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc } mIns[si + 2].CopyMode(mIns[si + 0]); + + if (mIns[si + 2].RequiresYReg()) + mIns[si + 1].mLive |= LIVE_CPU_REG_Y; + if (mIns[si + 2].RequiresXReg()) + mIns[si + 1].mLive |= LIVE_CPU_REG_X; + mIns[si + 0].CopyMode(mIns[ei]); prevBlock->mIns.Push(mIns[si]); mIns.Remove(si); @@ -22927,7 +23042,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (j == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); exitBlock->mIns.Insert(0, mIns[ei]); mIns.Remove(ei); @@ -22950,7 +23065,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc (mIns[i].mType == ASMIT_TAY && (mIns[i - 1].mType == ASMIT_LDA || mIns[i - 1].mType == ASMIT_STA) && mIns[i - 1].mMode == ASMIM_ZERO_PAGE && mIns[i - 1].mAddress == mIns[0].mAddress)) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); while (i < mIns.Size()) { mIns[i].mLive |= LIVE_CPU_REG_Y; @@ -22977,7 +23092,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc (mIns[i].mType == ASMIT_TAX && (mIns[i - 1].mType == ASMIT_LDA || mIns[i - 1].mType == ASMIT_STA) && mIns[i - 1].mMode == ASMIM_ZERO_PAGE && mIns[i - 1].mAddress == mIns[0].mAddress)) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); while (i < mIns.Size()) { mIns[i].mLive |= LIVE_CPU_REG_X; @@ -23009,7 +23124,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].mAddress == mIns[0].mAddress && !(mIns[1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); prevBlock->mIns.Push(mIns[0]); prevBlock->mIns.Push(mIns[1]); @@ -23043,7 +23158,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (i == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); prevBlock->mIns.Push(mIns[ai]); mIns.Remove(ai); @@ -23059,7 +23174,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (i == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); prevBlock->mIns.Push(mIns[ai]); mIns.Remove(ai); @@ -23073,7 +23188,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (i >= 0 && mIns[i].mType == ASMIT_STA && mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == mIns[ai].mAddress) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); prevBlock->mIns.Push(mIns[ai]); mIns.Remove(ai); @@ -23097,7 +23212,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (i == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); for (int i = 0; i < mIns.Size(); i++) mIns[i].mLive |= LIVE_CPU_REG_Y; @@ -23142,7 +23257,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (!fail) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); changed = true; @@ -23196,7 +23311,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc if (j == mIns.Size()) { if (!prevBlock) - return OptimizeSimpleLoopInvariant(proc); + return OptimizeSimpleLoopInvariant(proc, full); prevBlock->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, mIns[i + 0].mAddress)); prevBlock->mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, mIns[i + 1].mAddress)); mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; @@ -23220,7 +23335,8 @@ bool NativeCodeBasicBlock::SimpleLoopReversal(NativeCodeProcedure* proc) CheckLive(); - if (mTrueJump && !mFalseJump && mTrueJump->mTrueJump == mTrueJump && mIns.Size() > 0 && mTrueJump->mIns.Size() > 1 && mTrueJump->mBranch == ASMIT_BCC) + if (mTrueJump && !mFalseJump && mTrueJump->mTrueJump == mTrueJump && mIns.Size() > 0 && mTrueJump->mIns.Size() > 1 && + mTrueJump->mBranch == ASMIT_BCC && !mExitRequiredRegs[CPU_REG_C]) { NativeCodeBasicBlock* lb = mTrueJump; int lbs = lb->mIns.Size(); @@ -23466,7 +23582,7 @@ bool NativeCodeBasicBlock::OptimizeXYSimpleLoop(void) } -bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) +bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc, bool full) { if (!mVisited) { @@ -23517,7 +23633,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) if (sz == 2 && mTrueJump == this) { - changed = OptimizeSimpleLoopInvariant(proc, nullptr, nullptr); + changed = OptimizeSimpleLoopInvariant(proc, nullptr, nullptr, full); CheckLive(); } @@ -23656,7 +23772,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) mTrueJump = lblock; mFalseJump = nullptr; - lblock->OptimizeSimpleLoopInvariant(proc, this, eblock); + lblock->OptimizeSimpleLoopInvariant(proc, this, eblock, full); lblock->CheckLive(); @@ -23729,7 +23845,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) lblock->CheckLive(); - lblock->OptimizeSimpleLoopInvariant(proc, this, eblock); + lblock->OptimizeSimpleLoopInvariant(proc, this, eblock, full); lblock->CheckLive(); @@ -23835,7 +23951,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) mTrueJump = lblock; mFalseJump = nullptr; - lblock->OptimizeSimpleLoopInvariant(proc, this, eblock); + lblock->OptimizeSimpleLoopInvariant(proc, this, eblock, full); lblock->CheckLive(); @@ -23927,7 +24043,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) mTrueJump = lblock; mFalseJump = nullptr; - lblock->OptimizeSimpleLoopInvariant(proc, this, eblock); + lblock->OptimizeSimpleLoopInvariant(proc, this, eblock, full); lblock->CheckLive(); @@ -23965,7 +24081,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) mTrueJump = lblock; mFalseJump = nullptr; - lblock->OptimizeSimpleLoopInvariant(proc, this, eblock); + lblock->OptimizeSimpleLoopInvariant(proc, this, eblock, full); lblock->CheckLive(); @@ -23979,7 +24095,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) #if 1 assert(mBranch != ASMIT_JMP || mFalseJump == nullptr); if (!changed) - changed = OptimizeSimpleLoopInvariant(proc, nullptr, nullptr); + changed = OptimizeSimpleLoopInvariant(proc, nullptr, nullptr, full); assert(mBranch != ASMIT_JMP || mFalseJump == nullptr); #endif } @@ -23988,9 +24104,9 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc) CheckLive(); - if (mTrueJump && mTrueJump->OptimizeSimpleLoop(proc)) + if (mTrueJump && mTrueJump->OptimizeSimpleLoop(proc, full)) changed = true; - if (mFalseJump && mFalseJump->OptimizeSimpleLoop(proc)) + if (mFalseJump && mFalseJump->OptimizeSimpleLoop(proc, full)) changed = true; assert(mIns.Size() == 0 || mIns[0].mType != ASMIT_INV); @@ -26953,7 +27069,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass for (int i = 2; i + 1 < mIns.Size(); i++) { if (mIns[i + 0].mType == ASMIT_LDA && (mIns[i + 0].mMode == ASMIM_IMMEDIATE || mIns[i + 0].mMode == ASMIM_ZERO_PAGE) && - mIns[i + 1].mType == ASMIT_STA && (mIns[i + 1].mMode == ASMIM_ABSOLUTE || mIns[i + 1].mMode == ASMIM_ZERO_PAGE) && !(mIns[i + 1].mFlags & NCIF_VOLATILE)) + mIns[i + 1].mType == ASMIT_STA && (mIns[i + 1].mMode == ASMIM_ABSOLUTE || mIns[i + 1].mMode == ASMIM_ABSOLUTE_X || mIns[i + 1].mMode == ASMIM_ABSOLUTE_Y || mIns[i + 1].mMode == ASMIM_ZERO_PAGE) && !(mIns[i + 1].mFlags & NCIF_VOLATILE)) { if (MoveLoadImmStoreAbsoluteUp(i + 0)) changed = true; @@ -29895,6 +30011,17 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass progress = true; } + else if ( + mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 0].mAddress == 0 && + mIns[i + 1].mType == ASMIT_SBC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && mIns[i + 1].mAddress == 0 && + mIns[i + 2].mType == ASMIT_CLC && + mIns[i + 3].mType == ASMIT_ADC) + { + mIns[i + 0].mAddress = 0xff; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + progress = true; + } #if 1 else if ( mIns[i + 0].mType == ASMIT_LDX && mIns[i + 0].mMode == ASMIM_ABSOLUTE_Y && @@ -30985,30 +31112,54 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #endif -#if 0 +#if 1 + if (mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mAddress == mIns[i + 2].mAddress && + mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && + mIns[i + 5].mType == ASMIT_ADC && mIns[i + 5].mMode == ASMIM_ZERO_PAGE && mIns[i + 5].mAddress == mIns[i + 2].mAddress + 1 && + mIns[i + 6].mType == ASMIT_STA && mIns[i + 6].mMode == ASMIM_ZERO_PAGE && mIns[i + 6].mAddress == mIns[i + 5].mAddress && + !(mIns[i + 6].mLive & LIVE_CPU_REG_A)) + { + int yval = RetrieveYValue(i); + proc->ResetPatched(); + if (CheckForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 3].mAddress, mIns[i + 1], i + 7, yval)) + { + proc->ResetPatched(); + if (PatchForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 3].mAddress, mIns[i + 1], i + 7, yval)) + { + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED; + + progress = true; + } + } + } +#endif +#if 1 if (mIns[i + 0].mType == ASMIT_CLC && mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mAddress != mIns[i + 1].mAddress && - mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && mIns[i + 4].mAddress == mIns[i + 1].mAddress + 1 && - mIns[i + 5].mType == ASMIT_ADC && mIns[i + 5].mMode == ASMIM_IMMEDIATE && mIns[i + 5].mAddress == 0 && + mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && + mIns[i + 5].mType == ASMIT_ADC && mIns[i + 5].mMode == ASMIM_ZERO_PAGE && mIns[i + 6].mType == ASMIT_STA && mIns[i + 6].mMode == ASMIM_ZERO_PAGE && mIns[i + 6].mAddress == mIns[i + 3].mAddress + 1 && !(mIns[i + 6].mLive & LIVE_CPU_REG_A)) { + int yval = RetrieveYValue(i); proc->ResetPatched(); - if (CheckForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 1].mAddress, mIns[i + 3].mAddress, i + 7, -1)) + if (CheckForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 3].mAddress, mIns[i + 1], i + 7, yval)) { - mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; - mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; - mIns[i + 2].mType = ASMIT_LDA; mIns[i + 2].mLive |= LIVE_CPU_REG_A; - - mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED; - mIns[i + 5].mType = ASMIT_NOP; mIns[i + 5].mMode = ASMIM_IMPLIED; - mIns[i + 6].mType = ASMIT_NOP; mIns[i + 6].mMode = ASMIM_IMPLIED; - proc->ResetPatched(); - if (PatchForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 1].mAddress, mIns[i + 3].mAddress, i + 7, -1)) + if (PatchForwardSumYPointer(this, mIns[i + 3].mAddress, mIns[i + 3].mAddress, mIns[i + 1], i + 7, yval)) + { + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_LDA; + progress = true; + } } } #endif @@ -31331,6 +31482,20 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } } #endif + else if (sz >= 2 && + mIns[sz - 2].ChangesAccuAndFlag() && + mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 1].mMode == ASMIM_IMMEDIATE && mIns[sz - 1].mAddress == 1 && + (mBranch == ASMIT_BCC || mBranch == ASMIT_BCS) && !mExitRequiredRegs[CPU_REG_Z] && !mExitRequiredRegs[CPU_REG_C]) + { + if (mBranch == ASMIT_BCC) + mBranch = ASMIT_BEQ; + else + mBranch = ASMIT_BNE; + + mIns[sz - 1].mType = ASMIT_NOP; mIns[sz - 1].mMode = ASMIM_IMPLIED; + mIns[sz - 2].mLive |= LIVE_CPU_REG_Z; + changed = true; + } #if 1 else if (sz >= 2 && mIns[sz - 2].ChangesAccuAndFlag() && @@ -32764,7 +32929,7 @@ void NativeCodeProcedure::RebuildEntry(void) void NativeCodeProcedure::Optimize(void) { - CheckFunc = !strcmp(mInterProc->mIdent->mString, "main"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "bmu_line"); #if 1 int step = 0; @@ -32890,6 +33055,7 @@ void NativeCodeProcedure::Optimize(void) if (mEntryBlock->PeepHoleOptimizer(this, step)) changed = true; #endif + #if 1 if (step >= 3) { @@ -32939,7 +33105,7 @@ void NativeCodeProcedure::Optimize(void) if (step > 0) { ResetVisited(); - if (mEntryBlock->OptimizeSimpleLoop(this)) + if (mEntryBlock->OptimizeSimpleLoop(this, step > 4)) changed = true; ResetVisited(); @@ -33004,7 +33170,6 @@ void NativeCodeProcedure::Optimize(void) ResetVisited(); mEntryBlock->CheckBlocks(true); #endif - #if 1 if (step == 3) { @@ -33043,7 +33208,7 @@ void NativeCodeProcedure::Optimize(void) if (!changed && mEntryBlock->CrossBlockXYShortcut()) changed = true; #endif - + #if 1 ResetVisited(); if (!changed && mEntryBlock->CrossBlockXYPreservation()) @@ -33053,7 +33218,6 @@ void NativeCodeProcedure::Optimize(void) #endif - if (step > 3 && !changed) { ResetVisited(); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index ffe2a67..f0d478b 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -79,6 +79,7 @@ static const uint32 NCIF_USE_CPU_REG_Y = 0x00004000; // use a 32bit zero page register indexed by X for JSR static const uint32 NCIF_USE_ZP_32_X = 0x00008000; +static const uint32 NICF_USE_ZP_ADDR = 0x00010000; class NativeCodeInstruction { @@ -217,11 +218,11 @@ public: void BlockSizeReduction(NativeCodeProcedure* proc, int xenter, int yenter); bool BlockSizeCopyReduction(NativeCodeProcedure* proc, int & si, int & di); - bool OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc); - bool OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock * prevBlock, NativeCodeBasicBlock* exitBlock); + bool OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, bool full); + bool OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock * prevBlock, NativeCodeBasicBlock* exitBlock, bool full); bool RemoveSimpleLoopUnusedIndex(void); - bool OptimizeSimpleLoop(NativeCodeProcedure* proc); + bool OptimizeSimpleLoop(NativeCodeProcedure* proc, bool full); bool SimpleLoopReversal(NativeCodeProcedure* proc); bool OptimizeInnerLoop(NativeCodeProcedure* proc, NativeCodeBasicBlock* head, NativeCodeBasicBlock* tail, GrowingArray& blocks); bool OptimizeXYSimpleLoop(void); @@ -484,6 +485,11 @@ public: bool CheckSingleUseGlobalLoad(const NativeCodeBasicBlock* block, int reg, int at, const NativeCodeInstruction& ains, int cycles); bool PatchSingleUseGlobalLoad(const NativeCodeBasicBlock* block, int reg, int at, const NativeCodeInstruction& ains); + // reg : base register pair to replace + // base: new base register + // iins : indexing instruction + // at : start position in block + // yval: known y immediate value of -1 if not known bool CheckForwardSumYPointer(const NativeCodeBasicBlock* block, int reg, int base, const NativeCodeInstruction & iins, int at, int yval); bool PatchForwardSumYPointer(const NativeCodeBasicBlock* block, int reg, int base, const NativeCodeInstruction & iins, int at, int yval); diff --git a/samples/memmap/charsetload.d64 b/samples/memmap/charsetload.d64 index 061786c..32a52f3 100644 Binary files a/samples/memmap/charsetload.d64 and b/samples/memmap/charsetload.d64 differ