From 32ea493c172ba784b8640a5656a1403f89d20da8 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sun, 2 Apr 2023 19:04:10 +0200 Subject: [PATCH] Prepare per function optimization config using pragmas --- oscar64/Declaration.cpp | 3 +- oscar64/Declaration.h | 2 +- oscar64/Disassembler.cpp | 16 ++ oscar64/Errors.h | 3 + oscar64/GlobalAnalyzer.cpp | 6 +- oscar64/InterCode.cpp | 378 +++++++++++++++++++++++++++++++- oscar64/InterCode.h | 8 + oscar64/InterCodeGenerator.cpp | 34 ++- oscar64/InterCodeGenerator.h | 1 + oscar64/Linker.h | 1 + oscar64/NativeCodeGenerator.cpp | 29 ++- oscar64/Parser.cpp | 82 ++++++- oscar64/Parser.h | 5 +- 13 files changed, 538 insertions(+), 30 deletions(-) diff --git a/oscar64/Declaration.cpp b/oscar64/Declaration.cpp index 0e96563..9f5839c 100644 --- a/oscar64/Declaration.cpp +++ b/oscar64/Declaration.cpp @@ -596,7 +596,8 @@ Expression* Expression::ConstantFold(Errors * errors) Declaration::Declaration(const Location& loc, DecType type) : mLocation(loc), mType(type), mScope(nullptr), mData(nullptr), mIdent(nullptr), mSize(0), mOffset(0), mFlags(0), mComplexity(0), mLocalSize(0), mBase(nullptr), mParams(nullptr), mValue(nullptr), mNext(nullptr), mVarIndex(-1), mLinkerObject(nullptr), mCallers(nullptr), mCalled(nullptr), mAlignment(1), - mInteger(0), mNumber(0), mMinValue(-0x80000000LL), mMaxValue(0x7fffffffLL), mFastCallBase(0), mFastCallSize(0), mStride(0), mStripe(1) + mInteger(0), mNumber(0), mMinValue(-0x80000000LL), mMaxValue(0x7fffffffLL), mFastCallBase(0), mFastCallSize(0), mStride(0), mStripe(1), + mCompilerOptions(0) {} Declaration::~Declaration(void) diff --git a/oscar64/Declaration.h b/oscar64/Declaration.h index 2a55b8a..877120b 100644 --- a/oscar64/Declaration.h +++ b/oscar64/Declaration.h @@ -189,7 +189,7 @@ public: int mOffset, mSize, mVarIndex, mNumVars, mComplexity, mLocalSize, mAlignment, mFastCallBase, mFastCallSize, mStride, mStripe; int64 mInteger, mMinValue, mMaxValue; double mNumber; - uint64 mFlags; + uint64 mFlags, mCompilerOptions; const Ident * mIdent; LinkerSection * mSection; const uint8 * mData; diff --git a/oscar64/Disassembler.cpp b/oscar64/Disassembler.cpp index f738354..59f4bd5 100644 --- a/oscar64/Disassembler.cpp +++ b/oscar64/Disassembler.cpp @@ -769,6 +769,22 @@ const char* NativeCodeDisassembler::AddrName(int addr, char* buffer, InterCodePr if (linker) { LinkerObject* obj; + + if (proc && proc->mLinkerObject && addr < 256) + { + obj = proc->mLinkerObject; + + int i = 0; + while (i < obj->mZeroPageRanges.Size() && !(addr >= obj->mZeroPageRanges[i].mOffset && addr < obj->mZeroPageRanges[i].mOffset + obj->mZeroPageRanges[i].mSize)) + i++; + + if (i < obj->mZeroPageRanges.Size()) + { + sprintf_s(buffer, 160, "; (%s + %d)", obj->mZeroPageRanges[i].mIdent->mString, addr - obj->mZeroPageRanges[i].mOffset); + return buffer; + + } + } if (proc && proc->mLinkerObject && addr >= proc->mLinkerObject->mAddress && addr < proc->mLinkerObject->mAddress + proc->mLinkerObject->mSize) obj = proc->mLinkerObject; diff --git a/oscar64/Errors.h b/oscar64/Errors.h index dea1f14..6989c88 100644 --- a/oscar64/Errors.h +++ b/oscar64/Errors.h @@ -68,6 +68,9 @@ enum ErrorID ERRR_USE_OF_UNINITIALIZED_VARIABLE, ERRR_STRIPE_REQUIRES_FIXED_SIZE_ARRAY, + ERRR_STACK_OVERFLOW, + ERRR_INVALID_NUMBER, + EERR_INVALID_PREPROCESSOR, }; diff --git a/oscar64/GlobalAnalyzer.cpp b/oscar64/GlobalAnalyzer.cpp index 2c09994..ae07bf2 100644 --- a/oscar64/GlobalAnalyzer.cpp +++ b/oscar64/GlobalAnalyzer.cpp @@ -82,11 +82,11 @@ void GlobalAnalyzer::AutoInline(void) int cost = (f->mComplexity - 20 * nparams); bool doinline = false; - if ((mCompilerOptions & COPT_OPTIMIZE_INLINE) && (f->mFlags & DTF_REQUEST_INLINE)) + if ((f->mCompilerOptions & COPT_OPTIMIZE_INLINE) && (f->mFlags & DTF_REQUEST_INLINE)) doinline = true; - if ((mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE) && (cost * (f->mCallers.Size() - 1) <= 0)) + if ((f->mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE) && (cost * (f->mCallers.Size() - 1) <= 0)) doinline = true; - if ((mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE_ALL) && (cost * (f->mCallers.Size() - 1) <= 10000)) + if ((f->mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE_ALL) && (cost * (f->mCallers.Size() - 1) <= 10000)) doinline = true; if (doinline) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index addc7ea..7429337 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -5851,6 +5851,38 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray assert(mLocalValueRange.Size() == mExitRequiredTemps.Size()); + InterCodeBasicBlock * pblock; + int nloop; + + bool singleLoop = CheckSingleBlockLimitedLoop(pblock, nloop); + + FastNumberSet dependTemps(mExitRequiredTemps.Size()); + + if (singleLoop) + { + FastNumberSet changedTemps(mExitRequiredTemps.Size()); + + for (int i = 0; i < sz; i++) + { + InterInstruction* ins(mInstructions[i]); + + if (ins->mCode == IC_BINARY_OPERATOR && ins->mOperator == IA_ADD && ins->mDst.mTemp == ins->mSrc[1].mTemp && ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst > 0) + { + if (dependTemps[ins->mDst.mTemp]) + changedTemps += ins->mDst.mTemp; + else if (pblock->mTrueValueRange[ins->mDst.mTemp].IsConstant()) + dependTemps += ins->mDst.mTemp; + else + changedTemps += ins->mDst.mTemp; + } + else if (ins->mDst.mTemp >= 0) + { + changedTemps += ins->mDst.mTemp; + dependTemps -= ins->mDst.mTemp; + } + } + } + for (int i = 0; i < sz; i++) { InterInstruction* ins(mInstructions[i]); @@ -6033,13 +6065,21 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray case IA_ADD: if (ins->mSrc[0].mTemp < 0) { - vr = mLocalValueRange[ins->mSrc[1].mTemp]; - if (ins->mSrc[0].mIntConst > 0 && vr.mMaxState == IntegerValueRange::S_WEAK) - vr.mMaxState = IntegerValueRange::S_UNBOUND; - else if (ins->mSrc[0].mIntConst < 0 && vr.mMinState == IntegerValueRange::S_WEAK) - vr.mMinState = IntegerValueRange::S_UNBOUND; - vr.mMaxValue += ins->mSrc[0].mIntConst; - vr.mMinValue += ins->mSrc[0].mIntConst; + if (ins->mSrc[1].mTemp == ins->mDst.mTemp && dependTemps[ins->mDst.mTemp] && i + 3 != sz) + { + int start = pblock->mTrueValueRange[ins->mDst.mTemp].mMinValue; + vr.SetLimit(start + ins->mSrc[0].mIntConst, start + nloop * ins->mSrc[0].mIntConst); + } + else + { + vr = mLocalValueRange[ins->mSrc[1].mTemp]; + if (ins->mSrc[0].mIntConst > 0 && vr.mMaxState == IntegerValueRange::S_WEAK) + vr.mMaxState = IntegerValueRange::S_UNBOUND; + else if (ins->mSrc[0].mIntConst < 0 && vr.mMinState == IntegerValueRange::S_WEAK) + vr.mMinState = IntegerValueRange::S_UNBOUND; + vr.mMaxValue += ins->mSrc[0].mIntConst; + vr.mMinValue += ins->mSrc[0].mIntConst; + } } else if (ins->mSrc[1].mTemp < 0) { @@ -6604,11 +6644,13 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray { if (ins->mSrc[0].mTemp < 0) { - mMemoryValueSize[ins->mSrc[1].mTemp] = mMemoryValueSize[ins->mDst.mTemp] - ins->mSrc[0].mIntConst; + if (ins->mSrc[0].mIntConst >= 0) + mMemoryValueSize[ins->mSrc[1].mTemp] = mMemoryValueSize[ins->mDst.mTemp] - ins->mSrc[0].mIntConst; } else if (ins->mSrc[0].mRange.mMinState == IntegerValueRange::S_BOUND) { - mMemoryValueSize[ins->mSrc[1].mTemp] = mMemoryValueSize[ins->mDst.mTemp] - ins->mSrc[0].mRange.mMinValue; + if (ins->mSrc[0].mRange.mMinValue >= 0) + mMemoryValueSize[ins->mSrc[1].mTemp] = mMemoryValueSize[ins->mDst.mTemp] - ins->mSrc[0].mRange.mMinValue; } } break; @@ -9486,6 +9528,16 @@ bool InterCodeBasicBlock::IsTempReferencedInRange(int from, int to, int temp) return false; } +InterInstruction* InterCodeBasicBlock::FindTempOrigin(int temp) const +{ + for (int i = mInstructions.Size() - 1; i >= 0; i--) + { + if (mInstructions[i]->mDst.mTemp == temp) + return mInstructions[i]; + } + return nullptr; +} + bool InterCodeBasicBlock::CanMoveInstructionDown(int si, int ti) const { InterInstruction* ins = mInstructions[si]; @@ -11607,6 +11659,236 @@ void InterCodeBasicBlock::PushMoveOutOfLoop(void) } } +bool InterCodeBasicBlock::CheckSingleBlockLimitedLoop(InterCodeBasicBlock*& pblock, int& nloop) +{ + if (mLoopHead && mNumEntries == 2 && mFalseJump && (mTrueJump == this || mFalseJump == this) && mInstructions.Size() > 3) + { + int nins = mInstructions.Size(); + + pblock = mEntryBlocks[0]; + if (pblock == this) + pblock = mEntryBlocks[1]; + + if (mInstructions[nins - 1]->mCode == IC_BRANCH && + mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && + mInstructions[nins - 3]->mCode == IC_BINARY_OPERATOR && mInstructions[nins - 3]->mOperator == IA_ADD) + { + InterInstruction* ains = mInstructions[nins - 3]; + InterInstruction* cins = mInstructions[nins - 2]; + InterInstruction* bins = mInstructions[nins - 1]; + + if (bins->mSrc[0].mTemp == cins->mDst.mTemp && + cins->mSrc[1].mTemp == ains->mDst.mTemp && + cins->mSrc[0].mTemp < 0 && + ains->mSrc[1].mTemp == ains->mDst.mTemp && + ains->mSrc[0].mTemp < 0 && + (cins->mOperator == IA_CMPLU || cins->mOperator == IA_CMPLEU) && + cins->mSrc[0].mIntConst < 255 && + ains->mSrc[0].mIntConst > 0) + { + int pi = pblock->mInstructions.Size() - 1; + while (pi >= 0 && pblock->mInstructions[pi]->mDst.mTemp != ains->mDst.mTemp) + pi--; + + int i = 0; + while (i < nins - 3 && mInstructions[i]->mDst.mTemp != ains->mDst.mTemp) + i++; + if (i == nins - 3) + { + nloop = cins->mSrc[0].mIntConst; + if (cins->mOperator == IA_CMPLEU) + nloop++; + nloop = (nloop + ains->mSrc[0].mIntConst - 1) / ains->mSrc[0].mIntConst; + + return true; + } + } + } + } + + return false; +} + + + +bool InterCodeBasicBlock::SingleBlockLoopPointerToByte(int& spareTemps) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + if (mLoopHead && mNumEntries == 2 && mFalseJump && (mTrueJump == this || mFalseJump == this) && mInstructions.Size() > 3) + { + int nins = mInstructions.Size(); + + InterCodeBasicBlock* pblock = mEntryBlocks[0], * eblock = mFalseJump; + if (pblock == this) + pblock = mEntryBlocks[1]; + if (eblock == this) + eblock = mTrueJump; + + if (mInstructions[nins - 1]->mCode == IC_BRANCH && + mInstructions[nins - 2]->mCode == IC_RELATIONAL_OPERATOR && + mInstructions[nins - 3]->mCode == IC_BINARY_OPERATOR && mInstructions[nins - 3]->mOperator == IA_ADD) + { + InterInstruction* ains = mInstructions[nins - 3]; + InterInstruction* cins = mInstructions[nins - 2]; + InterInstruction* bins = mInstructions[nins - 1]; + + if (bins->mSrc[0].mTemp == cins->mDst.mTemp && + cins->mSrc[1].mTemp == ains->mDst.mTemp && + cins->mSrc[0].mTemp < 0 && + ains->mSrc[1].mTemp == ains->mDst.mTemp && + ains->mSrc[0].mTemp < 0 && + (cins->mOperator == IA_CMPLU || cins->mOperator == IA_CMPLEU) && + cins->mSrc[0].mIntConst < 255 && + ains->mSrc[0].mIntConst > 0) + { + GrowingArray tvalues(nullptr); + tvalues.SetSize(mEntryRequiredTemps.Size() + 16); + + GrowingArray mtemps(-1); + + int pi = pblock->mInstructions.Size() - 1; + while (pi >= 0 && pblock->mInstructions[pi]->mDst.mTemp != ains->mDst.mTemp) + pi--; + + int i = 0; + while (i < nins - 3 && mInstructions[i]->mDst.mTemp != ains->mDst.mTemp) + i++; + if (i == nins - 3) + { + int nloop = cins->mSrc[0].mIntConst; + if (cins->mOperator == IA_CMPLEU) + nloop++; + nloop /= ains->mSrc[0].mIntConst; + + for (int i = 0; i < mInstructions.Size() - 3; i++) + { + InterInstruction* lins = mInstructions[i]; + if (lins->mCode == IC_LEA && lins->mDst.mTemp == lins->mSrc[1].mTemp && lins->mSrc[0].mTemp < 0 && lins->mSrc[0].mIntConst > 0 && lins->mSrc[0].mIntConst * nloop < 256 && + !IsTempReferencedInRange(i + 1, mInstructions.Size(), lins->mDst.mTemp) && !IsTempModifiedInRange(0, i, lins->mDst.mTemp) && + !eblock->mEntryRequiredTemps[lins->mDst.mTemp]) + { + bool isglobal = false; + InterInstruction* slins = pblock->FindTempOrigin(lins->mSrc[1].mTemp); + if (slins) + { + if (slins->mCode == IC_CONSTANT) + { + if (slins->mConst.mMemory == IM_ABSOLUTE || slins->mConst.mMemory == IM_GLOBAL) + isglobal = true; + } + else if (slins->mCode == IC_LEA) + { + if (slins->mSrc[0].mTemp < 0) + { + if (slins->mSrc[1].mMemory == IM_ABSOLUTE || slins->mSrc[1].mMemory == IM_GLOBAL) + isglobal = true; + } + } + } + + bool failed = false; + for (int j = 0; j < i; j++) + { + InterInstruction* jins = mInstructions[j]; + + if (jins->ReferencesTemp(lins->mDst.mTemp)) + { + if (jins->mCode == IC_LOAD) + { + if (!isglobal && (jins->mSrc[0].mIntConst < 0 || jins->mSrc[0].mIntConst > 2)) + failed = true; + } + else if (jins->mCode == IC_STORE) + { + if (!isglobal && (jins->mSrc[1].mIntConst < 0 || jins->mSrc[1].mIntConst > 2)) + failed = true; + } + else + failed = true; + } + } + + if (!failed) + { + if (spareTemps + 2 >= mEntryRequiredTemps.Size() + 16) + return true; + + int inc = lins->mSrc[0].mIntConst; + + int ireg = mtemps[inc]; + + if (ireg < 0) + { + ireg = spareTemps++; + + InterInstruction* cins = new InterInstruction(lins->mLocation, IC_CONSTANT); + cins->mDst.mTemp = ireg; + cins->mDst.mType = IT_INT16; + cins->mConst.mType = IT_INT16; + cins->mConst.mIntConst = 0; + mtemps[inc] = cins->mDst.mTemp; + + pblock->mInstructions.Insert(pblock->mInstructions.Size() - 1, cins); + + InterInstruction* iins = new InterInstruction(lins->mLocation, IC_BINARY_OPERATOR); + iins->mNumOperands = 2; + iins->mOperator = IA_ADD; + iins->mDst = cins->mDst; + iins->mSrc[1] = cins->mDst; + iins->mSrc[0].mTemp = -1; + iins->mSrc[0].mType = IT_INT16; + iins->mSrc[0].mIntConst = lins->mSrc[0].mIntConst; + iins->mSrc[1].mRange.SetLimit(0, inc * (nloop - 1)); + iins->mDst.mRange.SetLimit(lins->mSrc[0].mIntConst, inc * nloop); + mInstructions.Insert(i, iins); + } + + InterInstruction* nins = new InterInstruction(lins->mLocation, IC_LEA); + nins->mNumOperands = 2; + nins->mDst.mTemp = spareTemps++; + nins->mDst.mType = IT_POINTER; + nins->mSrc[1] = lins->mSrc[1]; + nins->mSrc[1].mFinal = false; + nins->mSrc[0].mType = IT_INT16; + nins->mSrc[0].mTemp = ireg; + nins->mSrc[0].mRange.SetLimit(0, inc * (nloop - 1)); + + for (int j = 0; j < i; j++) + { + InterInstruction* jins = mInstructions[j]; + for (int k = 0; k < jins->mNumOperands; k++) + if (jins->mSrc[k].mTemp == lins->mDst.mTemp) + jins->mSrc[k].mTemp = nins->mDst.mTemp; + } + + mInstructions.Insert(0, nins); + + lins->mCode = IC_NONE; + lins->mNumOperands = 0; + lins->mDst.mTemp = -1; + + changed = true; + } + } + } + } + } + } + } + + if (mTrueJump && mTrueJump->SingleBlockLoopPointerToByte(spareTemps)) + changed = true; + if (mFalseJump && mFalseJump->SingleBlockLoopPointerToByte(spareTemps)) + changed = true; + } + + return changed; +} bool InterCodeBasicBlock::SingleBlockLoopPointerSplit(int& spareTemps) { @@ -14757,6 +15039,53 @@ void InterCodeProcedure::MergeIndexedLoadStore(void) DisassembleDebug("SimplifyPointerOffsets"); } +void InterCodeProcedure::SingleBlockLoopPointerToByte(FastNumberSet& activeSet) +{ + int silvused = mTemporaries.Size(); + + do + { + mTemporaries.SetSize(silvused, true); + + DisassembleDebug("SingleBlockLoopPointerToByteA"); + + BuildDataFlowSets(); + + DisassembleDebug("SingleBlockLoopPointerToByteB"); + + TempForwarding(); + RemoveUnusedInstructions(); + + DisassembleDebug("SingleBlockLoopPointerToByteC"); + + activeSet.Clear(); + + ResetVisited(); + mEntryBlock->CollectActiveTemporaries(activeSet); + + silvused = activeSet.Num(); + if (silvused != mTemporaries.Size()) + { + mTemporaries.SetSize(activeSet.Num(), true); + + ResetVisited(); + mEntryBlock->ShrinkActiveTemporaries(activeSet, mTemporaries); + + ResetVisited(); + mEntryBlock->RemapActiveTemporaries(activeSet); + } + + ResetVisited(); + } while (mEntryBlock->SingleBlockLoopPointerToByte(silvused)); + + assert(silvused == mTemporaries.Size()); + + DisassembleDebug("SingleBlockLoopPointerToByte"); + + +} + + void InterCodeProcedure::SingleBlockLoopPointerSplit(FastNumberSet& activeSet) { int silvused = mTemporaries.Size(); @@ -15082,7 +15411,13 @@ void InterCodeProcedure::Close(void) InterMemory paramMemory = mFastCallProcedure ? IM_FPARAM : IM_PARAM; - PromoteSimpleLocalsToTemp(paramMemory, nlocals, nparams); + if (mCompilerOptions & COPT_OPTIMIZE_BASIC) + PromoteSimpleLocalsToTemp(paramMemory, nlocals, nparams); + else + { + ResetVisited(); + mEntryBlock->CollectVariables(mModule->mGlobalVars, mLocalVars, mParamVars, paramMemory); + } BuildDataFlowSets(); @@ -15265,6 +15600,8 @@ void InterCodeProcedure::Close(void) SingleBlockLoopPointerSplit(activeSet); MergeIndexedLoadStore(); + + SingleBlockLoopPointerToByte(activeSet); #if 1 DisassembleDebug("PreMoveTrainCrossBlockA"); @@ -15384,7 +15721,7 @@ void InterCodeProcedure::Close(void) #endif #if 1 - if (mModule->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) + if (mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) { ResetVisited(); mEntryBlock->SingleBlockLoopUnrolling(); @@ -15715,6 +16052,25 @@ void InterCodeProcedure::Close(void) if (mSaveTempsLinkerObject && mTempSize > BC_REG_TMP_SAVED - BC_REG_TMP) mSaveTempsLinkerObject->AddSpace(mTempSize - (BC_REG_TMP_SAVED - BC_REG_TMP)); + + for (int i = 0; i < mParamVars.Size(); i++) + { + InterVariable* v(mParamVars[i]); + if (v && v->mIdent) + { + if (v->mLinkerObject) + { + } + else + { + LinkerObjectRange range; + range.mIdent = v->mIdent; + range.mOffset = i + BC_REG_FPARAMS; + range.mSize = v->mSize; + mLinkerObject->mZeroPageRanges.Push(range); + } + } + } } void InterCodeProcedure::AddCalledFunction(InterCodeProcedure* proc) diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 27996c9..daceda2 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -425,6 +425,8 @@ public: bool BuildGlobalRequiredStaticVariableSet(const GrowingVariableArray& staticVars, NumberSet& fromRequiredVars); bool RemoveUnusedStaticStoreInstructions(const GrowingVariableArray& staticVars); + bool CheckSingleBlockLimitedLoop(InterCodeBasicBlock*& pblock, int& nloop); + void RestartLocalIntegerRangeSets(int num, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars); void BuildLocalIntegerRangeSets(int num, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars); void UpdateLocalIntegerRangeSets(const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars); @@ -498,6 +500,8 @@ public: bool IsTempUsedInRange(int from, int to, int temp); bool IsTempReferencedInRange(int from, int to, int temp); + InterInstruction* FindTempOrigin(int temp) const; + void CheckFinalLocal(void); void CheckFinal(void); void CheckBlocks(void); @@ -508,6 +512,7 @@ public: void SingleBlockLoopOptimisation(const NumberSet& aliasedParams, const GrowingVariableArray& staticVars); void SingleBlockLoopUnrolling(void); bool SingleBlockLoopPointerSplit(int& spareTemps); + bool SingleBlockLoopPointerToByte(int& spareTemps); bool CollectLoopBody(InterCodeBasicBlock* head, GrowingArray & body); void CollectLoopPath(const GrowingArray& body, GrowingArray& path); void InnerLoopOptimization(const NumberSet& aliasedParams); @@ -577,6 +582,8 @@ public: LinkerObject * mLinkerObject, * mSaveTempsLinkerObject; Declaration * mDeclaration; + uint64 mCompilerOptions; + InterCodeProcedure(InterCodeModule * module, const Location & location, const Ident * ident, LinkerObject* linkerObject); ~InterCodeProcedure(void); @@ -615,6 +622,7 @@ protected: void PromoteSimpleLocalsToTemp(InterMemory paramMemory, int nlocals, int nparams); void SimplifyIntegerNumeric(FastNumberSet& activeSet); void SingleBlockLoopPointerSplit(FastNumberSet& activeSet); + void SingleBlockLoopPointerToByte(FastNumberSet& activeSet); void MergeIndexedLoadStore(void); void EliminateAliasValues(); void LoadStoreForwarding(InterMemory paramMemory); diff --git a/oscar64/InterCodeGenerator.cpp b/oscar64/InterCodeGenerator.cpp index 4da56b7..fd8a002 100644 --- a/oscar64/InterCodeGenerator.cpp +++ b/oscar64/InterCodeGenerator.cpp @@ -230,6 +230,15 @@ static inline InterType InterTypeOfArithmetic(InterType t1, InterType t2) return IT_INT16; } +void InterCodeGenerator::InitParameter(InterCodeProcedure* proc, Declaration* dec, int index) +{ + if (!proc->mParamVars[index]) + { + proc->mParamVars[index] = new InterVariable(); + proc->mParamVars[index]->mIdent = dec->mIdent; + } +} + void InterCodeGenerator::InitLocalVariable(InterCodeProcedure* proc, Declaration* dec, int index) { if (!proc->mLocalVars[index]) @@ -1204,14 +1213,20 @@ InterCodeGenerator::ExValue InterCodeGenerator::TranslateExpression(Declaration* { ins->mConst.mMemory = IM_LOCAL; ins->mConst.mVarIndex = inlineMapper->mParams[dec->mVarIndex]; + InitLocalVariable(proc, dec, ins->mConst.mVarIndex); } else if (procType->mFlags & DTF_FASTCALL) { ins->mConst.mMemory = IM_FPARAM; ins->mConst.mVarIndex += procType->mFastCallBase; + InitParameter(proc, dec, ins->mConst.mVarIndex); } else + { ins->mConst.mMemory = IM_PARAM; + InitParameter(proc, dec, ins->mConst.mVarIndex); + } + if (dec->mBase->mType == DT_TYPE_ARRAY) { ref = 2; @@ -2275,7 +2290,10 @@ InterCodeGenerator::ExValue InterCodeGenerator::TranslateExpression(Declaration* } } - bool canInline = exp->mLeft->mType == EX_CONSTANT && exp->mLeft->mDecValue->mType == DT_CONST_FUNCTION && !(inlineMapper && inlineMapper->mDepth > 10); + bool canInline = exp->mLeft->mType == EX_CONSTANT && + exp->mLeft->mDecValue->mType == DT_CONST_FUNCTION && + (mCompilerOptions & COPT_OPTIMIZE_INLINE) && + !(inlineMapper && inlineMapper->mDepth > 10); bool doInline = false, inlineConstexpr = false; if (canInline) @@ -2309,8 +2327,11 @@ InterCodeGenerator::ExValue InterCodeGenerator::TranslateExpression(Declaration* doInline = true; else if (exp->mLeft->mDecValue->mFlags & DTF_INLINE) { - if (proc->mNativeProcedure || !(exp->mLeft->mDecValue->mFlags & DTF_NATIVE)) - doInline = true; + if ((exp->mLeft->mDecValue->mFlags & DTF_REQUEST_INLINE) || (mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE)) + { + if (proc->mNativeProcedure || !(exp->mLeft->mDecValue->mFlags & DTF_NATIVE)) + doInline = true; + } } } @@ -3665,6 +3686,11 @@ InterCodeProcedure* InterCodeGenerator::TranslateProcedure(InterCodeModule * mod { InterCodeProcedure* proc = new InterCodeProcedure(mod, dec->mLocation, dec->mIdent, mLinker->AddObject(dec->mLocation, dec->mIdent, dec->mSection, LOT_BYTE_CODE)); + uint64 outerCompilerOptions = mCompilerOptions; + mCompilerOptions = dec->mCompilerOptions; + + proc->mCompilerOptions = mCompilerOptions; + dec->mVarIndex = proc->mID; dec->mLinkerObject = proc->mLinkerObject; proc->mNumLocals = dec->mNumVars; @@ -3733,5 +3759,7 @@ InterCodeProcedure* InterCodeGenerator::TranslateProcedure(InterCodeModule * mod proc->Close(); } + mCompilerOptions = outerCompilerOptions; + return proc; } diff --git a/oscar64/InterCodeGenerator.h b/oscar64/InterCodeGenerator.h index 614d9d5..7637326 100644 --- a/oscar64/InterCodeGenerator.h +++ b/oscar64/InterCodeGenerator.h @@ -27,6 +27,7 @@ public: void TranslateAssembler(InterCodeModule* mod, Expression * exp, GrowingArray * refvars); void InitGlobalVariable(InterCodeModule* mod, Declaration* dec); void InitLocalVariable(InterCodeProcedure* proc, Declaration* dec, int index); + void InitParameter(InterCodeProcedure* proc, Declaration* dec, int index); protected: Errors* mErrors; diff --git a/oscar64/Linker.h b/oscar64/Linker.h index eb65e22..925b4e1 100644 --- a/oscar64/Linker.h +++ b/oscar64/Linker.h @@ -187,6 +187,7 @@ public: ExpandingArray mRanges; ExpandingArray mCodeLocations; + ExpandingArray mZeroPageRanges; LinkerObject(void); ~LinkerObject(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 3b8ace3..ef09d4f 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -6857,9 +6857,9 @@ NativeCodeBasicBlock * NativeCodeBasicBlock::CopyValue(InterCodeProcedure* proc, if (sstride > 1 || dstride > 1) msize = 32; - else if (nproc->mGenerator->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) + else if (nproc->mInterProc->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) msize = 8; - else if (nproc->mGenerator->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE) + else if (nproc->mInterProc->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE) msize = 2; #if 1 if (ins->mSrc[0].mTemp < 0 && ins->mSrc[1].mTemp < 0) @@ -8771,8 +8771,8 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p int lcost = 8 + 2 * (nbytes - 1); int ucost = shift * (1 + 2 * nbytes); - if ((nproc->mGenerator->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE) && lcost < ucost || - !(nproc->mGenerator->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) && 2 * lcost < ucost) + if ((nproc->mInterProc->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE) && lcost < ucost || + !(nproc->mInterProc->mCompilerOptions & COPT_OPTIMIZE_AUTO_UNROLL) && 2 * lcost < ucost) { mIns.Push(NativeCodeInstruction(ins, ASMIT_LDX, ASMIM_IMMEDIATE, shift)); this->Close(ins, lblock, nullptr, ASMIT_JMP); @@ -11768,7 +11768,7 @@ void NativeCodeBasicBlock::CallAssembler(InterCodeProcedure* proc, NativeCodePro assert(ins->mSrc[0].mLinkerObject); - if (ins->mCode == IC_ASSEMBLER && (proc->mModule->mCompilerOptions & COPT_OPTIMIZE_ASSEMBLER)) + if (ins->mCode == IC_ASSEMBLER && (proc->mCompilerOptions & COPT_OPTIMIZE_ASSEMBLER)) { ExpandingArray tains; @@ -12372,20 +12372,24 @@ bool NativeCodeBasicBlock::MergeBasicBlocks(void) changed = true; } - while (mTrueJump && mTrueJump->mIns.Size() == 0 && !mTrueJump->mFalseJump && !mTrueJump->mLocked && mTrueJump != this && mTrueJump->mTrueJump != mTrueJump) + int steps = 100; + while (mTrueJump && mTrueJump->mIns.Size() == 0 && !mTrueJump->mFalseJump && !mTrueJump->mLocked && mTrueJump != this && mTrueJump->mTrueJump != mTrueJump && steps > 0) { mTrueJump->mNumEntries--; mTrueJump = mTrueJump->mTrueJump; mTrueJump->mNumEntries++; changed = true; + steps--; } - while (mFalseJump && mFalseJump->mTrueJump && mFalseJump->mIns.Size() == 0 && !mFalseJump->mFalseJump && !mFalseJump->mLocked && mFalseJump != this && mFalseJump->mTrueJump != mFalseJump) + steps = 100; + while (mFalseJump && mFalseJump->mTrueJump && mFalseJump->mIns.Size() == 0 && !mFalseJump->mFalseJump && !mFalseJump->mLocked && mFalseJump != this && mFalseJump->mTrueJump != mFalseJump && steps > 0) { mFalseJump->mNumEntries--; mFalseJump = mFalseJump->mTrueJump; mFalseJump->mNumEntries++; changed = true; + steps--; } if (mTrueJump && mTrueJump == mFalseJump) @@ -25777,6 +25781,8 @@ bool NativeCodeBasicBlock::MoveLoadShiftStoreUp(int at) return false; } + mIns[j].mLive |= LIVE_CPU_REG_A; + mIns[at + 1].mLive |= mIns[j].mLive; mIns[at + 2].mLive |= mIns[j].mLive; @@ -26437,6 +26443,9 @@ bool NativeCodeBasicBlock::ValueForwarding(NativeCodeProcedure* proc, const Nati if (ins.mMode == ASMIM_ZERO_PAGE && ins.ChangesAddress()) mNDataSet.ResetZeroPage(ins.mAddress); + else if (ins.mMode == ASMIM_ABSOLUTE && ins.ChangesAddress()) + mNDataSet.ResetAbsolute(ins.mLinkerObject, ins.mAddress); + if (ins.mType == ASMIT_JSR) { mNDataSet.ResetWorkRegs(); @@ -30165,7 +30174,7 @@ static bool CheckBlockCopySequence(const ExpandingArray& bool NativeCodeBasicBlock::BlockSizeCopyReduction(NativeCodeProcedure* proc, int& si, int& di) { - if ((proc->mGenerator->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE)) + if ((proc->mInterProc->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE)) { if (si + 1 < mIns.Size() && mIns[si + 0].mType == ASMIT_LDA && (mIns[si + 0].mMode == ASMIM_ZERO_PAGE || mIns[si + 0].mMode == ASMIM_ABSOLUTE) && @@ -38656,7 +38665,7 @@ void NativeCodeProcedure::RebuildEntry(void) void NativeCodeProcedure::Optimize(void) { - CheckFunc = !strcmp(mInterProc->mIdent->mString, "test"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "main"); #if 1 int step = 0; @@ -39968,6 +39977,8 @@ void NativeCodeGenerator::CompleteRuntime(void) LinkerObject* NativeCodeGenerator::AllocateShortMulTable(InterOperator op, int factor, int size, bool msb) { + assert(size > 0); + int i = 0; while (i < mMulTables.Size() && (mMulTables[i].mFactor != factor || mMulTables[i].mOperator != op)) i++; diff --git a/oscar64/Parser.cpp b/oscar64/Parser.cpp index 9352835..12f2470 100644 --- a/oscar64/Parser.cpp +++ b/oscar64/Parser.cpp @@ -16,6 +16,7 @@ Parser::Parser(Errors* errors, Scanner* scanner, CompilationUnits* compilationUn mUnrollLoop = 0; mUnrollLoopPage = false; mInlineCall = false; + mCompilerOptionSP = 0; for (int i = 0; i < 256; i++) mCharMap[i] = i; @@ -1284,6 +1285,9 @@ Declaration* Parser::ParseDeclaration(bool variable, bool expression) if (ndec->mFlags & DTF_DEFINED) mErrors->Error(ndec->mLocation, EERR_DUPLICATE_DEFINITION, "Duplicate function definition"); + ndec->mCompilerOptions = mCompilerOptions; + ndec->mBase->mCompilerOptions = mCompilerOptions; + ndec->mVarIndex = -1; ndec->mValue = ParseFunction(ndec->mBase); ndec->mFlags |= DTF_DEFINED; @@ -3234,9 +3238,20 @@ bool Parser::ConsumeTokenIf(Token token) } else return false; - } +bool Parser::ConsumeIdentIf(const char* ident) +{ + if (mScanner->mToken == TK_IDENT && !strcmp(ident, mScanner->mTokenIdent->mString)) + { + mScanner->NextToken(); + return true; + } + else + return false; +} + + void Parser::ParsePragma(void) { if (mScanner->mToken == TK_IDENT) @@ -3947,6 +3962,71 @@ void Parser::ParsePragma(void) ConsumeToken(TK_CLOSE_PARENTHESIS); mInlineCall = true; } + else if (ConsumeIdentIf("optimize")) + { + mScanner->NextToken(); + ConsumeToken(TK_OPEN_PARENTHESIS); + if (!ConsumeTokenIf(TK_CLOSE_PARENTHESIS)) + { + do { + if (ConsumeIdentIf("push")) + { + if (mCompilerOptionSP < 32) + mCompilerOptionStack[mCompilerOptionSP++] = mCompilerOptions; + else + mErrors->Error(mScanner->mLocation, ERRR_STACK_OVERFLOW, "Stack overflow"); + } + else if (ConsumeIdentIf("pop")) + { + if (mCompilerOptionSP > 0) + mCompilerOptions = mCompilerOptionStack[--mCompilerOptionSP] = mCompilerOptions; + else + mErrors->Error(mScanner->mLocation, ERRR_STACK_OVERFLOW, "Stack underflow"); + } + else if (mScanner->mToken == TK_INTEGER) + { + mCompilerOptions &= ~(COPT_OPTIMIZE_ALL); + switch (mScanner->mTokenInteger) + { + case 0: + break; + case 1: + mCompilerOptions |= COPT_OPTIMIZE_DEFAULT; + break; + case 2: + mCompilerOptions |= COPT_OPTIMIZE_SPEED; + break; + case 3: + mCompilerOptions |= COPT_OPTIMIZE_ALL; + break; + default: + mErrors->Error(mScanner->mLocation, ERRR_INVALID_NUMBER, "Invalid number"); + } + } + else if (ConsumeIdentIf("asm")) + mCompilerOptions |= COPT_OPTIMIZE_ASSEMBLER; + else if (ConsumeIdentIf("noasm")) + mCompilerOptions &= ~COPT_OPTIMIZE_ASSEMBLER; + else if (ConsumeIdentIf("size")) + mCompilerOptions |= COPT_OPTIMIZE_SIZE; + else if (ConsumeIdentIf("speed")) + mCompilerOptions &= ~COPT_OPTIMIZE_SIZE; + else if (ConsumeIdentIf("noinline")) + mCompilerOptions &= ~(COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE_ALL); + else if (ConsumeIdentIf("inline")) + mCompilerOptions |= COPT_OPTIMIZE_AUTO_INLINE; + else if (ConsumeIdentIf("autoinline")) + mCompilerOptions |= COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE; + else if (ConsumeIdentIf("maxinline")) + mCompilerOptions |= COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_INLINE_ALL; + else + mErrors->Error(mScanner->mLocation, EERR_INVALID_IDENTIFIER, "Invalid option"); + + } while (ConsumeTokenIf(TK_COMMA)); + + ConsumeToken(TK_CLOSE_PARENTHESIS); + } + } else { mScanner->NextToken(); diff --git a/oscar64/Parser.h b/oscar64/Parser.h index 8449e75..b228a64 100644 --- a/oscar64/Parser.h +++ b/oscar64/Parser.h @@ -16,12 +16,15 @@ public: LinkerSection * mCodeSection, * mDataSection, * mBSSection; - uint64 mCompilerOptions; + uint64 mCompilerOptions; + uint64 mCompilerOptionStack[32]; + int mCompilerOptionSP; void Parse(void); protected: bool ConsumeToken(Token token); bool ConsumeTokenIf(Token token); + bool ConsumeIdentIf(const char* ident); char mCharMap[256]; int mUnrollLoop;