diff --git a/include/c64/memmap.c b/include/c64/memmap.c index c2c061f..fb101fc 100644 --- a/include/c64/memmap.c +++ b/include/c64/memmap.c @@ -1,11 +1,8 @@ #include "memmap.h" -volatile char PLAShadow; - __asm DoneTrampoline { - lda PLAShadow - sta $01 + stx $01 pla tax pla @@ -17,8 +14,6 @@ __asm IRQTrampoline pha txa pha - lda #$36 - sta $01 lda #>DoneTrampoline pha @@ -27,6 +22,9 @@ __asm IRQTrampoline tsx lda $0105, x pha + ldx $01 + lda #$36 + sta $01 jmp ($fffe) } @@ -35,8 +33,6 @@ __asm NMITrampoline pha txa pha - lda #$36 - sta $01 lda #>DoneTrampoline pha @@ -45,6 +41,9 @@ __asm NMITrampoline tsx lda $0105, x pha + ldx $01 + lda #$36 + sta $01 jmp ($fffa) } @@ -56,8 +55,9 @@ void mmap_trampoline(void) #pragma native(mmap_trampoline) -void mmap_set(char pla) +char mmap_set(char pla) { - PLAShadow = pla; + char ppla = *((char *)0x01); *((volatile char *)0x01) = pla; + return ppla; } diff --git a/include/c64/memmap.h b/include/c64/memmap.h index aebb768..947ffec 100644 --- a/include/c64/memmap.h +++ b/include/c64/memmap.h @@ -18,9 +18,9 @@ void mmap_trampoline(void); // Set the memory map in a way that is compatible with the IRQ -// trampoline +// trampoline, returns the previous state -inline void mmap_set(char pla); +inline char mmap_set(char pla); #pragma compile("memmap.c") diff --git a/include/crt.c b/include/crt.c index 2f30649..6f85487 100644 --- a/include/crt.c +++ b/include/crt.c @@ -160,7 +160,7 @@ wx1: jmp lx2 w0: - lda #$3f + lda #$2f sta $00 lda #$36 sta $01 diff --git a/oscar64/Declaration.cpp b/oscar64/Declaration.cpp index f271383..5617724 100644 --- a/oscar64/Declaration.cpp +++ b/oscar64/Declaration.cpp @@ -1285,7 +1285,7 @@ bool Declaration::IsSame(const Declaration* dec) const return mIdent == dec->mIdent; else if (mType == DT_TYPE_POINTER || mType == DT_TYPE_ARRAY) { - if (mBase->mType == DT_TYPE_STRUCT && dec->mBase->mType == DT_TYPE_STRUCT) + if (mBase->mType == DT_TYPE_STRUCT && dec->mBase->mType == DT_TYPE_STRUCT && mBase->mStripe == dec->mBase->mStripe) { if (mBase->mQualIdent == dec->mBase->mQualIdent && (mBase->mFlags & (DTF_CONST | DTF_VOLATILE)) == (dec->mBase->mFlags & (DTF_CONST | DTF_VOLATILE))) @@ -1373,7 +1373,7 @@ bool Declaration::CanAssign(const Declaration* fromType) const { if (mBase->mType == DT_TYPE_VOID || fromType->mBase->mType == DT_TYPE_VOID) return (mBase->mFlags & DTF_CONST) || !(fromType->mBase->mFlags & DTF_CONST); - else if (mBase->IsSubType(fromType->mBase)) + else if (mBase->mStripe == fromType->mBase->mStripe && mBase->IsSubType(fromType->mBase)) return true; } else if (mBase->mType == DT_TYPE_FUNCTION && fromType->mType == DT_TYPE_FUNCTION) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index d2a169b..1a1b6cd 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -530,6 +530,18 @@ bool InterCodeBasicBlock::DestroyingMem(const InterInstruction* lins, const Inte return false; } +bool InterCodeBasicBlock::DestroyingMem(InterCodeBasicBlock* block, InterInstruction* lins, int from, int to) const +{ + for (int i = from; i < to; i++) + { + InterInstruction* ins = block->mInstructions[i]; + if (DestroyingMem(lins, ins)) + return true; + } + + return false; +} + static bool SameMem(const InterOperand& op1, const InterOperand& op2) { if (op1.mMemory != op2.mMemory || op1.mType != op2.mType || op1.mIntConst != op2.mIntConst) @@ -8036,7 +8048,7 @@ void InterCodeBasicBlock::PerformTempForwarding(const TempForwardingTable& forwa { if (mEntryBlocks[i] != mLoopPrefix) { - if (!mEntryBlocks[i]->CollectLoopBody(this, body)) + if (!mEntryBlocks[i]->CollectLoopBodyRecursive(this, body)) innerLoop = false; } } @@ -11578,6 +11590,10 @@ void InterCodeBasicBlock::BuildLoopSuffix(void) if (mFalseJump->mNumEntries > 1) { InterCodeBasicBlock* suffix = new InterCodeBasicBlock(mProc); + suffix->mEntryRequiredTemps = mFalseJump->mEntryRequiredTemps; + suffix->mExitRequiredTemps = mFalseJump->mEntryRequiredTemps; + suffix->mLocalModifiedTemps.Reset(mExitRequiredTemps.Size()); + InterInstruction* jins = new InterInstruction(mInstructions[0]->mLocation, IC_JUMP); suffix->Append(jins); suffix->Close(mFalseJump, nullptr); @@ -11590,6 +11606,10 @@ void InterCodeBasicBlock::BuildLoopSuffix(void) if (mTrueJump->mNumEntries > 1) { InterCodeBasicBlock* suffix = new InterCodeBasicBlock(mProc); + suffix->mEntryRequiredTemps = mTrueJump->mEntryRequiredTemps; + suffix->mExitRequiredTemps = mTrueJump->mEntryRequiredTemps; + suffix->mLocalModifiedTemps.Reset(mExitRequiredTemps.Size()); + InterInstruction* jins = new InterInstruction(mInstructions[0]->mLocation, IC_JUMP); suffix->Append(jins); suffix->Close(mTrueJump, nullptr); @@ -11620,6 +11640,10 @@ InterCodeBasicBlock* InterCodeBasicBlock::BuildLoopPrefix(void) if (mLoopHead) { mLoopPrefix = new InterCodeBasicBlock(mProc); + mLoopPrefix->mEntryRequiredTemps = mEntryRequiredTemps; + mLoopPrefix->mExitRequiredTemps = mEntryRequiredTemps; + mLoopPrefix->mLocalModifiedTemps.Reset(mEntryRequiredTemps.Size()); + InterInstruction* jins = new InterInstruction(mInstructions[0]->mLocation, IC_JUMP); mLoopPrefix->Append(jins); mLoopPrefix->Close(this, nullptr); @@ -11645,6 +11669,22 @@ bool InterCodeBasicBlock::CollectLoopBody(InterCodeBasicBlock* head, GrowingArra return true; } +bool InterCodeBasicBlock::CollectLoopBodyRecursive(InterCodeBasicBlock* head, GrowingArray& body) +{ + if (this == head) + return true; + + if (body.IndexOf(this) != -1) + return true; + body.Push(this); + + for (int i = 0; i < mEntryBlocks.Size(); i++) + if (!mEntryBlocks[i]->CollectLoopBodyRecursive(head, body)) + return false; + + return true; +} + void InterCodeBasicBlock::CollectLoopPath(const GrowingArray& body, GrowingArray& path) { if (body.IndexOf(this) >= 0) @@ -11872,6 +11912,17 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar } } #endif + bool hasStore = false; + for (int j = 0; j < body.Size(); j++) + { + int sz = body[j]->mInstructions.Size(); + for (int i = 0; i < sz; i++) + { + InterInstruction* ins = body[j]->mInstructions[i]; + if (IsObservable(ins->mCode)) + hasStore = true; + } + } int i = 0; while (i < mInstructions.Size()) @@ -11880,7 +11931,8 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar if (lins->mCode == IC_BINARY_OPERATOR || lins->mCode == IC_CONSTANT || lins->mCode == IC_UNARY_OPERATOR || lins->mCode == IC_CONVERSION_OPERATOR || lins->mCode == IC_SELECT || - lins->mCode == IC_RELATIONAL_OPERATOR) + lins->mCode == IC_LEA || + lins->mCode == IC_RELATIONAL_OPERATOR || (lins->mCode == IC_LOAD && !hasStore && !lins->mVolatile)) { #if 1 if (CanMoveInstructionBeforeBlock(i) && !IsInsModifiedInRange(i + 1, mInstructions.Size(), lins) && !tail->IsInsModified(lins) && !lins->UsesTemp(lins->mDst.mTemp)) @@ -11901,7 +11953,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar } #endif } - else if (lins->mCode == IC_LOAD && lins->mSrc[0].mTemp < 0 && !tail->mExitRequiredTemps[lins->mDst.mTemp]) + else if (lins->mCode == IC_LOAD && !lins->mVolatile && lins->mSrc[0].mTemp < 0 && !tail->mExitRequiredTemps[lins->mDst.mTemp]) { if (CanMoveInstructionBeforeBlock(i)) { @@ -11963,8 +12015,36 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar } } } + else + { + int k = 0; + while (k < body.Size() && !DestroyingMem(body[k], lins, 0, body[k]->mInstructions.Size())) + k++; + if (k == body.Size()) + { +#if 1 + if (!IsInsModifiedInRange(i + 1, mInstructions.Size(), lins) && !tail->IsInsModified(lins)) + { + int j = 1; + while (j < body.Size() && !body[j]->IsInsModified(lins)) + j++; + if (j == body.Size()) + { + mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins); + mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp; + mEntryRequiredTemps += lins->mDst.mTemp; + mInstructions.Remove(i); + i--; + + modified = true; + } + } +#endif + } + } } } + i++; } } diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index b39c3f5..e15a0f9 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -502,6 +502,7 @@ public: bool IsTempReferencedOnPath(int temp, int at) const; bool DestroyingMem(const InterInstruction* lins, const InterInstruction* sins) const; + bool DestroyingMem(InterCodeBasicBlock* block, InterInstruction* lins, int from, int to) const; bool CollidingMem(const InterInstruction* ins1, const InterInstruction* ins2) const; bool CollidingMem(const InterOperand& op, InterType type, const InterInstruction* ins) const; bool CollidingMem(const InterOperand& op1, InterType type1, const InterOperand& op2, InterType type2) const; @@ -542,6 +543,7 @@ public: bool SingleBlockLoopPointerSplit(int& spareTemps); bool SingleBlockLoopPointerToByte(int& spareTemps); bool CollectLoopBody(InterCodeBasicBlock* head, GrowingArray & body); + bool CollectLoopBodyRecursive(InterCodeBasicBlock* head, GrowingArray& body); void CollectLoopPath(const GrowingArray& body, GrowingArray& path); void InnerLoopOptimization(const NumberSet& aliasedParams); void PushMoveOutOfLoop(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 3d38038..ffb3809 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -2403,6 +2403,16 @@ void NativeCodeInstruction::Simulate(NativeRegisterDataSet& data) } } break; + + case ASMIT_CLC: + data.mRegs[CPU_REG_C].mValue = 0; + data.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE; + break; + + case ASMIT_SEC: + data.mRegs[CPU_REG_C].mValue = 1; + data.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE; + break; } } @@ -12978,10 +12988,26 @@ void NativeCodeBasicBlock::BuildEntryDataSet(const NativeRegisterDataSet& set) for (int i = 0; i < mIns.Size(); i++) mIns[i].Simulate(mNDataSet); + mFDataSet = mNDataSet; + if (mBranch == ASMIT_BCC) + { + mNDataSet.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE; + mNDataSet.mRegs[CPU_REG_C].mValue = 0; + mFDataSet.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE; + mFDataSet.mRegs[CPU_REG_C].mValue = 1; + } + else if (mBranch == ASMIT_BCS) + { + mNDataSet.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE; + mNDataSet.mRegs[CPU_REG_C].mValue = 1; + mFDataSet.mRegs[CPU_REG_C].mMode = NRDM_IMMEDIATE; + mFDataSet.mRegs[CPU_REG_C].mValue = 0; + } + if (mTrueJump) mTrueJump->BuildEntryDataSet(mNDataSet); if (mFalseJump) - mFalseJump->BuildEntryDataSet(mNDataSet); + mFalseJump->BuildEntryDataSet(mFDataSet); } } @@ -15889,6 +15915,10 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc) { mVisited = true; + int carry = -1; + if (mEntryRegisterDataSet.mRegs[CPU_REG_C].mMode == NRDM_IMMEDIATE) + carry = mEntryRegisterDataSet.mRegs[CPU_REG_C].mValue; + for (int i = 0; i < mIns.Size(); i++) { if (i + 2 < mIns.Size() && @@ -16229,6 +16259,45 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc) break; } + if (carry == 0 && + mIns[i + 0].mType == ASMIT_LDA && + mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && mIns[i + 1].mAddress == 1 && + mIns[i + 2].mType == ASMIT_STA && mIns[i + 0].SameEffectiveAddress(mIns[i + 2]) && + mIns[i + 3].mType == ASMIT_LDA && + mIns[i + 4].mType == ASMIT_ADC && mIns[i + 4].mMode == ASMIM_IMMEDIATE && mIns[i + 4].mAddress == 0 && + mIns[i + 5].mType == ASMIT_STA && mIns[i + 3].SameEffectiveAddress(mIns[i + 5]) && + HasAsmInstructionMode(ASMIT_INC, mIns[i + 2].mMode) && + HasAsmInstructionMode(ASMIT_INC, mIns[i + 5].mMode) && + !(mIns[i + 5].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C | LIVE_CPU_REG_Z))) + { + changed = true; + + NativeCodeBasicBlock* iblock = proc->AllocateBlock(); + NativeCodeBasicBlock* fblock = proc->AllocateBlock(); + + fblock->mTrueJump = mTrueJump; + fblock->mFalseJump = mFalseJump; + fblock->mBranch = mBranch; + + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_INC; mIns[i + 2].mLive |= LIVE_CPU_REG_Z; + + for (int j = i + 6; j < mIns.Size(); j++) + fblock->mIns.Push(mIns[j]); + iblock->mIns.Push(mIns[i + 5]); + mIns.SetSize(i + 3); + iblock->mIns[0].mType = ASMIT_INC; + iblock->mTrueJump = fblock; + iblock->mBranch = ASMIT_JMP; + + mTrueJump = fblock; + mFalseJump = iblock; + mBranch = ASMIT_BNE; + break; + } + + if (mIns[i + 0].mType == ASMIT_CLC && mIns[i + 1].mType == ASMIT_LDA && mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 0xff && @@ -17027,6 +17096,13 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc) } } + + if (mIns[i].mType == ASMIT_CLC) + carry = 0; + else if (mIns[i].mType == ASMIT_SEC) + carry = 0; + else if (mIns[i].ChangesCarry()) + carry = -1; } #if 1 @@ -18369,6 +18445,19 @@ bool NativeCodeBasicBlock::PropagateSinglePath(void) changed = true; } } + if (mTrueJump->mNumEntries == 1 && mTrueJump->mIns.Size() > 0 && mTrueJump->mIns[0].mType == ASMIT_TAX && !mFalseJump->mEntryRequiredRegs[CPU_REG_A] && !mFalseJump->mEntryRequiredRegs[CPU_REG_X] && !(mTrueJump->mIns[0].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) + { + int sz = mIns.Size(); + if (sz >= 2 && mIns[sz - 2].mType == ASMIT_LDA && mIns[sz - 1].mType == ASMIT_CMP && HasAsmInstructionMode(ASMIT_LDX, mIns[sz - 2].mMode) && HasAsmInstructionMode(ASMIT_CPX, mIns[sz - 1].mMode)) + { + mIns[sz - 2].mType = ASMIT_LDX; mIns[sz - 2].mLive |= LIVE_CPU_REG_X; + mIns[sz - 1].mType = ASMIT_CPX; mIns[sz - 1].mLive |= LIVE_CPU_REG_X; + mExitRequiredRegs += CPU_REG_X; + mTrueJump->mEntryRequiredRegs += CPU_REG_X; + mTrueJump->mIns[0].mType = ASMIT_NOP; + changed = true; + } + } } #endif @@ -40404,7 +40493,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) { mInterProc = proc; - CheckFunc = !strcmp(mInterProc->mIdent->mString, "gauge_show"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "test"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -41612,6 +41701,10 @@ void NativeCodeProcedure::Optimize(void) #if 1 if (step >= 7) { + ResetVisited(); + NativeRegisterDataSet data; + mEntryBlock->BuildEntryDataSet(data); + ResetVisited(); if (mEntryBlock->ExpandADCToBranch(this)) changed = true; diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 6a37d13..ff5d34f 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -609,6 +609,12 @@ public: bool CheckPatchFailLoop(const NativeCodeBasicBlock* block, const NativeCodeBasicBlock* head, int reg, bool changed); + // reg : base register pair to replace + // index: index register + // at : start position in block + // yval: known y immediate value of -1 if not known + // lobj: linker object addressed + // address: offset into linker object bool CheckGlobalAddressSumYPointer(const NativeCodeBasicBlock * block, int reg, int index, int at, int yval); bool PatchGlobalAddressSumYPointer(const NativeCodeBasicBlock* block, int reg, int index, int at, int yval, LinkerObject * lobj, int address, uint32 flags = NCIF_LOWER | NCIF_UPPER);