diff --git a/include/crt.c b/include/crt.c index 0c206d7..f35c7dd 100644 --- a/include/crt.c +++ b/include/crt.c @@ -2858,6 +2858,11 @@ W1: __asm faddsub { +fsub: + lda tmp + 3 + eor #$80 + sta tmp + 3 +fadd: lda #$ff cmp tmp + 4 beq INF @@ -3008,7 +3013,7 @@ fas_zero: __asm inp_binop_add_f32 { jsr freg.split_exp - jsr faddsub + jsr faddsub.fadd jmp startup.exec } @@ -3017,10 +3022,7 @@ __asm inp_binop_add_f32 __asm inp_binop_sub_f32 { jsr freg.split_exp - lda tmp + 3 - eor #$80 - sta tmp + 3 - jsr faddsub + jsr faddsub.fsub jmp startup.exec } @@ -3983,11 +3985,38 @@ fru3: jmp freg.merge_aexp } +__asm store32 +{ + lda accu + 0 + sta $00, x + lda accu + 1 + sta $01, x + lda accu + 2 + sta $02, x + lda accu + 3 + sta $03, x + rts +} + +__asm load32 +{ + lda $00, x + sta accu + 0 + lda $01, x + sta accu + 1 + lda $02, x + sta accu + 2 + lda $03, x + sta accu + 3 + rts +} + #pragma runtime(fsplita, freg.split_aexp) #pragma runtime(fsplitt, freg.split_texp) #pragma runtime(fsplitx, freg.split_xexp) #pragma runtime(fmergea, freg.merge_aexp) -#pragma runtime(faddsub, faddsub) +#pragma runtime(fadd, faddsub.fadd) +#pragma runtime(fsub, faddsub.fsub) #pragma runtime(fmul, fmul) #pragma runtime(fdiv, fdiv) #pragma runtime(fcmp, fcmp) @@ -4001,6 +4030,8 @@ fru3: #pragma runtime(ffromlu, uint32_to_float) #pragma runtime(ftoli, f32_to_i32) #pragma runtime(ftolu, f32_to_u32) +#pragma runtime(store32, store32) +#pragma runtime(load32, load32) __asm inp_op_floor_f32 { diff --git a/oscar64/Compiler.cpp b/oscar64/Compiler.cpp index 66675d4..0aa69a8 100644 --- a/oscar64/Compiler.cpp +++ b/oscar64/Compiler.cpp @@ -943,7 +943,8 @@ bool Compiler::GenerateCode(void) RegisterRuntime(loc, Ident::Unique("fsplitt")); RegisterRuntime(loc, Ident::Unique("fsplitx")); RegisterRuntime(loc, Ident::Unique("fsplita")); - RegisterRuntime(loc, Ident::Unique("faddsub")); + RegisterRuntime(loc, Ident::Unique("fadd")); + RegisterRuntime(loc, Ident::Unique("fsub")); RegisterRuntime(loc, Ident::Unique("fmul")); RegisterRuntime(loc, Ident::Unique("fdiv")); RegisterRuntime(loc, Ident::Unique("mul16")); @@ -971,6 +972,9 @@ bool Compiler::GenerateCode(void) RegisterRuntime(loc, Ident::Unique("divu32")); RegisterRuntime(loc, Ident::Unique("modu32")); + RegisterRuntime(loc, Ident::Unique("store32")); + RegisterRuntime(loc, Ident::Unique("load32")); + RegisterRuntime(loc, Ident::Unique("malloc")); RegisterRuntime(loc, Ident::Unique("free")); RegisterRuntime(loc, Ident::Unique("breakpoint")); diff --git a/oscar64/CompilerTypes.h b/oscar64/CompilerTypes.h index 255c7f9..5bc2fb4 100644 --- a/oscar64/CompilerTypes.h +++ b/oscar64/CompilerTypes.h @@ -47,7 +47,7 @@ static const uint64 COPT_DEFAULT = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | static const uint64 COPT_OPTIMIZE_DEFAULT = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_CONST_EXPRESSIONS; -static const uint64 COPT_OPTIMIZE_SIZE = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_CONST_EXPRESSIONS | COPT_OPTIMIZE_CODE_SIZE | COPT_OPTIMIZE_CONST_PARAMS | COPT_OPTIMIZE_MERGE_CALLS | COPT_OPTIMIZE_GLOBAL; +static const uint64 COPT_OPTIMIZE_SIZE = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_CONST_EXPRESSIONS | COPT_OPTIMIZE_CODE_SIZE | COPT_OPTIMIZE_CONST_PARAMS | COPT_OPTIMIZE_MERGE_CALLS | COPT_OPTIMIZE_GLOBAL; static const uint64 COPT_OPTIMIZE_SPEED = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_UNROLL | COPT_OPTIMIZE_CONST_EXPRESSIONS | COPT_OPTIMIZE_ASSEMBLER | COPT_OPTIMIZE_CONST_PARAMS | COPT_OPTIMIZE_MERGE_CALLS | COPT_OPTIMIZE_GLOBAL; diff --git a/oscar64/Errors.h b/oscar64/Errors.h index 6a87e5d..1ea3284 100644 --- a/oscar64/Errors.h +++ b/oscar64/Errors.h @@ -42,6 +42,7 @@ enum ErrorID EWARN_UNDEFINED_POINTER_ARITHMETIC, EWARN_INVALID_VALUE_RANGE, EWARN_DEFAULT_COPY_DEPRECATED, + EWARN_INSUFFICIENT_MEMORY, EERR_GENERIC = 3000, EERR_FILE_NOT_FOUND, diff --git a/oscar64/GlobalAnalyzer.cpp b/oscar64/GlobalAnalyzer.cpp index 017b333..59663d4 100644 --- a/oscar64/GlobalAnalyzer.cpp +++ b/oscar64/GlobalAnalyzer.cpp @@ -147,6 +147,25 @@ void GlobalAnalyzer::TopoSort(Declaration* procDec) } } +int GlobalAnalyzer::CallerInvokes(Declaration* called) +{ + int n = 0; + for (int i = 0; i < called->mCallers.Size(); i++) + { + Declaration* f = called->mCallers[i]; + n += CallerInvokes(f, called); + } + return n; +} + +int GlobalAnalyzer::CallerInvokes(Declaration* caller, Declaration* called) +{ + int n = 1; + if (caller->mType == DT_CONST_FUNCTION && (caller->mFlags & (DTF_INLINE | DTF_REQUEST_INLINE)) && !(caller->mFlags & DTF_PREVENT_INLINE) && !(caller->mFlags & DTF_FUNC_RECURSIVE) && !(caller->mFlags & DTF_FUNC_VARIABLE) && !(caller->mFlags & DTF_EXPORT)) + n = CallerInvokes(caller); + return n > 1 ? n : 1; +} + void GlobalAnalyzer::AutoInline(void) { for (int i = 0; i < mFunctions.Size(); i++) @@ -181,33 +200,39 @@ void GlobalAnalyzer::AutoInline(void) dec = dec->mNext; } + int invokes = CallerInvokes(f); int cost = (f->mComplexity - 20 * nparams - 10); -// printf("CHECK INLINING %s (%d) %d * (%d - 1)\n", f->mIdent->mString, f->mComplexity, cost, f->mCallers.Size()); +// printf("CHECK INLINING %s (%d) %d * (%d - 1)\n", f->mIdent->mString, f->mComplexity, cost, invokes); bool doinline = false; if ((f->mCompilerOptions & COPT_OPTIMIZE_INLINE) && (f->mFlags & DTF_REQUEST_INLINE)) doinline = true; if (f->mLocalSize < 100) { - if ((f->mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE) && ((cost - 20) * (f->mCallers.Size() - 1) <= 20)) + if ((f->mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE) && ((cost - 20) * (invokes - 1) <= 20)) { - if (f->mCallers.Size() == 1 && f->mComplexity > 100) + if (f->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE) + { + if (invokes == 1 && f->mSection == f->mCallers[0]->mSection || cost < 0) + doinline = true; + } + else if (invokes == 1 && f->mComplexity > 100) { // printf("CHECK INLINING2 %s <- %s %d\n", f->mIdent->mString, f->mCallers[0]->mIdent->mString, f->mCallers[0]->mCalled.Size()); if (cost < 0 || f->mCallers[0]->mComplexity + cost < 1000 || f->mCallers[0]->mCalled.Size() == 1) doinline = true; } - else + else doinline = true; } - if ((f->mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE_ALL) && (cost * (f->mCallers.Size() - 1) <= 10000)) + if ((f->mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE_ALL) && (cost * (invokes - 1) <= 10000)) doinline = true; } if (doinline) { -// printf("INLINING %s %d * (%d - 1)\n", f->mIdent->mString, cost, f->mCallers.Size()); +// printf("INLINING %s %d * (%d - 1)\n", f->mIdent->mString, cost, invokes); f->mFlags |= DTF_INLINE; for (int j = 0; j < f->mCallers.Size(); j++) diff --git a/oscar64/GlobalAnalyzer.h b/oscar64/GlobalAnalyzer.h index 03050a1..3f58960 100644 --- a/oscar64/GlobalAnalyzer.h +++ b/oscar64/GlobalAnalyzer.h @@ -32,6 +32,8 @@ protected: GrowingArray mGlobalVariables; void AnalyzeInit(Declaration* mdec); + int CallerInvokes(Declaration* called); + int CallerInvokes(Declaration* caller, Declaration* called); Declaration* Analyze(Expression* exp, Declaration* procDec, bool lhs, bool aliasing); diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 978cf3f..455acf3 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -23479,7 +23479,7 @@ void InterCodeProcedure::Close(void) DisassembleDebug("mapped variabled"); - ReduceTemporaries(); + ReduceTemporaries(true); DisassembleDebug("Reduced Temporaries"); @@ -24224,12 +24224,20 @@ void InterCodeProcedure::HoistCommonConditionalPath(void) } -void InterCodeProcedure::ReduceTemporaries(void) +void InterCodeProcedure::ReduceTemporaries(bool final) { NumberSet* collisionSet; int i, j, numRenamedTemps; int numTemps = mTemporaries.Size(); + NumberSet callerSaved(numTemps); + + if (final) + { + ResetVisited(); + mEntryBlock->BuildCallerSaveTempSet(callerSaved); + } + ResetVisited(); mEntryBlock->BuildLocalTempSets(numTemps); @@ -24261,24 +24269,73 @@ void InterCodeProcedure::ReduceTemporaries(void) NumberSet usedTemps(numTemps); - for (i = 0; i < numTemps; i++) + if (final) { - usedTemps.Clear(); - - for (j = 0; j < numTemps; j++) + for (int sz = 4; sz > 0; sz >>= 1) { - if (mRenameTable[j] >= 0 && (collisionSet[i][j] || InterTypeSize[mTemporaries[j]] != InterTypeSize[mTemporaries[i]])) + for (i = 0; i < numTemps; i++) { - usedTemps += mRenameTable[j]; + if (InterTypeSize[mTemporaries[i]] == sz && !callerSaved[i]) + { + usedTemps.Clear(); + + for (j = 0; j < numTemps; j++) + { + if (mRenameTable[j] >= 0 && collisionSet[i][j]) + usedTemps += mRenameTable[j]; + } + + j = 0; + while (usedTemps[j]) + j++; + + mRenameTable[i] = j; + if (j >= numRenamedTemps) numRenamedTemps = j + 1; + } + } + for (i = 0; i < numTemps; i++) + { + if (InterTypeSize[mTemporaries[i]] == sz && callerSaved[i]) + { + usedTemps.Clear(); + + for (j = 0; j < numTemps; j++) + { + if (mRenameTable[j] >= 0 && collisionSet[i][j]) + usedTemps += mRenameTable[j]; + } + + j = 0; + while (usedTemps[j]) + j++; + + mRenameTable[i] = j; + if (j >= numRenamedTemps) numRenamedTemps = j + 1; + } } } + } + else + { + for (i = 0; i < numTemps; i++) + { + usedTemps.Clear(); - j = 0; - while (usedTemps[j]) - j++; + for (j = 0; j < numTemps; j++) + { + if (mRenameTable[j] >= 0 && (collisionSet[i][j] || InterTypeSize[mTemporaries[j]] != InterTypeSize[mTemporaries[i]])) + { + usedTemps += mRenameTable[j]; + } + } - mRenameTable[i] = j; - if (j >= numRenamedTemps) numRenamedTemps = j + 1; + j = 0; + while (usedTemps[j]) + j++; + + mRenameTable[i] = j; + if (j >= numRenamedTemps) numRenamedTemps = j + 1; + } } mTemporaries.SetSize(numRenamedTemps, true); diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 664fd4f..455e0c0 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -739,7 +739,7 @@ public: bool ModifiesGlobal(int varindex); void MapVariables(void); - void ReduceTemporaries(void); + void ReduceTemporaries(bool final = false); void Disassemble(FILE* file); void Disassemble(const char* name, bool dumpSets = false); protected: diff --git a/oscar64/Linker.cpp b/oscar64/Linker.cpp index 1b466b3..6207f1a 100644 --- a/oscar64/Linker.cpp +++ b/oscar64/Linker.cpp @@ -772,6 +772,8 @@ void LinkerRegion::PlaceStackSection(LinkerSection* stackSection, LinkerSection* void Linker::CopyObjects(bool inlays) { + bool errors = false; + for (int i = 0; i < mObjects.Size(); i++) { LinkerObject* obj = mObjects[i]; @@ -809,7 +811,9 @@ void Linker::CopyObjects(bool inlays) { if (!obj->mRegion) { - mErrors->Error(obj->mLocation, ERRR_INSUFFICIENT_MEMORY, "Could not place object", obj->mIdent); + mErrors->Error(obj->mLocation, errors ? EWARN_INSUFFICIENT_MEMORY : ERRR_INSUFFICIENT_MEMORY, "Could not place object", obj->mIdent); + if (mCompilerOptions & COPT_ERROR_FILES) + errors = true; int avail = 0; for (int i = 0; i < mRegions.Size(); i++) @@ -1645,7 +1649,7 @@ bool Linker::WriteCrtFile(const char* filename, uint16 id) bool Linker::WriteMapFile(const char* filename) { - bool banked = mCartridgeBankUsed[0]; + bool banked = mCartridgeBankUsed[0] || mCartridgeBankUsed[1]; FILE* file; fopen_s(&file, filename, "wb"); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index d463532..270c881 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -5186,7 +5186,7 @@ static AsmInsType TransposeBranchCondition(AsmInsType code) } -int NativeCodeBasicBlock::PutJump(NativeCodeProcedure* proc, NativeCodeBasicBlock* target, int offset) +int NativeCodeBasicBlock::PutJump(NativeCodeProcedure* proc, NativeCodeBasicBlock* target, int offset, AsmInsType code) { if (target->mIns.Size() == 1 && target->mIns[0].mType == ASMIT_RTS) { @@ -5216,10 +5216,16 @@ int NativeCodeBasicBlock::PutJump(NativeCodeProcedure* proc, NativeCodeBasicBloc PutWord(0); return 3; } -#if JUMP_TO_BRANCH else if (offset >= -126 && offset <= 129) { - if (mNDataSet.mRegs[CPU_REG_C].mMode == NRDM_IMMEDIATE) + if (code != ASMIT_INV) + { + PutOpcode(AsmInsOpcodes[InvertBranchCondition(code)][ASMIM_RELATIVE]); + PutByte(offset - 2); + return 2; + } +#if JUMP_TO_BRANCH + else if (mNDataSet.mRegs[CPU_REG_C].mMode == NRDM_IMMEDIATE) { if (mNDataSet.mRegs[CPU_REG_C].mValue) PutOpcode(AsmInsOpcodes[ASMIT_BCS][ASMIM_RELATIVE]); @@ -5239,8 +5245,8 @@ int NativeCodeBasicBlock::PutJump(NativeCodeProcedure* proc, NativeCodeBasicBloc PutByte(offset - 2); return 2; } - } #endif + } PutByte(0x4c); LinkerReference rl; @@ -5267,7 +5273,7 @@ int NativeCodeBasicBlock::BranchByteSize(NativeCodeBasicBlock* target, int from, } } -int NativeCodeBasicBlock::JumpByteSize(NativeCodeBasicBlock* target, int offset) +int NativeCodeBasicBlock::JumpByteSize(NativeCodeBasicBlock* target, int offset, bool second) { if (target->mIns.Size() == 1 && target->mIns[0].mType == ASMIT_RTS) { @@ -5280,17 +5286,19 @@ int NativeCodeBasicBlock::JumpByteSize(NativeCodeBasicBlock* target, int offset) { return 3; } -#if JUMP_TO_BRANCH else if (offset >= -126 && offset <= 129) { - if (mNDataSet.mRegs[CPU_REG_C].mMode == NRDM_IMMEDIATE) + if (second) + return 2; +#if JUMP_TO_BRANCH + else if (mNDataSet.mRegs[CPU_REG_C].mMode == NRDM_IMMEDIATE) return 2; else if (mNDataSet.mRegs[CPU_REG_Z].mMode == NRDM_IMMEDIATE) return 2; +#endif else return 3; } -#endif else return 3; } @@ -7252,9 +7260,10 @@ void NativeCodeBasicBlock::LoadValueToReg(InterCodeProcedure* proc, const InterI { if (ins->mSrc[0].mMemory == IM_INDIRECT) { - int areg = BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp]; - int index = int(ins->mSrc[0].mIntConst); - int stride = ins->mSrc[0].mStride; + int areg = BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp]; + int index = int(ins->mSrc[0].mIntConst); + int stride = ins->mSrc[0].mStride; + bool accu = reg == areg; if (stride * 4 <= 256) { @@ -7264,7 +7273,10 @@ void NativeCodeBasicBlock::LoadValueToReg(InterCodeProcedure* proc, const InterI { mIns.Push(NativeCodeInstruction(ins, ASMIT_LDY, ASMIM_IMMEDIATE, index + i * stride)); mIns.Push(NativeCodeInstruction(ins, ASMIT_LDA, ASMIM_INDIRECT_Y, areg)); - mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, reg + i)); + if (accu) + mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + i)); + else + mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, reg + i)); } } else @@ -7275,11 +7287,23 @@ void NativeCodeBasicBlock::LoadValueToReg(InterCodeProcedure* proc, const InterI mIns.Push(NativeCodeInstruction(ins, ASMIT_LDY, ASMIM_IMMEDIATE, index)); mIns.Push(NativeCodeInstruction(ins, ASMIT_LDA, ASMIM_INDIRECT_Y, areg)); - mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, reg + i)); + if (accu) + mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + i)); + else + mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, reg + i)); index += stride; } } + + if (accu) + { + for (int i = 0; i < 4; i++) + { + mIns.Push(NativeCodeInstruction(ins, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + i)); + mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, reg + i)); + } + } } } } @@ -9677,28 +9701,23 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p { case IA_ADD: { - NativeCodeGenerator::Runtime& art(nproc->mGenerator->ResolveRuntime(Ident::Unique("faddsub"))); + NativeCodeGenerator::Runtime& art(nproc->mGenerator->ResolveRuntime(Ident::Unique("fadd"))); mIns.Push(NativeCodeInstruction(ins, ASMIT_JSR, ASMIM_ABSOLUTE, art.mOffset, art.mLinkerObject, NCIF_RUNTIME | NCIF_LOWER | NCIF_UPPER)); } break; case IA_SUB: { - if (!changedSign) + bool add = false; + if (changedSign) + add = true; + else if (flipop) { - if (flipop) - { - mIns.Push(NativeCodeInstruction(ins, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 3)); - mIns.Push(NativeCodeInstruction(ins, ASMIT_EOR, ASMIM_IMMEDIATE, 0x80)); - mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 3)); - } - else - { - mIns.Push(NativeCodeInstruction(ins, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_WORK + 3)); - mIns.Push(NativeCodeInstruction(ins, ASMIT_EOR, ASMIM_IMMEDIATE, 0x80)); - mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_WORK + 3)); - } + mIns.Push(NativeCodeInstruction(ins, ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 3)); + mIns.Push(NativeCodeInstruction(ins, ASMIT_EOR, ASMIM_IMMEDIATE, 0x80)); + mIns.Push(NativeCodeInstruction(ins, ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_ACCU + 3)); + add = true; } - NativeCodeGenerator::Runtime& art(nproc->mGenerator->ResolveRuntime(Ident::Unique("faddsub"))); + NativeCodeGenerator::Runtime& art(nproc->mGenerator->ResolveRuntime(Ident::Unique(add ? "fadd" : "fsub"))); mIns.Push(NativeCodeInstruction(ins, ASMIT_JSR, ASMIM_ABSOLUTE, art.mOffset, art.mLinkerObject, NCIF_RUNTIME | NCIF_LOWER | NCIF_UPPER)); } break; case IA_MUL: @@ -9733,8 +9752,16 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BinaryOperator(InterCodeProcedure* p case IA_AND: case IA_XOR: { - if (sins1) LoadValueToReg(proc, sins1, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp], nullptr, nullptr); - if (sins0) LoadValueToReg(proc, sins0, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp], nullptr, nullptr); + if (sins1 && sins0 && sins1->mDst.mTemp == sins0->mSrc[0].mTemp) + { + LoadValueToReg(proc, sins0, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp], nullptr, nullptr); + LoadValueToReg(proc, sins1, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp], nullptr, nullptr); + } + else + { + if (sins1) LoadValueToReg(proc, sins1, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp], nullptr, nullptr); + if (sins0) LoadValueToReg(proc, sins0, BC_REG_TMP + proc->mTempOffset[ins->mSrc[0].mTemp], nullptr, nullptr); + } AsmInsType atype; switch (ins->mOperator) @@ -16735,10 +16762,18 @@ bool NativeCodeBasicBlock::GlobalLoadStoreForwarding(bool zpage, const NativeCod } else if (ins.mType == ASMIT_TAY) { + if (mALSIns.mMode == ASMIM_ABSOLUTE_Y) + mALSIns.mType = ASMIT_INV; + if (mXLSIns.mMode == ASMIM_ABSOLUTE_Y) + mXLSIns.mType = ASMIT_INV; mYLSIns = mALSIns; } else if (ins.mType == ASMIT_TAX) { + if (mALSIns.mMode == ASMIM_ABSOLUTE_X) + mALSIns.mType = ASMIT_INV; + if (mYLSIns.mMode == ASMIM_ABSOLUTE_X) + mYLSIns.mType = ASMIT_INV; mXLSIns = mALSIns; } else if (ins.mType == ASMIT_TYA) @@ -29008,6 +29043,60 @@ bool NativeCodeBasicBlock::IsDominatedBy(const NativeCodeBasicBlock* block) cons #endif } +bool NativeCodeBasicBlock::JoinSameBranch(NativeCodeBasicBlock* block) +{ + bool changed = false; + + if (!mPatched) + { + mPatched = true; + + if (this != block) + { + if (mBranch == block->mBranch && mTrueJump == block->mTrueJump && mFalseJump == block->mFalseJump) + { + mTrueJump->mNumEntries--; + mFalseJump->mNumEntries--; + block->mNumEntries++; + mBranch = ASMIT_JMP; + mTrueJump = block; + mFalseJump = nullptr; + changed = true; + } + } + + if (mTrueJump && mTrueJump->JoinSameBranch(block)) + changed = true; + if (mFalseJump && mFalseJump->JoinSameBranch(block)) + changed = true; + } + + return changed; +} + +bool NativeCodeBasicBlock::MergeSameBranch(void) +{ + bool changed = false; + if (!mVisited) + { + mVisited = true; + + if (mIns.Size() == 0 && mFalseJump) + { + mProc->ResetPatched(); + if (mProc->mEntryBlock->JoinSameBranch(this)) + changed = true; + } + + if (mTrueJump && mTrueJump->MergeSameBranch()) + changed = true; + if (mFalseJump && mFalseJump->MergeSameBranch()) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::CheckPatchFailLoop(const NativeCodeBasicBlock* block, const NativeCodeBasicBlock* head, int reg, bool changed) { if (!mPatchLoop || (changed && !mPatchLoopChanged)) @@ -40989,6 +41078,23 @@ bool NativeCodeBasicBlock::BlockSizeCopyReduction(NativeCodeProcedure* proc, int while (CheckBlockCopySequence(mIns, si + 2 * k * i, i)) k++; + if (k == 1 && i == 4 && mIns[si + 0].mMode == ASMIM_ZERO_PAGE && mIns[si + 1].mMode == ASMIM_ZERO_PAGE) + { + if (mIns[si + 0].mAddress == BC_REG_ACCU) + { + NativeCodeGenerator::Runtime& frt(mProc->mGenerator->ResolveRuntime(Ident::Unique("store32"))); + mIns[di++] = NativeCodeInstruction(mIns[si + 0].mIns, ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME | NCIF_LOWER | NCIF_UPPER | NCIF_USE_ZP_32_X, mIns[si + 1].mAddress); + si += 8; + return true; + } + else if (mIns[si + 1].mAddress == BC_REG_ACCU) + { + NativeCodeGenerator::Runtime& frt(mProc->mGenerator->ResolveRuntime(Ident::Unique("load32"))); + mIns[di++] = NativeCodeInstruction(mIns[si + 0].mIns, ASMIT_JSR, ASMIM_ABSOLUTE, frt.mOffset, frt.mLinkerObject, NCIF_RUNTIME | NCIF_LOWER | NCIF_UPPER | NCIF_USE_ZP_32_X, mIns[si + 0].mAddress); + si += 8; + return true; + } + } int sz = 3 + 4 * k; for (int j = 0; j < k; j++) @@ -50631,6 +50737,22 @@ void NativeCodeBasicBlock::BuildPlacement(ExpandingArray& mFalseJump->BuildPlacement(placement); mTrueJump->BuildPlacement(placement); } + else if (!mTrueJump->mFalseJump && mTrueJump->mTrueJump && mTrueJump->mTrueJump->mFalseJump && + mFalseJump->mFalseJump && mFalseJump->mTrueJump == mTrueJump->mTrueJump && + mTrueJump->mCode.Size() < 20 && mTrueJump->mTrueJump->mCode.Size() < 20 && + mFalseJump->mFalseJump->LeadsInto(mTrueJump->mTrueJump->mTrueJump, 0) < 10 && + mFalseJump->mFalseJump->LeadsInto(mTrueJump->mTrueJump->mFalseJump, 0) < 10) + { + mTrueJump->mPlaced = true; + mTrueJump->mPlace = placement.Size(); + placement.Push(mTrueJump); + + mTrueJump->mTrueJump->mPlaced = true; + mTrueJump->mTrueJump->mPlace = placement.Size(); + placement.Push(mTrueJump->mTrueJump); + + mFalseJump->BuildPlacement(placement); + } else if (!mTrueJump->mFalseJump && mTrueJump->mTrueJump && mTrueJump->mCode.Size() < 100 && mFalseJump->LeadsInto(mTrueJump->mTrueJump, 0) < 100) { mTrueJump->mPlaced = true; @@ -50759,19 +50881,19 @@ bool NativeCodeBasicBlock::CalculateOffset(int& total) mFalseJump->mIns.Size() == 1 && mFalseJump->mIns[0].mType == ASMIT_RTS) { total += BranchByteSize(mTrueJump, total, mTrueJump->mOffset); - total += JumpByteSize(mFalseJump, mFalseJump->mOffset - total); + total += JumpByteSize(mFalseJump, mFalseJump->mOffset - total, true); } else if ( mFalseJump->mPlace > mTrueJump->mPlace && mFalseJump->mPlace < mPlace || mFalseJump->mPlace < mTrueJump->mPlace && mFalseJump->mPlace > mPlace) { total += BranchByteSize(mFalseJump, total, mFalseJump->mOffset); - total += JumpByteSize(mTrueJump, mTrueJump->mOffset - total); + total += JumpByteSize(mTrueJump, mTrueJump->mOffset - total, true); } else { total += BranchByteSize(mTrueJump, total, mTrueJump->mOffset); - total += JumpByteSize(mFalseJump, mFalseJump->mOffset - total); + total += JumpByteSize(mFalseJump, mFalseJump->mOffset - total, true); } } else if (mTrueJump) @@ -50786,7 +50908,7 @@ bool NativeCodeBasicBlock::CalculateOffset(int& total) mTrueJump = nullptr; } else - total += JumpByteSize(mTrueJump, mTrueJump->mOffset - total); + total += JumpByteSize(mTrueJump, mTrueJump->mOffset - total, false); } } @@ -50897,19 +51019,19 @@ void NativeCodeBasicBlock::CopyCode(NativeCodeProcedure * proc, uint8* target) mFalseJump->mIns.Size() == 1 && mFalseJump->mIns[0].mType == ASMIT_RTS) { end += PutBranch(proc, mTrueJump, mBranch, mTrueJump->mOffset - end); - end += PutJump(proc, mFalseJump, mFalseJump->mOffset - end); + end += PutJump(proc, mFalseJump, mFalseJump->mOffset - end, mBranch); } else if ( mFalseJump->mPlace > mTrueJump->mPlace && mFalseJump->mPlace < mPlace || mFalseJump->mPlace < mTrueJump->mPlace && mFalseJump->mPlace > mPlace) { end += PutBranch(proc, mFalseJump, InvertBranchCondition(mBranch), mFalseJump->mOffset - end); - end += PutJump(proc, mTrueJump, mTrueJump->mOffset - end); + end += PutJump(proc, mTrueJump, mTrueJump->mOffset - end, InvertBranchCondition(mBranch)); } else { end += PutBranch(proc, mTrueJump, mBranch, mTrueJump->mOffset - end); - end += PutJump(proc, mFalseJump, mFalseJump->mOffset - end); + end += PutJump(proc, mFalseJump, mFalseJump->mOffset - end, mBranch); } if (mBranchIns) @@ -51201,7 +51323,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) mInterProc = proc; mInterProc->mLinkerObject->mNativeProc = this; - CheckFunc = !strcmp(mInterProc->mIdent->mString, "_showFrame1"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "printf"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -52921,6 +53043,8 @@ void NativeCodeProcedure::Optimize(void) } } #endif + + #if 1 if (step == 14 && cnt == 0) { @@ -53015,6 +53139,7 @@ void NativeCodeProcedure::Optimize(void) } while (changed); + #if 1 ResetVisited(); mEntryBlock->CombineAlternateLoads(); @@ -53121,6 +53246,9 @@ void NativeCodeProcedure::Optimize(void) ResetVisited(); mEntryBlock->RemoveJumpToBranch(); + ResetVisited(); + mEntryBlock->MergeSameBranch(); + #if 1 ResetVisited(); data.Reset(); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index bd2b2fc..6ab7bf7 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -265,8 +265,8 @@ public: NativeCodeInstruction DecodeNative(const InterInstruction* ins, LinkerObject * lobj, int& offset) const; int PutBranch(NativeCodeProcedure* proc, NativeCodeBasicBlock* target, AsmInsType code, int offset); - int PutJump(NativeCodeProcedure* proc, NativeCodeBasicBlock* target, int offset); - int JumpByteSize(NativeCodeBasicBlock * target, int offset); + int PutJump(NativeCodeProcedure* proc, NativeCodeBasicBlock* target, int offset, AsmInsType code = ASMIT_INV); + int JumpByteSize(NativeCodeBasicBlock * target, int offset, bool second); int BranchByteSize(NativeCodeBasicBlock* target, int from, int to); NativeCodeBasicBlock* SplitAt(int at); @@ -746,6 +746,9 @@ public: bool CheckPatchFailLoop(const NativeCodeBasicBlock* block, const NativeCodeBasicBlock* head, int reg, bool changed); + bool JoinSameBranch(NativeCodeBasicBlock* block); + bool MergeSameBranch(void); + // reg : base register pair to replace // index: index register // at : start position in block diff --git a/oscar64/oscar64.cpp b/oscar64/oscar64.cpp index 939ff1e..84d37c3 100644 --- a/oscar64/oscar64.cpp +++ b/oscar64/oscar64.cpp @@ -225,19 +225,19 @@ int main2(int argc, const char** argv) strcpy_s(cid, arg + 5); compiler->mCartridgeID = atoi(cid); } - else if (arg[1] == 'n') + else if (arg[1] == 'n' && arg[2] == 0) { compiler->mCompilerOptions |= COPT_NATIVE; } - else if (arg[1] == 'b' && arg[2] == 'c') + else if (arg[1] == 'b' && arg[2] == 'c' && arg[3] == 0) { compiler->mCompilerOptions &= ~COPT_NATIVE; } - else if (arg[1] == 'p' && arg[2] == 's' && arg[3] == 'c' && arg[4] == 'i') + else if (arg[1] == 'p' && arg[2] == 's' && arg[3] == 'c' && arg[4] == 'i' && arg[5] == 0) { compiler->mCompilerOptions |= COPT_PETSCII; } - else if (arg[1] == 'O') + else if (arg[1] == 'O' && arg[2] != 0 && arg[3] == 0) { if (arg[2] == '0') compiler->mCompilerOptions &= ~(COPT_OPTIMIZE_ALL); @@ -261,6 +261,8 @@ int main2(int argc, const char** argv) compiler->mCompilerOptions |= COPT_OPTIMIZE_GLOBAL; else if (arg[2] == 'm') compiler->mCompilerOptions |= COPT_OPTIMIZE_MERGE_CALLS; + else + compiler->mErrors->Error(loc, EERR_COMMAND_LINE, "Invalid command line argument", arg); } else if (arg[1] == 'e') { @@ -291,7 +293,7 @@ int main2(int argc, const char** argv) else compiler->AddDefine(Ident::Unique(def), ""); } - else if (arg[1] == 'g') + else if (arg[1] == 'g' && !arg[2]) { compiler->mCompilerOptions |= COPT_DEBUGINFO; } @@ -302,17 +304,19 @@ int main2(int argc, const char** argv) compiler->mCompilerOptions |= COPT_VERBOSE2; else if (arg[2] == '3') compiler->mCompilerOptions |= COPT_VERBOSE2 | COPT_VERBOSE3; + else if (!arg[2]) + compiler->mErrors->Error(loc, EERR_COMMAND_LINE, "Invalid command line argument", arg); } - else if (arg[1] == 'x' && arg[2] == 'z') + else if (arg[1] == 'x' && arg[2] == 'z' && !arg[3]) { compiler->mCompilerOptions |= COPT_EXTENDED_ZERO_PAGE; } - else if (arg[1] == 'p' && arg[2] == 'p') + else if (arg[1] == 'p' && arg[2] == 'p' && !arg[3]) { compiler->mCompilerOptions |= COPT_CPLUSPLUS; compiler->AddDefine(Ident::Unique("__cplusplus"), "1"); } - else if (arg[1] == 'r' && arg[2] == 'm' && arg[3] == 'p') + else if (arg[1] == 'r' && arg[2] == 'm' && arg[3] == 'p' && !arg[4]) { compiler->mCompilerOptions |= COPT_ERROR_FILES; } @@ -563,6 +567,8 @@ int main2(int argc, const char** argv) if (diskPath[0]) d64 = new DiskImage(diskPath); + compiler->RemoveErrorFile(targetPath); + compiler->WriteOutputFile(targetPath, d64); if (d64)