From aab71cbab3a1179b704edbd54b32f516fdb8ed31 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Tue, 20 Feb 2024 14:16:32 +0100 Subject: [PATCH] Add more std c include files --- autotest/autotest.bat | 3 + autotest/divmod32test.c | 36 +++++ include/crt.c | 58 +++++++- include/inttypes.c | 26 ++++ include/inttypes.h | 98 +++++++++++++ include/iso646.h | 16 +++ oscar64/NativeCodeGenerator.cpp | 243 +++++++++++++++++++++++++++++++- oscar64/NativeCodeGenerator.h | 4 + 8 files changed, 477 insertions(+), 7 deletions(-) create mode 100644 autotest/divmod32test.c create mode 100644 include/inttypes.c create mode 100644 include/inttypes.h create mode 100644 include/iso646.h diff --git a/autotest/autotest.bat b/autotest/autotest.bat index 3de8c63..15ce3ca 100644 --- a/autotest/autotest.bat +++ b/autotest/autotest.bat @@ -171,6 +171,9 @@ rem @echo off @call :test divmodtest.c @if %errorlevel% neq 0 goto :error +@call :test divmod32test.c +@if %errorlevel% neq 0 goto :error + @call :test enumswitch.c @if %errorlevel% neq 0 goto :error diff --git a/autotest/divmod32test.c b/autotest/divmod32test.c new file mode 100644 index 0000000..e073330 --- /dev/null +++ b/autotest/divmod32test.c @@ -0,0 +1,36 @@ +#include + +void check(unsigned long l, unsigned long r) +{ + unsigned long d = l / r, m = l % r; + + assert(d * r + m == l); + assert(m < r); +} + +int main(void) +{ + for(char i=0; i<28; i++) + { + for(char j=0; j<28; j++) + { + check(0xb3ul << i, 0x2bul << j); + check(0xb3ul << i, 0x01ul << j); + check(0x01ul << i, 0xc2ul << j); + check(0xb31ful << i, 0x2bul << j); + check(0xb354ul << i, 0x01ul << j); + check(0xb3ul << i, 0x2b1cul << j); + check(0xb3ul << i, 0x013ful << j); + check(0xb31ful << i, 0x2b23ul << j); + check(0xb354ul << i, 0x0145ul << j); + check(0xb31f24ul << i, 0x2bul << j); + check(0xb35421ul << i, 0x01ul << j); + check(0xb31f24ul << i, 0x2b23ul << j); + check(0xb35421ul << i, 0x0145ul << j); + check(0xb31f24ul << i, 0x2b2356ul << j); + check(0xb35421ul << i, 0x0145a7ul << j); + } + } + + return 0; +} diff --git a/include/crt.c b/include/crt.c index ad70de8..48a951d 100644 --- a/include/crt.c +++ b/include/crt.c @@ -562,6 +562,49 @@ __asm divmod32 // divide 32 by 16 bit + lda tmp + 1 + bne W16 + +// a is zero + clc +LB1: rol accu + rol accu + 1 + rol accu + 2 + rol accu + 3 + rol + bcc LB1a + + sbc tmp + sec + bcs LB1b +LB1a: + cmp tmp + bcc LB1b + sbc tmp +LB1b: +WB1: dey + bne LB1 + sta tmp + 4 + rol accu + rol accu + 1 + rol accu + 2 + rol accu + 3 + ldy tmpy + rts + +W16: +// 0x0000bb?? number in range 256..65535 + lda accu + 3 + bne LS0 + ldx accu + 2 + stx accu + 3 + ldx accu + 1 + stx accu + 2 + ldx accu + 0 + stx accu + 1 + sta accu + 0 + ldy #24 +LS0: clc LS1: rol accu rol accu + 1 @@ -599,11 +642,20 @@ WS1: dey rts W32: +// upper 16 bit are not zero, so ignore first 16 div steps + + ldy #16 + lda accu + 3 + sta tmp + 5 + lda accu + 2 + sta tmp + 4 + lda #0 + sta accu + 2 + sta accu + 3 + clc L1: rol accu rol accu + 1 - rol accu + 2 - rol accu + 3 rol tmp + 4 rol tmp + 5 rol tmp + 6 @@ -634,8 +686,6 @@ W1: dey bne L1 rol accu rol accu + 1 - rol accu + 2 - rol accu + 3 ldy tmpy rts } diff --git a/include/inttypes.c b/include/inttypes.c new file mode 100644 index 0000000..0306527 --- /dev/null +++ b/include/inttypes.c @@ -0,0 +1,26 @@ +#include "inttypes.h" +#include "stdlib.h" + + +intmax_t imaxabs(intmax_t n) +{ + return n < 0 ? -n : n; +} + +imaxdiv_t imaxdiv(intmax_t l, intmax_t r) +{ + imaxdiv_t t; + t.quot = l / r; + t.rem = l % r; + return t; +} + +inline intmax_t strtoimax(const char * s, char ** endp, int base) +{ + return strtol(s, endp, base); +} + +inline uintmax_t strtoumax(const char * s, char ** endp, int base) +{ + return strtoul(s, endp, base); +} diff --git a/include/inttypes.h b/include/inttypes.h new file mode 100644 index 0000000..7d60a49 --- /dev/null +++ b/include/inttypes.h @@ -0,0 +1,98 @@ +#ifndef INTTYPES_H +#define INTTYPES_H + +#include + +#define PRId8 "d" +#define PRId16 "d" +#define PRId32 "ld" + +#define PRIdLEAST8 "d" +#define PRIdLEAST16 "d" +#define PRIdLEAST32 "ld" + +#define PRIdFAST8 "d" +#define PRIdFAST16 "d" +#define PRIdFAST32 "ld" + +#define PRIdMAX "ld" +#define PRIdPTR "d" + + +#define PRIo8 "o" +#define PRIo16 "o" +#define PRIo32 "lo" + +#define PRIoLEAST8 "o" +#define PRIoLEAST16 "o" +#define PRIoLEAST32 "lo" + +#define PRIoFAST8 "o" +#define PRIoFAST16 "o" +#define PRIoFAST32 "lo" + +#define PRIoMAX "lo" +#define PRIoPTR "o" + +#define PRIu8 "u" +#define PRIu16 "u" +#define PRIu32 "lu" + +#define PRIuLEAST8 "u" +#define PRIuLEAST16 "u" +#define PRIuLEAST32 "lu" + +#define PRIuFAST8 "u" +#define PRIuFAST16 "u" +#define PRIuFAST32 "lu" + +#define PRIuMAX "lu" +#define PRIuPTR "u" + + +#define PRIx8 "x" +#define PRIx16 "x" +#define PRIx32 "lx" + +#define PRIxLEAST8 "x" +#define PRIxLEAST16 "x" +#define PRIxLEAST32 "lx" + +#define PRIxFAST8 "x" +#define PRIxFAST16 "x" +#define PRIxFAST32 "lx" + +#define PRIxMAX "lx" +#define PRIxPTR "x" + + +#define PRIX8 "X" +#define PRIX16 "X" +#define PRIX32 "lX" + +#define PRIXLEAST8 "X" +#define PRIXLEAST16 "X" +#define PRIXLEAST32 "lX" + +#define PRIXFAST8 "X" +#define PRIXFAST16 "X" +#define PRIXFAST32 "lX" + +#define PRIXMAX "lX" +#define PRIXPTR "X" + + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +intmax_t imaxabs(intmax_t n); +imaxdiv_t imaxdiv(intmax_t l, intmax_t r); +intmax_t strtoimax(const char * s, char ** endp, int base); +uintmax_t strtoumax(const char * s, char ** endp, int base); + + +#pragma compile("inttypes.c") + +#endif diff --git a/include/iso646.h b/include/iso646.h new file mode 100644 index 0000000..f2ab35e --- /dev/null +++ b/include/iso646.h @@ -0,0 +1,16 @@ +#ifndef ISO646_H +#define ISO646_H + +#define and && +#define and_eq &= +#define bitand & +#define bitor | +#define compl ~ +#define not ! +#define not_eq != +#define or || +#define or_eq |= +#define xor ^ +#define xor_eq ^= + +#endif diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index ca39939..8f603b8 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -27068,6 +27068,25 @@ bool NativeCodeBasicBlock::JoinTAXARange(int from, int to) } } + if (from > 0 && mIns[from - 1].mType == ASMIT_LDA && mIns[from - 1].mMode == ASMIM_ABSOLUTE_X) + { + for (int i = from + 1; i < to; i++) + { + if (mIns[from - 1].MayBeChangedOnAddress(mIns[i])) + return false; + } + + int live = mIns[to].mLive; + mIns.Remove(to); + mIns.Insert(to, mIns[from - 1]); + mIns[to].mLive |= live; + mIns.Remove(from - 1); + + CheckLive(); + + return true; + } + if (to + 2 < mIns.Size() && mIns[to + 2].mType == ASMIT_STA && HasAsmInstructionMode(ASMIT_STX, mIns[to + 2].mMode) && !(mIns[to + 2].mLive & LIVE_CPU_REG_X) && !ReferencesXReg(from + 1, to)) { if (mIns[to + 1].mType == ASMIT_ORA && mIns[to + 1].mMode == ASMIM_IMMEDIATE) @@ -27978,6 +27997,39 @@ bool NativeCodeBasicBlock::MoveIndirectLoadStoreDown(int at) return false; } +bool NativeCodeBasicBlock::MoveLDXBeforeZ(int at) +{ + int i = at; + while (i > 0) + { + i--; + NativeCodeInstruction& ins(mIns[i]); + if (ins.ReferencesXReg()) + return false; + + if (ins.ChangesZFlag()) + { + if (ins.ChangesAccuAndFlag()) + { + mIns[at].mLive |= mIns[i].mLive; + mIns.Insert(i, mIns[at]); + mIns.Remove(at + 1); + mIns.Remove(at + 1); + for (int i = 0; i < at; i++) + mIns[i].mLive |= LIVE_CPU_REG_X | LIVE_CPU_REG_Z; + return true; + } + else + return false; + } + + if (ins.ChangesXReg() || ins.ChangesAccu()) + return false; + } + + return false; +} + bool NativeCodeBasicBlock::MoveLDXUp(int at) { NativeCodeInstruction& lins(mIns[at]); @@ -30499,7 +30551,7 @@ bool NativeCodeBasicBlock::MoveLoadAddImmStoreAbsXUp(int at) bool NativeCodeBasicBlock::MoveLoadAddImmStoreUp(int at) { int j = at - 1; - while (j > 0) + while (j >= 0) { if (mIns[j].mType == ASMIT_STA && mIns[j].mMode == ASMIM_ZERO_PAGE && mIns[j].mAddress == mIns[at + 1].mAddress) { @@ -30538,6 +30590,64 @@ bool NativeCodeBasicBlock::MoveLoadAddImmStoreUp(int at) return false; } +bool NativeCodeBasicBlock::MoveLoadEorImmStoreUp(int at) +{ + int j = at - 1; + while (j >= 0) + { + if (mIns[j].mType == ASMIT_STA && mIns[j].mMode == ASMIM_ZERO_PAGE && mIns[j].mAddress == mIns[at + 0].mAddress) + { + if (mIns[j].mLive & LIVE_CPU_REG_A) + return false; + + mIns[j].mLive |= LIVE_CPU_REG_A; + + mIns[at + 1].mLive |= mIns[j].mLive; + mIns[at + 2].mLive |= mIns[j].mLive; + + mIns.Insert(j + 1, mIns[at + 2]); // STA + mIns.Insert(j + 1, mIns[at + 2]); // EOR + + mIns[at + 2].mType = ASMIT_NOP; mIns[at + 2].mMode = ASMIM_IMPLIED; + mIns[at + 3].mType = ASMIT_NOP; mIns[at + 3].mMode = ASMIM_IMPLIED; + mIns[at + 4].mType = ASMIT_NOP; mIns[at + 4].mMode = ASMIM_IMPLIED; + return true; + } + else if (mIns[j].IsShift() && mIns[j].mMode == ASMIM_ZERO_PAGE && mIns[j].mAddress == mIns[at + 0].mAddress && mIns[at + 2].mMode == ASMIM_ZERO_PAGE && mIns[j].mAddress == mIns[at + 2].mAddress) + { + if (mIns[j].mLive & LIVE_CPU_REG_A) + return false; + + mIns[j].mLive |= LIVE_CPU_REG_A; + + mIns[at + 0].mLive |= LIVE_CPU_REG_C; + mIns[at + 1].mLive |= mIns[j].mLive; + mIns[at + 2].mLive |= mIns[j].mLive; + + mIns[j].mMode = ASMIM_IMPLIED; + + mIns.Insert(j, mIns[at + 0]); // LDA + + mIns.Insert(j + 2, mIns[at + 3]); // STA + mIns.Insert(j + 2, mIns[at + 3]); // EOR + + mIns[at + 3].mType = ASMIT_NOP; mIns[at + 3].mMode = ASMIM_IMPLIED; + mIns[at + 4].mType = ASMIT_NOP; mIns[at + 4].mMode = ASMIM_IMPLIED; + mIns[at + 5].mType = ASMIT_NOP; mIns[at + 5].mMode = ASMIM_IMPLIED; + return true; + } + + if (mIns[j].ChangesZeroPage(mIns[at + 0].mAddress)) + return false; + if (mIns[j].UsesMemoryOf(mIns[at + 2])) + return false; + + j--; + } + + return false; +} + bool NativeCodeBasicBlock::MoveCLCLoadAddZPStoreUp(int at) { int j = at - 1; @@ -32306,7 +32416,7 @@ bool NativeCodeBasicBlock::OptimizeSingleEntryLoop(NativeCodeProcedure* proc) pblock = mEntryBlocks[0]; } - if (!pblock->mFalseJump && eblock->IsDominatedBy(this)) + if (!pblock->mFalseJump && eblock != this && eblock->IsDominatedBy(this)) { ExpandingArray lblocks; @@ -32315,6 +32425,7 @@ bool NativeCodeBasicBlock::OptimizeSingleEntryLoop(NativeCodeProcedure* proc) lblocks.Push(eblock); eblock->mPatched = true; + pblock->mPatched = true; int i = 0; while (i < lblocks.Size()) @@ -32398,6 +32509,11 @@ bool NativeCodeBasicBlock::OptimizeSingleEntryLoop(NativeCodeProcedure* proc) aimm = false; } } + + if (!changed) + { + changed = OptimizeSingleEntryLoopInvariant(proc, pblock, eblock, lblocks); + } } } @@ -35168,6 +35284,70 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoop(NativeCodeProcedure * proc, bool f return false; } +bool NativeCodeBasicBlock::OptimizeSingleEntryLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock* prev, NativeCodeBasicBlock* tail, ExpandingArray& lblocks) +{ + bool changed = false; + + for (int bi = 0; bi < lblocks.Size(); bi++) + { + NativeCodeBasicBlock* block = lblocks[bi]; + for (int i = 0; i + 1 < block->mIns.Size(); i++) + { + if (block->mIns[i].mType == ASMIT_LDA && (block->mIns[i].mMode == ASMIM_IMMEDIATE || block->mIns[i].mMode == ASMIM_IMMEDIATE_ADDRESS) && + block->mIns[i + 1].mType == ASMIT_STA && block->mIns[i + 1].mMode == ASMIM_ZERO_PAGE) + { + int addr = block->mIns[i + 1].mAddress; + if (!mEntryRequiredRegs[addr] && !tail->mEntryRequiredRegs[addr] && !ReferencesZeroPage(addr) && + !block->ReferencesZeroPage(addr, 0, i) && !block->ChangesZeroPage(addr, i + 2) && !prev->mExitRequiredRegs[CPU_REG_Z]) + { + int j = 0; + while (j < lblocks.Size() && (j == bi || !lblocks[j]->ReferencesZeroPage(addr))) + j++; + if (j == lblocks.Size()) + { + bool ok = true; + if (!prev->mExitRequiredRegs[CPU_REG_A]) + { + prev->mIns.Push(block->mIns[i]); + prev->mIns.Push(block->mIns[i + 1]); + } + else if (!prev->mExitRequiredRegs[CPU_REG_X]) + { + prev->mIns.Push(NativeCodeInstruction(block->mIns[i].mIns, ASMIT_LDX, block->mIns[i])); + prev->mIns.Push(NativeCodeInstruction(block->mIns[i + 1].mIns, ASMIT_STX, block->mIns[i + 1])); + } + else if (!prev->mExitRequiredRegs[CPU_REG_Y]) + { + prev->mIns.Push(NativeCodeInstruction(block->mIns[i].mIns, ASMIT_LDY, block->mIns[i])); + prev->mIns.Push(NativeCodeInstruction(block->mIns[i + 1].mIns, ASMIT_STY, block->mIns[i + 1])); + } + else + ok = false; + + if (ok) + { + block->mIns.Remove(i + 1); + changed = true; + + prev->mExitRequiredRegs += addr; + mEntryRequiredRegs += addr; + mExitRequiredRegs += addr; + + for (int i = 0; i < lblocks.Size(); i++) + { + lblocks[i]->mEntryRequiredRegs += addr; + lblocks[i]->mExitRequiredRegs += addr; + } + } + } + } + } + } + } + + return changed; +} + bool NativeCodeBasicBlock::OptimizeInnerLoop(NativeCodeProcedure* proc, NativeCodeBasicBlock* head, NativeCodeBasicBlock* tail, ExpandingArray& lblocks) { bool simple = true; @@ -37918,6 +38098,22 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #endif +#if 1 + // move ORA #imm up a shift chain to an LDA #imm + + for (int i = 1; i + 1 < mIns.Size(); i++) + { + if (mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_IMMEDIATE && + mIns[i + 1].mType == ASMIT_ORA && mIns[i + 1].mMode == ASMIM_IMMEDIATE && mIns[i + 1].mAddress == 0) + { + if (MoveLDXBeforeZ(i)) + changed = true; + } + } + CheckLive(); + +#endif + #if 1 // Combine ADC with immediate @@ -38106,6 +38302,26 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #endif +#if 1 + // move load - ora/and/eor # - store up to initial store + // + + for (int i = 2; i + 2 < mIns.Size(); i++) + { + if ( + mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && + (mIns[i + 1].mType == ASMIT_ORA || mIns[i + 1].mType == ASMIT_AND || mIns[i + 1].mType == ASMIT_EOR) && mIns[i + 1].mMode == ASMIM_IMMEDIATE && + mIns[i + 2].mType == ASMIT_STA && (mIns[i + 2].mMode == ASMIM_ZERO_PAGE || mIns[i + 2].mMode == ASMIM_ABSOLUTE) && + (mIns[i + 2].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)) == 0) + { + if (MoveLoadEorImmStoreUp(i)) + changed = true; + } + } + CheckLive(); + +#endif + #if 1 // move load - add # - store with absolute,x up as far possible @@ -44748,6 +44964,27 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #endif #endif +#if 1 + if (sz >= 4 && (mBranch == ASMIT_BMI || mBranch == ASMIT_BPL) && !mExitRequiredRegs[CPU_REG_Z]) + { + if (mIns[sz - 4].mType == ASMIT_LDA && + (mIns[sz - 3].mType == ASMIT_ASL || mIns[sz - 3].mType == ASMIT_ROL) && mIns[sz - 3].mMode == ASMIM_IMPLIED && + mIns[sz - 2].mType == ASMIT_STA && !mIns[sz - 2].SameEffectiveAddress(mIns[sz - 4]) && + mIns[sz - 1].mType == ASMIT_LDA && mIns[sz - 1].SameEffectiveAddress(mIns[sz - 4])) + { + if (mBranch == ASMIT_BMI) + mBranch = ASMIT_BCS; + else + mBranch = ASMIT_BCC; + mIns[sz - 3].mLive |= LIVE_CPU_REG_C; + mIns[sz - 2].mLive |= LIVE_CPU_REG_C; + mIns[sz - 1].mLive |= LIVE_CPU_REG_C; + changed = true; + } + } + +#endif + #if 1 if (sz > 0 && mFalseJump && mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 1].mMode == ASMIM_IMMEDIATE) { @@ -45711,7 +45948,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) { mInterProc = proc; - CheckFunc = !strcmp(mInterProc->mIdent->mString, "vdcwin_get_rect"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "sieve"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index d9b34f5..bf60f2b 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -298,6 +298,8 @@ public: bool OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock * prevBlock, NativeCodeBasicBlock* exitBlock, bool full); bool RemoveSimpleLoopUnusedIndex(void); bool OptimizeLoopCarryOver(void); + + bool OptimizeSingleEntryLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock* prev, NativeCodeBasicBlock* tail, ExpandingArray& blocks); bool OptimizeSingleEntryLoop(NativeCodeProcedure* proc); bool OptimizeSimpleLoop(NativeCodeProcedure* proc, bool full); @@ -413,6 +415,7 @@ public: bool MoveLDXUp(int at); bool MoveLDYUp(int at); + bool MoveLDXBeforeZ(int at); bool MoveIndirectLoadStoreUp(int at); bool MoveAbsoluteLoadStoreUp(int at); @@ -424,6 +427,7 @@ public: bool MoveStaTaxLdaStaDown(int at); bool MoveLoadAddImmStoreUp(int at); + bool MoveLoadEorImmStoreUp(int at); bool MoveCLCLoadAddZPStoreUp(int at); bool MoveLoadAddZPStoreUp(int at); bool MoveLoadShiftRotateUp(int at);