From 6189e0cc4905e1ecf6d4cc21e5c441336d1926e1 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 4 Mar 2023 14:07:34 +0100 Subject: [PATCH] Prepare NES machine type --- include/crt.c | 6 + include/nes/nes.c | 2 + include/nes/nes.h | 76 +++ include/nes/neslib.c | 648 +++++++++++++++++++++++ include/nes/neslib.h | 295 +++++++++++ oscar64/CompilationUnits.h | 2 +- oscar64/Compiler.cpp | 42 +- oscar64/CompilerTypes.h | 4 +- oscar64/Linker.cpp | 19 + oscar64/Linker.h | 1 + oscar64/NativeCodeGenerator.cpp | 903 +++++++++++++++++++++++++++++--- oscar64/NativeCodeGenerator.h | 9 + oscar64/oscar64.cpp | 13 +- 13 files changed, 1952 insertions(+), 68 deletions(-) create mode 100644 include/nes/nes.c create mode 100644 include/nes/nes.h create mode 100644 include/nes/neslib.c create mode 100644 include/nes/neslib.h diff --git a/include/crt.c b/include/crt.c index bfa6bc8..80014e1 100644 --- a/include/crt.c +++ b/include/crt.c @@ -131,6 +131,9 @@ w0: #elif defined(OSCAR_TARGET_BIN) +#elif defined(OSCAR_TARGET_NES) + sei + cld #else byt 0x0b byt 0x08 @@ -153,6 +156,7 @@ w0: stx spentry #endif + // Clear BSS Segment lda # + +#define PPU_CTRL_NT_2000 0b00000000 +#define PPU_CTRL_NT_2400 0b00000001 +#define PPU_CTRL_NT_2800 0b00000010 +#define PPU_CTRL_NT_2C00 0b00000011 +#define PPU_CTRL_INC_1 0b00000000 +#define PPU_CTRL_INC_32 0b00000100 +#define PPU_CTRL_SPR_0000 0b00000000 +#define PPU_CTRL_SPR_1000 0b00001000 +#define PPU_CTRL_BG_0000 0b00000000 +#define PPU_CTRL_BG_1000 0b00010000 +#define PPU_CTRL_SPR_8X8 0b00000000 +#define PPU_CTRL_SPR_8X16 0b00100000 +#define PPU_CTRL_NMI 0b10000000 + +#define PPU_MASK_GREYSCALE 0b00000001 +#define PPU_MASK_BG8 0b00000010 +#define PPU_MASK_SPR8 0b00000100 +#define PPU_MASK_BG_ON 0b00001000 +#define PPU_MASK_SPR_ON 0b00010000 +#define PPU_MASK_EM_RED 0b00100000 +#define PPU_MASK_EM_GREEN 0b01000000 +#define PPU_MASK_EM_BLUE 0b10000000 + +struct PPU +{ + volatile byte control; + volatile byte mask; + volatile byte status; + volatile byte oamaddr; + volatile byte oamdata; + volatile byte scroll; + volatile byte addr; + volatile byte data; +}; + +#define ppu (*((struct PPU *)0x2000)) + +struct NESIO +{ + volatile byte sq1_volume; + volatile byte sq1_sweep; + volatile word sq1_freq; + + volatile byte sq2_volume; + volatile byte sq2_sweep; + volatile word sq2_freq; + + volatile byte tri_volume; + volatile byte tri_pad; + volatile word tri_freq; + + volatile byte noise_volume; + volatile byte noise_pad; + volatile word noise_freq; + + volatile byte dmc_freq; + volatile byte dmc_raw; + volatile byte dmc_start; + volatile byte dmc_length; + + volatile byte oamdma; + volatile byte channels; + volatile byte input[2]; +}; + +#define nesio (*((struct NESIO *)0x4000)) + +#pragma compile("nes.c") + +#endif + diff --git a/include/nes/neslib.c b/include/nes/neslib.c new file mode 100644 index 0000000..3809ee9 --- /dev/null +++ b/include/nes/neslib.c @@ -0,0 +1,648 @@ +#include "neslib.h" +// NES hardware-dependent functions by Shiru (shiru@mail.ru) +// with improvements by VEG +// Feel free to do anything you want with this code, consider it Public Domain + +const char palBrightTable[] = { + 0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f, + 0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f, + 0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f, + 0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f,0x0f, + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0f,0x0f,0x0f, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x00,0x00,0x00, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x10,0x10,0x10, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x20,0x20,0x20, + 0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30, + 0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30, + 0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30, + 0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30 +}; + +char OAM_BUF[256]; +char PAL_BUF[32]; + +#pragma align(OAM_BUF, 256) + +char NTSC_MODE; +volatile char FRAME_CNT1; +volatile char FRAME_CNT2; +char VRAM_UPDATE; +char * NAME_UPD_ADR; +char NAME_UPD_ENABLE; +char PAL_UPDATE; +const char * PAL_BG_PTR; +const char * PAL_SPR_PTR; +char SCROLL_X; +char SCROLL_Y; +char SCROLL_X1; +char SCROLL_Y1; +char PAD_STATE[2]; +char PAD_STATEP[2]; +char PAD_STATET[2]; +char PPU_CTRL_VAR; +char PPU_CTRL_VAR1; +char PPU_MASK_VAR; +char RAND_SEED[2]; + +int main(void) +{ + ppu.mask = 0; + nesio.dmc_freq = 0; + ppu.control = 0; + + char c = ppu.status; + do {} while (!(ppu.status & 0x80)); + do {} while (!(ppu.status & 0x80)); + + nesio.input[1] = 0x40; + + ppu.addr = 0x3f; + ppu.addr = 0x00; + for(char i=0; i<32; i++) + ppu.data = 0x0f; + + ppu.addr = 0x20; + ppu.addr = 0x00; + + for(unsigned i=0; i<0x1000; i++) + ppu.data = 0x00; + + char i = 0; + do { + ((char *)0x200)[i] = 0; + ((char *)0x300)[i] = 0; + ((char *)0x400)[i] = 0; + ((char *)0x500)[i] = 0; + ((char *)0x600)[i] = 0; + ((char *)0x700)[i] = 0; + i++; + } while (i); + + pal_bright(4); + pal_clear(); + oam_clear(); + + PPU_CTRL_VAR = 0x80; + PPU_MASK_VAR = 0x06; + + ppu.control = 0x80; + + RAND_SEED[0] = 0xfd; + RAND_SEED[1] = 0xfd; + + ppu.scroll = 0x00; + ppu.scroll = 0x00; + ppu.oamaddr = 0x00; + + nes_game(); + + return 0; +} + +__hwinterrupt void nmi(void) +{ + if (PPU_MASK_VAR & 0x18) + { + nesio.oamdma = (unsigned)(&OAM_BUF[0]) >> 8; + + if (PAL_UPDATE) + { + PAL_UPDATE = 0; + ppu.addr = 0x3f; + ppu.addr = 0x00; + + char c = PAL_BG_PTR[PAL_BUF[0]]; + + ppu.data = c; + ppu.data = PAL_BG_PTR[PAL_BUF[1]]; + ppu.data = PAL_BG_PTR[PAL_BUF[2]]; + ppu.data = PAL_BG_PTR[PAL_BUF[3]]; + + #pragma unroll(full) + for(char j=0; j<3; j++) + { + ppu.data = c; + ppu.data = PAL_BG_PTR[PAL_BUF[5 + 4 * j + 0]]; + ppu.data = PAL_BG_PTR[PAL_BUF[5 + 4 * j + 1]]; + ppu.data = PAL_BG_PTR[PAL_BUF[5 + 4 * j + 2]]; + } + + #pragma unroll(full) + for(char j=0; j<4; j++) + { + ppu.data = c; + ppu.data = PAL_SPR_PTR[PAL_BUF[17 + 4 * j + 0]]; + ppu.data = PAL_SPR_PTR[PAL_BUF[17 + 4 * j + 1]]; + ppu.data = PAL_SPR_PTR[PAL_BUF[17 + 4 * j + 2]]; + } + } + + if (VRAM_UPDATE) + { + VRAM_UPDATE = 0; + + if (NAME_UPD_ENABLE) + flush_vram_update(NAME_UPD_ADR); + } + + ppu.addr = 0x00; + ppu.addr = 0x00; + + ppu.scroll = SCROLL_X; + ppu.scroll = SCROLL_Y; + + ppu.control = PPU_CTRL_VAR; + } + + ppu.mask = PPU_MASK_VAR; + + FRAME_CNT1++; + FRAME_CNT2++; + if (FRAME_CNT2 == 6) + FRAME_CNT2 = 0; + + // jsr FamiToneUpdate +} + +void pal_all(const char *data) +{ + for(char i=0; i<32; i++) + PAL_BUF[i] = data[i]; + PAL_UPDATE++; +} + +void pal_bg(const char *data) +{ + for(char i=0; i<16; i++) + PAL_BUF[i] = data[i]; + PAL_UPDATE++; +} + +void pal_spr(const char *data) +{ + for(char i=0; i<16; i++) + PAL_BUF[i + 16] = data[i]; + PAL_UPDATE++; +} + +void pal_col(unsigned char index,unsigned char color) +{ + PAL_BUF[index & 0x1f] = color; + PAL_UPDATE++; +} + +void pal_clear(void) +{ + for(char i=0; i<32; i++) + PAL_BUF[i] = 0x0f; + PAL_UPDATE++; +} + +void pal_spr_bright(unsigned char bright) +{ + PAL_SPR_PTR = palBrightTable + 16 * bright; + PAL_UPDATE++; +} + +void pal_bg_bright(unsigned char bright) +{ + PAL_BG_PTR = palBrightTable + 16 * bright; + PAL_UPDATE++; +} + + + +void pal_bright(unsigned char bright) +{ + pal_spr_bright(bright); + pal_bg_bright(bright); +} + +void ppu_off(void) +{ + PPU_MASK_VAR &= 0b11100111; + ppu_wait_nmi(); +} + +void ppu_on_all(void) +{ + PPU_MASK_VAR|= 0b00011000; + ppu_wait_nmi(); +} + +void ppu_on_bg(void) +{ + PPU_MASK_VAR |= 0b00001000; + ppu_wait_nmi(); +} + +void ppu_on_spr(void) +{ + PPU_MASK_VAR |= 0b00010000; + ppu_wait_nmi(); +} + +void ppu_mask(unsigned char mask) +{ + PPU_MASK_VAR = mask; +} + +unsigned char ppu_system(void) +{ + return NTSC_MODE; +} + +unsigned char get_ppu_ctrl_var(void) +{ + return PPU_CTRL_VAR; +} + +void set_ppu_ctrl_var(unsigned char var) +{ + PPU_CTRL_VAR = var; +} + +void oam_clear(void) +{ + char i = 0; + do { + OAM_BUF[i] = 0; + i += 4; + } while (i); +} + +void oam_size(unsigned char size) +{ + if (size & 1) + PPU_CTRL_VAR |= 0x20; + else + PPU_CTRL_VAR &= ~0x20; +} + +unsigned char oam_spr(unsigned char x,unsigned char y,unsigned char chrnum,unsigned char attr,unsigned char sprid) +{ + OAM_BUF[sprid + 2] = attr; + OAM_BUF[sprid + 1] = chrnum; + OAM_BUF[sprid + 0] = y; + OAM_BUF[sprid + 3] = x; + return attr + 4; +} + +unsigned char oam_meta_spr(unsigned char x,unsigned char y,unsigned char sprid,const unsigned char *data) +{ + char i = 0; + while (!(data[i] & 0x80)) + { + OAM_BUF[sprid + 3] = x + data[i + 0]; + OAM_BUF[sprid + 0] = y + data[i + 1]; + OAM_BUF[sprid + 1] = data[i + 2]; + OAM_BUF[sprid + 2] = data[i + 3]; + + sprid += 4; + i += 4; + } + return sprid; +} + + +void oam_hide_rest(unsigned char sprid) +{ + do { + OAM_BUF[sprid] = 240; + sprid += 4; + } while (sprid); +} + +void ppu_wait_frame(void) +{ + VRAM_UPDATE = 1; + char c = FRAME_CNT1; + while (c == FRAME_CNT1) ; + if (NTSC_MODE) + { + while (FRAME_CNT2 == 5) ; + } +} + +void ppu_wait_nmi(void) +{ + VRAM_UPDATE = 1; + char c = FRAME_CNT1; + while (c == FRAME_CNT1) ; +} + +void vram_unrle(const unsigned char *data) +{ + char tag = *data++; + char b; + + for(;;) + { + char c = *data++; + if (c != tag) + { + ppu.data = c; + b = c; + } + else + { + c = *data++; + if (!c) + return; + while (c) + { + ppu.data = b; + c--; + } + } + } +} + +void scroll(unsigned int x,unsigned int y) +{ + char b = (PPU_CTRL_VAR & 0xfc) | ((x >> 8) & 1); + + if (y >= 240) + { + y -= 240; + b |= 2; + } + + SCROLL_Y = y; + SCROLL_X = x; + + PPU_CTRL_VAR = b; +} + +void split(unsigned int x,unsigned int y) +{ + char b = (PPU_CTRL_VAR & 0xfc) | ((x >> 8) & 1); + + SCROLL_X1 = x; + PPU_CTRL_VAR1 = b; + + while (ppu.status & 0x40) ; + while (!(ppu.status & 0x40)) ; + + ppu.scroll = SCROLL_X1; + ppu.scroll = 0; + ppu.control = PPU_CTRL_VAR1; +} + +void bank_spr(unsigned char n) +{ + if (n & 1) + PPU_CTRL_VAR |= 0x08; + else + PPU_CTRL_VAR &= ~0x08; +} + +void bank_bg(unsigned char n) +{ + if (n & 1) + PPU_CTRL_VAR |= 0x10; + else + PPU_CTRL_VAR &= ~0x10; +} + +void vram_read(unsigned char *dst,unsigned int size) +{ + for(unsigned i=size; i!=0; i--) + *dst++ = ppu.data; +} + +void vram_write(const unsigned char *src,unsigned int size) +{ + for(unsigned i=size; i!=0; i--) + ppu.data = *src++; +} + +void music_play(unsigned char song) +{ + //_music_play=FamiToneMusicPlay +} + +void music_stop(void) +{ + //_music_stop=FamiToneMusicStop + +} + +void music_pause(unsigned char pause) +{ + //_music_pause=FamiToneMusicPause +} + +void sfx_play(unsigned char sound,unsigned char channel) +{ +#if 0 +_sfx_play: + +.if(FT_SFX_ENABLE) + + and #$03 + tax + lda @sfxPriority,x + tax + jsr popa + jmp FamiToneSfxPlay + +@sfxPriority: + + .byte FT_SFX_CH0,FT_SFX_CH1,FT_SFX_CH2,FT_SFX_CH3 + +.else + rts +.endif +#endif +} + +void sample_play(unsigned char sample) +{ +#if 0 +.if(FT_DPCM_ENABLE) +_sample_play=FamiToneSamplePlay +.else +_sample_play: + rts +.endif +#endif +} + +unsigned char pad_poll(unsigned char pad) +{ + char buf[3]; + + for(char j=0; j<3; j++) + { + nesio.input[0] = 1; + nesio.input[0] = 0; + + char c = 0; + for(char i=0; i<8; i++) + { + c = (c | (nesio.input[pad] << 8)) >> 1; + } + buf[j] = c; + } + + char b = buf[0]; + if (b != buf[1] && b != buf[2]) + b = buf[1]; + + PAD_STATE[pad] = b; + PAD_STATET[pad] = (b ^ PAD_STATEP[pad]) & PAD_STATE[pad]; + PAD_STATEP[pad] = b; + + return b; +} + +unsigned char pad_trigger(unsigned char pad) +{ + pad_poll(pad); + return PAD_STATET[pad]; +} + +unsigned char pad_state(unsigned char pad) +{ + return PAD_STATE[pad]; +} + +unsigned char rand1(void) +{ + if (RAND_SEED[0] & 0x80) + { + RAND_SEED[0] <<= 1; + RAND_SEED[0] ^= 0xcf; + } + else + RAND_SEED[0] <<= 1; + return RAND_SEED[0]; +} + + +unsigned char rand2(void) +{ + if (RAND_SEED[1] & 0x80) + { + RAND_SEED[1] <<= 1; + RAND_SEED[1] ^= 0xd7; + } + else + RAND_SEED[1] <<= 1; +} + +unsigned char rand8(void) +{ + return rand1() + rand2(); +} + +unsigned int rand16(void) +{ + return (rand1() << 8) | rand2(); +} + +void set_rand(unsigned seed) +{ + RAND_SEED[0] = seed & 0xff; + RAND_SEED[1] = seed >> 8; +} + +void set_vram_update(unsigned char *buf) +{ + NAME_UPD_ADR = buf; + NAME_UPD_ENABLE = buf != nullptr; +} + +void flush_vram_update(unsigned char *buf) +{ + char i = 0; + for(;;) + { + char c = buf[i++]; + if (c < 0x40) + { + ppu.addr = c; + ppu.addr = buf[i++]; + ppu.data = buf[i++]; + } + else + { + if (c < 0x80) + ppu.control = PPU_CTRL_VAR | 0x04; + else if (c != 0xff) + ppu.control = PPU_CTRL_VAR & ~0x04; + else + return; + + ppu.addr = c & 0x3f; + ppu.addr = buf[i++]; + c = buf[i++]; + do { + ppu.data = buf[i++]; + c--; + } while (c); + ppu.control = PPU_CTRL_VAR; + } + } +} + +void vram_adr(unsigned int addr) +{ + ppu.addr = addr >> 8; + ppu.addr = addr & 0xff; +} + +void vram_put(unsigned char n) +{ + ppu.data = n; +} + +void vram_fill(unsigned char n,unsigned int size) +{ + for(unsigned i=size; i!=0; i--) + ppu.data = n; +} + +void vram_inc(unsigned char n) +{ + if (n) + PPU_CTRL_VAR |= 0x04; + else + PPU_CTRL_VAR &= ~0x04; + ppu.control = PPU_CTRL_VAR; +} + +void memfill(void *dst,unsigned char value,unsigned int size) +{ + for(unsigned i=size; i!=0; i--) + *dst++ = value; +} + +unsigned char nesclock(void) +{ + return FRAME_CNT1; +} + +void delay(unsigned char frames) +{ + while (frames) + { + ppu_wait_nmi(); + frames--; + } +} + +#pragma data(boot) + +__export struct Boot +{ + void * nmi, * reset, * irq; +} boot = { + nmi, + (void *)0x8000, + nullptr +}; + +#pragma data(data) + diff --git a/include/nes/neslib.h b/include/nes/neslib.h new file mode 100644 index 0000000..a08dcaa --- /dev/null +++ b/include/nes/neslib.h @@ -0,0 +1,295 @@ +#ifndef NES_NESLIB_H +#define NES_NESLIB_H + +#include "nes.h" + + +/* + (C) 2015 Alex Semenov (Shiru) + (C) 2016 Lauri Kasanen + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + +// NES hardware-dependent functions by Shiru (shiru@mail.ru) +// Feel free to do anything you want with this code, consider it Public Domain + +// Versions history: +// 280215 - fixed palette glitch caused with the active DMC DMA glitch +// 030914 - minor fixes in the vram update system +// 310814 - added vram_flush_update +// 120414 - removed adr argument from vram_write and vram_read, +// unrle_vram renamed to vram_unrle, with adr argument removed +// 060414 - many fixes and improvements, including sequental VRAM updates +// previous versions were created since mid-2011, there were many updates + + +void nes_game(void); + + +// set bg and spr palettes, data is 32 bytes array +void pal_all(const char *data); + +// set bg palette only, data is 16 bytes array +void pal_bg(const char *data); + +// set spr palette only, data is 16 bytes array +void pal_spr(const char *data); + +// set a palette entry, index is 0..31 +void pal_col(unsigned char index, unsigned char color); + +// reset palette to $0f +void pal_clear(void); + +// set virtual bright both for sprites and background, 0 is black, 4 is normal, 8 is white +void pal_bright(unsigned char bright); + +// set virtual bright for sprites only +void pal_spr_bright(unsigned char bright); + +// set virtual bright for sprites background only +void pal_bg_bright(unsigned char bright); + + + +// wait actual TV frame, 50hz for PAL, 60hz for NTSC +void ppu_wait_nmi(void); + +// wait virtual frame, it is always 50hz, frame-to-frame in PAL, frameskip in NTSC +void ppu_wait_frame(void); + +// turn off rendering, nmi still enabled when rendering is disabled +void ppu_off(void); + +// turn on bg, spr +void ppu_on_all(void); + +// turn on bg only +void ppu_on_bg(void); + +// turn on spr only +void ppu_on_spr(void); + +// set PPU_MASK directly +void ppu_mask(unsigned char mask); + +// get current video system, 0 for PAL, not 0 for NTSC +unsigned char ppu_system(void); + +// Return an 8-bit counter incremented at each vblank +unsigned char nesclock(void); + +// get/set the internal ppu ctrl cache var for manual writing +unsigned char get_ppu_ctrl_var(void); +void set_ppu_ctrl_var(unsigned char var); + + +// clear OAM buffer, all the sprites are hidden +void oam_clear(void); + +// set sprite display mode, 0 for 8x8 sprites, 1 for 8x16 sprites +void oam_size(unsigned char size); + +// set sprite in OAM buffer, chrnum is tile, attr is attribute, sprid is offset in OAM in bytes +// returns sprid+4, which is offset for a next sprite +unsigned char oam_spr(unsigned char x, unsigned char y, + unsigned char chrnum, unsigned char attr, + unsigned char sprid); + +// set metasprite in OAM buffer +// meta sprite is a const unsigned char array, it contains four bytes per sprite +// in order x offset, y offset, tile, attribute +// x=128 is end of a meta sprite +// returns sprid+4, which is offset for a next sprite +unsigned char oam_meta_spr(unsigned char x, unsigned char y, + unsigned char sprid, const unsigned char *data); + +// hide all remaining sprites from given offset +void oam_hide_rest(unsigned char sprid); + + + +// play a music in FamiTone format +void music_play(unsigned char song); + +// stop music +void music_stop(void); + +// pause and unpause music +void music_pause(unsigned char pause); + +// play FamiTone sound effect on channel 0..3 +void sfx_play(unsigned char sound, unsigned char channel); + +// play a DPCM sample, 1..63 +void sample_play(unsigned char sample); + + + +// poll controller and return flags like PAD_LEFT etc, input is pad number (0 or 1) +unsigned char pad_poll(unsigned char pad); + +// poll controller in trigger mode, a flag is set only on button down, not hold +// if you need to poll the pad in both normal and trigger mode, poll it in the +// trigger mode for first, then use pad_state +unsigned char pad_trigger(unsigned char pad); + +// get previous pad state without polling ports +unsigned char pad_state(unsigned char pad); + + +// set scroll, including rhe top bits +// it is always applied at beginning of a TV frame, not at the function call +void scroll(unsigned int x, unsigned int y); + +// set scroll after screen split invoked by the sprite 0 hit +// warning: all CPU time between the function call and the actual split point will be wasted! +// warning: the program loop has to fit into the frame time, ppu_wait_frame should not be used +// otherwise empty frames without split will be inserted, resulting in jumpy screen +// warning: only X scroll could be changed in this version +void split(unsigned int x, unsigned int y); + + +// select current chr bank for sprites, 0..1 +void bank_spr(unsigned char n); + +// select current chr bank for background, 0..1 +void bank_bg(unsigned char n); + + + +// get random number 0..255 or 0..65535 +unsigned char rand8(void); +unsigned int rand16(void); + +// set random seed +void set_rand(unsigned int seed); + + + +// when display is enabled, vram access could only be done with this vram update system +// the function sets a pointer to the update buffer that contains data and addresses +// in a special format. It allows to write non-sequental bytes, as well as horizontal or +// vertical nametable sequences. +// buffer pointer could be changed during rendering, but it only takes effect on a new frame +// number of transferred bytes is limited by vblank time +// to disable updates, call this function with NULL pointer + +// the update data format: +// MSB, LSB, byte for a non-sequental write +// MSB|NT_UPD_HORZ, LSB, LEN, [bytes] for a horizontal sequence +// MSB|NT_UPD_VERT, LSB, LEN, [bytes] for a vertical sequence +// NT_UPD_EOF to mark end of the buffer + +// length of this data should be under 256 bytes + +void set_vram_update(unsigned char *buf); + +// all following vram functions only work when display is disabled + +// do a series of VRAM writes, the same format as for set_vram_update, but writes done right away +void flush_vram_update(unsigned char *buf); + +// set vram pointer to write operations if you need to write some data to vram +void vram_adr(unsigned int adr); + +// put a byte at current vram address, works only when rendering is turned off +void vram_put(unsigned char n); + +// fill a block with a byte at current vram address, works only when rendering is turned off +void vram_fill(unsigned char n, unsigned int len); + +// set vram autoincrement, 0 for +1 and not 0 for +32 +void vram_inc(unsigned char n); + +// read a block from current address of vram, works only when rendering is turned off +void vram_read(unsigned char *dst, unsigned int size); + +// write a block to current address of vram, works only when rendering is turned off +void vram_write(const unsigned char *src, unsigned int size); + + +// unpack RLE data to current address of vram, mostly used for nametables +void vram_unrle(const unsigned char *data); + +// unpack LZ4 data to this address +void vram_unlz4(const unsigned char *in, unsigned char *out, + const unsigned uncompressed_size); +/* + Rough speeds for a full 1024 nametable: + - rle takes 0.5 frames + - uncompressed takes 1.3 frames + - lz4 takes 2.8 frames +*/ + + +// like memset, but does not return anything +void memfill(void *dst, unsigned char value, unsigned int len); + +// delay for N frames +void delay(unsigned char frames); + +// display.sinc functions +void oam_clear_fast(void); +void oam_meta_spr_pal(unsigned char x,unsigned char y,unsigned char pal,const unsigned char *metasprite); +void oam_meta_spr_clip(signed int x,unsigned char y,const unsigned char *metasprite); + + + +#define PAD_A 0x01 +#define PAD_B 0x02 +#define PAD_SELECT 0x04 +#define PAD_START 0x08 +#define PAD_UP 0x10 +#define PAD_DOWN 0x20 +#define PAD_LEFT 0x40 +#define PAD_RIGHT 0x80 + +#define OAM_FLIP_V 0x80 +#define OAM_FLIP_H 0x40 +#define OAM_BEHIND 0x20 + +#define MAX(x1,x2) ((x1)<(x2)?(x2):(x1)) +#define MIN(x1,x2) ((x1)<(x2)?(x1):(x2)) + +#define MASK_SPR 0x10 +#define MASK_BG 0x08 +#define MASK_EDGE_SPR 0x04 +#define MASK_EDGE_BG 0x02 + +#define NAMETABLE_A 0x2000 +#define NAMETABLE_B 0x2400 +#define NAMETABLE_C 0x2800 +#define NAMETABLE_D 0x2c00 + +#define NT_UPD_HORZ 0x40 +#define NT_UPD_VERT 0x80 +#define NT_UPD_EOF 0xff + +// macro to calculate nametable address from X,Y in compile time + +#define NTADR_A(x,y) (NAMETABLE_A|(((y)<<5)|(x))) +#define NTADR_B(x,y) (NAMETABLE_B|(((y)<<5)|(x))) +#define NTADR_C(x,y) (NAMETABLE_C|(((y)<<5)|(x))) +#define NTADR_D(x,y) (NAMETABLE_D|(((y)<<5)|(x))) + +// macro to get MSB and LSB + +#define MSB(x) (((x)>>8)) +#define LSB(x) (((x)&0xff)) + +#pragma compile("neslib.c") + +#endif diff --git a/oscar64/CompilationUnits.h b/oscar64/CompilationUnits.h index 2fb55f2..16c334c 100644 --- a/oscar64/CompilationUnits.h +++ b/oscar64/CompilationUnits.h @@ -28,7 +28,7 @@ public: DeclarationScope* mRuntimeScope; - LinkerSection* mSectionCode, * mSectionData, * mSectionBSS, * mSectionHeap, * mSectionStack, * mSectionZeroPage, * mSectionLowCode; + LinkerSection* mSectionCode, * mSectionData, * mSectionBSS, * mSectionHeap, * mSectionStack, * mSectionZeroPage, * mSectionLowCode, * mSectionBoot; Linker* mLinker; bool AddUnit(Location & location, const char* name, const char * from); diff --git a/oscar64/Compiler.cpp b/oscar64/Compiler.cpp index f8655e5..02cd6e9 100644 --- a/oscar64/Compiler.cpp +++ b/oscar64/Compiler.cpp @@ -23,6 +23,7 @@ Compiler::Compiler(void) mCompilationUnits->mSectionStack = mLinker->AddSection(Ident::Unique("stack"), LST_STACK); mCompilationUnits->mSectionZeroPage = mLinker->AddSection(Ident::Unique("zeropage"), LST_ZEROPAGE); mCompilationUnits->mSectionLowCode = nullptr; + mCompilationUnits->mSectionBoot = nullptr; mCompilationUnits->mSectionStack->mSize = 4096; mCompilationUnits->mSectionHeap->mSize = 1024; @@ -82,6 +83,11 @@ bool Compiler::ParseSource(void) case TMACH_C128: mCompilationUnits->mSectionLowCode = mLinker->AddSection(Ident::Unique("lowcode"), LST_DATA); break; + case TMACH_NES: + mCompilationUnits->mSectionStack->mSize = 256; + mCompilationUnits->mSectionHeap->mSize = 256; + mCompilationUnits->mSectionBoot = mLinker->AddSection(Ident::Unique("boot"), LST_DATA); + break; } mPreprocessor->mCompilerOptions = mCompilerOptions; @@ -194,6 +200,8 @@ bool Compiler::GenerateCode(void) const Ident* identStartup = Ident::Unique("startup"); const Ident* identBytecode = Ident::Unique("bytecode"); const Ident* identMain = Ident::Unique("main"); + const Ident* identRom = Ident::Unique("rom"); + const Ident* identBoot = Ident::Unique("boot"); const Ident* identCode = Ident::Unique("code"); const Ident* identZeroPage = Ident::Unique("zeropage"); const Ident* identLowcode = Ident::Unique("lowcode"); @@ -205,7 +213,7 @@ bool Compiler::GenerateCode(void) } LinkerRegion* regionStartup = mLinker->FindRegion(identStartup); - LinkerRegion* regionLowcode = nullptr; + LinkerRegion* regionLowcode = nullptr, * regionBoot = nullptr; if (!regionStartup) { @@ -260,6 +268,8 @@ bool Compiler::GenerateCode(void) break; } } + else if (mTargetMachine == TMACH_NES) + regionStartup = mLinker->AddRegion(identStartup, 0x8000, 0x8080); else regionStartup = mLinker->AddRegion(identStartup, 0x0800, 0x0900); } @@ -296,6 +306,7 @@ bool Compiler::GenerateCode(void) } LinkerRegion* regionMain = mLinker->FindRegion(identMain); + LinkerRegion* regionRom = mLinker->FindRegion(identRom); LinkerSection * sectionStartup = mLinker->AddSection(identStartup, LST_DATA); LinkerSection* sectionBytecode = nullptr; @@ -315,7 +326,14 @@ bool Compiler::GenerateCode(void) { if (!regionMain) { - if (!(mCompilerOptions & COPT_TARGET_PRG)) + if (mTargetMachine == TMACH_NES) + { + regionBoot = mLinker->AddRegion(identBoot, 0xfffa, 0x10000); + regionBoot->mSections.Push(mCompilationUnits->mSectionBoot); + regionRom = mLinker->AddRegion(identRom, 0x8080, 0xfffa); + regionMain = mLinker->AddRegion(identMain, 0x0200, 0x0800); + } + else if (!(mCompilerOptions & COPT_TARGET_PRG)) regionMain = mLinker->AddRegion(identMain, 0x0900, 0x4700); else if (regionBytecode) { @@ -379,8 +397,17 @@ bool Compiler::GenerateCode(void) } } - regionMain->mSections.Push(mCompilationUnits->mSectionCode); - regionMain->mSections.Push(mCompilationUnits->mSectionData); + if (regionRom) + { + regionRom->mSections.Push(mCompilationUnits->mSectionCode); + regionRom->mSections.Push(mCompilationUnits->mSectionData); + } + else + { + regionMain->mSections.Push(mCompilationUnits->mSectionCode); + regionMain->mSections.Push(mCompilationUnits->mSectionData); + } + regionMain->mSections.Push(mCompilationUnits->mSectionBSS); regionMain->mSections.Push(mCompilationUnits->mSectionHeap); regionMain->mSections.Push(mCompilationUnits->mSectionStack); @@ -701,6 +728,13 @@ bool Compiler::WriteOutputFile(const char* targetPath, DiskImage * d64) printf("Writing <%s>\n", prgPath); mLinker->WriteBinFile(prgPath); } + else if (mCompilerOptions & COPT_TARGET_NES) + { + strcat_s(prgPath, "nes"); + if (mCompilerOptions & COPT_VERBOSE) + printf("Writing <%s>\n", prgPath); + mLinker->WriteNesFile(prgPath); + } if (d64) diff --git a/oscar64/CompilerTypes.h b/oscar64/CompilerTypes.h index ef14a1e..1d262cf 100644 --- a/oscar64/CompilerTypes.h +++ b/oscar64/CompilerTypes.h @@ -21,6 +21,7 @@ static const uint64 COPT_TARGET_CRT512 = 0x400000000ULL; static const uint64 COPT_TARGET_COPY = 0x800000000ULL; static const uint64 COPT_TARGET_BIN = 0x1000000000ULL; static const uint64 COPT_TARGET_LZO = 0x2000000000ULL; +static const uint64 COPT_TARGET_NES = 0x4000000000ULL; static const uint64 COPT_VERBOSE = 0x10000000000ULL; static const uint64 COPT_VERBOSE2 = 0x20000000000ULL; @@ -46,7 +47,8 @@ enum TargetMachine TMACH_VIC20_16K, TMACH_VIC20_24K, TMACH_C128, - TMACH_C128B + TMACH_C128B, + TMACH_NES }; diff --git a/oscar64/Linker.cpp b/oscar64/Linker.cpp index 3f1942d..b841cb0 100644 --- a/oscar64/Linker.cpp +++ b/oscar64/Linker.cpp @@ -611,6 +611,25 @@ bool Linker::WriteBinFile(const char* filename) return false; } +bool Linker::WriteNesFile(const char* filename) +{ + FILE* file; + fopen_s(&file, filename, "wb"); + if (file) + { + char header[16] = { 0x4e, 0x45, 0x53, 0x1a, 0x02, 0x01, 0x01, 0x00, 0x02, 0x00, 0x00 }; + + fwrite(header, 1, 16, file); + int done = fwrite(mMemory + 0x8000, 1, 0x8000, file); + done += fwrite(mCartridge[0], 1, 0x2000, file); + + fclose(file); + return done == 0x8000 + 0x2000; + } + else + return false; +} + bool Linker::WritePrgFile(DiskImage* image, const char* filename) { if (image->OpenFile(filename)) diff --git a/oscar64/Linker.h b/oscar64/Linker.h index d3a5f52..f5d85be 100644 --- a/oscar64/Linker.h +++ b/oscar64/Linker.h @@ -228,6 +228,7 @@ public: bool WriteLblFile(const char* filename); bool WriteCrtFile(const char* filename); bool WriteBinFile(const char* filename); + bool WriteNesFile(const char* filename); uint64 mCompilerOptions; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 37e9a5f..a1e5e3f 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -3116,6 +3116,12 @@ bool NativeCodeInstruction::ValueForwarding(NativeRegisterDataSet& data, AsmInsT } changed = true; } + else if (mMode == ASMIM_IMMEDIATE && mType == ASMIT_ORA && mAddress != 0x00) + { + data.mRegs[CPU_REG_Z].mMode = NRDM_IMMEDIATE; + data.mRegs[CPU_REG_Z].mValue = 1; + data.mRegs[CPU_REG_A].Reset(); + } else if (mMode == ASMIM_IMMEDIATE && ((mAddress == 0 && (mType == ASMIT_ORA || mType == ASMIT_EOR)) || (mAddress == 0xff && mType == ASMIT_AND))) { data.mRegs[CPU_REG_Z].Reset(); @@ -14424,35 +14430,78 @@ bool NativeCodeBasicBlock::Split16BitLoopCount(NativeCodeProcedure* proc) { changed = true; - NativeCodeBasicBlock* hblock = proc->AllocateBlock(); - NativeCodeBasicBlock* dblock = proc->AllocateBlock(); - NativeCodeBasicBlock* zblock = proc->AllocateBlock(); + NativeCodeBasicBlock* pblock = nullptr; + if (mEntryBlocks.Size() == 2) + { + if (mEntryBlocks[0] == this) + pblock = mEntryBlocks[1]; + else if (mEntryBlocks[1] == this) + pblock = mEntryBlocks[0]; + } - zblock->mTrueJump = mTrueJump; - zblock->mFalseJump = mFalseJump; - zblock->mBranch = ASMIT_BNE; - - dblock->mTrueJump = mTrueJump; - dblock->mFalseJump = zblock; - dblock->mBranch = ASMIT_BNE; - - hblock->mTrueJump = dblock; - hblock->mBranch = ASMIT_JMP; - mTrueJump = dblock; - mFalseJump = hblock; + bool preinc = false; + if (pblock && !pblock->mFalseJump && mTrueJump == this && mIns[sz - 7].mMode == ASMIM_ZERO_PAGE && !mFalseJump->mEntryRequiredRegs[mIns[sz - 7].mAddress] && !mEntryRequiredRegs[CPU_REG_A]) + { + int i = 0; + while (i < sz - 8 && !mIns[i].ReferencesZeroPage(mIns[sz - 7].mAddress)) + i++; + if (i == sz - 8) + preinc = true; + } NativeCodeInstruction ilow(mIns[sz - 7]); NativeCodeInstruction ihigh(mIns[sz - 4]); mIns.SetSize(sz - 8); - mIns.Push(NativeCodeInstruction(ASMIT_LDA, ilow.mMode, ilow.mAddress, ilow.mLinkerObject)); + if (preinc) + { + NativeCodeBasicBlock* dblock = proc->AllocateBlock(); + NativeCodeBasicBlock* zblock = proc->AllocateBlock(); - hblock->mIns.Push(NativeCodeInstruction(ASMIT_DEC, ihigh.mMode, ihigh.mAddress, ihigh.mLinkerObject)); + zblock->mTrueJump = this; - dblock->mIns.Push(NativeCodeInstruction(ASMIT_DEC, ilow.mMode, ilow.mAddress, ilow.mLinkerObject)); + dblock->mTrueJump = this; + dblock->mFalseJump = mFalseJump; + dblock->mBranch = ASMIT_BNE; - zblock->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ihigh.mMode, ihigh.mAddress, ihigh.mLinkerObject)); + mFalseJump = dblock; + + pblock->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ilow)); + pblock->mBranch = ASMIT_BEQ; + pblock->mFalseJump = zblock; + zblock->mIns.Push(NativeCodeInstruction(ASMIT_INC, ihigh)); + + mIns.Push(NativeCodeInstruction(ASMIT_DEC, ilow)); + dblock->mIns.Push(NativeCodeInstruction(ASMIT_DEC, ihigh)); + } + else + { + NativeCodeBasicBlock* hblock = proc->AllocateBlock(); + NativeCodeBasicBlock* dblock = proc->AllocateBlock(); + NativeCodeBasicBlock* zblock = proc->AllocateBlock(); + + zblock->mTrueJump = mTrueJump; + zblock->mFalseJump = mFalseJump; + zblock->mBranch = ASMIT_BNE; + + dblock->mTrueJump = mTrueJump; + dblock->mFalseJump = zblock; + dblock->mBranch = ASMIT_BNE; + + hblock->mTrueJump = dblock; + hblock->mBranch = ASMIT_JMP; + mTrueJump = dblock; + mFalseJump = hblock; + + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ilow)); + + hblock->mIns.Push(NativeCodeInstruction(ASMIT_DEC, ihigh)); + + dblock->mIns.Push(NativeCodeInstruction(ASMIT_DEC, ilow)); + + zblock->mIns.Push(NativeCodeInstruction(ASMIT_LDA, ihigh)); + } } } #endif @@ -14533,6 +14582,59 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc) for (int i = 0; i < mIns.Size(); i++) { + if (i + 2 < mIns.Size() && + mIns[i + 0].mType == ASMIT_TXA && + mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && mIns[i + 1].mAddress == 0 && + mIns[i + 2].mType == ASMIT_TAX && !(mIns[i + 2].mLive & LIVE_CPU_REG_C)) + { + changed = true; + + NativeCodeBasicBlock* eblock = proc->AllocateBlock(); + NativeCodeBasicBlock* rblock = proc->AllocateBlock(); + + rblock->mTrueJump = mTrueJump; + rblock->mFalseJump = mFalseJump; + rblock->mBranch = mBranch; + + rblock->mIns.Push(NativeCodeInstruction(ASMIT_TXA)); + eblock->mIns.Push(NativeCodeInstruction(ASMIT_INX)); + for (int j = i + 3; j < mIns.Size(); j++) + rblock->mIns.Push(mIns[j]); + mIns.SetSize(i); + + mTrueJump = rblock; + mFalseJump = eblock; + mBranch = ASMIT_BCC; + + eblock->Close(rblock, nullptr, ASMIT_JMP); + break; + } +#if 1 + if (i + 4 == mIns.Size() && + mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_LDA && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 1 && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 1].SameEffectiveAddress(mIns[i + 3]) && + !(mIns[i + 3].mLive & LIVE_CPU_REG_A) && + mExitRequiredRegs.Size() && !mExitRequiredRegs[CPU_REG_C] && + (mBranch == ASMIT_BCC || mBranch == ASMIT_BCS)) + { + mIns[i + 3].mType = ASMIT_INC; + mIns[i + 3].mLive |= LIVE_CPU_REG_Z; + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + + if (mBranch == ASMIT_BCC) + mBranch = ASMIT_BNE; + else + mBranch = ASMIT_BEQ; + + changed = true; + break; + + } +#endif #if 1 if (i + 6 < mIns.Size()) { @@ -17512,6 +17614,48 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool } } } + + if (mTrueJump && mFalseJump) + { + if (mTrueJump->mIns.Size() > 0 && mFalseJump->mIns.Size() > 0 && !mExitRequiredRegs[CPU_REG_Z] && (mBranch == ASMIT_BCC || mBranch == ASMIT_BCS) && + mTrueJump->mNumEntries == 1 && mFalseJump->mNumEntries == 1) + { + if (!mTrueJump->mIns[0].ChangesCarry() && mTrueJump->mIns[0].IsSame(mFalseJump->mIns[0])) + { + int live = mTrueJump->mIns[0].mLive; + mIns.Push(mTrueJump->mIns[0]); + mTrueJump->mIns.Remove(0); + mFalseJump->mIns.Remove(0); + if (live & LIVE_CPU_REG_A) + { + mExitRequiredRegs += CPU_REG_A; + mTrueJump->mEntryRequiredRegs += CPU_REG_A; + mFalseJump->mEntryRequiredRegs += CPU_REG_A; + } + if (live & LIVE_CPU_REG_X) + { + mExitRequiredRegs += CPU_REG_X; + mTrueJump->mEntryRequiredRegs += CPU_REG_X; + mFalseJump->mEntryRequiredRegs += CPU_REG_X; + } + if (live & LIVE_CPU_REG_Y) + { + mExitRequiredRegs += CPU_REG_Y; + mTrueJump->mEntryRequiredRegs += CPU_REG_Y; + mFalseJump->mEntryRequiredRegs += CPU_REG_Y; + } + if (live & LIVE_CPU_REG_Z) + { + mExitRequiredRegs += CPU_REG_Z; + mTrueJump->mEntryRequiredRegs += CPU_REG_Z; + mFalseJump->mEntryRequiredRegs += CPU_REG_Z; + } + + changed = true; + } + } + } + #if 1 if (loops && mIns.Size() >= 1 && mEntryBlocks.Size() == 2) { @@ -17836,6 +17980,36 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool } } #endif + if (mIns.Size() >= 1 && mBranch == ASMIT_BNE && (mIns.Last().mType == ASMIT_INC || mIns.Last().mType == ASMIT_DEC) && mIns.Last().mMode == ASMIM_ZERO_PAGE) + { + NativeCodeBasicBlock* b = mTrueJump; + + if (b->mIns.Size() == 2 && !b->mExitRequiredRegs[CPU_REG_A]) + { + if ((b->mIns[0].mType == ASMIT_LDA || b->mIns[0].mType == ASMIT_TXA || b->mIns[0].mType == ASMIT_TYA) && + b->mIns[1].mType == ASMIT_ORA && b->mIns[1].SameEffectiveAddress(mIns.Last())) + { + if (b->mBranch == ASMIT_BEQ) + { + b->mNumEntries--; + b->mEntryBlocks.RemoveAll(this); + mTrueJump = b->mFalseJump; + mTrueJump->mNumEntries++; + mTrueJump->mEntryBlocks.Push(this); + changed = true; + } + else if (b->mBranch == ASMIT_BNE) + { + b->mNumEntries--; + b->mEntryBlocks.RemoveAll(this); + mTrueJump = b->mTrueJump; + mTrueJump->mNumEntries++; + mTrueJump->mEntryBlocks.Push(this); + changed = true; + } + } + } + } #if 1 if (mIns.Size() >= 2 && mFalseJump) { @@ -18414,6 +18588,44 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc, bool } #endif + if (mFalseJump && !mFalseJump->mFalseJump && mTrueJump == mFalseJump->mTrueJump && mTrueJump->mNumEntries == 2 && mTrueJump->mIns.Size() > 0 && mIns.Size() > 0) + { + int sz = mIns.Size(); + + if (mTrueJump->mIns[0].mType == ASMIT_TAX && !mFalseJump->ReferencesXReg() && !mFalseJump->ReferencesAccu() && !(mTrueJump->mIns[0].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) + { + if (mIns[sz - 1].mType == ASMIT_LDA && HasAsmInstructionMode(ASMIT_LDX, mIns[sz - 1].mMode)) + { + mIns[sz - 1].mType = ASMIT_LDX; + mIns[sz - 1].mLive |= LIVE_CPU_REG_X; + mTrueJump->mIns[0].mType = ASMIT_NOP; + mExitRequiredRegs += CPU_REG_X; + mTrueJump->mEntryRequiredRegs += CPU_REG_X; + mFalseJump->mEntryRequiredRegs += CPU_REG_X; + mFalseJump->mExitRequiredRegs += CPU_REG_X; + for (int i = 0; i < mFalseJump->mIns.Size(); i++) + mFalseJump->mIns[i].mLive |= LIVE_CPU_REG_X; + changed = true; + } + } + else if (mTrueJump->mIns[0].mType == ASMIT_TAY && !mFalseJump->ReferencesYReg() && !mFalseJump->ReferencesAccu() && !(mTrueJump->mIns[0].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z))) + { + if (mIns[sz - 1].mType == ASMIT_LDA && HasAsmInstructionMode(ASMIT_LDY, mIns[sz - 1].mMode)) + { + mIns[sz - 1].mType = ASMIT_LDY; + mIns[sz - 1].mLive |= LIVE_CPU_REG_Y; + mTrueJump->mIns[0].mType = ASMIT_NOP; + mExitRequiredRegs += CPU_REG_Y; + mTrueJump->mEntryRequiredRegs += CPU_REG_Y; + mFalseJump->mEntryRequiredRegs += CPU_REG_Y; + mFalseJump->mExitRequiredRegs += CPU_REG_Y; + for (int i = 0; i < mFalseJump->mIns.Size(); i++) + mFalseJump->mIns[i].mLive |= LIVE_CPU_REG_Y; + changed = true; + } + } + } + CheckLive(); if (mTrueJump && mTrueJump->JoinTailCodeSequences(proc, loops)) @@ -18542,6 +18754,109 @@ bool NativeCodeBasicBlock::CanCrossBlockYShortcut(int addr) return false; } +bool NativeCodeBasicBlock::CrossBlockYAliasProgpagation(const int* yalias, int yoffset) +{ + bool changed = false; + + if (!mVisited) + { + if (mLoopHead || !yalias) + { + for (int i = 0; i < 256; i++) + mYAlias[i] = -1; + } + else + { + if (mNumEntered == 0) + { + mYOffset = yoffset; + for (int i = 0; i < 256; i++) + mYAlias[i] = yalias[i]; + } + else if (mYOffset == yoffset) + { + for (int i = 0; i < 256; i++) + if (mYAlias[i] != yalias[i]) + mYAlias[i] = -1; + } + else + { + for (int i = 0; i < 256; i++) + mYAlias[i] = -1; + yoffset = -1; + } + + mNumEntered++; + if (mNumEntered < mNumEntries) + return false; + } + + mVisited = true; + + for (int i = 0; i < mIns.Size(); i++) + { + NativeCodeInstruction& ins(mIns[i]); + if (ins.mType == ASMIT_INY) + yoffset = (yoffset + 1) & 0xff; + else if (ins.mType == ASMIT_DEY) + yoffset = (yoffset - 1) & 0xff; + else if (ins.mType == ASMIT_STY && mIns[i].mMode == ASMIM_ZERO_PAGE) + mYAlias[ins.mAddress] = yoffset; + else if (ins.mMode == ASMIM_ZERO_PAGE && mIns[i].ChangesAddress()) + mYAlias[ins.mAddress] = -1; + else if (ins.mType == ASMIT_LDY && ins.mMode == ASMIM_ZERO_PAGE) + { + if (mYAlias[ins.mAddress] != -1) + { + int diff = (mYAlias[ins.mAddress] - yoffset) & 0xff; + if (diff == 0x01) + { + ins.mType = ASMIT_INY; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else if (diff == 0xff) + { + ins.mType = ASMIT_DEY; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + else if (diff == 0x00 && !(ins.mLive & LIVE_CPU_REG_Z)) + { + ins.mType = ASMIT_NOP; + ins.mMode = ASMIM_IMPLIED; + changed = true; + } + + yoffset = mYAlias[ins.mAddress]; + } + else + { + for (int i = 0; i < 256; i++) + mYAlias[i] = -1; + + yoffset = 0; + mYAlias[ins.mAddress] = yoffset; + } + } + else if (ins.ChangesYReg()) + { + for (int i = 0; i < 256; i++) + mYAlias[i] = -1; + yoffset = 0; + } + } + + if (mTrueJump && mTrueJump->CrossBlockYAliasProgpagation(mYAlias, yoffset)) + changed = true; + if (mFalseJump && mFalseJump->CrossBlockYAliasProgpagation(mYAlias, yoffset)) + changed = true; + } + + return changed; +} + + bool NativeCodeBasicBlock::CrossBlockXYShortcut(void) { bool changed = false; @@ -19345,6 +19660,242 @@ bool NativeCodeBasicBlock::PatchForwardSumYPointer(const NativeCodeBasicBlock* b } +bool NativeCodeBasicBlock::CrossBlock16BitFlood(NativeCodeProcedure* proc) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + for (int i = 0; i + 3 < mIns.Size(); i++) + { + if (mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && !(mIns[i + 0].mLive & LIVE_MEM) && + mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_LDA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && !(mIns[i + 2].mLive & LIVE_MEM) && mIns[i + 2].mAddress == mIns[i + 0].mAddress + 1 && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mAddress == mIns[i + 1].mAddress + 1) + { + proc->ResetPatched(); + if (CheckCrossBlock16BitFlood(this, mIns[i + 0].mAddress, mIns[i + 1].mAddress, i + 4, false)) + { + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED; + mIns[i + 2].mLive |= LIVE_MEM; + + proc->ResetPatched(); + if (PatchCrossBlock16BitFlood(this, mIns[i + 0].mAddress, mIns[i + 1].mAddress, i + 4)) + { + changed = true; + } + } + } + } + + if (mTrueJump && mTrueJump->CrossBlock16BitFlood(proc)) + changed = true; + if (mFalseJump && mFalseJump->CrossBlock16BitFlood(proc)) + changed = true; + } + + return changed; +} + +bool NativeCodeBasicBlock::CheckCrossBlock16BitFlood(const NativeCodeBasicBlock* block, int sreg, int dreg, int at, bool rvalid) +{ + if (at == 0 && this == block) + return false; + + if (!mPatched) + { + mPatched = true; + + if (at == 0) + { + if (!mEntryRequiredRegs[dreg] && !mEntryRequiredRegs[dreg + 1]) + return true; + + if (mEntryRequiredRegs[sreg] || mEntryRequiredRegs[sreg + 1]) + return false; + + if (mNumEntries > 1) + { + for (int i = 0; i < mEntryBlocks.Size(); i++) + if (!mEntryBlocks[i]->CheckCrossBlock16BitFloodExit(block, sreg, dreg, rvalid)) + return false; + } + } + + while (at < mIns.Size()) + { + NativeCodeInstruction& ins(mIns[at]); + + if (ins.ReferencesZeroPage(sreg) || ins.ReferencesZeroPage(sreg + 1)) + return false; + else if ((ins.mType == ASMIT_JSR || ins.mType == ASMIT_RTS) && (ins.ReferencesZeroPage(dreg) || ins.ReferencesZeroPage(dreg + 1))) + return false; + + at++; + } + + mPatchExit = true; + + if (mTrueJump && !mTrueJump->CheckCrossBlock16BitFlood(block, sreg, dreg, 0, rvalid)) + return false; + if (mFalseJump && !mFalseJump->CheckCrossBlock16BitFlood(block, sreg, dreg, 0, rvalid)) + return false; + + } + + return true; +} + +bool NativeCodeBasicBlock::CheckCrossBlock16BitFloodExit(const NativeCodeBasicBlock* block, int sreg, int dreg, bool rvalid) +{ + if (!mPatchExit) + { + mPatchExit = true; + + if (mTrueJump && !mTrueJump->CheckCrossBlock16BitFlood(block, sreg, dreg, 0, rvalid)) + return false; + if (mFalseJump && !mFalseJump->CheckCrossBlock16BitFlood(block, sreg, dreg, 0, rvalid)) + return false; + + int at = mIns.Size() - 1; + while (at >= 0) + { + NativeCodeInstruction& ins(mIns[at]); + + if (ins.ReferencesZeroPage(sreg) || ins.ReferencesZeroPage(sreg + 1)) + return false; + else if ((ins.mType == ASMIT_JSR || ins.mType == ASMIT_RTS) && (ins.ReferencesZeroPage(dreg) || ins.ReferencesZeroPage(dreg + 1))) + return false; + + at--; + } + + mPatched = true; + + if (mEntryBlocks.Size() == 0) + return false; + + for (int i = 0; i < mEntryBlocks.Size(); i++) + if (!mEntryBlocks[i]->CheckCrossBlock16BitFloodExit(block, sreg, dreg, rvalid)) + return false; + } + + return true; +} + +bool NativeCodeBasicBlock::PatchCrossBlock16BitFlood(const NativeCodeBasicBlock* block, int sreg, int dreg, int at) +{ + bool changed = false; + + if (!mPatched) + { + mPatched = true; + + if (at == 0) + { + if (!mEntryRequiredRegs[dreg] && !mEntryRequiredRegs[dreg + 1]) + return false; + + for (int i = 0; i < mEntryBlocks.Size(); i++) + if (mEntryBlocks[i]->PatchCrossBlock16BitFloodExit(block, sreg, dreg)) + changed = true; + + mEntryRequiredRegs += sreg; + mEntryRequiredRegs += sreg + 1; + } + + while (at < mIns.Size()) + { + NativeCodeInstruction& ins(mIns[at]); + + if (ins.mMode == ASMIM_ZERO_PAGE || ins.mMode == ASMIM_INDIRECT_Y) + { + if (ins.mAddress == dreg) + { + ins.mAddress = sreg; + changed = true; + } + else if (ins.mAddress == dreg + 1) + { + ins.mAddress = sreg + 1; + changed = true; + } + } + + at++; + } + + mPatchExit = true; + + mExitRequiredRegs |= sreg; + mExitRequiredRegs |= sreg + 1; + + if (mTrueJump && mTrueJump->PatchCrossBlock16BitFlood(block, sreg, dreg, 0)) + changed = true; + if (mFalseJump && mFalseJump->PatchCrossBlock16BitFlood(block, sreg, dreg, 0)) + changed = true; + } + + return changed; + +} + +bool NativeCodeBasicBlock::PatchCrossBlock16BitFloodExit(const NativeCodeBasicBlock* block, int sreg, int dreg) +{ + bool changed = false; + + if (!mPatchExit) + { + mPatchExit = true; + + mExitRequiredRegs += sreg; + mExitRequiredRegs += sreg + 1; + + if (mTrueJump && mTrueJump->PatchCrossBlock16BitFlood(block, sreg, dreg, 0)) + changed = true; + if (mFalseJump && mFalseJump->PatchCrossBlock16BitFlood(block, sreg, dreg, 0)) + changed = true; + + int at = mIns.Size() - 1; + while (at >= 0) + { + NativeCodeInstruction& ins(mIns[at]); + + if (ins.mAddress == dreg) + { + ins.mAddress = sreg; + changed = true; + } + else if (ins.mAddress == dreg + 1) + { + ins.mAddress = sreg + 1; + changed = true; + } + + at--; + } + + if (mEntryRequiredRegs[dreg] || mEntryRequiredRegs[dreg + 1]) + { + mPatched = true; + mEntryRequiredRegs += sreg; + mEntryRequiredRegs += sreg + 1; + + for (int i = 0; i < mEntryBlocks.Size(); i++) + if (mEntryBlocks[i]->PatchCrossBlock16BitFloodExit(block, sreg, dreg)) + changed = true; + } + + return changed; + } + + return false; +} + bool NativeCodeBasicBlock::CrossBlockXYFlood(NativeCodeProcedure* proc) { bool changed = false; @@ -20242,7 +20793,9 @@ bool NativeCodeBasicBlock::CheckGlobalAddressSumYPointer(const NativeCodeBasicBl } } else + { mPatchStart = true; + } while (at < mIns.Size()) { @@ -24211,12 +24764,64 @@ bool NativeCodeBasicBlock::BitFieldForwarding(const NativeRegisterDataSet& data) mIns[i + 3].mAddress = mIns[i + 1].mAddress; changed = true; } -#endif + if (i + 2 < mIns.Size() && + mIns[i + 0].mType == ASMIT_TXA && mIns[i + 1].mType == ASMIT_ORA && mIns[i + 1].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mType == ASMIT_TAX) + { + if (mIns[i + 1].mAddress == 1 && (mNDataSet.mRegs[CPU_REG_X].mMask & 1)) + { + if (mNDataSet.mRegs[CPU_REG_X].mValue & 1) + { + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + } + else + { + mIns[i + 0].mType = ASMIT_INX; mIns[i + 0].mLive |= LIVE_CPU_REG_X; + } - if (mIns[i].BitFieldForwarding(mNDataSet, carryop)) - changed = true; - if (carryop != ASMIT_NOP) - mIns.Insert(i + 1, NativeCodeInstruction(carryop)); + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_TXA; mIns[i + 2].mMode = ASMIM_IMPLIED; + } + else if (mIns[i + 1].mAddress == 2 && (mNDataSet.mRegs[CPU_REG_X].mMask & 2)) + { + if (mNDataSet.mRegs[CPU_REG_X].mValue & 2) + { + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + } + else + { + mIns[i + 0].mType = ASMIT_INX; mIns[i + 0].mLive |= LIVE_CPU_REG_X; + mIns[i + 1].mType = ASMIT_INX; mIns[i + 1].mMode = ASMIM_IMPLIED; mIns[i + 1].mLive |= LIVE_CPU_REG_X; + } + + mIns[i + 2].mType = ASMIT_TXA; mIns[i + 2].mMode = ASMIM_IMPLIED; + } + } +#endif + bool skip = false; + + if (i + 2 < mIns.Size() && + mIns[i + 0].mType == ASMIT_EOR && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && + mIns[i + 1].mType == ASMIT_AND && mIns[i + 1].mMode == ASMIM_IMMEDIATE && + mIns[i + 2].mType == ASMIT_EOR && mIns[i + 2].SameEffectiveAddress(mIns[i + 0])) + { + if (mNDataSet.mRegs[mIns[i + 0].mAddress].mMask == 0) + { + mNDataSet.mRegs[CPU_REG_A].mMask &= mIns[i + 1].mAddress; + mNDataSet.mRegs[CPU_REG_Z].ResetMask(); + i += 2; + skip = true; + } + } + + + if (!skip) + { + if (mIns[i].BitFieldForwarding(mNDataSet, carryop)) + changed = true; + if (carryop != ASMIT_NOP) + mIns.Insert(i + 1, NativeCodeInstruction(carryop)); + } } if (mFalseJump) { @@ -24727,6 +25332,11 @@ bool NativeCodeBasicBlock::ValueForwarding(NativeCodeProcedure* proc, const Nati mFDataSet.mRegs[CPU_REG_X].mMode = NRDM_IMMEDIATE; mFDataSet.mRegs[CPU_REG_X].mValue = 0; } + else if ((lins.mType == ASMIT_INC || lins.mType == ASMIT_DEC) && lins.mMode == ASMIM_ZERO_PAGE) + { + mFDataSet.mRegs[lins.mAddress].mMode = NRDM_IMMEDIATE; + mFDataSet.mRegs[lins.mAddress].mValue = 0; + } } break; case ASMIT_BEQ: @@ -24810,6 +25420,11 @@ bool NativeCodeBasicBlock::ValueForwarding(NativeCodeProcedure* proc, const Nati mNDataSet.mRegs[CPU_REG_X].mMode = NRDM_IMMEDIATE; mNDataSet.mRegs[CPU_REG_X].mValue = 0; } + else if ((lins.mType == ASMIT_INC || lins.mType == ASMIT_DEC) && lins.mMode == ASMIM_ZERO_PAGE) + { + mNDataSet.mRegs[lins.mAddress].mMode = NRDM_IMMEDIATE; + mNDataSet.mRegs[lins.mAddress].mValue = 0; + } } break; case ASMIT_BPL: @@ -25083,6 +25698,63 @@ bool NativeCodeBasicBlock::RemoveSimpleLoopUnusedIndex(void) return changed; } +static bool Is16BitInc(NativeCodeBasicBlock* block, int reg, int at) +{ + if (at + 6 < block->mIns.Size()) + { + if (block->mIns[at + 0].mType == ASMIT_CLC && + block->mIns[at + 1].mType == ASMIT_LDA && block->mIns[at + 1].mMode == ASMIM_ZERO_PAGE && block->mIns[at + 1].mAddress == reg && + block->mIns[at + 2].mType == ASMIT_ADC && block->mIns[at + 2].mMode == ASMIM_IMMEDIATE && block->mIns[at + 2].mAddress == 1 && + block->mIns[at + 3].mType == ASMIT_STA && block->mIns[at + 3].mMode == ASMIM_ZERO_PAGE && block->mIns[at + 3].mAddress == reg && + block->mIns[at + 4].mType == ASMIT_LDA && block->mIns[at + 4].mMode == ASMIM_ZERO_PAGE && block->mIns[at + 4].mAddress == reg + 1 && + block->mIns[at + 5].mType == ASMIT_ADC && block->mIns[at + 5].mMode == ASMIM_IMMEDIATE && block->mIns[at + 5].mAddress == 0 && + block->mIns[at + 6].mType == ASMIT_STA && block->mIns[at + 6].mMode == ASMIM_ZERO_PAGE && block->mIns[at + 6].mAddress == reg + 1 && + !(block->mIns[at + 6].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z | LIVE_CPU_REG_C))) + { + return true; + } + } + + return false; +} + +static bool IsPointerIncAddrs(NativeCodeBasicBlock* block, int reg) +{ + bool changed = false; + bool used = false; + + int i = 0; + while (i < block->mIns.Size()) + { + NativeCodeInstruction ins(block->mIns[i]); + + if (ins.mMode == ASMIM_INDIRECT_Y) + { + if (ins.mAddress == reg) + { + if (changed) + return false; + used = true; + i++; + } + else + return false; + } + else if (Is16BitInc(block, reg, i)) + { + changed = true; + i += 7; + } + else if (ins.ReferencesZeroPage(reg) || ins.ReferencesZeroPage(reg + 1) || ins.ReferencesYReg()) + return false; + else + i++; + } + + return changed; +} + + bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc, NativeCodeBasicBlock* prevBlock, NativeCodeBasicBlock* exitBlock, bool full) { bool changed = false; @@ -25778,6 +26450,41 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc } } } + else + { + int i = 0; + while (i < mIns.Size() && !mIns[i].ReferencesYReg()) + i++; + if (i < mIns.Size() && mIns[i].mMode == ASMIM_INDIRECT_Y && IsPointerIncAddrs(this, mIns[i].mAddress)) + { + NativeCodeBasicBlock* oblock = mTrueJump == this ? mFalseJump : mTrueJump; + + int reg = mIns[i].mAddress; + + if (!oblock->mEntryRequiredRegs[CPU_REG_Y] && + !oblock->mEntryRequiredRegs[reg] && + !oblock->mEntryRequiredRegs[reg + 1]) + { + for (int i = 0; i < mIns.Size(); i++) + { + if (mIns[i].mMode == ASMIM_ZERO_PAGE && mIns[i].mAddress == reg) + { + if (mIns[i].mType == ASMIT_LDA) + { + mIns[i].mType = ASMIT_TYA; + mIns[i].mMode = ASMIM_IMPLIED; + } + else if (mIns[i].mType == ASMIT_STA) + { + mIns[i].mType = ASMIT_TAY; + mIns[i].mMode = ASMIM_IMPLIED; + } + } + } + changed = true; + } + } + } if (mEntryRequiredRegs.Size() && !mEntryRequiredRegs[CPU_REG_A]) { @@ -33343,6 +34050,21 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass progress = true; } } + else if ( + pass > 2 && + mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress == 1 && + mIns[i + 3].mType == ASMIT_STA && !mIns[i + 3].MayBeSameAddress(mIns[i + 1]) && !mIns[i + 3].RequiresYReg() && + mIns[i + 4].mType == ASMIT_LDY && mIns[i + 4].SameEffectiveAddress(mIns[i + 1]) && !(mIns[i + 4].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))) + { + mIns[i + 0].mType = ASMIT_NOP; + mIns[i + 1].mType = ASMIT_LDY; mIns[i + 1].mLive |= LIVE_CPU_REG_Y; + mIns[i + 2].mType = ASMIT_INY; mIns[i + 2].mMode = ASMIM_IMPLIED; mIns[i + 2].mLive |= LIVE_CPU_REG_Y; + mIns[i + 3].mType = ASMIT_STY; mIns[i + 3].mLive |= LIVE_CPU_REG_Y; + mIns[i + 4].mType = ASMIT_DEY; mIns[i + 4].mMode = ASMIM_IMPLIED; + progress = true; + } #if 1 else if ( mIns[i + 0].mType == ASMIT_LDA && @@ -33764,6 +34486,31 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } } #endif +#if 1 + if (pass == 0 && + mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && + mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && + mIns[i + 3].mType == ASMIT_LDA && mIns[i + 3].mMode == ASMIM_IMMEDIATE && + mIns[i + 4].mType == ASMIT_ADC && mIns[i + 4].mMode == ASMIM_IMMEDIATE && + mIns[i + 5].mType == ASMIT_STA && mIns[i + 5].mMode == ASMIM_ZERO_PAGE && mIns[i + 5].mAddress == mIns[i + 2].mAddress + 1 && + !(mIns[i + 5].mLive & LIVE_CPU_REG_A)) + { + proc->ResetPatched(); + if (CheckGlobalAddressSumYPointer(this, mIns[i + 2].mAddress, mIns[i + 2].mAddress, i + 6, -1)) + { + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED; + mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED; + mIns[i + 5].mType = ASMIT_NOP; mIns[i + 5].mMode = ASMIM_IMPLIED; + + proc->ResetPatched(); + if (PatchGlobalAddressSumYPointer(this, mIns[i + 2].mAddress, mIns[i + 2].mAddress, i + 6, -1, nullptr, mIns[i + 1].mAddress + (mIns[i + 3].mAddress + mIns[i + 4].mAddress) * 256)) + progress = true; + } + } +#endif #if 1 else if (pass == 0 && mIns[i + 0].mType == ASMIT_CLC && @@ -34711,6 +35458,19 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass changed = true; } + else if (pass >= 7 && sz >= 1 && + mIns[sz - 1].mType == ASMIT_AND && mIns[sz - 1].mMode == ASMIM_IMMEDIATE && mIns[sz - 1].mAddress == 0x01 && !(mIns[sz - 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C)) && + (mBranch == ASMIT_BEQ || mBranch == ASMIT_BNE) && !mExitRequiredRegs[CPU_REG_Z]) + { + mIns[sz - 1].mType = ASMIT_LSR; mIns[sz - 1].mLive |= LIVE_CPU_REG_C; mIns[sz - 1].mMode = ASMIM_IMPLIED; + + if (mBranch == ASMIT_BEQ) + mBranch = ASMIT_BCC; + else + mBranch = ASMIT_BCS; + + changed = true; + } if (sz == 1 && mBranch == ASMIT_BNE && mTrueJump == this) { @@ -35035,11 +35795,21 @@ void NativeCodeBasicBlock::BuildPlacement(GrowingArray& p mFalseJump->BuildPlacement(placement); else if (!mTrueJump->mFalseJump && !mFalseJump->mFalseJump && mTrueJump->mTrueJump == mFalseJump->mTrueJump) { - mFalseJump->mPlaced = true; - mFalseJump->mPlace = placement.Size(); - placement.Push(mFalseJump); + if (mTrueJump->mNDataSet.mRegs[CPU_REG_C].mMode == NRDM_IMMEDIATE || mTrueJump->mNDataSet.mRegs[CPU_REG_Z].mMode == NRDM_IMMEDIATE) + { + mTrueJump->mPlaced = true; + mTrueJump->mPlace = placement.Size(); + placement.Push(mTrueJump); + mFalseJump->BuildPlacement(placement); + } + else + { + mFalseJump->mPlaced = true; + mFalseJump->mPlace = placement.Size(); + placement.Push(mFalseJump); - mTrueJump->BuildPlacement(placement); + mTrueJump->BuildPlacement(placement); + } } else if (mTrueJump->LeadsInto(mFalseJump, 0) < mFalseJump->LeadsInto(mTrueJump, 0)) { @@ -35985,7 +36755,7 @@ void NativeCodeProcedure::RebuildEntry(void) void NativeCodeProcedure::Optimize(void) { - CheckFunc = !strcmp(mInterProc->mIdent->mString, "tile_dig"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "pad_poll"); #if 1 int step = 0; @@ -36022,6 +36792,12 @@ void NativeCodeProcedure::Optimize(void) mEntryBlock->Split16BitLoopCount(this); } + if (step >= 6) + { + ResetVisited(); + mEntryBlock->CrossBlockYAliasProgpagation(nullptr, -1); + } + RebuildEntry(); @@ -36081,7 +36857,7 @@ void NativeCodeProcedure::Optimize(void) { ResetVisited(); NativeRegisterDataSet data; - if (mEntryBlock->ValueForwarding(this, data, step > 0, step == 7)) + if (mEntryBlock->ValueForwarding(this, data, step > 0, step == 8)) { changed = true; } @@ -36094,7 +36870,7 @@ void NativeCodeProcedure::Optimize(void) if (step > 1) { ResetVisited(); - if (mEntryBlock->GlobalValueForwarding(this, step == 7)) + if (mEntryBlock->GlobalValueForwarding(this, step == 8)) changed = true; } #endif @@ -36129,7 +36905,7 @@ void NativeCodeProcedure::Optimize(void) #endif #if 1 - if (step < 6) + if (step < 7) { ResetVisited(); if (mEntryBlock->OptimizeSelect(this)) @@ -36157,14 +36933,14 @@ void NativeCodeProcedure::Optimize(void) } #endif - if (step == 4) + if (step == 5) { ResetVisited(); if (mEntryBlock->RemoveDoubleZPStore()) changed = true; } - if (step == 4) + if (step == 5) { ResetVisited(); if (mEntryBlock->PropagateCommonSubExpression()) @@ -36174,7 +36950,7 @@ void NativeCodeProcedure::Optimize(void) if (step > 0) { ResetVisited(); - if (mEntryBlock->OptimizeSimpleLoop(this, step > 4)) + if (mEntryBlock->OptimizeSimpleLoop(this, step > 5)) changed = true; ResetVisited(); @@ -36211,7 +36987,7 @@ void NativeCodeProcedure::Optimize(void) #if 1 - if (step > 4 && !changed) + if (step > 5 && !changed) { ResetVisited(); if (mEntryBlock->ShortcutCrossBlockMoves(this)) @@ -36228,7 +37004,7 @@ void NativeCodeProcedure::Optimize(void) if (step > 2 && !changed) { ResetVisited(); - if (mEntryBlock->JoinTailCodeSequences(this, step > 3)) + if (mEntryBlock->JoinTailCodeSequences(this, step > 4)) changed = true; ResetVisited(); @@ -36237,13 +37013,12 @@ void NativeCodeProcedure::Optimize(void) } #endif - #if _DEBUG ResetVisited(); mEntryBlock->CheckBlocks(true); #endif #if 1 - if (step == 3) + if (step == 3 || step == 4) { #if 1 ResetVisited(); @@ -36263,17 +37038,18 @@ void NativeCodeProcedure::Optimize(void) #endif #if 1 - ResetVisited(); - if (!changed && mEntryBlock->OptimizeGenericLoop(this)) - changed = true; + if (step == 4) + { + ResetVisited(); + if (!changed && mEntryBlock->OptimizeGenericLoop(this)) + changed = true; + } #endif - #if 1 ResetVisited(); if (!changed && mEntryBlock->ShortcutZeroPageCopyUp(this)) changed = true; #endif - #if 1 ResetVisited(); if (!changed && mEntryBlock->CrossBlockXYShortcut()) @@ -36288,7 +37064,7 @@ void NativeCodeProcedure::Optimize(void) } #endif - if (step > 3 && !changed) + if (step > 4 && !changed) { ResetVisited(); if (mEntryBlock->OptimizeXYSimpleLoop()) @@ -36297,7 +37073,7 @@ void NativeCodeProcedure::Optimize(void) #if 1 - if (step == 4 || step == 5) + if (step == 5 || step == 6) { #if 1 int xregs[256], yregs[256]; @@ -36368,7 +37144,7 @@ void NativeCodeProcedure::Optimize(void) #endif if (!changed) { - if (step == 4) + if (step == 5) { ResetVisited(); mEntryBlock->ReduceLocalXPressure(); @@ -36399,9 +37175,9 @@ void NativeCodeProcedure::Optimize(void) #endif #if 1 - if (step >= 4) + if (step >= 5) { - if (step == 8) + if (step == 9) { ResetVisited(); mEntryBlock->ReduceLocalYPressure(); @@ -36417,7 +37193,7 @@ void NativeCodeProcedure::Optimize(void) } #endif #if 1 - if (step == 5) + if (step == 6) { ResetVisited(); if (mEntryBlock->AlternateXYUsage()) @@ -36441,14 +37217,14 @@ void NativeCodeProcedure::Optimize(void) #endif #if 1 ResetVisited(); - if (mEntryBlock->ForwardZpYIndex(step >= 4)) + if (mEntryBlock->ForwardZpYIndex(step >= 5)) changed = true; #endif #if 1 ResetVisited(); - if (mEntryBlock->ForwardZpXIndex(step >= 4)) + if (mEntryBlock->ForwardZpXIndex(step >= 5)) changed = true; #endif @@ -36458,7 +37234,7 @@ void NativeCodeProcedure::Optimize(void) #endif #if 1 - if (step == 5) + if (step == 6) { ResetVisited(); if (mEntryBlock->SimplifyDiamond(this)) @@ -36471,7 +37247,7 @@ void NativeCodeProcedure::Optimize(void) } #endif - if (step >= 5) + if (step >= 6) { ResetVisited(); if (mEntryBlock->BypassRegisterConditionBlock()) @@ -36484,7 +37260,7 @@ void NativeCodeProcedure::Optimize(void) #endif - if (step == 7) + if (step == 8) { ResetVisited(); if (mEntryBlock->SimplifyDiamond(this)) @@ -36497,7 +37273,7 @@ void NativeCodeProcedure::Optimize(void) #endif #if 1 - if (step == 7) + if (step == 8) { ResetVisited(); if (mEntryBlock->CrossBlockXYFlood(this)) @@ -36506,7 +37282,7 @@ void NativeCodeProcedure::Optimize(void) #endif #if 1 - if (step == 7) + if (step == 8) { ResetVisited(); if (mEntryBlock->CrossBlockY2XFlood(this)) @@ -36514,8 +37290,14 @@ void NativeCodeProcedure::Optimize(void) } #endif + if (step == 8) + { + ResetVisited(); + if (mEntryBlock->CrossBlock16BitFlood(this)) + changed = true; + } #if 1 - if (step >= 6) + if (step >= 7) { ResetVisited(); if (mEntryBlock->ExpandADCToBranch(this)) @@ -36534,7 +37316,7 @@ void NativeCodeProcedure::Optimize(void) #endif #if 1 - if (step == 9) + if (step == 10) { if (changed) swappedXY = false; @@ -36565,7 +37347,7 @@ void NativeCodeProcedure::Optimize(void) } #if 1 - if (!changed && step < 10) + if (!changed && step < 11) { cnt = 0; step++; @@ -36628,7 +37410,6 @@ void NativeCodeProcedure::Optimize(void) changed = mEntryBlock->PeepHoleOptimizer(this, 20); } - if (!changed) { ResetVisited(); diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 1ac4316..e4e7598 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -186,6 +186,7 @@ public: NativeCodeBasicBlock* mLoopHeadBlock, * mLoopTailBlock; NativeRegisterDataSet mDataSet, mNDataSet, mFDataSet; + int mYAlias[256], mYOffset; NativeCodeInstruction DecodeNative(LinkerObject * lobj, int& offset) const; @@ -428,6 +429,8 @@ public: bool CrossBlockXYShortcut(void); + bool CrossBlockYAliasProgpagation(const int * yalias, int yoffset); + bool BypassRegisterConditionBlock(void); bool Check16BitSum(int at, NativeRegisterSum16Info& info); @@ -515,6 +518,12 @@ public: bool CheckForwardSumYPointer(const NativeCodeBasicBlock* block, int reg, int base, const NativeCodeInstruction & iins, int at, int yval); bool PatchForwardSumYPointer(const NativeCodeBasicBlock* block, int reg, int base, const NativeCodeInstruction & iins, int at, int yval); + bool CrossBlock16BitFlood(NativeCodeProcedure* proc); + bool CheckCrossBlock16BitFlood(const NativeCodeBasicBlock* block, int sreg, int dreg, int at, bool rvalid); + bool CheckCrossBlock16BitFloodExit(const NativeCodeBasicBlock* block, int sreg, int dreg, bool rvalid); + bool PatchCrossBlock16BitFlood(const NativeCodeBasicBlock* block, int sreg, int dreg, int at); + bool PatchCrossBlock16BitFloodExit(const NativeCodeBasicBlock* block, int sreg, int dreg); + bool CrossBlockXYFlood(NativeCodeProcedure * proc); bool CheckCrossBlockXFlood(const NativeCodeBasicBlock* block, int reg, int at, bool rvalid); diff --git a/oscar64/oscar64.cpp b/oscar64/oscar64.cpp index 3775c71..b4ec082 100644 --- a/oscar64/oscar64.cpp +++ b/oscar64/oscar64.cpp @@ -282,11 +282,22 @@ int main2(int argc, const char** argv) compiler->mTargetMachine = TMACH_VIC20_24K; compiler->AddDefine(Ident::Unique("__VIC20__"), "1"); } + else if (!strcmp(targetMachine, "nes")) + { + compiler->mTargetMachine = TMACH_NES; + compiler->mCompilerOptions |= COPT_EXTENDED_ZERO_PAGE; + compiler->AddDefine(Ident::Unique("__NES__"), "1"); + } else compiler->mErrors->Error(loc, EERR_COMMAND_LINE, "Invalid target machine option", targetMachine); - if (!strcmp(targetFormat, "prg")) + if (compiler->mTargetMachine == TMACH_NES) + { + compiler->mCompilerOptions |= COPT_TARGET_NES; + compiler->AddDefine(Ident::Unique("OSCAR_TARGET_NES"), "1"); + } + else if (!strcmp(targetFormat, "prg")) { compiler->mCompilerOptions |= COPT_TARGET_PRG; compiler->AddDefine(Ident::Unique("OSCAR_TARGET_PRG"), "1");