Some bytecode optimizations

This commit is contained in:
drmortalwombat 2021-11-26 22:59:10 +01:00
parent ccd6a50043
commit 0ca6ef8894
6 changed files with 134 additions and 75 deletions

View File

@ -199,14 +199,12 @@ void cwin_write_string(CharWin * win, const char * buffer)
{ {
for(char x=0; x<win->wx; x++) for(char x=0; x<win->wx; x++)
{ {
char c = *buffer; char ch = *buffer;
if (c) if (ch)
{ {
c &= 0xbf; ch = (ch & 0x3f) | ((ch & 0x80) >> 1);
if (c & 0x80)
c ^= 0xc0;
dp[x] = c; dp[x] = ch;
buffer++; buffer++;
} }
else else
@ -254,9 +252,7 @@ void cwin_putat_char(CharWin * win, char x, char y, char ch, char color)
{ {
int offset = mul40[y] + x; int offset = mul40[y] + x;
ch &= 0xbf; ch = (ch & 0x3f) | ((ch & 0x80) >> 1);
if (ch & 0x80)
ch ^= 0xc0;
win->sp[offset] = ch; win->sp[offset] = ch;
win->cp[offset] = color; win->cp[offset] = color;
@ -275,9 +271,7 @@ void cwin_putat_chars(CharWin * win, char x, char y, const char * chars, char nu
{ {
char ch = chars[i]; char ch = chars[i];
ch &= 0xbf; ch = (ch & 0x3f) | ((ch & 0x80) >> 1);
if (ch & 0x80)
ch ^= 0xc0;
sp[i] = ch; sp[i] = ch;
cp[i] = color; cp[i] = color;
@ -296,9 +290,7 @@ char cwin_putat_string(CharWin * win, char x, char y, const char * str, char col
char i = 0; char i = 0;
while (char ch = str[i]) while (char ch = str[i])
{ {
ch &= 0xbf; ch = (ch & 0x3f) | ((ch & 0x80) >> 1);
if (ch & 0x80)
ch ^= 0xc0;
sp[i] = ch; sp[i] = ch;
cp[i] = color; cp[i] = color;

View File

@ -131,6 +131,7 @@ w2:
// All native code // All native code
jsr main jsr main
pexec: pexec:
tyexec:
yexec: yexec:
zexec: zexec:
exec: exec:
@ -146,32 +147,27 @@ exec:
sta ip + 1 sta ip + 1
pexec: pexec:
ldy #$ff ldy #0
beq exec
tyexec:
ldy tmpy
yexec: yexec:
iny iny
exec: exec:
#if 0
tya
clc
adc ip
sta ip
bcc W1
inc ip + 1
W1: ldy #0
#endif
lda (ip), y lda (ip), y
sta execjmp + 1 sta execjmp + 1
iny iny
execjmp: execjmp:
jmp (0x0900) jmp (0x0900)
zexec: zexec:
tya tya
ldy #0
clc clc
adc ip adc ip
sta ip sta ip
bcc pexec bcc exec
inc ip + 1 inc ip + 1
bne pexec bne exec
bcode: bcode:
byt BC_CALL_ABS * 2 byt BC_CALL_ABS * 2
byt <main byt <main
@ -902,8 +898,7 @@ __asm inp_load_abs_8
L0: L0:
lda (addr), y lda (addr), y
sta $00, x sta $00, x
ldy tmpy jmp startup.tyexec
jmp startup.yexec
inp_load_addr_8: inp_load_addr_8:
lda (ip), y lda (ip), y
tax tax
@ -934,8 +929,7 @@ L0:
sta $00, x sta $00, x
lda #0 lda #0
sta $01, x sta $01, x
ldy tmpy jmp startup.tyexec
jmp startup.yexec
inp_load_addr_u8: inp_load_addr_u8:
lda (ip), y lda (ip), y
tax tax
@ -967,8 +961,7 @@ L0:
iny iny
lda (addr), y lda (addr), y
sta $01, x sta $01, x
ldy tmpy jmp startup.tyexec
jmp startup.yexec
inp_load_addr_16: inp_load_addr_16:
lda (ip), y lda (ip), y
@ -983,6 +976,28 @@ inp_load_addr_16:
#pragma bytecode(BC_LOAD_ABS_16, inp_load_abs_16) #pragma bytecode(BC_LOAD_ABS_16, inp_load_abs_16)
#pragma bytecode(BC_LOAD_ADDR_16, inp_load_abs_16.inp_load_addr_16) #pragma bytecode(BC_LOAD_ADDR_16, inp_load_abs_16.inp_load_addr_16)
__asm inp_load_abs_addr
{
lda (ip), y
sta addr
iny
lda (ip), y
sta addr + 1
sty tmpy
ldy #0
lda (addr), y
tax
iny
lda (addr), y
sta addr + 1
stx addr
jmp startup.tyexec
}
#pragma bytecode(BC_LOAD_ABS_ADDR, inp_load_abs_addr)
__asm inp_load_abs_32 __asm inp_load_abs_32
{ {
lda (ip), y lda (ip), y
@ -1008,8 +1023,7 @@ L0:
iny iny
lda (addr), y lda (addr), y
sta $03, x sta $03, x
ldy tmpy jmp startup.tyexec
jmp startup.yexec
inp_load_addr_32: inp_load_addr_32:
lda (ip), y lda (ip), y
@ -1040,8 +1054,7 @@ L0:
lda $00, x lda $00, x
sta (addr), y sta (addr), y
ldy tmpy jmp startup.tyexec
jmp startup.yexec
inp_store_addr_8: inp_store_addr_8:
lda (ip), y lda (ip), y
@ -1074,8 +1087,7 @@ L0:
iny iny
lda $01, x lda $01, x
sta (addr), y sta (addr), y
ldy tmpy jmp startup.tyexec
jmp startup.yexec
inp_store_addr_16: inp_store_addr_16:
lda (ip), y lda (ip), y
@ -1114,8 +1126,7 @@ L0:
iny iny
lda $03, x lda $03, x
sta (addr), y sta (addr), y
ldy tmpy jmp startup.tyexec
jmp startup.yexec
inp_store_addr_32: inp_store_addr_32:
lda (ip), y lda (ip), y
@ -1210,8 +1221,7 @@ __asm inp_load_local_16
iny iny
lda (fp), y lda (fp), y
sta $01, x sta $01, x
ldy tmpy jmp startup.tyexec
jmp startup.yexec
} }
#pragma bytecode(BC_LOAD_LOCAL_16, inp_load_local_16) #pragma bytecode(BC_LOAD_LOCAL_16, inp_load_local_16)
@ -1235,8 +1245,7 @@ __asm inp_load_local_32
iny iny
lda (fp), y lda (fp), y
sta $03, x sta $03, x
ldy tmpy jmp startup.tyexec
jmp startup.yexec
} }
#pragma bytecode(BC_LOAD_LOCAL_32, inp_load_local_32) #pragma bytecode(BC_LOAD_LOCAL_32, inp_load_local_32)
@ -1251,8 +1260,7 @@ __asm inp_load_local_8
tay tay
lda (fp), y lda (fp), y
sta $00, x sta $00, x
ldy tmpy jmp startup.tyexec
jmp startup.yexec
} }
#pragma bytecode(BC_LOAD_LOCAL_8, inp_load_local_8) #pragma bytecode(BC_LOAD_LOCAL_8, inp_load_local_8)
@ -1269,8 +1277,7 @@ __asm inp_load_local_u8
sta $00, x sta $00, x
lda #0 lda #0
sta $01, x sta $01, x
ldy tmpy jmp startup.tyexec
jmp startup.yexec
} }
#pragma bytecode(BC_LOAD_LOCAL_U8, inp_load_local_u8) #pragma bytecode(BC_LOAD_LOCAL_U8, inp_load_local_u8)
@ -1285,8 +1292,7 @@ __asm inp_store_local_8
tay tay
lda $00, x lda $00, x
sta (fp), y sta (fp), y
ldy tmpy jmp startup.tyexec
jmp startup.yexec
} }
#pragma bytecode(BC_STORE_LOCAL_8, inp_store_local_8) #pragma bytecode(BC_STORE_LOCAL_8, inp_store_local_8)
@ -1304,8 +1310,7 @@ __asm inp_store_local_16
iny iny
lda $01, x lda $01, x
sta (fp), y sta (fp), y
ldy tmpy jmp startup.tyexec
jmp startup.yexec
} }
#pragma bytecode(BC_STORE_LOCAL_16, inp_store_local_16) #pragma bytecode(BC_STORE_LOCAL_16, inp_store_local_16)
@ -1329,8 +1334,7 @@ __asm inp_store_local_32
iny iny
lda $03, x lda $03, x
sta (fp), y sta (fp), y
ldy tmpy jmp startup.tyexec
jmp startup.yexec
} }
#pragma bytecode(BC_STORE_LOCAL_32, inp_store_local_32) #pragma bytecode(BC_STORE_LOCAL_32, inp_store_local_32)
@ -1363,8 +1367,7 @@ __asm inp_store_frame_8
tay tay
lda $00, x lda $00, x
sta (sp), y sta (sp), y
ldy tmpy jmp startup.tyexec
jmp startup.yexec
} }
#pragma bytecode(BC_STORE_FRAME_8, inp_store_frame_8) #pragma bytecode(BC_STORE_FRAME_8, inp_store_frame_8)
@ -1382,8 +1385,7 @@ __asm inp_store_frame_16
iny iny
lda $01, x lda $01, x
sta (sp), y sta (sp), y
ldy tmpy jmp startup.tyexec
jmp startup.yexec
} }
#pragma bytecode(BC_STORE_FRAME_16, inp_store_frame_16) #pragma bytecode(BC_STORE_FRAME_16, inp_store_frame_16)
@ -1407,8 +1409,7 @@ __asm inp_store_frame_32
iny iny
lda $03, x lda $03, x
sta (sp), y sta (sp), y
ldy tmpy jmp startup.tyexec
jmp startup.yexec
} }
#pragma bytecode(BC_STORE_FRAME_32, inp_store_frame_32) #pragma bytecode(BC_STORE_FRAME_32, inp_store_frame_32)

View File

@ -26,6 +26,7 @@ enum ByteCode
BC_LOAD_ABS_U8, BC_LOAD_ABS_U8,
BC_LOAD_ABS_16, BC_LOAD_ABS_16,
BC_LOAD_ABS_32, BC_LOAD_ABS_32,
BC_LOAD_ABS_ADDR,
BC_STORE_ABS_8, BC_STORE_ABS_8,
BC_STORE_ABS_16, BC_STORE_ABS_16,

View File

@ -26,6 +26,7 @@ static const char* ByteCodeNames[] = {
"LOAD_ABS_U8", "LOAD_ABS_U8",
"LOAD_ABS_16", "LOAD_ABS_16",
"LOAD_ABS_32", "LOAD_ABS_32",
"LOAD_ABS_ADDR",
"STORE_ABS_8", "STORE_ABS_8",
"STORE_ABS_16", "STORE_ABS_16",
@ -147,9 +148,8 @@ static const char* ByteCodeNames[] = {
"SET_LT", "SET_LT",
"SET_LE", "SET_LE",
"JSR", //113 "JSR", //114
nullptr,
nullptr, nullptr,
nullptr, nullptr,
@ -434,6 +434,9 @@ bool ByteCodeInstruction::CheckAccuSize(uint32 & used)
} }
break; break;
case BC_LOAD_ABS_ADDR:
break;
case BC_LEA_ABS: case BC_LEA_ABS:
case BC_LEA_LOCAL: case BC_LEA_LOCAL:
case BC_LEA_FRAME: case BC_LEA_FRAME:
@ -954,6 +957,22 @@ void ByteCodeInstruction::Assemble(ByteCodeGenerator* generator, ByteCodeBasicBl
block->PutByte(mRegister); block->PutByte(mRegister);
break; break;
case BC_LOAD_ABS_ADDR:
block->PutCode(generator, mCode);
if (mRelocate)
{
LinkerReference rl;
rl.mOffset = block->mCode.Size();
rl.mFlags = LREF_HIGHBYTE | LREF_LOWBYTE;
rl.mRefObject = mLinkerObject;
rl.mRefOffset = mValue;
block->mRelocations.Push(rl);
block->PutWord(0);
}
else
block->PutWord(mValue);
break;
case BC_LEA_ABS: case BC_LEA_ABS:
case BC_LEA_ABS_INDEX: case BC_LEA_ABS_INDEX:
case BC_LEA_ABS_INDEX_U8: case BC_LEA_ABS_INDEX_U8:
@ -4816,19 +4835,19 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(int phase)
mIns[i + 2].mCode = BC_NOP; mIns[i + 2].mCode = BC_NOP;
progress = true; progress = true;
} }
else if ( else if (
mIns[i + 0].mCode == BC_LOAD_REG_8 && mIns[i + 0].mCode == BC_LOAD_REG_8 &&
mIns[i + 1].mCode == BC_STORE_REG_16 && mIns[i + 1].mCode == BC_STORE_REG_16 &&
mIns[i + 2].mCode == BC_LEA_ABS_INDEX && mIns[i + 2].mRegister == mIns[i + 1].mRegister && !(mIns[i + 2].mLive & LIVE_ACCU) && mIns[i + 2].mRegisterFinal) mIns[i + 2].mCode == BC_LEA_ABS_INDEX && mIns[i + 2].mRegister == mIns[i + 1].mRegister && !(mIns[i + 2].mLive & LIVE_ACCU) && mIns[i + 2].mRegisterFinal)
{ {
mIns[i + 2].mCode = BC_LEA_ABS_INDEX_U8; mIns[i + 2].mCode = BC_LEA_ABS_INDEX_U8;
mIns[i + 2].mRegister = mIns[i + 0].mRegister; mIns[i + 2].mRegister = mIns[i + 0].mRegister;
mIns[i + 2].mRegisterFinal = mIns[i + 0].mRegisterFinal; mIns[i + 2].mRegisterFinal = mIns[i + 0].mRegisterFinal;
mIns[i + 0].mCode = BC_NOP; mIns[i + 0].mCode = BC_NOP;
mIns[i + 1].mCode = BC_NOP; mIns[i + 1].mCode = BC_NOP;
progress = true; progress = true;
} }
else if ( else if (
mIns[i + 0].mCode == BC_STORE_REG_16 && mIns[i + 0].mCode == BC_STORE_REG_16 &&
@ -4872,7 +4891,7 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(int phase)
progress = true; progress = true;
} }
else if ( else if (
mIns[i + 0].mCode == BC_LOAD_REG_8 && mIns[i + 0].mCode == BC_LOAD_REG_8 &&
mIns[i + 1].mCode == BC_BINOP_SHRI_U16 && mIns[i + 1].mCode == BC_BINOP_SHRI_U16 &&
mIns[i + 2].mCode == BC_LOAD_REG_8 && mIns[i + 2].mRegister == BC_REG_ACCU) mIns[i + 2].mCode == BC_LOAD_REG_8 && mIns[i + 2].mRegister == BC_REG_ACCU)
{ {
@ -4913,9 +4932,19 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(int phase)
mIns[i + 2].mCode = BC_BINOP_ADDA_16; mIns[i + 2].mCode = BC_BINOP_ADDA_16;
progress = true; progress = true;
} }
else if (
mIns[i + 0].mCode == BC_LOAD_REG_16 &&
mIns[i + 1].mCode == BC_BINOP_SHLI_16 && mIns[i + 1].mValue == 1 &&
mIns[i + 2].mCode == BC_STORE_REG_16 && mIns[i + 0].mRegister == mIns[i + 2].mRegister && !(mIns[i + 2].mLive & LIVE_ACCU))
{
mIns[i + 0].mRegisterFinal = false;
mIns[i + 1].mCode = BC_NOP;
mIns[i + 2].mCode = BC_BINOP_ADDA_16;
progress = true;
}
else if ( else if (
(mIns[i + 0].mCode == BC_LOAD_LOCAL_16 || mIns[i + 0].mCode == BC_LOAD_ABS_16 || mIns[i + 0].mCode == BC_LOAD_ADDR_16 || (mIns[i + 0].mCode == BC_LOAD_LOCAL_16 || mIns[i + 0].mCode == BC_LOAD_ABS_16 || mIns[i + 0].mCode == BC_LOAD_ADDR_16 ||
mIns[i + 0].mCode == BC_LOAD_LOCAL_U8 || mIns[i + 0].mCode == BC_LOAD_ABS_U8 || mIns[i + 0].mCode == BC_LOAD_ADDR_U8) && mIns[i + 0].mCode == BC_LOAD_LOCAL_U8 || mIns[i + 0].mCode == BC_LOAD_ABS_U8 || mIns[i + 0].mCode == BC_LOAD_ADDR_U8) &&
mIns[i + 1].mCode == BC_BINOP_ADDR_16 && mIns[i + 0].mRegister == mIns[i + 1].mRegister && mIns[i + 1].mRegisterFinal && mIns[i + 1].mCode == BC_BINOP_ADDR_16 && mIns[i + 0].mRegister == mIns[i + 1].mRegister && mIns[i + 1].mRegisterFinal &&
mIns[i + 2].mCode == BC_STORE_REG_16 && !(mIns[i + 2].mLive & LIVE_ACCU)) mIns[i + 2].mCode == BC_STORE_REG_16 && !(mIns[i + 2].mLive & LIVE_ACCU))
{ {
@ -4936,6 +4965,18 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(int phase)
mIns[i + 1].mLive |= LIVE_ACCU; mIns[i + 1].mLive |= LIVE_ACCU;
progress = true; progress = true;
} }
else if (
mIns[i + 0].mCode == BC_LEA_ABS &&
mIns[i + 1].mCode == BC_BINOP_ADDA_16 && mIns[i + 0].mRegister == mIns[i + 1].mRegister &&
mIns[i + 2].mCode == BC_ADDR_REG && mIns[i + 0].mRegister == mIns[i + 2].mRegister && mIns[i + 2].mRegisterFinal)
{
mIns[i + 0].mCode = BC_LEA_ABS_INDEX;
mIns[i + 0].mRegister = BC_REG_ACCU;
mIns[i + 1].mCode = BC_NOP;
mIns[i + 2].mCode = BC_NOP;
progress = true;
}
#if 1 #if 1
else if ( else if (
i + 3 == mIns.Size() && mFalseJump && i + 3 == mIns.Size() && mFalseJump &&
@ -5283,7 +5324,26 @@ bool ByteCodeBasicBlock::PeepHoleOptimizer(int phase)
mIns[i + 0].mCode = BC_NOP; mIns[i + 0].mCode = BC_NOP;
progress = true; progress = true;
} }
else if (
mIns[i + 0].mCode == BC_BINOP_ADDA_16 &&
mIns[i + 1].mCode == BC_ADDR_REG && mIns[i + 0].mRegister == mIns[i + 1].mRegister && mIns[i + 1].mRegisterFinal)
{
mIns[i + 1].mCode = BC_LEA_ACCU_INDEX;
mIns[i + 0].mCode = BC_NOP;
progress = true;
}
#endif #endif
#if 1
else if (
mIns[i + 0].mCode == BC_LOAD_ABS_16 &&
mIns[i + 1].mCode == BC_ADDR_REG && mIns[i + 0].mRegister == mIns[i + 1].mRegister && mIns[i + 1].mRegisterFinal)
{
mIns[i + 0].mCode = BC_LOAD_ABS_ADDR;
mIns[i + 1].mCode = BC_NOP;
progress = true;
}
#endif
#if 1 #if 1
else if ( else if (
i + 2 == mIns.Size() && mFalseJump && i + 2 == mIns.Size() && mFalseJump &&

View File

@ -27,6 +27,7 @@ enum ByteCode
BC_LOAD_ABS_U8, BC_LOAD_ABS_U8,
BC_LOAD_ABS_16, BC_LOAD_ABS_16,
BC_LOAD_ABS_32, BC_LOAD_ABS_32,
BC_LOAD_ABS_ADDR,
BC_STORE_ABS_8, BC_STORE_ABS_8,
BC_STORE_ABS_16, BC_STORE_ABS_16,

View File

@ -155,6 +155,10 @@ void ByteCodeDisassembler::Disassemble(FILE* file, const uint8* memory, int star
fprintf(file, "MOVD\t%s, %s", TempName(memory[start + i + 2], tbuffer, proc), AddrName(uint16(memory[start + i + 0] + 256 * memory[start + i + 1]), abuffer, linker)); fprintf(file, "MOVD\t%s, %s", TempName(memory[start + i + 2], tbuffer, proc), AddrName(uint16(memory[start + i + 0] + 256 * memory[start + i + 1]), abuffer, linker));
i += 3; i += 3;
break; break;
case BC_LOAD_ABS_ADDR:
fprintf(file, "MOV\tADDR, %s", AddrName(uint16(memory[start + i + 0] + 256 * memory[start + i + 1]), abuffer, linker));
i += 2;
break;
case BC_LEA_ABS: case BC_LEA_ABS:
fprintf(file, "LEA\t%s, %s", TempName(memory[start + i + 0], tbuffer, proc), AddrName(uint16(memory[start + i + 1] + 256 * memory[start + i + 2]), abuffer, linker)); fprintf(file, "LEA\t%s, %s", TempName(memory[start + i + 0], tbuffer, proc), AddrName(uint16(memory[start + i + 1] + 256 * memory[start + i + 2]), abuffer, linker));