Optimize byte code interpreter loop

This commit is contained in:
drmortalwombat 2021-11-25 21:42:06 +01:00
parent e9caf064de
commit ccd6a50043
8 changed files with 259 additions and 67 deletions

View File

@ -23,7 +23,7 @@ The goal is to implement the actual C standard and not some subset for performan
## Limits and Errors ## Limits and Errors
There are still several open areas, but most targets have been reached. The current Dhrystone performance is 59 iterations per second with byte code (11434) and 270 iterations with native code (12145 Bytes). There are still several open areas, but most targets have been reached. The current Dhrystone performance is 61 iterations per second with byte code (11434) and 270 iterations with native code (12145 Bytes).
### Language ### Language

View File

@ -108,6 +108,9 @@ if %errorlevel% neq 0 goto :error
call :test randsumtest.c call :test randsumtest.c
if %errorlevel% neq 0 goto :error if %errorlevel% neq 0 goto :error
call :test longcodetest.c
if %errorlevel% neq 0 goto :error
exit /b 0 exit /b 0
:error :error

30
autotest/longcodetest.c Normal file
View File

@ -0,0 +1,30 @@
#include <assert.h>
char a[200], b[200];
bool ok = true;
int main(void)
{
#assign ni 0
#repeat
a[ni] = ni & 255;
#assign ni ni + 1
#until ni == 200
#assign ni 0
#repeat
if (ok)
b[ni] = ni & 255;
#assign ni ni + 1
#until ni == 200
int asum = 0, bsum = 0, csum;
for(int i=0; i<200; i++)
{
asum += a[i];
bsum += b[i];
csum += i & 255;
}
return asum + bsum - 2 * csum;
}

View File

@ -132,6 +132,7 @@ w2:
jsr main jsr main
pexec: pexec:
yexec: yexec:
zexec:
exec: exec:
jmp inp_exit jmp inp_exit
@ -149,21 +150,28 @@ pexec:
yexec: yexec:
iny iny
exec: exec:
lda (ip), y #if 0
sta execjmp + 1
iny
bmi incip
execjmp:
jmp (0x0900)
incip:
tya tya
ldy #0
clc clc
adc ip adc ip
sta ip sta ip
bcc execjmp bcc W1
inc ip + 1 inc ip + 1
bne execjmp W1: ldy #0
#endif
lda (ip), y
sta execjmp + 1
iny
execjmp:
jmp (0x0900)
zexec:
tya
clc
adc ip
sta ip
bcc pexec
inc ip + 1
bne pexec
bcode: bcode:
byt BC_CALL_ABS * 2 byt BC_CALL_ABS * 2
byt <main byt <main
@ -642,7 +650,7 @@ L2: jsr divmod32
__asm inp_nop __asm inp_nop
{ {
jmp startup.exec jmp startup.zexec
} }
#pragma bytecode(BC_NOP, inp_nop) #pragma bytecode(BC_NOP, inp_nop)
@ -2051,13 +2059,13 @@ inp_jumps:
sta ip sta ip
bcc W2 bcc W2
inc ip + 1 inc ip + 1
W2: jmp startup.exec W2: jmp startup.zexec
W1: sec W1: sec
adc ip adc ip
sta ip sta ip
bcs W3 bcs W3
dec ip + 1 dec ip + 1
W3: jmp startup.exec W3: jmp startup.zexec
inp_branchs_eq: inp_branchs_eq:
lda accu lda accu
@ -2163,7 +2171,7 @@ inp_jumpf:
adc ip + 1 adc ip + 1
sta ip + 1 sta ip + 1
stx ip stx ip
jmp startup.exec jmp startup.zexec
inp_branchf_eq: inp_branchf_eq:
lda accu lda accu

View File

@ -1285,10 +1285,9 @@ ByteCodeBasicBlock::ByteCodeBasicBlock(void)
{ {
mTrueJump = mFalseJump = NULL; mTrueJump = mFalseJump = NULL;
mTrueLink = mFalseLink = NULL; mTrueLink = mFalseLink = NULL;
mOffset = 0x7fffffff; mOffset = -1;
mCopied = false; mPlaced = false;
mAssembled = false; mAssembled = false;
mKnownShortBranch = false;
mBypassed = false; mBypassed = false;
mExitLive = 0; mExitLive = 0;
} }
@ -5440,10 +5439,18 @@ void ByteCodeBasicBlock::Assemble(ByteCodeGenerator* generator)
{ {
mAssembled = true; mAssembled = true;
int nins = 0;
for (int i = 0; i < mIns.Size(); i++) for (int i = 0; i < mIns.Size(); i++)
{ {
mIns[i].Assemble(generator, this); mIns[i].Assemble(generator, this);
if (mCode.Size() > nins + 240)
{
PutCode(generator, BC_NOP);
nins = mCode.Size();
} }
}
mLinear = mCode.Size() - nins + 3;
if (this->mTrueJump) if (this->mTrueJump)
this->mTrueJump->Assemble(generator); this->mTrueJump->Assemble(generator);
@ -5513,12 +5520,6 @@ void ByteCodeBasicBlock::CopyCode(ByteCodeGenerator* generator, LinkerObject* li
{ {
int i; int i;
int next, end; int next, end;
int pos, at;
uint8 b;
if (!mCopied)
{
mCopied = true;
for (int i = 0; i < mRelocations.Size(); i++) for (int i = 0; i < mRelocations.Size(); i++)
{ {
@ -5533,50 +5534,150 @@ void ByteCodeBasicBlock::CopyCode(ByteCodeGenerator* generator, LinkerObject* li
if (mFalseJump) if (mFalseJump)
{ {
if (mFalseJump->mOffset <= mOffset) if (mFalseJump->mPlace == mPlace + 1)
{ end += PutBranch(generator, mBranch, mTrueJump->mOffset - end);
if (mTrueJump->mOffset <= mOffset) else if (mTrueJump->mPlace == mPlace + 1)
end += PutBranch(generator, InvertBranchCondition(mBranch), mFalseJump->mOffset - end);
else
{ {
end += PutBranch(generator, mBranch, mTrueJump->mOffset - end); end += PutBranch(generator, mBranch, mTrueJump->mOffset - end);
end += PutBranch(generator, BC_JUMPS, mFalseJump->mOffset - end); end += PutBranch(generator, BC_JUMPS, mFalseJump->mOffset - end);
}
else
{
end += PutBranch(generator, InvertBranchCondition(mBranch), mFalseJump->mOffset - end);
}
}
else
{
end += PutBranch(generator, mBranch, mTrueJump->mOffset - end);
} }
} }
else if (mTrueJump) else if (mTrueJump)
{ {
if (mTrueJump->mOffset != next) if (mTrueJump->mPlace != mPlace + 1)
{ end += PutBranch(generator, mBranch, mTrueJump->mOffset - end);
end += PutBranch(generator, BC_JUMPS, mTrueJump->mOffset - end);
}
} }
assert(end == next); assert(end == next);
for (i = 0; i < mCode.Size(); i++) for (i = 0; i < mCode.Size(); i++)
{
mCode.Lookup(i, target[i + mOffset]); mCode.Lookup(i, target[i + mOffset]);
} }
if (mTrueJump) mTrueJump->CopyCode(generator, linkerObject, target); void ByteCodeBasicBlock::BuildPlacement(GrowingArray<ByteCodeBasicBlock*>& placement)
if (mFalseJump) mFalseJump->CopyCode(generator, linkerObject, target); {
if (!mPlaced)
{
mPlaced = true;
mPlace = placement.Size();
placement.Push(this);
if (mFalseJump)
{
if (mFalseJump->mPlaced)
mTrueJump->BuildPlacement(placement);
else if (mTrueJump->mPlaced)
mFalseJump->BuildPlacement(placement);
else if (!mTrueJump->mFalseJump && !mFalseJump->mFalseJump && mTrueJump->mTrueJump == mFalseJump->mTrueJump)
{
mFalseJump->mPlaced = true;
mFalseJump->mPlace = placement.Size();
placement.Push(mFalseJump);
mTrueJump->BuildPlacement(placement);
}
else if (mTrueJump->mFalseJump == mFalseJump || mTrueJump->mTrueJump == mFalseJump)
{
mTrueJump->BuildPlacement(placement);
mFalseJump->BuildPlacement(placement);
}
else
{
mFalseJump->BuildPlacement(placement);
mTrueJump->BuildPlacement(placement);
}
}
else if (mTrueJump)
{
mTrueJump->BuildPlacement(placement);
}
} }
} }
void ByteCodeBasicBlock::CalculateOffset(int& total) void ByteCodeBasicBlock::InitialOffset(int& total, int& linear)
{ {
int size = mCode.Size();
if (size > 240)
size = 240;
mNeedsNop = linear + size > 240;
if (mNeedsNop)
{
total++;
linear = 0;
}
else
linear += mLinear;
mOffset = total;
total += mCode.Size();
if (mFalseJump)
{
total += 3;
if (mFalseJump->mPlace != mPlace + 1 && mTrueJump->mPlace != mPlace + 1)
{
total += 3;
linear = 0;
}
}
else if (mTrueJump)
{
if (mTrueJump->mPlace != mPlace + 1)
{
total += 3;
linear = 0;
}
}
mSize = total - mOffset;
}
bool ByteCodeBasicBlock::CalculateOffset(int& total)
{
if (mNeedsNop)
total++;
bool changed = total != mOffset;
mOffset = total;
total += mCode.Size();
if (mFalseJump)
{
if (mFalseJump->mPlace == mPlace + 1)
total += BranchByteSize(total, mTrueJump->mOffset);
else if (mTrueJump->mPlace == mPlace + 1)
total += BranchByteSize(total, mFalseJump->mOffset);
else
{
total += BranchByteSize(total, mTrueJump->mOffset);
total += JumpByteSize(total, mFalseJump->mOffset);
}
}
else if (mTrueJump)
{
if (mTrueJump->mPlace != mPlace + 1)
total += BranchByteSize(total, mTrueJump->mOffset);
}
if (mOffset + mSize != total)
changed = true;
mSize = total - mOffset;
return changed;
}
#if 0
int next; int next;
if (mOffset > total) if (mOffset > total)
{ {
mNeedsNop = false;
linear += mSize + 3;
mOffset = total; mOffset = total;
next = total + mCode.Size(); next = total + mCode.Size();
@ -5599,6 +5700,13 @@ void ByteCodeBasicBlock::CalculateOffset(int& total)
// trueJump has not been placed, but falseJump has // trueJump has not been placed, but falseJump has
total = next + BranchByteSize(next, mFalseJump->mOffset); total = next + BranchByteSize(next, mFalseJump->mOffset);
if (linear + mTrueJump->mCode.Size() > 240)
{
mNeedsNop = true;
total++;
}
mSize = total - mOffset; mSize = total - mOffset;
mTrueJump->CalculateOffset(total); mTrueJump->CalculateOffset(total);
} }
@ -5608,6 +5716,13 @@ void ByteCodeBasicBlock::CalculateOffset(int& total)
// falseJump has not been placed, but trueJump has // falseJump has not been placed, but trueJump has
total = next + BranchByteSize(next, mTrueJump->mOffset); total = next + BranchByteSize(next, mTrueJump->mOffset);
if (linear + mFalseJump->mCode.Size() > 240)
{
mNeedsNop = true;
total++;
}
mSize = total - mOffset; mSize = total - mOffset;
mFalseJump->CalculateOffset(total); mFalseJump->CalculateOffset(total);
} }
@ -5618,6 +5733,13 @@ void ByteCodeBasicBlock::CalculateOffset(int& total)
// a short branch // a short branch
total = next + 2; total = next + 2;
if (linear + mFalseJump->mCode.Size() > 240)
{
mNeedsNop = true;
total++;
}
mSize = total - mOffset; mSize = total - mOffset;
mFalseJump->CalculateOffset(total); mFalseJump->CalculateOffset(total);
@ -5634,7 +5756,15 @@ void ByteCodeBasicBlock::CalculateOffset(int& total)
// Small diamond so place true then false directly behind each other // Small diamond so place true then false directly behind each other
// with short branches // with short branches
mSize = mCode.Size() + 2; total = next + 2;
if (linear + mFalseJump->mCode.Size() > 240)
{
mNeedsNop = true;
total++;
}
mSize = total - mOffset;
mFalseJump->mOffset = next + 2; mFalseJump->mOffset = next + 2;
mFalseJump->mSize = mFalseJump->mCode.Size() + 2; mFalseJump->mSize = mFalseJump->mCode.Size() + 2;
@ -5717,6 +5847,8 @@ void ByteCodeBasicBlock::CalculateOffset(int& total)
} }
} }
} }
#endif
ByteCodeProcedure::ByteCodeProcedure(void) ByteCodeProcedure::ByteCodeProcedure(void)
: mBlocks(nullptr) : mBlocks(nullptr)
@ -5802,17 +5934,32 @@ void ByteCodeProcedure::Compile(ByteCodeGenerator* generator, InterCodeProcedure
exitBlock->PutCode(generator, BC_RETURN); exitBlock->PutByte(tempSave); exitBlock->PutWord(proc->mLocalSize + 2 + tempSave); exitBlock->PutCode(generator, BC_RETURN); exitBlock->PutByte(tempSave); exitBlock->PutWord(proc->mLocalSize + 2 + tempSave);
int total;
ByteCodeBasicBlock* lentryBlock = entryBlock->BypassEmptyBlocks(); ByteCodeBasicBlock* lentryBlock = entryBlock->BypassEmptyBlocks();
total = 0; GrowingArray<ByteCodeBasicBlock*> placement(nullptr);
lentryBlock->CalculateOffset(total); int total, linear;
total = 0;
linear = 0;
lentryBlock->BuildPlacement(placement);
for (int i = 0; i < placement.Size(); i++)
placement[i]->InitialOffset(total, linear);
do {
progress = false;
total = 0;
for (int i = 0; i < placement.Size(); i++)
if (placement[i]->CalculateOffset(total))
progress = true;
} while (progress);
uint8* data = proc->mLinkerObject->AddSpace(total); uint8* data = proc->mLinkerObject->AddSpace(total);
lentryBlock->CopyCode(generator, proc->mLinkerObject, data); for (int i = 0; i < placement.Size(); i++)
placement[i]->CopyCode(generator, proc->mLinkerObject, data);
mProgSize = total; mProgSize = total;
} }
@ -5849,6 +5996,7 @@ ByteCodeGenerator::ByteCodeGenerator(Errors* errors, Linker* linker)
mByteCodeUsed[BC_CALL_ABS] = 1; mByteCodeUsed[BC_CALL_ABS] = 1;
mByteCodeUsed[BC_EXIT] = 1; mByteCodeUsed[BC_EXIT] = 1;
mByteCodeUsed[BC_NATIVE] = 1; mByteCodeUsed[BC_NATIVE] = 1;
mByteCodeUsed[BC_NOP] = 1;
assert(sizeof(ByteCodeNames) == 128 * sizeof(char*)); assert(sizeof(ByteCodeNames) == 128 * sizeof(char*));
} }

View File

@ -247,8 +247,8 @@ public:
GrowingArray<LinkerReference> mRelocations; GrowingArray<LinkerReference> mRelocations;
GrowingArray<ByteCodeBasicBlock*> mEntryBlocks; GrowingArray<ByteCodeBasicBlock*> mEntryBlocks;
int mOffset, mSize; int mOffset, mSize, mPlace, mLinear;
bool mPlaced, mCopied, mKnownShortBranch, mBypassed, mAssembled, mVisited; bool mPlaced, mNeedsNop, mBypassed, mAssembled, mVisited;
uint32 mExitLive; uint32 mExitLive;
ByteCodeBasicBlock(void); ByteCodeBasicBlock(void);
@ -266,7 +266,10 @@ public:
int PutBranch(ByteCodeGenerator* generator, ByteCode code, int offset); int PutBranch(ByteCodeGenerator* generator, ByteCode code, int offset);
ByteCodeBasicBlock* BypassEmptyBlocks(void); ByteCodeBasicBlock* BypassEmptyBlocks(void);
void CalculateOffset(int& total); void BuildPlacement(GrowingArray<ByteCodeBasicBlock*> & placement);
void InitialOffset(int& total, int& linear);
bool CalculateOffset(int & total);
void CopyCode(ByteCodeGenerator* generator, LinkerObject * linkerObject, uint8* target); void CopyCode(ByteCodeGenerator* generator, LinkerObject * linkerObject, uint8* target);
void LongConstToAccu(int64 val); void LongConstToAccu(int64 val);

View File

@ -640,7 +640,7 @@ int Emulator::Emulate(int startIP)
break; break;
} }
if ((trace & 1) && ip == 0x0850) if ((trace & 1) && ip == 0x0851)
{ {
int accu = mMemory[BC_REG_ACCU] + 256 * mMemory[BC_REG_ACCU + 1]; int accu = mMemory[BC_REG_ACCU] + 256 * mMemory[BC_REG_ACCU + 1];
int ptr = mMemory[BC_REG_ADDR] + 256 * mMemory[BC_REG_ADDR + 1]; int ptr = mMemory[BC_REG_ADDR] + 256 * mMemory[BC_REG_ADDR + 1];

View File

@ -4408,7 +4408,7 @@ void InterCodeBasicBlock::SplitBranches(InterCodeProcedure* proc)
{ {
mVisited = true; mVisited = true;
if (mTrueJump && mFalseJump && mInstructions.Size() > 2) if (mTrueJump && mFalseJump && (mInstructions.Size() > 2 || mInstructions.Size() == 2 && mInstructions[0]->mCode != IC_RELATIONAL_OPERATOR))
{ {
InterCodeBasicBlock* block = new InterCodeBasicBlock(); InterCodeBasicBlock* block = new InterCodeBasicBlock();
proc->Append(block); proc->Append(block);