Optimize byte code interpreter loop

This commit is contained in:
drmortalwombat 2021-11-25 21:42:06 +01:00
parent e9caf064de
commit ccd6a50043
8 changed files with 259 additions and 67 deletions

View File

@ -23,7 +23,7 @@ The goal is to implement the actual C standard and not some subset for performan
## Limits and Errors
There are still several open areas, but most targets have been reached. The current Dhrystone performance is 59 iterations per second with byte code (11434) and 270 iterations with native code (12145 Bytes).
There are still several open areas, but most targets have been reached. The current Dhrystone performance is 61 iterations per second with byte code (11434) and 270 iterations with native code (12145 Bytes).
### Language

View File

@ -108,6 +108,9 @@ if %errorlevel% neq 0 goto :error
call :test randsumtest.c
if %errorlevel% neq 0 goto :error
call :test longcodetest.c
if %errorlevel% neq 0 goto :error
exit /b 0
:error

30
autotest/longcodetest.c Normal file
View File

@ -0,0 +1,30 @@
#include <assert.h>
char a[200], b[200];
bool ok = true;
int main(void)
{
#assign ni 0
#repeat
a[ni] = ni & 255;
#assign ni ni + 1
#until ni == 200
#assign ni 0
#repeat
if (ok)
b[ni] = ni & 255;
#assign ni ni + 1
#until ni == 200
int asum = 0, bsum = 0, csum;
for(int i=0; i<200; i++)
{
asum += a[i];
bsum += b[i];
csum += i & 255;
}
return asum + bsum - 2 * csum;
}

View File

@ -132,6 +132,7 @@ w2:
jsr main
pexec:
yexec:
zexec:
exec:
jmp inp_exit
@ -149,21 +150,28 @@ pexec:
yexec:
iny
exec:
lda (ip), y
sta execjmp + 1
iny
bmi incip
execjmp:
jmp (0x0900)
incip:
#if 0
tya
ldy #0
clc
adc ip
sta ip
bcc execjmp
bcc W1
inc ip + 1
bne execjmp
W1: ldy #0
#endif
lda (ip), y
sta execjmp + 1
iny
execjmp:
jmp (0x0900)
zexec:
tya
clc
adc ip
sta ip
bcc pexec
inc ip + 1
bne pexec
bcode:
byt BC_CALL_ABS * 2
byt <main
@ -642,7 +650,7 @@ L2: jsr divmod32
__asm inp_nop
{
jmp startup.exec
jmp startup.zexec
}
#pragma bytecode(BC_NOP, inp_nop)
@ -2051,13 +2059,13 @@ inp_jumps:
sta ip
bcc W2
inc ip + 1
W2: jmp startup.exec
W2: jmp startup.zexec
W1: sec
adc ip
sta ip
bcs W3
dec ip + 1
W3: jmp startup.exec
W3: jmp startup.zexec
inp_branchs_eq:
lda accu
@ -2163,7 +2171,7 @@ inp_jumpf:
adc ip + 1
sta ip + 1
stx ip
jmp startup.exec
jmp startup.zexec
inp_branchf_eq:
lda accu

View File

@ -1285,10 +1285,9 @@ ByteCodeBasicBlock::ByteCodeBasicBlock(void)
{
mTrueJump = mFalseJump = NULL;
mTrueLink = mFalseLink = NULL;
mOffset = 0x7fffffff;
mCopied = false;
mOffset = -1;
mPlaced = false;
mAssembled = false;
mKnownShortBranch = false;
mBypassed = false;
mExitLive = 0;
}
@ -5440,10 +5439,18 @@ void ByteCodeBasicBlock::Assemble(ByteCodeGenerator* generator)
{
mAssembled = true;
int nins = 0;
for (int i = 0; i < mIns.Size(); i++)
{
mIns[i].Assemble(generator, this);
if (mCode.Size() > nins + 240)
{
PutCode(generator, BC_NOP);
nins = mCode.Size();
}
}
mLinear = mCode.Size() - nins + 3;
if (this->mTrueJump)
this->mTrueJump->Assemble(generator);
@ -5513,12 +5520,6 @@ void ByteCodeBasicBlock::CopyCode(ByteCodeGenerator* generator, LinkerObject* li
{
int i;
int next, end;
int pos, at;
uint8 b;
if (!mCopied)
{
mCopied = true;
for (int i = 0; i < mRelocations.Size(); i++)
{
@ -5533,50 +5534,150 @@ void ByteCodeBasicBlock::CopyCode(ByteCodeGenerator* generator, LinkerObject* li
if (mFalseJump)
{
if (mFalseJump->mOffset <= mOffset)
{
if (mTrueJump->mOffset <= mOffset)
if (mFalseJump->mPlace == mPlace + 1)
end += PutBranch(generator, mBranch, mTrueJump->mOffset - end);
else if (mTrueJump->mPlace == mPlace + 1)
end += PutBranch(generator, InvertBranchCondition(mBranch), mFalseJump->mOffset - end);
else
{
end += PutBranch(generator, mBranch, mTrueJump->mOffset - end);
end += PutBranch(generator, BC_JUMPS, mFalseJump->mOffset - end);
}
else
{
end += PutBranch(generator, InvertBranchCondition(mBranch), mFalseJump->mOffset - end);
}
}
else
{
end += PutBranch(generator, mBranch, mTrueJump->mOffset - end);
}
}
else if (mTrueJump)
{
if (mTrueJump->mOffset != next)
{
end += PutBranch(generator, BC_JUMPS, mTrueJump->mOffset - end);
}
if (mTrueJump->mPlace != mPlace + 1)
end += PutBranch(generator, mBranch, mTrueJump->mOffset - end);
}
assert(end == next);
for (i = 0; i < mCode.Size(); i++)
{
mCode.Lookup(i, target[i + mOffset]);
}
if (mTrueJump) mTrueJump->CopyCode(generator, linkerObject, target);
if (mFalseJump) mFalseJump->CopyCode(generator, linkerObject, target);
void ByteCodeBasicBlock::BuildPlacement(GrowingArray<ByteCodeBasicBlock*>& placement)
{
if (!mPlaced)
{
mPlaced = true;
mPlace = placement.Size();
placement.Push(this);
if (mFalseJump)
{
if (mFalseJump->mPlaced)
mTrueJump->BuildPlacement(placement);
else if (mTrueJump->mPlaced)
mFalseJump->BuildPlacement(placement);
else if (!mTrueJump->mFalseJump && !mFalseJump->mFalseJump && mTrueJump->mTrueJump == mFalseJump->mTrueJump)
{
mFalseJump->mPlaced = true;
mFalseJump->mPlace = placement.Size();
placement.Push(mFalseJump);
mTrueJump->BuildPlacement(placement);
}
else if (mTrueJump->mFalseJump == mFalseJump || mTrueJump->mTrueJump == mFalseJump)
{
mTrueJump->BuildPlacement(placement);
mFalseJump->BuildPlacement(placement);
}
else
{
mFalseJump->BuildPlacement(placement);
mTrueJump->BuildPlacement(placement);
}
}
else if (mTrueJump)
{
mTrueJump->BuildPlacement(placement);
}
}
}
void ByteCodeBasicBlock::CalculateOffset(int& total)
void ByteCodeBasicBlock::InitialOffset(int& total, int& linear)
{
int size = mCode.Size();
if (size > 240)
size = 240;
mNeedsNop = linear + size > 240;
if (mNeedsNop)
{
total++;
linear = 0;
}
else
linear += mLinear;
mOffset = total;
total += mCode.Size();
if (mFalseJump)
{
total += 3;
if (mFalseJump->mPlace != mPlace + 1 && mTrueJump->mPlace != mPlace + 1)
{
total += 3;
linear = 0;
}
}
else if (mTrueJump)
{
if (mTrueJump->mPlace != mPlace + 1)
{
total += 3;
linear = 0;
}
}
mSize = total - mOffset;
}
bool ByteCodeBasicBlock::CalculateOffset(int& total)
{
if (mNeedsNop)
total++;
bool changed = total != mOffset;
mOffset = total;
total += mCode.Size();
if (mFalseJump)
{
if (mFalseJump->mPlace == mPlace + 1)
total += BranchByteSize(total, mTrueJump->mOffset);
else if (mTrueJump->mPlace == mPlace + 1)
total += BranchByteSize(total, mFalseJump->mOffset);
else
{
total += BranchByteSize(total, mTrueJump->mOffset);
total += JumpByteSize(total, mFalseJump->mOffset);
}
}
else if (mTrueJump)
{
if (mTrueJump->mPlace != mPlace + 1)
total += BranchByteSize(total, mTrueJump->mOffset);
}
if (mOffset + mSize != total)
changed = true;
mSize = total - mOffset;
return changed;
}
#if 0
int next;
if (mOffset > total)
{
mNeedsNop = false;
linear += mSize + 3;
mOffset = total;
next = total + mCode.Size();
@ -5599,6 +5700,13 @@ void ByteCodeBasicBlock::CalculateOffset(int& total)
// trueJump has not been placed, but falseJump has
total = next + BranchByteSize(next, mFalseJump->mOffset);
if (linear + mTrueJump->mCode.Size() > 240)
{
mNeedsNop = true;
total++;
}
mSize = total - mOffset;
mTrueJump->CalculateOffset(total);
}
@ -5608,6 +5716,13 @@ void ByteCodeBasicBlock::CalculateOffset(int& total)
// falseJump has not been placed, but trueJump has
total = next + BranchByteSize(next, mTrueJump->mOffset);
if (linear + mFalseJump->mCode.Size() > 240)
{
mNeedsNop = true;
total++;
}
mSize = total - mOffset;
mFalseJump->CalculateOffset(total);
}
@ -5618,6 +5733,13 @@ void ByteCodeBasicBlock::CalculateOffset(int& total)
// a short branch
total = next + 2;
if (linear + mFalseJump->mCode.Size() > 240)
{
mNeedsNop = true;
total++;
}
mSize = total - mOffset;
mFalseJump->CalculateOffset(total);
@ -5634,7 +5756,15 @@ void ByteCodeBasicBlock::CalculateOffset(int& total)
// Small diamond so place true then false directly behind each other
// with short branches
mSize = mCode.Size() + 2;
total = next + 2;
if (linear + mFalseJump->mCode.Size() > 240)
{
mNeedsNop = true;
total++;
}
mSize = total - mOffset;
mFalseJump->mOffset = next + 2;
mFalseJump->mSize = mFalseJump->mCode.Size() + 2;
@ -5717,6 +5847,8 @@ void ByteCodeBasicBlock::CalculateOffset(int& total)
}
}
}
#endif
ByteCodeProcedure::ByteCodeProcedure(void)
: mBlocks(nullptr)
@ -5802,17 +5934,32 @@ void ByteCodeProcedure::Compile(ByteCodeGenerator* generator, InterCodeProcedure
exitBlock->PutCode(generator, BC_RETURN); exitBlock->PutByte(tempSave); exitBlock->PutWord(proc->mLocalSize + 2 + tempSave);
int total;
ByteCodeBasicBlock* lentryBlock = entryBlock->BypassEmptyBlocks();
total = 0;
GrowingArray<ByteCodeBasicBlock*> placement(nullptr);
lentryBlock->CalculateOffset(total);
int total, linear;
total = 0;
linear = 0;
lentryBlock->BuildPlacement(placement);
for (int i = 0; i < placement.Size(); i++)
placement[i]->InitialOffset(total, linear);
do {
progress = false;
total = 0;
for (int i = 0; i < placement.Size(); i++)
if (placement[i]->CalculateOffset(total))
progress = true;
} while (progress);
uint8* data = proc->mLinkerObject->AddSpace(total);
lentryBlock->CopyCode(generator, proc->mLinkerObject, data);
for (int i = 0; i < placement.Size(); i++)
placement[i]->CopyCode(generator, proc->mLinkerObject, data);
mProgSize = total;
}
@ -5849,6 +5996,7 @@ ByteCodeGenerator::ByteCodeGenerator(Errors* errors, Linker* linker)
mByteCodeUsed[BC_CALL_ABS] = 1;
mByteCodeUsed[BC_EXIT] = 1;
mByteCodeUsed[BC_NATIVE] = 1;
mByteCodeUsed[BC_NOP] = 1;
assert(sizeof(ByteCodeNames) == 128 * sizeof(char*));
}

View File

@ -247,8 +247,8 @@ public:
GrowingArray<LinkerReference> mRelocations;
GrowingArray<ByteCodeBasicBlock*> mEntryBlocks;
int mOffset, mSize;
bool mPlaced, mCopied, mKnownShortBranch, mBypassed, mAssembled, mVisited;
int mOffset, mSize, mPlace, mLinear;
bool mPlaced, mNeedsNop, mBypassed, mAssembled, mVisited;
uint32 mExitLive;
ByteCodeBasicBlock(void);
@ -266,7 +266,10 @@ public:
int PutBranch(ByteCodeGenerator* generator, ByteCode code, int offset);
ByteCodeBasicBlock* BypassEmptyBlocks(void);
void CalculateOffset(int& total);
void BuildPlacement(GrowingArray<ByteCodeBasicBlock*> & placement);
void InitialOffset(int& total, int& linear);
bool CalculateOffset(int & total);
void CopyCode(ByteCodeGenerator* generator, LinkerObject * linkerObject, uint8* target);
void LongConstToAccu(int64 val);

View File

@ -640,7 +640,7 @@ int Emulator::Emulate(int startIP)
break;
}
if ((trace & 1) && ip == 0x0850)
if ((trace & 1) && ip == 0x0851)
{
int accu = mMemory[BC_REG_ACCU] + 256 * mMemory[BC_REG_ACCU + 1];
int ptr = mMemory[BC_REG_ADDR] + 256 * mMemory[BC_REG_ADDR + 1];

View File

@ -4408,7 +4408,7 @@ void InterCodeBasicBlock::SplitBranches(InterCodeProcedure* proc)
{
mVisited = true;
if (mTrueJump && mFalseJump && mInstructions.Size() > 2)
if (mTrueJump && mFalseJump && (mInstructions.Size() > 2 || mInstructions.Size() == 2 && mInstructions[0]->mCode != IC_RELATIONAL_OPERATOR))
{
InterCodeBasicBlock* block = new InterCodeBasicBlock();
proc->Append(block);