From ccd6a5004341c5e95655c16aaa603902f9afb0db Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Thu, 25 Nov 2021 21:42:06 +0100 Subject: [PATCH] Optimize byte code interpreter loop --- README.md | 2 +- autotest/autotest.bat | 3 + autotest/longcodetest.c | 30 +++++ include/crt.c | 36 +++-- oscar64/ByteCodeGenerator.cpp | 242 +++++++++++++++++++++++++++------- oscar64/ByteCodeGenerator.h | 9 +- oscar64/Emulator.cpp | 2 +- oscar64/InterCode.cpp | 2 +- 8 files changed, 259 insertions(+), 67 deletions(-) create mode 100644 autotest/longcodetest.c diff --git a/README.md b/README.md index ca9a0f9..b7ffa55 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ The goal is to implement the actual C standard and not some subset for performan ## Limits and Errors -There are still several open areas, but most targets have been reached. The current Dhrystone performance is 59 iterations per second with byte code (11434) and 270 iterations with native code (12145 Bytes). +There are still several open areas, but most targets have been reached. The current Dhrystone performance is 61 iterations per second with byte code (11434) and 270 iterations with native code (12145 Bytes). ### Language diff --git a/autotest/autotest.bat b/autotest/autotest.bat index b84fbf2..d327391 100644 --- a/autotest/autotest.bat +++ b/autotest/autotest.bat @@ -108,6 +108,9 @@ if %errorlevel% neq 0 goto :error call :test randsumtest.c if %errorlevel% neq 0 goto :error +call :test longcodetest.c +if %errorlevel% neq 0 goto :error + exit /b 0 :error diff --git a/autotest/longcodetest.c b/autotest/longcodetest.c new file mode 100644 index 0000000..cb766e3 --- /dev/null +++ b/autotest/longcodetest.c @@ -0,0 +1,30 @@ +#include + +char a[200], b[200]; +bool ok = true; + +int main(void) +{ +#assign ni 0 +#repeat + a[ni] = ni & 255; +#assign ni ni + 1 +#until ni == 200 + +#assign ni 0 +#repeat + if (ok) + b[ni] = ni & 255; +#assign ni ni + 1 +#until ni == 200 + + int asum = 0, bsum = 0, csum; + for(int i=0; i<200; i++) + { + asum += a[i]; + bsum += b[i]; + csum += i & 255; + } + + return asum + bsum - 2 * csum; +} diff --git a/include/crt.c b/include/crt.c index 3e4a9f2..0a6a73f 100644 --- a/include/crt.c +++ b/include/crt.c @@ -132,6 +132,7 @@ w2: jsr main pexec: yexec: +zexec: exec: jmp inp_exit @@ -149,21 +150,28 @@ pexec: yexec: iny exec: - lda (ip), y - sta execjmp + 1 - iny - bmi incip -execjmp: - jmp (0x0900) -incip: +#if 0 tya - ldy #0 clc adc ip sta ip - bcc execjmp + bcc W1 inc ip + 1 - bne execjmp +W1: ldy #0 +#endif + lda (ip), y + sta execjmp + 1 + iny +execjmp: + jmp (0x0900) +zexec: + tya + clc + adc ip + sta ip + bcc pexec + inc ip + 1 + bne pexec bcode: byt BC_CALL_ABS * 2 byt
nins + 240) + { + PutCode(generator, BC_NOP); + nins = mCode.Size(); + } } + mLinear = mCode.Size() - nins + 3; + if (this->mTrueJump) this->mTrueJump->Assemble(generator); if (this->mFalseJump) @@ -5513,70 +5520,164 @@ void ByteCodeBasicBlock::CopyCode(ByteCodeGenerator* generator, LinkerObject* li { int i; int next, end; - int pos, at; - uint8 b; - if (!mCopied) + for (int i = 0; i < mRelocations.Size(); i++) { - mCopied = true; + LinkerReference rl = mRelocations[i]; + rl.mObject = linkerObject; + rl.mOffset += mOffset; + linkerObject->AddReference(rl); + } - for (int i = 0; i < mRelocations.Size(); i++) + end = mOffset + mCode.Size(); + next = mOffset + mSize; + + if (mFalseJump) + { + if (mFalseJump->mPlace == mPlace + 1) + end += PutBranch(generator, mBranch, mTrueJump->mOffset - end); + else if (mTrueJump->mPlace == mPlace + 1) + end += PutBranch(generator, InvertBranchCondition(mBranch), mFalseJump->mOffset - end); + else { - LinkerReference rl = mRelocations[i]; - rl.mObject = linkerObject; - rl.mOffset += mOffset; - linkerObject->AddReference(rl); + end += PutBranch(generator, mBranch, mTrueJump->mOffset - end); + end += PutBranch(generator, BC_JUMPS, mFalseJump->mOffset - end); } + } + else if (mTrueJump) + { + if (mTrueJump->mPlace != mPlace + 1) + end += PutBranch(generator, mBranch, mTrueJump->mOffset - end); + } - end = mOffset + mCode.Size(); - next = mOffset + mSize; + assert(end == next); + + for (i = 0; i < mCode.Size(); i++) + mCode.Lookup(i, target[i + mOffset]); +} + +void ByteCodeBasicBlock::BuildPlacement(GrowingArray& placement) +{ + if (!mPlaced) + { + mPlaced = true; + mPlace = placement.Size(); + placement.Push(this); if (mFalseJump) { - if (mFalseJump->mOffset <= mOffset) + if (mFalseJump->mPlaced) + mTrueJump->BuildPlacement(placement); + else if (mTrueJump->mPlaced) + mFalseJump->BuildPlacement(placement); + else if (!mTrueJump->mFalseJump && !mFalseJump->mFalseJump && mTrueJump->mTrueJump == mFalseJump->mTrueJump) { - if (mTrueJump->mOffset <= mOffset) - { - end += PutBranch(generator, mBranch, mTrueJump->mOffset - end); - end += PutBranch(generator, BC_JUMPS, mFalseJump->mOffset - end); - - } - else - { - end += PutBranch(generator, InvertBranchCondition(mBranch), mFalseJump->mOffset - end); - } + mFalseJump->mPlaced = true; + mFalseJump->mPlace = placement.Size(); + placement.Push(mFalseJump); + + mTrueJump->BuildPlacement(placement); + } + else if (mTrueJump->mFalseJump == mFalseJump || mTrueJump->mTrueJump == mFalseJump) + { + mTrueJump->BuildPlacement(placement); + mFalseJump->BuildPlacement(placement); } else { - end += PutBranch(generator, mBranch, mTrueJump->mOffset - end); + mFalseJump->BuildPlacement(placement); + mTrueJump->BuildPlacement(placement); } } else if (mTrueJump) { - if (mTrueJump->mOffset != next) - { - end += PutBranch(generator, BC_JUMPS, mTrueJump->mOffset - end); - } + mTrueJump->BuildPlacement(placement); } - - assert(end == next); - - for (i = 0; i < mCode.Size(); i++) - { - mCode.Lookup(i, target[i + mOffset]); - } - - if (mTrueJump) mTrueJump->CopyCode(generator, linkerObject, target); - if (mFalseJump) mFalseJump->CopyCode(generator, linkerObject, target); } } -void ByteCodeBasicBlock::CalculateOffset(int& total) +void ByteCodeBasicBlock::InitialOffset(int& total, int& linear) { + int size = mCode.Size(); + if (size > 240) + size = 240; + + mNeedsNop = linear + size > 240; + if (mNeedsNop) + { + total++; + linear = 0; + } + else + linear += mLinear; + + mOffset = total; + total += mCode.Size(); + if (mFalseJump) + { + total += 3; + if (mFalseJump->mPlace != mPlace + 1 && mTrueJump->mPlace != mPlace + 1) + { + total += 3; + linear = 0; + } + } + else if (mTrueJump) + { + if (mTrueJump->mPlace != mPlace + 1) + { + total += 3; + linear = 0; + } + } + + mSize = total - mOffset; +} + +bool ByteCodeBasicBlock::CalculateOffset(int& total) +{ + if (mNeedsNop) + total++; + + bool changed = total != mOffset; + mOffset = total; + + total += mCode.Size(); + + if (mFalseJump) + { + if (mFalseJump->mPlace == mPlace + 1) + total += BranchByteSize(total, mTrueJump->mOffset); + else if (mTrueJump->mPlace == mPlace + 1) + total += BranchByteSize(total, mFalseJump->mOffset); + else + { + total += BranchByteSize(total, mTrueJump->mOffset); + total += JumpByteSize(total, mFalseJump->mOffset); + } + } + else if (mTrueJump) + { + if (mTrueJump->mPlace != mPlace + 1) + total += BranchByteSize(total, mTrueJump->mOffset); + } + + if (mOffset + mSize != total) + changed = true; + + mSize = total - mOffset; + + return changed; +} + +#if 0 int next; if (mOffset > total) { + mNeedsNop = false; + linear += mSize + 3; + mOffset = total; next = total + mCode.Size(); @@ -5599,6 +5700,13 @@ void ByteCodeBasicBlock::CalculateOffset(int& total) // trueJump has not been placed, but falseJump has total = next + BranchByteSize(next, mFalseJump->mOffset); + + if (linear + mTrueJump->mCode.Size() > 240) + { + mNeedsNop = true; + total++; + } + mSize = total - mOffset; mTrueJump->CalculateOffset(total); } @@ -5608,6 +5716,13 @@ void ByteCodeBasicBlock::CalculateOffset(int& total) // falseJump has not been placed, but trueJump has total = next + BranchByteSize(next, mTrueJump->mOffset); + + if (linear + mFalseJump->mCode.Size() > 240) + { + mNeedsNop = true; + total++; + } + mSize = total - mOffset; mFalseJump->CalculateOffset(total); } @@ -5618,6 +5733,13 @@ void ByteCodeBasicBlock::CalculateOffset(int& total) // a short branch total = next + 2; + + if (linear + mFalseJump->mCode.Size() > 240) + { + mNeedsNop = true; + total++; + } + mSize = total - mOffset; mFalseJump->CalculateOffset(total); @@ -5634,7 +5756,15 @@ void ByteCodeBasicBlock::CalculateOffset(int& total) // Small diamond so place true then false directly behind each other // with short branches - mSize = mCode.Size() + 2; + total = next + 2; + + if (linear + mFalseJump->mCode.Size() > 240) + { + mNeedsNop = true; + total++; + } + + mSize = total - mOffset; mFalseJump->mOffset = next + 2; mFalseJump->mSize = mFalseJump->mCode.Size() + 2; @@ -5717,6 +5847,8 @@ void ByteCodeBasicBlock::CalculateOffset(int& total) } } } +#endif + ByteCodeProcedure::ByteCodeProcedure(void) : mBlocks(nullptr) @@ -5802,17 +5934,32 @@ void ByteCodeProcedure::Compile(ByteCodeGenerator* generator, InterCodeProcedure exitBlock->PutCode(generator, BC_RETURN); exitBlock->PutByte(tempSave); exitBlock->PutWord(proc->mLocalSize + 2 + tempSave); - int total; - ByteCodeBasicBlock* lentryBlock = entryBlock->BypassEmptyBlocks(); + GrowingArray placement(nullptr); + + int total, linear; total = 0; + linear = 0; - lentryBlock->CalculateOffset(total); + lentryBlock->BuildPlacement(placement); - uint8 * data = proc->mLinkerObject->AddSpace(total); + for (int i = 0; i < placement.Size(); i++) + placement[i]->InitialOffset(total, linear); + + do { + progress = false; + total = 0; + for (int i = 0; i < placement.Size(); i++) + if (placement[i]->CalculateOffset(total)) + progress = true; + } while (progress); + + uint8* data = proc->mLinkerObject->AddSpace(total); + + for (int i = 0; i < placement.Size(); i++) + placement[i]->CopyCode(generator, proc->mLinkerObject, data); - lentryBlock->CopyCode(generator, proc->mLinkerObject, data); mProgSize = total; } @@ -5849,6 +5996,7 @@ ByteCodeGenerator::ByteCodeGenerator(Errors* errors, Linker* linker) mByteCodeUsed[BC_CALL_ABS] = 1; mByteCodeUsed[BC_EXIT] = 1; mByteCodeUsed[BC_NATIVE] = 1; + mByteCodeUsed[BC_NOP] = 1; assert(sizeof(ByteCodeNames) == 128 * sizeof(char*)); } diff --git a/oscar64/ByteCodeGenerator.h b/oscar64/ByteCodeGenerator.h index 0c8948d..7e1cbf3 100644 --- a/oscar64/ByteCodeGenerator.h +++ b/oscar64/ByteCodeGenerator.h @@ -247,8 +247,8 @@ public: GrowingArray mRelocations; GrowingArray mEntryBlocks; - int mOffset, mSize; - bool mPlaced, mCopied, mKnownShortBranch, mBypassed, mAssembled, mVisited; + int mOffset, mSize, mPlace, mLinear; + bool mPlaced, mNeedsNop, mBypassed, mAssembled, mVisited; uint32 mExitLive; ByteCodeBasicBlock(void); @@ -266,7 +266,10 @@ public: int PutBranch(ByteCodeGenerator* generator, ByteCode code, int offset); ByteCodeBasicBlock* BypassEmptyBlocks(void); - void CalculateOffset(int& total); + void BuildPlacement(GrowingArray & placement); + void InitialOffset(int& total, int& linear); + bool CalculateOffset(int & total); + void CopyCode(ByteCodeGenerator* generator, LinkerObject * linkerObject, uint8* target); void LongConstToAccu(int64 val); diff --git a/oscar64/Emulator.cpp b/oscar64/Emulator.cpp index c82a85f..2c3827f 100644 --- a/oscar64/Emulator.cpp +++ b/oscar64/Emulator.cpp @@ -640,7 +640,7 @@ int Emulator::Emulate(int startIP) break; } - if ((trace & 1) && ip == 0x0850) + if ((trace & 1) && ip == 0x0851) { int accu = mMemory[BC_REG_ACCU] + 256 * mMemory[BC_REG_ACCU + 1]; int ptr = mMemory[BC_REG_ADDR] + 256 * mMemory[BC_REG_ADDR + 1]; diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 56b1509..a2b0e86 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -4408,7 +4408,7 @@ void InterCodeBasicBlock::SplitBranches(InterCodeProcedure* proc) { mVisited = true; - if (mTrueJump && mFalseJump && mInstructions.Size() > 2) + if (mTrueJump && mFalseJump && (mInstructions.Size() > 2 || mInstructions.Size() == 2 && mInstructions[0]->mCode != IC_RELATIONAL_OPERATOR)) { InterCodeBasicBlock* block = new InterCodeBasicBlock(); proc->Append(block);