Functional implementation.
All checks were successful
/ Build-Stuff (push) Successful in 6s

This has the same time and memory complexity as the original, and lost,
version. It does use twice the memory needed for the buffer.

TODO: reduce the size of the buffer to half of what it is now.

TODO: add some more tests. Pay attention to odd/even sized strings in
combination. Add some string that are closer to being the same with some
weird mix of pre, post and infix strings.

TODO: See if it's possible to make this faster by divide-and-conquer

TODO: See if it's possible to use SIMD instructions to improve
performance.
This commit is contained in:
Frederik Hertzum 2024-02-25 00:56:15 +01:00
commit 3ee51fa485
7 changed files with 158 additions and 0 deletions

View File

@ -0,0 +1,13 @@
ame: Build meson
run-name: Build stuff
on: [push]
jobs:
Build-Stuff:
runs-on: fedora-meson
steps:
- name: Check out repository code
uses: actions/checkout@v3
- run: meson setup build
- run: meson compile -C build
- run: meson test -C build

19
meson.build Normal file
View File

@ -0,0 +1,19 @@
project(
'iosifovitch', ['cpp'],
version: '0.1.0',
meson_version: '>= 1.3.0',
default_options: [ 'warning_level=3', 'werror=true', 'cpp_std=c++17']
)
iosifovitch_sources = []
iosifovitch_include_dirs = []
subdir('src')
iosifovitch_lib = library('iosifovitch', iosifovitch_sources, install: true)
iosifovitch_dep = declare_dependency(
link_with : iosifovitch_lib,
include_directories: iosifovitch_include_dirs
)
subdir('tests')

41
src/iosifovitch.cpp Normal file
View File

@ -0,0 +1,41 @@
#include "iosifovitch.h"
#include <numeric>
auto levenshtein_distance(std::string_view const& a, std::string_view const& b) -> unsigned int {
if (a.size() == 0 || b.size() == 0) return a.size() + b.size();
if (a.size() > b.size()) return levenshtein_distance(b, a);
auto i = 0u;
while (i < a.size() && a[i] == b[i]) ++i;
if (i != 0) return levenshtein_distance(a.substr(i), b.substr(i));
i = 0;
while (i && a[a.size() - i] == b[b.size() - i]) ++i;
if (i != 0) return levenshtein_distance(a.substr(0, a.size() - i), b.substr(0, b.size() - i));
auto const buffer_length = a.size() + 1;
auto buffers = new unsigned int[buffer_length * 2];
unsigned int * buffer[2] = {buffers, buffers + buffer_length};
std::iota(buffer[0], buffer[1], 0);
std::fill(buffer[1], buffer[1] + buffer_length, 0);
for (auto i = 0u; i < b.size(); ++i)
{
buffer[1][0] = i + 1;
for (auto j = 0u; j < a.size(); ++j)
{
buffer[1][j + 1] = std::min(
buffer[0][j] + (a[j] == b[i] ? 0u : 1u),
std::min(buffer[0][j + 1], buffer[1][j]) + 1u
);
}
std::swap(buffer[0], buffer[1]);
}
auto cost = buffer[0][buffer_length - 1];
delete [] buffers;
return cost;
}

15
src/iosifovitch.h Normal file
View File

@ -0,0 +1,15 @@
#pragma once
#include <string>
/* Calculate the levenshtein distance between two strings.
*
* The complexity is guaranteed to be O(n*n) in the worst case and O(n) in the
* best case, where n is the length of the shortest string.
*
*/
auto levenshtein_distance(
std::string_view const& a,
std::string_view const& b
) -> unsigned int
;

5
src/meson.build Normal file
View File

@ -0,0 +1,5 @@
iosifovitch_sources = files([
'iosifovitch.cpp'
])
iosifovitch_include_dirs += include_directories(['.'])

11
tests/basic-tests.cpp Normal file
View File

@ -0,0 +1,11 @@
#include <iosifovitch.h>
#include <iostream>
auto main(int, char ** argv) -> int
{
auto target_cost = (unsigned int)(atoi(argv[3]));
auto cost = levenshtein_distance(argv[1], argv[2]);
std::cerr << target_cost << '\n';
std::cerr << cost << '\n';
return cost == target_cost ? EXIT_SUCCESS : EXIT_FAILURE;
}

54
tests/meson.build Normal file
View File

@ -0,0 +1,54 @@
test_exe = executable(
'iosifovitch-basic-tests',
files(['basic-tests.cpp']),
dependencies: [iosifovitch_dep]
)
test(
'short equal strings',
test_exe,
args: ['hello', 'hello', '0']
)
test(
'empty strings',
test_exe,
args: ['', '', '0']
)
test(
'empty string and non-empty string',
test_exe,
args: ['', 'hello', '5']
)
test(
'non-empty string and empty string',
test_exe,
args: ['hello', '', '5']
)
test(
'non-empty string and non-empty string',
test_exe,
args: ['hello', 'm', '5']
)
test(
'non-empty string and prefix string',
test_exe,
args: ['hello', 'he', '3']
)
test(
'non-empty string and infix string',
test_exe,
args: ['hello', 'teller', '3']
)
test(
'non-empty string and postfix string',
test_exe,
args: ['hello', 'millo', '2']
)