This has the same time and memory complexity as the original, and lost, version. It does use twice the memory needed for the buffer. TODO: reduce the size of the buffer to half of what it is now. TODO: add some more tests. Pay attention to odd/even sized strings in combination. Add some string that are closer to being the same with some weird mix of pre, post and infix strings. TODO: See if it's possible to make this faster by divide-and-conquer TODO: See if it's possible to use SIMD instructions to improve performance.
This commit is contained in:
commit
3ee51fa485
|
@ -0,0 +1,13 @@
|
||||||
|
ame: Build meson
|
||||||
|
run-name: Build stuff
|
||||||
|
on: [push]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
Build-Stuff:
|
||||||
|
runs-on: fedora-meson
|
||||||
|
steps:
|
||||||
|
- name: Check out repository code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
- run: meson setup build
|
||||||
|
- run: meson compile -C build
|
||||||
|
- run: meson test -C build
|
|
@ -0,0 +1,19 @@
|
||||||
|
project(
|
||||||
|
'iosifovitch', ['cpp'],
|
||||||
|
version: '0.1.0',
|
||||||
|
meson_version: '>= 1.3.0',
|
||||||
|
default_options: [ 'warning_level=3', 'werror=true', 'cpp_std=c++17']
|
||||||
|
)
|
||||||
|
|
||||||
|
iosifovitch_sources = []
|
||||||
|
iosifovitch_include_dirs = []
|
||||||
|
|
||||||
|
subdir('src')
|
||||||
|
|
||||||
|
iosifovitch_lib = library('iosifovitch', iosifovitch_sources, install: true)
|
||||||
|
iosifovitch_dep = declare_dependency(
|
||||||
|
link_with : iosifovitch_lib,
|
||||||
|
include_directories: iosifovitch_include_dirs
|
||||||
|
)
|
||||||
|
|
||||||
|
subdir('tests')
|
|
@ -0,0 +1,41 @@
|
||||||
|
#include "iosifovitch.h"
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
|
auto levenshtein_distance(std::string_view const& a, std::string_view const& b) -> unsigned int {
|
||||||
|
if (a.size() == 0 || b.size() == 0) return a.size() + b.size();
|
||||||
|
|
||||||
|
if (a.size() > b.size()) return levenshtein_distance(b, a);
|
||||||
|
|
||||||
|
auto i = 0u;
|
||||||
|
while (i < a.size() && a[i] == b[i]) ++i;
|
||||||
|
|
||||||
|
if (i != 0) return levenshtein_distance(a.substr(i), b.substr(i));
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
while (i && a[a.size() - i] == b[b.size() - i]) ++i;
|
||||||
|
if (i != 0) return levenshtein_distance(a.substr(0, a.size() - i), b.substr(0, b.size() - i));
|
||||||
|
|
||||||
|
auto const buffer_length = a.size() + 1;
|
||||||
|
auto buffers = new unsigned int[buffer_length * 2];
|
||||||
|
|
||||||
|
unsigned int * buffer[2] = {buffers, buffers + buffer_length};
|
||||||
|
std::iota(buffer[0], buffer[1], 0);
|
||||||
|
std::fill(buffer[1], buffer[1] + buffer_length, 0);
|
||||||
|
|
||||||
|
for (auto i = 0u; i < b.size(); ++i)
|
||||||
|
{
|
||||||
|
buffer[1][0] = i + 1;
|
||||||
|
for (auto j = 0u; j < a.size(); ++j)
|
||||||
|
{
|
||||||
|
buffer[1][j + 1] = std::min(
|
||||||
|
buffer[0][j] + (a[j] == b[i] ? 0u : 1u),
|
||||||
|
std::min(buffer[0][j + 1], buffer[1][j]) + 1u
|
||||||
|
);
|
||||||
|
}
|
||||||
|
std::swap(buffer[0], buffer[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto cost = buffer[0][buffer_length - 1];
|
||||||
|
delete [] buffers;
|
||||||
|
return cost;
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
/* Calculate the levenshtein distance between two strings.
|
||||||
|
*
|
||||||
|
* The complexity is guaranteed to be O(n*n) in the worst case and O(n) in the
|
||||||
|
* best case, where n is the length of the shortest string.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
auto levenshtein_distance(
|
||||||
|
std::string_view const& a,
|
||||||
|
std::string_view const& b
|
||||||
|
) -> unsigned int
|
||||||
|
;
|
|
@ -0,0 +1,5 @@
|
||||||
|
iosifovitch_sources = files([
|
||||||
|
'iosifovitch.cpp'
|
||||||
|
])
|
||||||
|
|
||||||
|
iosifovitch_include_dirs += include_directories(['.'])
|
|
@ -0,0 +1,11 @@
|
||||||
|
#include <iosifovitch.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
auto main(int, char ** argv) -> int
|
||||||
|
{
|
||||||
|
auto target_cost = (unsigned int)(atoi(argv[3]));
|
||||||
|
auto cost = levenshtein_distance(argv[1], argv[2]);
|
||||||
|
std::cerr << target_cost << '\n';
|
||||||
|
std::cerr << cost << '\n';
|
||||||
|
return cost == target_cost ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
test_exe = executable(
|
||||||
|
'iosifovitch-basic-tests',
|
||||||
|
files(['basic-tests.cpp']),
|
||||||
|
dependencies: [iosifovitch_dep]
|
||||||
|
)
|
||||||
|
|
||||||
|
test(
|
||||||
|
'short equal strings',
|
||||||
|
test_exe,
|
||||||
|
args: ['hello', 'hello', '0']
|
||||||
|
)
|
||||||
|
|
||||||
|
test(
|
||||||
|
'empty strings',
|
||||||
|
test_exe,
|
||||||
|
args: ['', '', '0']
|
||||||
|
)
|
||||||
|
|
||||||
|
test(
|
||||||
|
'empty string and non-empty string',
|
||||||
|
test_exe,
|
||||||
|
args: ['', 'hello', '5']
|
||||||
|
)
|
||||||
|
|
||||||
|
test(
|
||||||
|
'non-empty string and empty string',
|
||||||
|
test_exe,
|
||||||
|
args: ['hello', '', '5']
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
test(
|
||||||
|
'non-empty string and non-empty string',
|
||||||
|
test_exe,
|
||||||
|
args: ['hello', 'm', '5']
|
||||||
|
)
|
||||||
|
|
||||||
|
test(
|
||||||
|
'non-empty string and prefix string',
|
||||||
|
test_exe,
|
||||||
|
args: ['hello', 'he', '3']
|
||||||
|
)
|
||||||
|
|
||||||
|
test(
|
||||||
|
'non-empty string and infix string',
|
||||||
|
test_exe,
|
||||||
|
args: ['hello', 'teller', '3']
|
||||||
|
)
|
||||||
|
|
||||||
|
test(
|
||||||
|
'non-empty string and postfix string',
|
||||||
|
test_exe,
|
||||||
|
args: ['hello', 'millo', '2']
|
||||||
|
)
|
Loading…
Reference in New Issue