This has the same time and memory complexity as the original, and lost, version. It does use twice the memory needed for the buffer. TODO: reduce the size of the buffer to half of what it is now. TODO: add some more tests. Pay attention to odd/even sized strings in combination. Add some string that are closer to being the same with some weird mix of pre, post and infix strings. TODO: See if it's possible to make this faster by divide-and-conquer TODO: See if it's possible to use SIMD instructions to improve performance.
This commit is contained in:
commit
3ee51fa485
|
@ -0,0 +1,13 @@
|
|||
ame: Build meson
|
||||
run-name: Build stuff
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
Build-Stuff:
|
||||
runs-on: fedora-meson
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v3
|
||||
- run: meson setup build
|
||||
- run: meson compile -C build
|
||||
- run: meson test -C build
|
|
@ -0,0 +1,19 @@
|
|||
project(
|
||||
'iosifovitch', ['cpp'],
|
||||
version: '0.1.0',
|
||||
meson_version: '>= 1.3.0',
|
||||
default_options: [ 'warning_level=3', 'werror=true', 'cpp_std=c++17']
|
||||
)
|
||||
|
||||
iosifovitch_sources = []
|
||||
iosifovitch_include_dirs = []
|
||||
|
||||
subdir('src')
|
||||
|
||||
iosifovitch_lib = library('iosifovitch', iosifovitch_sources, install: true)
|
||||
iosifovitch_dep = declare_dependency(
|
||||
link_with : iosifovitch_lib,
|
||||
include_directories: iosifovitch_include_dirs
|
||||
)
|
||||
|
||||
subdir('tests')
|
|
@ -0,0 +1,41 @@
|
|||
#include "iosifovitch.h"
|
||||
#include <numeric>
|
||||
|
||||
auto levenshtein_distance(std::string_view const& a, std::string_view const& b) -> unsigned int {
|
||||
if (a.size() == 0 || b.size() == 0) return a.size() + b.size();
|
||||
|
||||
if (a.size() > b.size()) return levenshtein_distance(b, a);
|
||||
|
||||
auto i = 0u;
|
||||
while (i < a.size() && a[i] == b[i]) ++i;
|
||||
|
||||
if (i != 0) return levenshtein_distance(a.substr(i), b.substr(i));
|
||||
|
||||
i = 0;
|
||||
while (i && a[a.size() - i] == b[b.size() - i]) ++i;
|
||||
if (i != 0) return levenshtein_distance(a.substr(0, a.size() - i), b.substr(0, b.size() - i));
|
||||
|
||||
auto const buffer_length = a.size() + 1;
|
||||
auto buffers = new unsigned int[buffer_length * 2];
|
||||
|
||||
unsigned int * buffer[2] = {buffers, buffers + buffer_length};
|
||||
std::iota(buffer[0], buffer[1], 0);
|
||||
std::fill(buffer[1], buffer[1] + buffer_length, 0);
|
||||
|
||||
for (auto i = 0u; i < b.size(); ++i)
|
||||
{
|
||||
buffer[1][0] = i + 1;
|
||||
for (auto j = 0u; j < a.size(); ++j)
|
||||
{
|
||||
buffer[1][j + 1] = std::min(
|
||||
buffer[0][j] + (a[j] == b[i] ? 0u : 1u),
|
||||
std::min(buffer[0][j + 1], buffer[1][j]) + 1u
|
||||
);
|
||||
}
|
||||
std::swap(buffer[0], buffer[1]);
|
||||
}
|
||||
|
||||
auto cost = buffer[0][buffer_length - 1];
|
||||
delete [] buffers;
|
||||
return cost;
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
/* Calculate the levenshtein distance between two strings.
|
||||
*
|
||||
* The complexity is guaranteed to be O(n*n) in the worst case and O(n) in the
|
||||
* best case, where n is the length of the shortest string.
|
||||
*
|
||||
*/
|
||||
auto levenshtein_distance(
|
||||
std::string_view const& a,
|
||||
std::string_view const& b
|
||||
) -> unsigned int
|
||||
;
|
|
@ -0,0 +1,5 @@
|
|||
iosifovitch_sources = files([
|
||||
'iosifovitch.cpp'
|
||||
])
|
||||
|
||||
iosifovitch_include_dirs += include_directories(['.'])
|
|
@ -0,0 +1,11 @@
|
|||
#include <iosifovitch.h>
|
||||
#include <iostream>
|
||||
|
||||
auto main(int, char ** argv) -> int
|
||||
{
|
||||
auto target_cost = (unsigned int)(atoi(argv[3]));
|
||||
auto cost = levenshtein_distance(argv[1], argv[2]);
|
||||
std::cerr << target_cost << '\n';
|
||||
std::cerr << cost << '\n';
|
||||
return cost == target_cost ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
test_exe = executable(
|
||||
'iosifovitch-basic-tests',
|
||||
files(['basic-tests.cpp']),
|
||||
dependencies: [iosifovitch_dep]
|
||||
)
|
||||
|
||||
test(
|
||||
'short equal strings',
|
||||
test_exe,
|
||||
args: ['hello', 'hello', '0']
|
||||
)
|
||||
|
||||
test(
|
||||
'empty strings',
|
||||
test_exe,
|
||||
args: ['', '', '0']
|
||||
)
|
||||
|
||||
test(
|
||||
'empty string and non-empty string',
|
||||
test_exe,
|
||||
args: ['', 'hello', '5']
|
||||
)
|
||||
|
||||
test(
|
||||
'non-empty string and empty string',
|
||||
test_exe,
|
||||
args: ['hello', '', '5']
|
||||
)
|
||||
|
||||
|
||||
test(
|
||||
'non-empty string and non-empty string',
|
||||
test_exe,
|
||||
args: ['hello', 'm', '5']
|
||||
)
|
||||
|
||||
test(
|
||||
'non-empty string and prefix string',
|
||||
test_exe,
|
||||
args: ['hello', 'he', '3']
|
||||
)
|
||||
|
||||
test(
|
||||
'non-empty string and infix string',
|
||||
test_exe,
|
||||
args: ['hello', 'teller', '3']
|
||||
)
|
||||
|
||||
test(
|
||||
'non-empty string and postfix string',
|
||||
test_exe,
|
||||
args: ['hello', 'millo', '2']
|
||||
)
|
Loading…
Reference in New Issue