From 3ee51fa485ba00108d748d807e03a1024a484d87 Mon Sep 17 00:00:00 2001 From: Frederik Hertzum Date: Sun, 25 Feb 2024 00:56:15 +0100 Subject: [PATCH] Functional implementation. This has the same time and memory complexity as the original, and lost, version. It does use twice the memory needed for the buffer. TODO: reduce the size of the buffer to half of what it is now. TODO: add some more tests. Pay attention to odd/even sized strings in combination. Add some string that are closer to being the same with some weird mix of pre, post and infix strings. TODO: See if it's possible to make this faster by divide-and-conquer TODO: See if it's possible to use SIMD instructions to improve performance. --- .gitea/workflows/workflow.yaml | 13 ++++++++ meson.build | 19 ++++++++++++ src/iosifovitch.cpp | 41 ++++++++++++++++++++++++++ src/iosifovitch.h | 15 ++++++++++ src/meson.build | 5 ++++ tests/basic-tests.cpp | 11 +++++++ tests/meson.build | 54 ++++++++++++++++++++++++++++++++++ 7 files changed, 158 insertions(+) create mode 100644 .gitea/workflows/workflow.yaml create mode 100644 meson.build create mode 100644 src/iosifovitch.cpp create mode 100644 src/iosifovitch.h create mode 100644 src/meson.build create mode 100644 tests/basic-tests.cpp create mode 100644 tests/meson.build diff --git a/.gitea/workflows/workflow.yaml b/.gitea/workflows/workflow.yaml new file mode 100644 index 0000000..6f81591 --- /dev/null +++ b/.gitea/workflows/workflow.yaml @@ -0,0 +1,13 @@ +ame: Build meson +run-name: Build stuff +on: [push] + +jobs: + Build-Stuff: + runs-on: fedora-meson + steps: + - name: Check out repository code + uses: actions/checkout@v3 + - run: meson setup build + - run: meson compile -C build + - run: meson test -C build diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..e63f80b --- /dev/null +++ b/meson.build @@ -0,0 +1,19 @@ +project( + 'iosifovitch', ['cpp'], + version: '0.1.0', + meson_version: '>= 1.3.0', + default_options: [ 'warning_level=3', 'werror=true', 'cpp_std=c++17'] +) + +iosifovitch_sources = [] +iosifovitch_include_dirs = [] + +subdir('src') + +iosifovitch_lib = library('iosifovitch', iosifovitch_sources, install: true) +iosifovitch_dep = declare_dependency( + link_with : iosifovitch_lib, + include_directories: iosifovitch_include_dirs +) + +subdir('tests') \ No newline at end of file diff --git a/src/iosifovitch.cpp b/src/iosifovitch.cpp new file mode 100644 index 0000000..e830808 --- /dev/null +++ b/src/iosifovitch.cpp @@ -0,0 +1,41 @@ +#include "iosifovitch.h" +#include + +auto levenshtein_distance(std::string_view const& a, std::string_view const& b) -> unsigned int { + if (a.size() == 0 || b.size() == 0) return a.size() + b.size(); + + if (a.size() > b.size()) return levenshtein_distance(b, a); + + auto i = 0u; + while (i < a.size() && a[i] == b[i]) ++i; + + if (i != 0) return levenshtein_distance(a.substr(i), b.substr(i)); + + i = 0; + while (i && a[a.size() - i] == b[b.size() - i]) ++i; + if (i != 0) return levenshtein_distance(a.substr(0, a.size() - i), b.substr(0, b.size() - i)); + + auto const buffer_length = a.size() + 1; + auto buffers = new unsigned int[buffer_length * 2]; + + unsigned int * buffer[2] = {buffers, buffers + buffer_length}; + std::iota(buffer[0], buffer[1], 0); + std::fill(buffer[1], buffer[1] + buffer_length, 0); + + for (auto i = 0u; i < b.size(); ++i) + { + buffer[1][0] = i + 1; + for (auto j = 0u; j < a.size(); ++j) + { + buffer[1][j + 1] = std::min( + buffer[0][j] + (a[j] == b[i] ? 0u : 1u), + std::min(buffer[0][j + 1], buffer[1][j]) + 1u + ); + } + std::swap(buffer[0], buffer[1]); + } + + auto cost = buffer[0][buffer_length - 1]; + delete [] buffers; + return cost; +} \ No newline at end of file diff --git a/src/iosifovitch.h b/src/iosifovitch.h new file mode 100644 index 0000000..68ec49e --- /dev/null +++ b/src/iosifovitch.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +/* Calculate the levenshtein distance between two strings. + * + * The complexity is guaranteed to be O(n*n) in the worst case and O(n) in the + * best case, where n is the length of the shortest string. + * + */ +auto levenshtein_distance( + std::string_view const& a, + std::string_view const& b +) -> unsigned int +; \ No newline at end of file diff --git a/src/meson.build b/src/meson.build new file mode 100644 index 0000000..038e680 --- /dev/null +++ b/src/meson.build @@ -0,0 +1,5 @@ +iosifovitch_sources = files([ + 'iosifovitch.cpp' +]) + +iosifovitch_include_dirs += include_directories(['.']) \ No newline at end of file diff --git a/tests/basic-tests.cpp b/tests/basic-tests.cpp new file mode 100644 index 0000000..6d305ef --- /dev/null +++ b/tests/basic-tests.cpp @@ -0,0 +1,11 @@ +#include +#include + +auto main(int, char ** argv) -> int +{ + auto target_cost = (unsigned int)(atoi(argv[3])); + auto cost = levenshtein_distance(argv[1], argv[2]); + std::cerr << target_cost << '\n'; + std::cerr << cost << '\n'; + return cost == target_cost ? EXIT_SUCCESS : EXIT_FAILURE; +} \ No newline at end of file diff --git a/tests/meson.build b/tests/meson.build new file mode 100644 index 0000000..490e173 --- /dev/null +++ b/tests/meson.build @@ -0,0 +1,54 @@ +test_exe = executable( + 'iosifovitch-basic-tests', + files(['basic-tests.cpp']), + dependencies: [iosifovitch_dep] +) + +test( + 'short equal strings', + test_exe, + args: ['hello', 'hello', '0'] +) + +test( + 'empty strings', + test_exe, + args: ['', '', '0'] +) + +test( + 'empty string and non-empty string', + test_exe, + args: ['', 'hello', '5'] +) + +test( + 'non-empty string and empty string', + test_exe, + args: ['hello', '', '5'] +) + + +test( + 'non-empty string and non-empty string', + test_exe, + args: ['hello', 'm', '5'] +) + +test( + 'non-empty string and prefix string', + test_exe, + args: ['hello', 'he', '3'] +) + +test( + 'non-empty string and infix string', + test_exe, + args: ['hello', 'teller', '3'] +) + +test( + 'non-empty string and postfix string', + test_exe, + args: ['hello', 'millo', '2'] +) \ No newline at end of file