更新libclamav库1.0.0版本
This commit is contained in:
1
clamav/libclamav_rust/.cargo/vendor/strsim/.cargo-checksum.json
vendored
Normal file
1
clamav/libclamav_rust/.cargo/vendor/strsim/.cargo-checksum.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"files":{"CHANGELOG.md":"18e89ab18be7d22dd835924a38b1198651e870dfd0a2904a344120d341c4bc0e","Cargo.toml":"dd189fa732c00b7d1b036b43220253ef6ff2a0b1ff5ca7cf13f0669113a2055e","LICENSE":"1738b51502ae831fb59ffbeb22ebdd90bf17e5c72fe57c00b47552415f133fd8","README.md":"cfb01477df60cd0780ba59cbbc388f3f71d83bf51bab77e004fc7b811aff0efd","appveyor.yml":"b41eae9798a9bb250f6046509d9bbd6e63bac9ad2655d342b3d9c8975584f0c0","benches/benches.rs":"e277857c44afdc08b2acf35fc05be6226529c588eb9da397382b0a74c58615ab","dev":"5bd26dc2c86f777627abe96c5992b6c45e6b5dea52f42b7107fa5c106abe2ab4","src/lib.rs":"26a960216567e5dea46033b3383a69e7da498095c092e195e92c05faef52f915","tests/lib.rs":"de2b1181c379a0f55de7b86021a9afb77dbe81053a6acf99623bec3663f9b7c4"},"package":"8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"}
|
||||
123
clamav/libclamav_rust/.cargo/vendor/strsim/CHANGELOG.md
vendored
Normal file
123
clamav/libclamav_rust/.cargo/vendor/strsim/CHANGELOG.md
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
# Change Log
|
||||
This project attempts to adhere to [Semantic Versioning](http://semver.org).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.8.0] - (2018-08-19)
|
||||
### Added
|
||||
- Normalized versions of Levenshtein and Damerau-Levenshtein (thanks [@gentoid](https://github.com/gentoid))
|
||||
|
||||
## [0.7.0] - (2018-01-17)
|
||||
### Changed
|
||||
- Faster Levenshtein implementation (thanks [@wdv4758h](https://github.com/wdv4758h))
|
||||
|
||||
### Removed
|
||||
- Remove the "against_vec" functions. They are one-liners now, so they don't
|
||||
seem to add enough value to justify making the API larger. I didn't find
|
||||
anybody using them when I skimmed through a GitHub search. If you do use them,
|
||||
you can change the calls to something like:
|
||||
```rust
|
||||
let distances = strings.iter().map(|a| jaro(target, a)).collect();
|
||||
```
|
||||
|
||||
## [0.6.0] - (2016-12-26)
|
||||
### Added
|
||||
- Add optimal string alignment distance
|
||||
|
||||
### Fixed
|
||||
- Fix Damerau-Levenshtein implementation (previous implementation was actually
|
||||
optimal string alignment; see this [Damerau-Levenshtein explanation])
|
||||
|
||||
## [0.5.2] - (2016-11-21)
|
||||
### Changed
|
||||
- Remove Cargo generated documentation in favor of a [docs.rs] link
|
||||
|
||||
## [0.5.1] - (2016-08-23)
|
||||
### Added
|
||||
- Add Cargo generated documentation
|
||||
|
||||
### Fixed
|
||||
- Fix panic when Jaro or Jaro-Winkler are given strings both with a length of
|
||||
one
|
||||
|
||||
## [0.5.0] - (2016-08-11)
|
||||
### Changed
|
||||
- Make Hamming faster (thanks @IBUzPE9) when the two strings have the same
|
||||
length but slower when they have different lengths
|
||||
|
||||
## [0.4.1] - (2016-04-18)
|
||||
### Added
|
||||
- Add Vagrant setup for development
|
||||
- Add AppVeyor configuration for Windows CI
|
||||
|
||||
### Fixed
|
||||
- Fix metrics when given strings with multibyte characters (thanks @WanzenBug)
|
||||
|
||||
## [0.4.0] - (2015-06-10)
|
||||
### Added
|
||||
- For each metric, add a function that takes a vector of strings and returns a
|
||||
vector of results (thanks @ovarene)
|
||||
|
||||
## [0.3.0] - (2015-04-30)
|
||||
### Changed
|
||||
- Remove usage of unstable Rust features
|
||||
|
||||
## [0.2.5] - (2015-04-24)
|
||||
### Fixed
|
||||
- Remove unnecessary `Float` import from doc tests
|
||||
|
||||
## [0.2.4] - (2015-04-15)
|
||||
### Fixed
|
||||
- Remove unused `core` feature flag
|
||||
|
||||
## [0.2.3] - (2015-04-01)
|
||||
### Fixed
|
||||
- Remove now unnecessary `Float` import
|
||||
|
||||
## [0.2.2] - (2015-03-29)
|
||||
### Fixed
|
||||
- Remove usage of `char_at` (marked as unstable)
|
||||
|
||||
## [0.2.1] - (2015-02-20)
|
||||
### Fixed
|
||||
- Update bit vector import to match Rust update
|
||||
|
||||
## [0.2.0] - (2015-02-19)
|
||||
### Added
|
||||
- Implement Damerau-Levenshtein
|
||||
- Add tests in docs
|
||||
|
||||
## [0.1.1] - (2015-02-10)
|
||||
### Added
|
||||
- Configure Travis for CI
|
||||
- Add rustdoc comments
|
||||
|
||||
### Fixed
|
||||
- Limit Jaro-Winkler return value to a maximum of 1.0
|
||||
- Fix float comparisons in tests
|
||||
|
||||
## [0.1.0] - (2015-02-09)
|
||||
### Added
|
||||
- Implement Hamming, Jaro, Jaro-Winkler, and Levenshtein
|
||||
|
||||
[Unreleased]: https://github.com/dguo/strsim-rs/compare/0.8.0...HEAD
|
||||
[0.8.0]: https://github.com/dguo/strsim-rs/compare/0.7.0...0.8.0
|
||||
[0.7.0]: https://github.com/dguo/strsim-rs/compare/0.6.0...0.7.0
|
||||
[0.6.0]: https://github.com/dguo/strsim-rs/compare/0.5.2...0.6.0
|
||||
[0.5.2]: https://github.com/dguo/strsim-rs/compare/0.5.1...0.5.2
|
||||
[0.5.1]: https://github.com/dguo/strsim-rs/compare/0.5.0...0.5.1
|
||||
[0.5.0]: https://github.com/dguo/strsim-rs/compare/0.4.1...0.5.0
|
||||
[0.4.1]: https://github.com/dguo/strsim-rs/compare/0.4.0...0.4.1
|
||||
[0.4.0]: https://github.com/dguo/strsim-rs/compare/0.3.0...0.4.0
|
||||
[0.3.0]: https://github.com/dguo/strsim-rs/compare/0.2.5...0.3.0
|
||||
[0.2.5]: https://github.com/dguo/strsim-rs/compare/0.2.4...0.2.5
|
||||
[0.2.4]: https://github.com/dguo/strsim-rs/compare/0.2.3...0.2.4
|
||||
[0.2.3]: https://github.com/dguo/strsim-rs/compare/0.2.2...0.2.3
|
||||
[0.2.2]: https://github.com/dguo/strsim-rs/compare/0.2.1...0.2.2
|
||||
[0.2.1]: https://github.com/dguo/strsim-rs/compare/0.2.0...0.2.1
|
||||
[0.2.0]: https://github.com/dguo/strsim-rs/compare/0.1.1...0.2.0
|
||||
[0.1.1]: https://github.com/dguo/strsim-rs/compare/0.1.0...0.1.1
|
||||
[0.1.0]: https://github.com/dguo/strsim-rs/compare/fabad4...0.1.0
|
||||
[docs.rs]: https://docs.rs/strsim/
|
||||
[Damerau-Levenshtein explanation]:
|
||||
http://scarcitycomputing.blogspot.com/2013/04/damerau-levenshtein-edit-distance.html
|
||||
28
clamav/libclamav_rust/.cargo/vendor/strsim/Cargo.toml
vendored
Normal file
28
clamav/libclamav_rust/.cargo/vendor/strsim/Cargo.toml
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g. crates.io) dependencies
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
|
||||
[package]
|
||||
name = "strsim"
|
||||
version = "0.8.0"
|
||||
authors = ["Danny Guo <dannyguo91@gmail.com>"]
|
||||
description = "Implementations of string similarity metrics.\nIncludes Hamming, Levenshtein, OSA, Damerau-Levenshtein, Jaro, and Jaro-Winkler.\n"
|
||||
homepage = "https://github.com/dguo/strsim-rs"
|
||||
documentation = "https://docs.rs/strsim/"
|
||||
readme = "README.md"
|
||||
keywords = ["string", "similarity", "Hamming", "Levenshtein", "Jaro"]
|
||||
license = "MIT"
|
||||
repository = "https://github.com/dguo/strsim-rs"
|
||||
[badges.appveyor]
|
||||
repository = "dguo/strsim-rs"
|
||||
|
||||
[badges.travis-ci]
|
||||
repository = "dguo/strsim-rs"
|
||||
23
clamav/libclamav_rust/.cargo/vendor/strsim/LICENSE
vendored
Normal file
23
clamav/libclamav_rust/.cargo/vendor/strsim/LICENSE
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Danny Guo
|
||||
Copyright (c) 2016 Titus Wormer <tituswormer@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
69
clamav/libclamav_rust/.cargo/vendor/strsim/README.md
vendored
Normal file
69
clamav/libclamav_rust/.cargo/vendor/strsim/README.md
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
# strsim-rs [](https://crates.io/crates/strsim) [](https://github.com/dguo/strsim-rs/blob/master/LICENSE) [](https://travis-ci.org/dguo/strsim-rs) [](https://ci.appveyor.com/project/dguo/strsim-rs)
|
||||
|
||||
[Rust](https://www.rust-lang.org) implementations of [string similarity metrics]:
|
||||
- [Hamming]
|
||||
- [Levenshtein] - distance & normalized
|
||||
- [Optimal string alignment]
|
||||
- [Damerau-Levenshtein] - distance & normalized
|
||||
- [Jaro and Jaro-Winkler] - this implementation of Jaro-Winkler does not limit the common prefix length
|
||||
|
||||
### Installation
|
||||
```toml
|
||||
# Cargo.toml
|
||||
[dependencies]
|
||||
strsim = "0.8.0"
|
||||
```
|
||||
|
||||
### [Documentation](https://docs.rs/strsim/)
|
||||
You can change the version in the url to see the documentation for an older
|
||||
version in the
|
||||
[changelog](https://github.com/dguo/strsim-rs/blob/master/CHANGELOG.md).
|
||||
|
||||
### Usage
|
||||
```rust
|
||||
extern crate strsim;
|
||||
|
||||
use strsim::{hamming, levenshtein, normalized_levenshtein, osa_distance,
|
||||
damerau_levenshtein, normalized_damerau_levenshtein, jaro,
|
||||
jaro_winkler};
|
||||
|
||||
fn main() {
|
||||
match hamming("hamming", "hammers") {
|
||||
Ok(distance) => assert_eq!(3, distance),
|
||||
Err(why) => panic!("{:?}", why)
|
||||
}
|
||||
|
||||
assert_eq!(3, levenshtein("kitten", "sitting"));
|
||||
|
||||
assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
|
||||
|
||||
assert_eq!(3, osa_distance("ac", "cba"));
|
||||
|
||||
assert_eq!(2, damerau_levenshtein("ac", "cba"));
|
||||
|
||||
assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001)
|
||||
|
||||
assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
|
||||
0.001);
|
||||
|
||||
assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
|
||||
0.001);
|
||||
}
|
||||
```
|
||||
|
||||
### Development
|
||||
If you don't want to install Rust itself, you can run `$ ./dev` for a
|
||||
development CLI if you have [Docker] installed.
|
||||
|
||||
Benchmarks require a Nightly toolchain. They are run by `cargo +nightly bench`.
|
||||
|
||||
### License
|
||||
[MIT](https://github.com/dguo/strsim-rs/blob/master/LICENSE)
|
||||
|
||||
[string similarity metrics]:http://en.wikipedia.org/wiki/String_metric
|
||||
[Damerau-Levenshtein]:http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
|
||||
[Jaro and Jaro-Winkler]:http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
|
||||
[Levenshtein]:http://en.wikipedia.org/wiki/Levenshtein_distance
|
||||
[Hamming]:http://en.wikipedia.org/wiki/Hamming_distance
|
||||
[Optimal string alignment]:https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance
|
||||
[Docker]:https://docs.docker.com/engine/installation/
|
||||
13
clamav/libclamav_rust/.cargo/vendor/strsim/appveyor.yml
vendored
Normal file
13
clamav/libclamav_rust/.cargo/vendor/strsim/appveyor.yml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
install:
|
||||
- ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-beta-x86_64-pc-windows-gnu.exe'
|
||||
- rust-beta-x86_64-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust"
|
||||
- SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin
|
||||
- rustc -V
|
||||
- cargo -V
|
||||
|
||||
build: false
|
||||
|
||||
test_script:
|
||||
- cargo build
|
||||
- cargo test --verbose
|
||||
|
||||
84
clamav/libclamav_rust/.cargo/vendor/strsim/benches/benches.rs
vendored
Normal file
84
clamav/libclamav_rust/.cargo/vendor/strsim/benches/benches.rs
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
//! Benchmarks for strsim.
|
||||
|
||||
#![feature(test)]
|
||||
|
||||
extern crate strsim;
|
||||
|
||||
mod benches {
|
||||
use super::*;
|
||||
|
||||
extern crate test;
|
||||
use self::test::Bencher;
|
||||
|
||||
#[bench]
|
||||
fn bench_hamming(bencher: &mut Bencher) {
|
||||
let a = "ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGG";
|
||||
let b = "CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGC";
|
||||
bencher.iter(|| {
|
||||
strsim::hamming(&a, &b).unwrap();
|
||||
})
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_jaro(bencher: &mut Bencher) {
|
||||
let a = "Philosopher Friedrich Nietzsche";
|
||||
let b = "Philosopher Jean-Paul Sartre";
|
||||
bencher.iter(|| {
|
||||
strsim::jaro(&a, &b);
|
||||
})
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_jaro_winkler(bencher: &mut Bencher) {
|
||||
let a = "Philosopher Friedrich Nietzsche";
|
||||
let b = "Philosopher Jean-Paul Sartre";
|
||||
bencher.iter(|| {
|
||||
strsim::jaro_winkler(&a, &b);
|
||||
})
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_levenshtein(bencher: &mut Bencher) {
|
||||
let a = "Philosopher Friedrich Nietzsche";
|
||||
let b = "Philosopher Jean-Paul Sartre";
|
||||
bencher.iter(|| {
|
||||
strsim::levenshtein(&a, &b);
|
||||
})
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_normalized_levenshtein(bencher: &mut Bencher) {
|
||||
let a = "Philosopher Friedrich Nietzsche";
|
||||
let b = "Philosopher Jean-Paul Sartre";
|
||||
bencher.iter(|| {
|
||||
strsim::normalized_levenshtein(&a, &b);
|
||||
})
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_osa_distance(bencher: &mut Bencher) {
|
||||
let a = "Philosopher Friedrich Nietzsche";
|
||||
let b = "Philosopher Jean-Paul Sartre";
|
||||
bencher.iter(|| {
|
||||
strsim::osa_distance(&a, &b);
|
||||
})
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_damerau_levenshtein(bencher: &mut Bencher) {
|
||||
let a = "Philosopher Friedrich Nietzsche";
|
||||
let b = "Philosopher Jean-Paul Sartre";
|
||||
bencher.iter(|| {
|
||||
strsim::damerau_levenshtein(&a, &b);
|
||||
})
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_normalized_damerau_levenshtein(bencher: &mut Bencher) {
|
||||
let a = "Philosopher Friedrich Nietzsche";
|
||||
let b = "Philosopher Jean-Paul Sartre";
|
||||
bencher.iter(|| {
|
||||
strsim::normalized_damerau_levenshtein(&a, &b);
|
||||
})
|
||||
}
|
||||
}
|
||||
41
clamav/libclamav_rust/.cargo/vendor/strsim/dev
vendored
Normal file
41
clamav/libclamav_rust/.cargo/vendor/strsim/dev
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python3
|
||||
# ./dev --help
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from subprocess import run
|
||||
import sys
|
||||
|
||||
parser = argparse.ArgumentParser(prog='./dev')
|
||||
subparsers = parser.add_subparsers(metavar='<command>', title='commands')
|
||||
command = [
|
||||
'docker', 'run', '-it', '--rm', '-v', os.getcwd() + ':/src:cached',
|
||||
'-w=/src', 'rust:1.21.0'
|
||||
]
|
||||
|
||||
def cargo(args, remaining):
|
||||
sys.exit(run(command + ['cargo'] + remaining or []).returncode)
|
||||
|
||||
parser_cargo = subparsers.add_parser('cargo', help='run a cargo command')
|
||||
parser_cargo.set_defaults(func=cargo)
|
||||
|
||||
def sh(args, remaining):
|
||||
sys.exit(run(command + ['bash']).returncode)
|
||||
|
||||
parser_sh = subparsers.add_parser('sh', help='bring up a shell')
|
||||
parser_sh.set_defaults(func=sh)
|
||||
|
||||
def test(args, remaining):
|
||||
sys.exit(run(command + ['cargo', 'test']).returncode)
|
||||
|
||||
parser_test = subparsers.add_parser('test', help='run tests')
|
||||
parser_test.set_defaults(func=test)
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
args, remaining = parser.parse_known_args()
|
||||
try:
|
||||
args.func(args, remaining)
|
||||
except FileNotFoundError:
|
||||
sys.exit('Please install Docker.')
|
||||
else:
|
||||
parser.print_help()
|
||||
786
clamav/libclamav_rust/.cargo/vendor/strsim/src/lib.rs
vendored
Normal file
786
clamav/libclamav_rust/.cargo/vendor/strsim/src/lib.rs
vendored
Normal file
@@ -0,0 +1,786 @@
|
||||
//! This library implements string similarity metrics.
|
||||
|
||||
use std::char;
|
||||
use std::cmp::{max, min};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum StrSimError {
|
||||
DifferentLengthArgs
|
||||
}
|
||||
|
||||
pub type HammingResult = Result<usize, StrSimError>;
|
||||
|
||||
/// Calculates the number of positions in the two strings where the characters
|
||||
/// differ. Returns an error if the strings have different lengths.
|
||||
///
|
||||
/// ```
|
||||
/// use strsim::hamming;
|
||||
///
|
||||
/// match hamming("hamming", "hammers") {
|
||||
/// Ok(distance) => assert_eq!(3, distance),
|
||||
/// Err(why) => panic!("{:?}", why)
|
||||
/// }
|
||||
/// ```
|
||||
pub fn hamming(a: &str, b: &str) -> HammingResult {
|
||||
let (mut ita, mut itb, mut count) = (a.chars(), b.chars(), 0);
|
||||
loop {
|
||||
match (ita.next(), itb.next()){
|
||||
(Some(x), Some(y)) => if x != y { count += 1 },
|
||||
(None, None) => return Ok(count),
|
||||
_ => return Err(StrSimError::DifferentLengthArgs),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates the Jaro similarity between two strings. The returned value
|
||||
/// is between 0.0 and 1.0 (higher value means more similar).
|
||||
///
|
||||
/// ```
|
||||
/// use strsim::jaro;
|
||||
///
|
||||
/// assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
|
||||
/// 0.001);
|
||||
/// ```
|
||||
pub fn jaro(a: &str, b: &str) -> f64 {
|
||||
if a == b { return 1.0; }
|
||||
|
||||
let a_len = a.chars().count();
|
||||
let b_len = b.chars().count();
|
||||
|
||||
// The check for lengths of one here is to prevent integer overflow when
|
||||
// calculating the search range.
|
||||
if a_len == 0 || b_len == 0 || (a_len == 1 && b_len == 1) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let search_range = (max(a_len, b_len) / 2) - 1;
|
||||
|
||||
let mut b_consumed = Vec::with_capacity(b_len);
|
||||
for _ in 0..b_len {
|
||||
b_consumed.push(false);
|
||||
}
|
||||
let mut matches = 0.0;
|
||||
|
||||
let mut transpositions = 0.0;
|
||||
let mut b_match_index = 0;
|
||||
|
||||
for (i, a_char) in a.chars().enumerate() {
|
||||
let min_bound =
|
||||
// prevent integer wrapping
|
||||
if i > search_range {
|
||||
max(0, i - search_range)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let max_bound = min(b_len - 1, i + search_range);
|
||||
|
||||
if min_bound > max_bound {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j, b_char) in b.chars().enumerate() {
|
||||
if min_bound <= j && j <= max_bound && a_char == b_char &&
|
||||
!b_consumed[j] {
|
||||
b_consumed[j] = true;
|
||||
matches += 1.0;
|
||||
|
||||
if j < b_match_index {
|
||||
transpositions += 1.0;
|
||||
}
|
||||
b_match_index = j;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if matches == 0.0 {
|
||||
0.0
|
||||
} else {
|
||||
(1.0 / 3.0) * ((matches / a_len as f64) +
|
||||
(matches / b_len as f64) +
|
||||
((matches - transpositions) / matches))
|
||||
}
|
||||
}
|
||||
|
||||
/// Like Jaro but gives a boost to strings that have a common prefix.
|
||||
///
|
||||
/// ```
|
||||
/// use strsim::jaro_winkler;
|
||||
///
|
||||
/// assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
|
||||
/// 0.001);
|
||||
/// ```
|
||||
pub fn jaro_winkler(a: &str, b: &str) -> f64 {
|
||||
let jaro_distance = jaro(a, b);
|
||||
|
||||
// Don't limit the length of the common prefix
|
||||
let prefix_length = a.chars()
|
||||
.zip(b.chars())
|
||||
.take_while(|&(a_char, b_char)| a_char == b_char)
|
||||
.count();
|
||||
|
||||
let jaro_winkler_distance =
|
||||
jaro_distance + (0.1 * prefix_length as f64 * (1.0 - jaro_distance));
|
||||
|
||||
if jaro_winkler_distance <= 1.0 {
|
||||
jaro_winkler_distance
|
||||
} else {
|
||||
1.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates the minimum number of insertions, deletions, and substitutions
|
||||
/// required to change one string into the other.
|
||||
///
|
||||
/// ```
|
||||
/// use strsim::levenshtein;
|
||||
///
|
||||
/// assert_eq!(3, levenshtein("kitten", "sitting"));
|
||||
/// ```
|
||||
pub fn levenshtein(a: &str, b: &str) -> usize {
|
||||
if a == b { return 0; }
|
||||
|
||||
let a_len = a.chars().count();
|
||||
let b_len = b.chars().count();
|
||||
|
||||
if a_len == 0 { return b_len; }
|
||||
if b_len == 0 { return a_len; }
|
||||
|
||||
let mut cache: Vec<usize> = (1..b_len+1).collect();
|
||||
|
||||
let mut result = 0;
|
||||
let mut distance_a;
|
||||
let mut distance_b;
|
||||
|
||||
for (i, a_char) in a.chars().enumerate() {
|
||||
result = i;
|
||||
distance_b = i;
|
||||
|
||||
for (j, b_char) in b.chars().enumerate() {
|
||||
let cost = if a_char == b_char { 0 } else { 1 };
|
||||
distance_a = distance_b + cost;
|
||||
distance_b = cache[j];
|
||||
result = min(result + 1, min(distance_a, distance_b + 1));
|
||||
cache[j] = result;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Calculates a normalized score of the Levenshtein algorithm between 0.0 and
|
||||
/// 1.0 (inclusive), where 1.0 means the strings are the same.
|
||||
///
|
||||
/// ```
|
||||
/// use strsim::normalized_levenshtein;
|
||||
///
|
||||
/// assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
|
||||
/// assert!((normalized_levenshtein("", "") - 1.0).abs() < 0.00001);
|
||||
/// assert!(normalized_levenshtein("", "second").abs() < 0.00001);
|
||||
/// assert!(normalized_levenshtein("first", "").abs() < 0.00001);
|
||||
/// assert!((normalized_levenshtein("string", "string") - 1.0).abs() < 0.00001);
|
||||
/// ```
|
||||
pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
|
||||
if a.is_empty() && b.is_empty() {
|
||||
return 1.0;
|
||||
}
|
||||
1.0 - (levenshtein(a, b) as f64) / (a.chars().count().max(b.chars().count()) as f64)
|
||||
}
|
||||
|
||||
/// Like Levenshtein but allows for adjacent transpositions. Each substring can
|
||||
/// only be edited once.
|
||||
///
|
||||
/// ```
|
||||
/// use strsim::osa_distance;
|
||||
///
|
||||
/// assert_eq!(3, osa_distance("ab", "bca"));
|
||||
/// ```
|
||||
pub fn osa_distance(a: &str, b: &str) -> usize {
|
||||
let a_len = a.chars().count();
|
||||
let b_len = b.chars().count();
|
||||
if a == b { return 0; }
|
||||
else if a_len == 0 { return b_len; }
|
||||
else if b_len == 0 { return a_len; }
|
||||
|
||||
let mut prev_two_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
|
||||
let mut prev_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
|
||||
let mut curr_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
|
||||
|
||||
let mut prev_a_char = char::MAX;
|
||||
let mut prev_b_char = char::MAX;
|
||||
|
||||
for i in 0..(b_len + 1) {
|
||||
prev_two_distances.push(i);
|
||||
prev_distances.push(i);
|
||||
curr_distances.push(0);
|
||||
}
|
||||
|
||||
for (i, a_char) in a.chars().enumerate() {
|
||||
curr_distances[0] = i + 1;
|
||||
|
||||
for (j, b_char) in b.chars().enumerate() {
|
||||
let cost = if a_char == b_char { 0 } else { 1 };
|
||||
curr_distances[j + 1] = min(curr_distances[j] + 1,
|
||||
min(prev_distances[j + 1] + 1,
|
||||
prev_distances[j] + cost));
|
||||
if i > 0 && j > 0 && a_char != b_char &&
|
||||
a_char == prev_b_char && b_char == prev_a_char {
|
||||
curr_distances[j + 1] = min(curr_distances[j + 1],
|
||||
prev_two_distances[j - 1] + 1);
|
||||
}
|
||||
|
||||
prev_b_char = b_char;
|
||||
}
|
||||
|
||||
prev_two_distances.clone_from(&prev_distances);
|
||||
prev_distances.clone_from(&curr_distances);
|
||||
prev_a_char = a_char;
|
||||
}
|
||||
|
||||
curr_distances[b_len]
|
||||
|
||||
}
|
||||
|
||||
/// Like optimal string alignment, but substrings can be edited an unlimited
|
||||
/// number of times, and the triangle inequality holds.
|
||||
///
|
||||
/// ```
|
||||
/// use strsim::damerau_levenshtein;
|
||||
///
|
||||
/// assert_eq!(2, damerau_levenshtein("ab", "bca"));
|
||||
/// ```
|
||||
pub fn damerau_levenshtein(a: &str, b: &str) -> usize {
|
||||
if a == b { return 0; }
|
||||
|
||||
let a_chars: Vec<char> = a.chars().collect();
|
||||
let b_chars: Vec<char> = b.chars().collect();
|
||||
let a_len = a_chars.len();
|
||||
let b_len = b_chars.len();
|
||||
|
||||
if a_len == 0 { return b_len; }
|
||||
if b_len == 0 { return a_len; }
|
||||
|
||||
let mut distances = vec![vec![0; b_len + 2]; a_len + 2];
|
||||
let max_distance = a_len + b_len;
|
||||
distances[0][0] = max_distance;
|
||||
|
||||
for i in 0..(a_len + 1) {
|
||||
distances[i + 1][0] = max_distance;
|
||||
distances[i + 1][1] = i;
|
||||
}
|
||||
|
||||
for j in 0..(b_len + 1) {
|
||||
distances[0][j + 1] = max_distance;
|
||||
distances[1][j + 1] = j;
|
||||
}
|
||||
|
||||
let mut chars: HashMap<char, usize> = HashMap::new();
|
||||
|
||||
for i in 1..(a_len + 1) {
|
||||
let mut db = 0;
|
||||
|
||||
for j in 1..(b_len + 1) {
|
||||
let k = match chars.get(&b_chars[j - 1]) {
|
||||
Some(value) => value.clone(),
|
||||
None => 0
|
||||
};
|
||||
|
||||
let l = db;
|
||||
|
||||
let mut cost = 1;
|
||||
if a_chars[i - 1] == b_chars[j - 1] {
|
||||
cost = 0;
|
||||
db = j;
|
||||
}
|
||||
|
||||
let substitution_cost = distances[i][j] + cost;
|
||||
let insertion_cost = distances[i][j + 1] + 1;
|
||||
let deletion_cost = distances[i + 1][j] + 1;
|
||||
let transposition_cost = distances[k][l] + (i - k - 1) + 1 +
|
||||
(j - l - 1);
|
||||
|
||||
distances[i + 1][j + 1] = min(substitution_cost,
|
||||
min(insertion_cost,
|
||||
min(deletion_cost,
|
||||
transposition_cost)));
|
||||
}
|
||||
|
||||
chars.insert(a_chars[i - 1], i);
|
||||
}
|
||||
|
||||
distances[a_len + 1][b_len + 1]
|
||||
}
|
||||
|
||||
/// Calculates a normalized score of the Damerau–Levenshtein algorithm between
|
||||
/// 0.0 and 1.0 (inclusive), where 1.0 means the strings are the same.
|
||||
///
|
||||
/// ```
|
||||
/// use strsim::normalized_damerau_levenshtein;
|
||||
///
|
||||
/// assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001);
|
||||
/// assert!((normalized_damerau_levenshtein("", "") - 1.0).abs() < 0.00001);
|
||||
/// assert!(normalized_damerau_levenshtein("", "flower").abs() < 0.00001);
|
||||
/// assert!(normalized_damerau_levenshtein("tree", "").abs() < 0.00001);
|
||||
/// assert!((normalized_damerau_levenshtein("sunglasses", "sunglasses") - 1.0).abs() < 0.00001);
|
||||
/// ```
|
||||
pub fn normalized_damerau_levenshtein(a: &str, b: &str) -> f64 {
|
||||
if a.is_empty() && b.is_empty() {
|
||||
return 1.0;
|
||||
}
|
||||
1.0 - (damerau_levenshtein(a, b) as f64) / (a.chars().count().max(b.chars().count()) as f64)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn hamming_empty() {
|
||||
match hamming("", "") {
|
||||
Ok(distance) => { assert_eq!(0, distance); },
|
||||
Err(why) => { panic!("{:?}", why); }
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hamming_same() {
|
||||
match hamming("hamming", "hamming") {
|
||||
Ok(distance) => { assert_eq!(0, distance); },
|
||||
Err(why) => { panic!("{:?}", why); }
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hamming_diff() {
|
||||
match hamming("hamming", "hammers") {
|
||||
Ok(distance) => { assert_eq!(3, distance); },
|
||||
Err(why) => { panic!("{:?}", why); }
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hamming_diff_multibyte() {
|
||||
match hamming("hamming", "h香mmüng") {
|
||||
Ok(distance) => { assert_eq!(2, distance); },
|
||||
Err(why) => { panic!("{:?}", why); }
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hamming_unequal_length() {
|
||||
match hamming("ham", "hamming") {
|
||||
Ok(_) => { panic!(); },
|
||||
Err(why) => { assert_eq!(why, StrSimError::DifferentLengthArgs); }
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hamming_names() {
|
||||
match hamming("Friedrich Nietzs", "Jean-Paul Sartre") {
|
||||
Ok(distance) => { assert_eq!(14, distance); },
|
||||
Err(why) => { panic!("{:?}", why); }
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_both_empty() {
|
||||
assert_eq!(1.0, jaro("", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_first_empty() {
|
||||
assert_eq!(0.0, jaro("", "jaro"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_second_empty() {
|
||||
assert_eq!(0.0, jaro("distance", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_same() {
|
||||
assert_eq!(1.0, jaro("jaro", "jaro"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_multibyte() {
|
||||
assert!((0.818 - jaro("testabctest", "testöঙ香test")) < 0.001);
|
||||
assert!((0.818 - jaro("testöঙ香test", "testabctest")) < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_diff_short() {
|
||||
assert!((0.767 - jaro("dixon", "dicksonx")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_diff_one_character() {
|
||||
assert_eq!(0.0, jaro("a", "b"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_diff_one_and_two() {
|
||||
assert!((0.83 - jaro("a", "ab")).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_diff_two_and_one() {
|
||||
assert!((0.83 - jaro("ab", "a")).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_diff_no_transposition() {
|
||||
assert!((0.822 - jaro("dwayne", "duane")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_diff_with_transposition() {
|
||||
assert!((0.944 - jaro("martha", "marhta")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_names() {
|
||||
assert!((0.392 - jaro("Friedrich Nietzsche",
|
||||
"Jean-Paul Sartre")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_both_empty() {
|
||||
assert_eq!(1.0, jaro_winkler("", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_first_empty() {
|
||||
assert_eq!(0.0, jaro_winkler("", "jaro-winkler"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_second_empty() {
|
||||
assert_eq!(0.0, jaro_winkler("distance", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_same() {
|
||||
assert_eq!(1.0, jaro_winkler("Jaro-Winkler", "Jaro-Winkler"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_multibyte() {
|
||||
assert!((0.89 - jaro_winkler("testabctest", "testöঙ香test")).abs() <
|
||||
0.001);
|
||||
assert!((0.89 - jaro_winkler("testöঙ香test", "testabctest")).abs() <
|
||||
0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_diff_short() {
|
||||
assert!((0.813 - jaro_winkler("dixon", "dicksonx")).abs() < 0.001);
|
||||
assert!((0.813 - jaro_winkler("dicksonx", "dixon")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_diff_one_character() {
|
||||
assert_eq!(0.0, jaro_winkler("a", "b"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_diff_no_transposition() {
|
||||
assert!((0.840 - jaro_winkler("dwayne", "duane")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_diff_with_transposition() {
|
||||
assert!((0.961 - jaro_winkler("martha", "marhta")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_names() {
|
||||
assert!((0.562 - jaro_winkler("Friedrich Nietzsche",
|
||||
"Fran-Paul Sartre")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_long_prefix() {
|
||||
assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
|
||||
0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_more_names() {
|
||||
assert!((0.868 - jaro_winkler("Thorkel", "Thorgier")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_length_of_one() {
|
||||
assert!((0.738 - jaro_winkler("Dinsdale", "D")).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_very_long_prefix() {
|
||||
assert!((1.0 - jaro_winkler("thequickbrownfoxjumpedoverx",
|
||||
"thequickbrownfoxjumpedovery")).abs() <
|
||||
0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn levenshtein_empty() {
|
||||
assert_eq!(0, levenshtein("", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn levenshtein_same() {
|
||||
assert_eq!(0, levenshtein("levenshtein", "levenshtein"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn levenshtein_diff_short() {
|
||||
assert_eq!(3, levenshtein("kitten", "sitting"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn levenshtein_diff_with_space() {
|
||||
assert_eq!(5, levenshtein("hello, world", "bye, world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn levenshtein_diff_multibyte() {
|
||||
assert_eq!(3, levenshtein("öঙ香", "abc"));
|
||||
assert_eq!(3, levenshtein("abc", "öঙ香"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn levenshtein_diff_longer() {
|
||||
let a = "The quick brown fox jumped over the angry dog.";
|
||||
let b = "Lorem ipsum dolor sit amet, dicta latine an eam.";
|
||||
assert_eq!(37, levenshtein(a, b));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn levenshtein_first_empty() {
|
||||
assert_eq!(7, levenshtein("", "sitting"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn levenshtein_second_empty() {
|
||||
assert_eq!(6, levenshtein("kitten", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_levenshtein_diff_short() {
|
||||
assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_levenshtein_for_empty_strings() {
|
||||
assert!((normalized_levenshtein("", "") - 1.0).abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_levenshtein_first_empty() {
|
||||
assert!(normalized_levenshtein("", "second").abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_levenshtein_second_empty() {
|
||||
assert!(normalized_levenshtein("first", "").abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_levenshtein_identical_strings() {
|
||||
assert!((normalized_levenshtein("identical", "identical") - 1.0).abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_empty() {
|
||||
assert_eq!(0, osa_distance("", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_same() {
|
||||
assert_eq!(0, osa_distance("damerau", "damerau"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_first_empty() {
|
||||
assert_eq!(7, osa_distance("", "damerau"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_second_empty() {
|
||||
assert_eq!(7, osa_distance("damerau", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_diff() {
|
||||
assert_eq!(3, osa_distance("ca", "abc"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_diff_short() {
|
||||
assert_eq!(3, osa_distance("damerau", "aderua"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_diff_reversed() {
|
||||
assert_eq!(3, osa_distance("aderua", "damerau"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_diff_multibyte() {
|
||||
assert_eq!(3, osa_distance("öঙ香", "abc"));
|
||||
assert_eq!(3, osa_distance("abc", "öঙ香"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_diff_unequal_length() {
|
||||
assert_eq!(6, osa_distance("damerau", "aderuaxyz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_diff_unequal_length_reversed() {
|
||||
assert_eq!(6, osa_distance("aderuaxyz", "damerau"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_diff_comedians() {
|
||||
assert_eq!(5, osa_distance("Stewart", "Colbert"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_many_transpositions() {
|
||||
assert_eq!(4, osa_distance("abcdefghijkl", "bacedfgihjlk"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_diff_longer() {
|
||||
let a = "The quick brown fox jumped over the angry dog.";
|
||||
let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
|
||||
assert_eq!(36, osa_distance(a, b));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_beginning_transposition() {
|
||||
assert_eq!(1, osa_distance("foobar", "ofobar"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_end_transposition() {
|
||||
assert_eq!(1, osa_distance("specter", "spectre"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_restricted_edit() {
|
||||
assert_eq!(4, osa_distance("a cat", "an abct"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_empty() {
|
||||
assert_eq!(0, damerau_levenshtein("", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_same() {
|
||||
assert_eq!(0, damerau_levenshtein("damerau", "damerau"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_first_empty() {
|
||||
assert_eq!(7, damerau_levenshtein("", "damerau"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_second_empty() {
|
||||
assert_eq!(7, damerau_levenshtein("damerau", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_diff() {
|
||||
assert_eq!(2, damerau_levenshtein("ca", "abc"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_diff_short() {
|
||||
assert_eq!(3, damerau_levenshtein("damerau", "aderua"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_diff_reversed() {
|
||||
assert_eq!(3, damerau_levenshtein("aderua", "damerau"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_diff_multibyte() {
|
||||
assert_eq!(3, damerau_levenshtein("öঙ香", "abc"));
|
||||
assert_eq!(3, damerau_levenshtein("abc", "öঙ香"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_diff_unequal_length() {
|
||||
assert_eq!(6, damerau_levenshtein("damerau", "aderuaxyz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_diff_unequal_length_reversed() {
|
||||
assert_eq!(6, damerau_levenshtein("aderuaxyz", "damerau"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_diff_comedians() {
|
||||
assert_eq!(5, damerau_levenshtein("Stewart", "Colbert"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_many_transpositions() {
|
||||
assert_eq!(4, damerau_levenshtein("abcdefghijkl", "bacedfgihjlk"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_diff_longer() {
|
||||
let a = "The quick brown fox jumped over the angry dog.";
|
||||
let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
|
||||
assert_eq!(36, damerau_levenshtein(a, b));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_beginning_transposition() {
|
||||
assert_eq!(1, damerau_levenshtein("foobar", "ofobar"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_end_transposition() {
|
||||
assert_eq!(1, damerau_levenshtein("specter", "spectre"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_unrestricted_edit() {
|
||||
assert_eq!(3, damerau_levenshtein("a cat", "an abct"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_damerau_levenshtein_diff_short() {
|
||||
assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_damerau_levenshtein_for_empty_strings() {
|
||||
assert!((normalized_damerau_levenshtein("", "") - 1.0).abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_damerau_levenshtein_first_empty() {
|
||||
assert!(normalized_damerau_levenshtein("", "flower").abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_damerau_levenshtein_second_empty() {
|
||||
assert!(normalized_damerau_levenshtein("tree", "").abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_damerau_levenshtein_identical_strings() {
|
||||
assert!((normalized_damerau_levenshtein("sunglasses", "sunglasses") - 1.0).abs() < 0.00001);
|
||||
}
|
||||
}
|
||||
49
clamav/libclamav_rust/.cargo/vendor/strsim/tests/lib.rs
vendored
Normal file
49
clamav/libclamav_rust/.cargo/vendor/strsim/tests/lib.rs
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
extern crate strsim;
|
||||
|
||||
use strsim::{hamming, levenshtein, normalized_levenshtein, osa_distance,damerau_levenshtein,
|
||||
normalized_damerau_levenshtein, jaro, jaro_winkler};
|
||||
|
||||
#[test]
|
||||
fn hamming_works() {
|
||||
match hamming("hamming", "hammers") {
|
||||
Ok(distance) => assert_eq!(3, distance),
|
||||
Err(why) => panic!("{:?}", why)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn levenshtein_works() {
|
||||
assert_eq!(3, levenshtein("kitten", "sitting"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_levenshtein_works() {
|
||||
assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn osa_distance_works() {
|
||||
assert_eq!(3, osa_distance("ac", "cba"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn damerau_levenshtein_works() {
|
||||
assert_eq!(2, damerau_levenshtein("ac", "cba"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalized_damerau_levenshtein_works() {
|
||||
assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_works() {
|
||||
assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
|
||||
0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaro_winkler_works() {
|
||||
assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
|
||||
0.001);
|
||||
}
|
||||
Reference in New Issue
Block a user