Решение на Spell Checker от Николай Захаров

Обратно към всички решения

Към профила на Николай Захаров

Резултати

  • 20 точки от тестове
  • 0 бонус точки
  • 20 точки общо
  • 15 успешни тест(а)
  • 0 неуспешни тест(а)

Код

use std::collections::{HashMap, HashSet};
pub fn clean_line(input: &str) -> String {
input
.chars()
.filter(is_valid_char)
.collect::<String>()
.trim()
.to_string()
}
fn is_valid_char(c: &char) -> bool {
c.is_alphabetic() || c.is_whitespace() || *c == '\'' || *c == '-'
}
fn sanitize_word(word: &str) -> String {
word.trim().to_lowercase()
}
#[derive(Debug)]
pub struct WordCounter {
words: HashMap<String, u32>,
}
impl WordCounter {
pub fn new() -> Self {
WordCounter {
words: HashMap::new(),
}
}
pub fn from_str(input: &str) -> Self {
let mut new_word_counter = WordCounter::new();
let clean_lines = input.split('\n').map(clean_line);
for line in clean_lines {
line.split(' ').for_each(|word| new_word_counter.add(&word));
}
new_word_counter
}
pub fn words(&self) -> Vec<&String> {
let mut words_to_sort = self.words.keys().collect::<Vec<&String>>();
words_to_sort.sort();
words_to_sort
}
pub fn add(&mut self, item: &str) {
let sanitized_word = sanitize_word(item);
if sanitized_word == "" {
return;
}
self.words
.entry(sanitized_word)
.and_modify(|count| *count += 1)
.or_insert(1);
}
pub fn get(&self, word: &str) -> u32 {
let sanitized_word = sanitize_word(word);
match self.words.get(&sanitized_word) {
Some(count) => *count,
None => 0,
}
}
pub fn total_count(&self) -> u32 {
self.words.values().sum()
}
}
impl std::fmt::Display for WordCounter {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let mut output = String::new();
output.push_str(&format!(
"WordCounter, total count: {}\n",
self.total_count()
));
let mut key_value_list: Vec<(String, u32)> = self
.words
.iter()
.map(|(word, count)| (String::from(word), *count))
.collect();
key_value_list.sort_by(|(_, val_a), (_, val_b)| val_b.cmp(val_a));
for (key, value) in key_value_list {
output.push_str(&format!("{}: {}\n", key, value))
}
write!(f, "{}", output)
}
}
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct SpellChecker {
word_counter: WordCounter,
alphabet: String,
}
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
SpellChecker {
word_counter: WordCounter::from_str(corpus),
alphabet: String::from(alphabet),
}
}
pub fn correction(&self, word: &str) -> String {
let sanitized_word = sanitize_word(word);
let candidates = self.candidates(&sanitized_word);
if candidates.len() == 1 {
return String::from(match candidates.get(0) {
Some(some_word) => some_word,
None => word, // probably unreachable
});
}
let mut candidate_probabilites: Vec<(f64, String)> = candidates
.iter()
.cloned()
.map(|c| (self.probability(&c), c))
.collect();
candidate_probabilites
.sort_by(|(prob_a, _), (prob_b, _)| prob_b.partial_cmp(prob_a).unwrap());
let (_, candidate) = candidate_probabilites.get(0).unwrap();
String::from(candidate)
}
pub fn probability(&self, word: &str) -> f64 {
self.word_counter.get(word) as f64 / self.word_counter.total_count() as f64
}
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
words
.iter()
.filter(|word| self.word_counter.get(word) > 0)
.collect()
}
pub fn candidates(&self, word: &str) -> Vec<String> {
let mut single_word_set: HashSet<String> = HashSet::new();
single_word_set.insert(word.to_string());
let known_exact = self.known(&single_word_set);
if known_exact.len() > 0 {
return known_exact.into_iter().cloned().collect();
}
let edits_1 = self.edits1(word);
let known_1_edit = self.known(&edits_1);
if known_1_edit.len() > 0 {
return known_1_edit.into_iter().cloned().collect();
}
let edits_2 = self.edits2(word);
let known_2_edit = self.known(&edits_2);
if known_2_edit.len() > 0 {
return known_2_edit.into_iter().cloned().collect();
}
vec![word.to_string()]
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
let mut splits: Vec<(String, String)> = Vec::new();
for i in 0..word.len() {
splits.push((
word.chars().take(i).collect(),
word.chars().skip(i).collect(),
));
}
splits.push((word.to_string(), String::new()));
let insertions: HashSet<String> =
splits
.iter()
.fold(HashSet::new(), |mut acc, (left, right)| {
self.alphabet.chars().for_each(|character| {
let with_insertion = format!("{}{}{}", left, character, right);
acc.insert(with_insertion);
});
acc
});
let deletes: HashSet<String> =
splits
.iter()
.fold(HashSet::new(), |mut acc, (left, right)| {
acc.insert(format!(
"{}{}",
left,
right.chars().skip(1).collect::<String>()
));
acc
});
let transposes: HashSet<String> = splits.iter().filter(|(_, right)| right.len() > 1).fold(
HashSet::new(),
|mut acc, (left, right)| {
let transposed_word = format!(
"{}{}{}{}",
left,
right.chars().skip(1).take(1).collect::<String>(),
right.chars().take(1).collect::<String>(),
right.chars().skip(2).collect::<String>()
);
acc.insert(transposed_word);
acc
},
);
let replaces: HashSet<String> =
splits
.iter()
.fold(HashSet::new(), |mut acc, (left, right)| {
self.alphabet.chars().for_each(|character| {
let with_replacement = format!(
"{}{}{}",
left,
character,
right.chars().skip(1).collect::<String>()
);
acc.insert(with_replacement);
});
acc
});
insertions
.into_iter()
.chain(deletes.into_iter())
.chain(transposes.into_iter())
.chain(replaces.into_iter())
.collect()
}
pub fn edits2(&self, word: &str) -> HashSet<String> {
self.edits1(word)
.into_iter()
.flat_map(|edited_word| self.edits1(&edited_word))
.collect()
}
}

Лог от изпълнението

Compiling solution v0.1.0 (/tmp/d20200114-2173579-1nmtjig/solution)
    Finished test [unoptimized + debuginfo] target(s) in 4.92s
     Running target/debug/deps/solution-a73e64ec87929bd0

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

     Running target/debug/deps/solution_test-38971695424b36d5

running 15 tests
test solution_test::test_best_word_is_returned ... ok
test solution_test::test_clean_line_removes_punctuation ... ok
test solution_test::test_clean_line_trims_the_input ... ok
test solution_test::test_correction ... ok
test solution_test::test_correction_fails_to_produce_new_result ... ok
test solution_test::test_correction_normalizes_case ... ok
test solution_test::test_counting ... ok
test solution_test::test_display ... ok
test solution_test::test_edits1 ... ok
test solution_test::test_edits2 ... ok
test solution_test::test_empty_counter ... ok
test solution_test::test_from_empty_str ... ok
test solution_test::test_from_str ... ok
test solution_test::test_known_words ... ok
test solution_test::test_probability ... ok

test result: ok. 15 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

   Doc-tests solution

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

История (1 версия и 0 коментара)

Николай качи първо решение на 14.01.2020 14:25 (преди над 5 години)