Решение на Spell Checker от Иво Стефанов

Обратно към всички решения

Към профила на Иво Стефанов

Резултати

  • 7 точки от тестове
  • 0 бонус точки
  • 7 точки общо
  • 5 успешни тест(а)
  • 10 неуспешни тест(а)

Код

use std::collections::HashSet;
use std::collections::HashMap;
use std::char;
pub fn clean_line(input: &str) -> String {
let clean_line = input.to_string();
for c in clean_line.chars() {
if !c.is_alphabetic() && !c.is_whitespace() && c!='\'' && c!='-' {
let _ = clean_line.replace(c, "");
}
}
clean_line
}
pub struct WordCounter {
total: u32,
words: HashMap<String, u32>,
}
impl WordCounter {
pub fn new() -> Self {
WordCounter{
total: 0,
words: HashMap::new(),
}
}
pub fn from_str(input: &str) -> Self {
let mut new_word_counter = WordCounter::new();
for line in input.lines(){
let sentence = clean_line(line);
let words_in_line: Vec<&str> = sentence.split(" ").collect();
for word in words_in_line {
new_word_counter.add(word);
}
}
new_word_counter
}
pub fn words(&self) -> Vec<&String> {
let mut vec: Vec<&String> = self.words.keys().collect();
vec.sort();
vec
}
pub fn add(&mut self, item: &str) {
let mut s = item.to_lowercase();
s.retain(|c| !c.is_whitespace());
self.total += 1;
if self.words.contains_key(item) {
let value = self.words.get_mut(item).unwrap();
*value += 1;
}
else {
self.words.insert(item.to_string(), 1);
}
}
pub fn get(&self, word: &str) -> u32 {
let count = self.words.get(word);
if count == None {
return 0;
}
else {
*count.unwrap()
}
}
pub fn total_count(&self) -> u32 {
self.total
}
}
impl std::fmt::Display for WordCounter {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let _ = write!(f, "WordCounter, total count: {}\n", self.total);
let mut vec: Vec<(String, u32)> = Vec::new();
for it in self.words.iter() {
vec.push((it.0.clone(), it.1.clone()));
}
vec.sort_by_key(|k| k.1);
for it in vec.iter(){
let _ = write!(f, "{}: {}\n", it.0, it.1);
}
Ok(())
}
}
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct SpellChecker {
corpus: WordCounter,
alphabet: Vec<char>,
}
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
SpellChecker{
corpus: WordCounter::from_str(corpus),
alphabet: alphabet.chars().collect(),
}
}
pub fn correction(&self, word: &str) -> String {
let vec = self.candidates(word);
if vec[0] == word{
return word.to_string();
}
let mut max_counter = self.corpus.words.get(&vec[0]);
let mut most_encountered_word = vec[0].clone();
for it in vec.iter() {
let current_counter = self.corpus.words.get(it);
if current_counter > max_counter {
max_counter = current_counter;
most_encountered_word = it.clone();
}
}
most_encountered_word
}
pub fn probability(&self, word: &str) -> f64 {
let probability = self.corpus.words.get(word);
if probability != None {
return *probability.unwrap() as f64 / self.corpus.total as f64;
}
else {
return 0.00;
}
}
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
let mut vec: Vec<&'a String> = Vec::new();
for it in words.iter() {
if self.corpus.words.contains_key(it) {
vec.push(&it);
}
}
vec
}
pub fn candidates(&self, word: &str) -> Vec<String> {
let mut vec_with_word: Vec<String> = Vec::new();
vec_with_word.push(word.to_string());
if self.corpus.words.contains_key(word) {
return vec_with_word;
}
// To produce the same result everytime we search for candidates for the same word we will sort alphabetically the vector
let edits1 = self.edits1(word);
if !edits1.is_empty() {
let mut vec_edits1: Vec<String> = edits1.iter().cloned().collect();
vec_edits1.sort();
return vec_edits1;
}
let edits2 = self.edits2(word);
if !edits2.is_empty() {
let mut vec_edits2: Vec<String> = edits2.iter().cloned().collect();
vec_edits2.sort();
return vec_edits2;
}
vec_with_word
}
// Helping function for edits1 & edits2
pub fn all_edits(&self, word: &str) -> HashSet<String> {
let mut set: HashSet<_> = HashSet::new();
let edit: Vec<_> = word.chars().collect();
// - Една буква изтрита на коя да е позиция
for (index, _) in edit.iter().enumerate() {
let mut edit_delete = edit.clone();
edit_delete.remove(index);
set.insert(edit_delete.into_iter().collect());
}
//println!("DELETED: {:? \n \n}", set);
// - Две букви разменени (една до друга)
for i in 0..(edit.len()-1) {
let mut edit_swap = edit.clone();
edit_swap.swap(i, i + 1);
set.insert(edit_swap.into_iter().collect());
}
//println!("SWAPPED: {:? \n \n}", set);
// - Една буква от азбуката замества коя да е буква от думата
let string: String = edit.clone().iter().collect();
for index in 0..string.len() {
for it_a in self.alphabet.iter(){
let mut edit_replace = string.clone();
if self.alphabet.iter().clone().collect::<String>() == ALPHABET_BG {
edit_replace.replace_range(index*2..index+2, &it_a.to_string());
}
else {
edit_replace.replace_range(index..index+1, &it_a.to_string());
}
set.insert(edit_replace);
}
}
//println!("REPLACED: {:?} \n \n", set);
// - Една буква от азбуката добавена в думата на която и да е позиция
let mut edit_insert = edit.clone();
for index in 1..(edit_insert.len()+1) {
for it_a in self.alphabet.iter(){
edit_insert.insert(index, *it_a);
set.insert(edit_insert.clone().into_iter().collect());
edit_insert.remove(index);
}
}
//println!("ADDED: {:?} \n \n", set);
set
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
let words_set: HashSet<String> = self.corpus.words.keys().cloned().collect();
let edits1 = self.all_edits(word).intersection(&words_set).cloned().collect();
edits1
}
pub fn edits2(&self, word: &str) -> HashSet<String> {
let one_edit_set = self.all_edits(word);
let mut two_edit_set: HashSet<String> = HashSet::new();
for it in one_edit_set.iter() {
two_edit_set = two_edit_set.union(& self.all_edits(it)).cloned().collect();
}
let words_set: HashSet<String> = self.corpus.words.keys().cloned().collect();
let edits2 = two_edit_set.intersection(&words_set).cloned().collect();
edits2
}
}
/*
// Some tests
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn word_counter_test() {
let wc = WordCounter::from_str("acb cbd bcd abc");
assert_eq!(wc.words().len(), 4);
assert_eq!(wc.words(), vec!["abc", "acb", "bcd", "cbd"]);
}
#[test]
fn spell_checker_test() {
let sc = SpellChecker::new("b bzooo bzoaa bzoaa 999 z bzoaa", ALPHABET_EN);
//tests, but on different corpus
//assert_eq!(sc.probability("bazooka"), 2 as f64 / 6 as f64);
//assert_eq!(sc.candidates("bazoka"), vec!["bazooka"]);
//assert_eq!(sc.edits1("bazoka").iter().cloned().collect::<Vec<String>>(), vec!["bazooka"]);
assert_eq!(sc.candidates("bzo"), vec!["b", "bzoaa", "bzooo", "z"]);
assert_eq!(sc.correction("bzo"), "bzoaa");
}
}
*/

Лог от изпълнението

Compiling solution v0.1.0 (/tmp/d20200114-2173579-uku511/solution)
    Finished test [unoptimized + debuginfo] target(s) in 4.42s
     Running target/debug/deps/solution-a73e64ec87929bd0

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

     Running target/debug/deps/solution_test-38971695424b36d5

running 15 tests
test solution_test::test_best_word_is_returned ... ok
test solution_test::test_clean_line_removes_punctuation ... FAILED
test solution_test::test_clean_line_trims_the_input ... FAILED
test solution_test::test_correction ... FAILED
test solution_test::test_correction_fails_to_produce_new_result ... FAILED
test solution_test::test_correction_normalizes_case ... FAILED
test solution_test::test_counting ... FAILED
test solution_test::test_display ... FAILED
test solution_test::test_edits1 ... FAILED
test solution_test::test_edits2 ... FAILED
test solution_test::test_empty_counter ... ok
test solution_test::test_from_empty_str ... ok
test solution_test::test_from_str ... FAILED
test solution_test::test_known_words ... ok
test solution_test::test_probability ... ok

failures:

---- solution_test::test_clean_line_removes_punctuation stdout ----
thread 'main' panicked at 'assertion failed: `(left == right)`
  left: `"foo, bar, baz"`,
 right: `"foo bar baz"`', tests/solution_test.rs:60:5
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace.

---- solution_test::test_clean_line_trims_the_input stdout ----
thread 'main' panicked at 'assertion failed: `(left == right)`
  left: `" foo  "`,
 right: `"foo"`', tests/solution_test.rs:67:5

---- solution_test::test_correction stdout ----
thread 'main' panicked at 'assertion failed: self.is_char_boundary(n)', src/liballoc/string.rs:1602:30

---- solution_test::test_correction_fails_to_produce_new_result stdout ----
thread 'main' panicked at 'assertion failed: `(left == right)`
  left: `"Либоф"`,
 right: `"либоф"`', tests/solution_test.rs:198:5

---- solution_test::test_correction_normalizes_case stdout ----
thread 'main' panicked at 'assertion failed: self.is_char_boundary(n)', src/liballoc/string.rs:1602:30

---- solution_test::test_counting stdout ----
thread 'main' panicked at 'assertion failed: `(left == right)`
  left: `0`,
 right: `1`', tests/solution_test.rs:35:5

---- solution_test::test_display stdout ----
thread 'main' panicked at 'assertion failed: `(left == right)`
  left: `"WordCounter, total count: 1\nOne: 1\n"`,
 right: `"WordCounter, total count: 1\none: 1\n"`', tests/solution_test.rs:49:5

---- solution_test::test_edits1 stdout ----
thread 'main' panicked at 'assertion failed: self.is_char_boundary(n)', src/liballoc/string.rs:1602:30

---- solution_test::test_edits2 stdout ----
thread 'main' panicked at 'assertion failed: self.is_char_boundary(n)', src/liballoc/string.rs:1602:30

---- solution_test::test_from_str stdout ----
thread 'main' panicked at 'assertion failed: `(left == right)`
  left: `["", "Любов,", "Обелих", "а", "варен", "го,", "картоф.", "любов,", "суров", "той"]`,
 right: `["а", "варен", "го", "картоф", "любов", "обелих", "суров", "той"]`', tests/solution_test.rs:88:5


failures:
    solution_test::test_clean_line_removes_punctuation
    solution_test::test_clean_line_trims_the_input
    solution_test::test_correction
    solution_test::test_correction_fails_to_produce_new_result
    solution_test::test_correction_normalizes_case
    solution_test::test_counting
    solution_test::test_display
    solution_test::test_edits1
    solution_test::test_edits2
    solution_test::test_from_str

test result: FAILED. 5 passed; 10 failed; 0 ignored; 0 measured; 0 filtered out

error: test failed, to rerun pass '--test solution_test'

История (1 версия и 0 коментара)

Иво качи първо решение на 14.01.2020 11:36 (преди над 5 години)