Решение на Spell Checker от Ричи Хаджиев
Резултати
- 16 точки от тестове
- 0 бонус точки
- 16 точки общо
- 12 успешни тест(а)
- 3 неуспешни тест(а)
Код
use std::collections::{HashMap, HashSet};
use std::mem::swap;
use std::borrow::Borrow;
use std::fmt;
pub fn clean_line(input: &str) -> String {
let mut output: String = String::new();
for c in input.chars() {
if c.is_alphabetic() || c.is_whitespace() || c == '\'' || c == '-' {
output.push(c);
}
}
output
}
pub struct WordCounter {
data: HashMap<String, u32>
}
impl WordCounter {
pub fn new() -> Self {
WordCounter{
data: HashMap::new()
}
}
pub fn from_str(input: &str) -> Self {
let mut training_data: HashMap<String, u32> = HashMap::new();
let mut word_counter = WordCounter::new();
for line in input.lines() {
let cleaned_line = clean_line(line);
for word in cleaned_line.split(" ") {
word_counter.add(word);
}
}
word_counter
}
pub fn words(&self) -> Vec<&String> {
let mut words = Vec::new();
for word in self.data.keys() {
words.push(word);
}
words.sort();
words
}
pub fn add(&mut self, item: &str) {
let lower_item = item.to_lowercase();
let word = lower_item.trim();
if self.data.contains_key(word) {
*self.data.get_mut(word).unwrap() += 1;
}
else {
if word != "" {
self.data.insert(String::from(word), 1);
}
}
}
pub fn get(&self, word: &str) -> u32 {
if self.data.contains_key(word) {
*self.data.get(word).unwrap()
}
else{
0
}
}
pub fn total_count(&self) -> u32 {
let mut count = 0;
for value in self.data.values() {
count += *value;
}
count
}
}
impl std::fmt::Display for WordCounter {
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
let tmp = self.total_count().to_string();
f.write_str("WordCounter, total count: ");
f.pad(&tmp);
f.write_str("\n");
let mut max_val:u32 = 0;
let mut max_key:String = "".to_string();
let mut map:HashMap<String, u32> = HashMap::new();
for (key, val) in self.data.iter(){
map.insert(key.to_string(), *val);
}
while !map.is_empty() {
for (key, val) in &map{
if val > &max_val {
max_val = *val;
max_key = key.to_string();
}
}
f.write_str(&max_key);
f.write_str(": ");
f.pad(&max_val.to_string());
f.write_str("\n");
map.remove(&max_key);
max_val = 0;
}
Ok(())
}
}
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct SpellChecker {
corpus: String,
alphabet: String,
word_counter: WordCounter
}
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
SpellChecker {
corpus: corpus.to_string(),
alphabet: alphabet.to_string(),
word_counter: WordCounter::from_str(corpus)
}
}
pub fn correction(&self, word: &str) -> String {
let lower_word = word.to_lowercase();
let trimmed_word = lower_word.trim();
if self.word_counter.get(trimmed_word) > 0 {
return word.to_string()
}
let mut max = 0;
let mut best_fit = "".to_string();
for edit1_word in self.edits1(trimmed_word) { //TODO: use known method here
let slice: &str = &edit1_word.to_owned()[..];
if self.word_counter.get(slice) > max {
max = self.word_counter.get(slice);
best_fit = edit1_word;
}
}
if max > 0 {
return best_fit
}
for edit2_word in self.edits2(trimmed_word) {
let slice: &str = &edit2_word.to_owned()[..];
if self.word_counter.get(slice) > max {
max = self.word_counter.get(slice);
best_fit = edit2_word;
}
}
if max > 0 {
return best_fit
}
word.to_string()
}
pub fn probability(&self, word: &str) -> f64 {
(self.word_counter.get(word) as f64)/(self.word_counter.total_count() as f64)
}
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
let mut vec = Vec::new();
for word in words {
if self.word_counter.get(word) > 0 {
vec.push(word);
}
}
vec
}
pub fn candidates(&self, word: &str) -> Vec<String> {
let word_counter:WordCounter;
word_counter = WordCounter::from_str(&self.corpus);
let mut result: Vec<String> = Vec::new();
let mut result1: Vec<String> = Vec::new();
let mut result2: Vec<String> = Vec::new();
let lower : String = word.to_lowercase();
let mut edited1 = HashSet::new();
edited1 = self.edits1(word);
result1 = self.known(&edited1).iter().map(|x| x.to_string()).collect::<Vec<String>>();
let mut edited2 = HashSet::new();
edited2 = self.edits2(word);
result2 = self.known(&edited2).iter().map(|x| x.to_string()).collect::<Vec<String>>();
if word_counter.data.contains_key(&lower){
result.push(word.to_string());
}
else if !result1.is_empty(){
result = result1;
}
else if !result2.is_empty(){
result = result2;
}
else{
result.push(word.to_string());
}
return result
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
let mut set = HashSet::new();
let mut i = 1; //iterator, if the alphabet is cyrillic we set it to 2
if word != "" && word.get(0..1).is_none(){
i = 2;
}
for c in word.chars() {
let one_less_letter_word = word.replace(c, "");
set.insert(one_less_letter_word);
}
for c in word.chars() {
for l in self.alphabet.chars() {
let one_changed_letter_word = word.replace(c, &l.to_string());
if one_changed_letter_word != word.to_string() {
set.insert(one_changed_letter_word);
}
}
}
let mut count = 0;
while count < word.len() - i {
let mut a = word.get(count..count+i).unwrap();
let mut b = word.get(count+i..count+i+i).unwrap();
let mut swapped_word = word.to_string();
swapped_word.replace_range(count..count+i, b);
swapped_word.replace_range(count+i..count+i+i, a);
set.insert(swapped_word);
count += 1;
if i == 2{
count += 1;
}
}
count = 0;
while count <= word.len() {
let mut alphabet = self.alphabet.chars();
let mut a = alphabet.next();
while a.is_some() {
let mut added_letter_word = String::from(word.to_string());
added_letter_word.insert_str(count, &a.unwrap().to_string());
a = alphabet.next();
set.insert(added_letter_word);
}
count += 1;
if i == 2{
count += 1;
}
}
set
}
pub fn edits2(&self, word: &str) -> HashSet<String> {
let mut set = HashSet::new();
for edit1 in self.edits1(word) {
let s = edit1.to_owned();
let s_slice = &s[..];
for edit2 in self.edits1(s_slice) {
set.insert(edit2);
}
}
set
}
}
Лог от изпълнението
Compiling solution v0.1.0 (/tmp/d20200114-2173579-xz4pup/solution) warning: unused import: `std::mem::swap` --> src/lib.rs:2:5 | 2 | use std::mem::swap; | ^^^^^^^^^^^^^^ | = note: `#[warn(unused_imports)]` on by default warning: unused import: `std::borrow::Borrow` --> src/lib.rs:3:5 | 3 | use std::borrow::Borrow; | ^^^^^^^^^^^^^^^^^^^ warning: unused variable: `training_data` --> src/lib.rs:28:17 | 28 | let mut training_data: HashMap<String, u32> = HashMap::new(); | ^^^^^^^^^^^^^ help: consider prefixing with an underscore: `_training_data` | = note: `#[warn(unused_variables)]` on by default warning: value assigned to `result1` is never read --> src/lib.rs:176:17 | 176 | let mut result1: Vec<String> = Vec::new(); | ^^^^^^^ | = note: `#[warn(unused_assignments)]` on by default = help: maybe it is overwritten before being read? warning: value assigned to `result2` is never read --> src/lib.rs:177:17 | 177 | let mut result2: Vec<String> = Vec::new(); | ^^^^^^^ | = help: maybe it is overwritten before being read? warning: value assigned to `edited1` is never read --> src/lib.rs:179:17 | 179 | let mut edited1 = HashSet::new(); | ^^^^^^^ | = help: maybe it is overwritten before being read? warning: value assigned to `edited2` is never read --> src/lib.rs:182:17 | 182 | let mut edited2 = HashSet::new(); | ^^^^^^^ | = help: maybe it is overwritten before being read? warning: variable does not need to be mutable --> src/lib.rs:28:13 | 28 | let mut training_data: HashMap<String, u32> = HashMap::new(); | ----^^^^^^^^^^^^^ | | | help: remove this `mut` | = note: `#[warn(unused_mut)]` on by default warning: variable does not need to be mutable --> src/lib.rs:220:17 | 220 | let mut a = word.get(count..count+i).unwrap(); | ----^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:221:17 | 221 | let mut b = word.get(count+i..count+i+i).unwrap(); | ----^ | | | help: remove this `mut` warning: unused `std::result::Result` that must be used --> src/lib.rs:82:9 | 82 | f.write_str("WordCounter, total count: "); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: `#[warn(unused_must_use)]` on by default = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:83:9 | 83 | f.pad(&tmp); | ^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:84:9 | 84 | f.write_str("\n"); | ^^^^^^^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:98:13 | 98 | f.write_str(&max_key); | ^^^^^^^^^^^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:99:13 | 99 | f.write_str(": "); | ^^^^^^^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:100:13 | 100 | f.pad(&max_val.to_string()); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:101:13 | 101 | f.write_str("\n"); | ^^^^^^^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused import: `std::mem::swap` --> src/lib.rs:2:5 | 2 | use std::mem::swap; | ^^^^^^^^^^^^^^ | = note: `#[warn(unused_imports)]` on by default warning: unused import: `std::borrow::Borrow` --> src/lib.rs:3:5 | 3 | use std::borrow::Borrow; | ^^^^^^^^^^^^^^^^^^^ warning: unused variable: `training_data` --> src/lib.rs:28:17 | 28 | let mut training_data: HashMap<String, u32> = HashMap::new(); | ^^^^^^^^^^^^^ help: consider prefixing with an underscore: `_training_data` | = note: `#[warn(unused_variables)]` on by default warning: value assigned to `result1` is never read --> src/lib.rs:176:17 | 176 | let mut result1: Vec<String> = Vec::new(); | ^^^^^^^ | = note: `#[warn(unused_assignments)]` on by default = help: maybe it is overwritten before being read? warning: value assigned to `result2` is never read --> src/lib.rs:177:17 | 177 | let mut result2: Vec<String> = Vec::new(); | ^^^^^^^ | = help: maybe it is overwritten before being read? warning: value assigned to `edited1` is never read --> src/lib.rs:179:17 | 179 | let mut edited1 = HashSet::new(); | ^^^^^^^ | = help: maybe it is overwritten before being read? warning: value assigned to `edited2` is never read --> src/lib.rs:182:17 | 182 | let mut edited2 = HashSet::new(); | ^^^^^^^ | = help: maybe it is overwritten before being read? warning: variable does not need to be mutable --> src/lib.rs:28:13 | 28 | let mut training_data: HashMap<String, u32> = HashMap::new(); | ----^^^^^^^^^^^^^ | | | help: remove this `mut` | = note: `#[warn(unused_mut)]` on by default warning: variable does not need to be mutable --> src/lib.rs:220:17 | 220 | let mut a = word.get(count..count+i).unwrap(); | ----^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:221:17 | 221 | let mut b = word.get(count+i..count+i+i).unwrap(); | ----^ | | | help: remove this `mut` warning: unused `std::result::Result` that must be used --> src/lib.rs:82:9 | 82 | f.write_str("WordCounter, total count: "); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: `#[warn(unused_must_use)]` on by default = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:83:9 | 83 | f.pad(&tmp); | ^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:84:9 | 84 | f.write_str("\n"); | ^^^^^^^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:98:13 | 98 | f.write_str(&max_key); | ^^^^^^^^^^^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:99:13 | 99 | f.write_str(": "); | ^^^^^^^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:100:13 | 100 | f.pad(&max_val.to_string()); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled warning: unused `std::result::Result` that must be used --> src/lib.rs:101:13 | 101 | f.write_str("\n"); | ^^^^^^^^^^^^^^^^^^ | = note: this `Result` may be an `Err` variant, which should be handled Finished test [unoptimized + debuginfo] target(s) in 4.58s Running target/debug/deps/solution-a73e64ec87929bd0 running 0 tests test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out Running target/debug/deps/solution_test-38971695424b36d5 running 15 tests test solution_test::test_best_word_is_returned ... FAILED test solution_test::test_clean_line_removes_punctuation ... ok test solution_test::test_clean_line_trims_the_input ... FAILED test solution_test::test_correction ... ok test solution_test::test_correction_fails_to_produce_new_result ... FAILED test solution_test::test_correction_normalizes_case ... ok test solution_test::test_counting ... ok test solution_test::test_display ... ok test solution_test::test_edits1 ... ok test solution_test::test_edits2 ... ok test solution_test::test_empty_counter ... ok test solution_test::test_from_empty_str ... ok test solution_test::test_from_str ... ok test solution_test::test_known_words ... ok test solution_test::test_probability ... ok failures: ---- solution_test::test_best_word_is_returned stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `"boot"`, right: `"boat"`', tests/solution_test.rs:216:5 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace. ---- solution_test::test_clean_line_trims_the_input stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `" foo "`, right: `"foo"`', tests/solution_test.rs:67:5 ---- solution_test::test_correction_fails_to_produce_new_result stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `"Либоф"`, right: `"либоф"`', tests/solution_test.rs:198:5 failures: solution_test::test_best_word_is_returned solution_test::test_clean_line_trims_the_input solution_test::test_correction_fails_to_produce_new_result test result: FAILED. 12 passed; 3 failed; 0 ignored; 0 measured; 0 filtered out error: test failed, to rerun pass '--test solution_test'