Решение на Spell Checker от Ангел Беширов

Резултати

20 точки от тестове
0 бонус точки
20 точки общо

15 успешни тест(а)
0 неуспешни тест(а)

Код

use std::collections::HashSet;

use std::collections::HashMap;

pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";

pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";

pub fn clean_line(input: &str) -> String {

     let mut cleaned = String::new();

     for c in input.trim().chars() { 

         if c.is_alphabetic() || c.is_whitespace() || c == '\'' || c == '-' {

            cleaned.push(c);

     cleaned

pub struct WordCounter {

    counter: HashMap<String, u32>,

impl WordCounter {

    pub fn new() -> Self {

        WordCounter {

            counter: HashMap::new(),

    pub fn from_str(input: &str) -> Self {

        let mut word_counter: WordCounter = WordCounter::new();

        for line in input.lines() {

            let line = clean_line(line);

            for word in line.split_whitespace() {

                word_counter.add(word);

        word_counter

    pub fn words(&self) -> Vec<&String> {

        let mut words: Vec<&String> = Vec::new();

        for key in self.counter.keys() {

            words.push(key);

        words.sort();

        words

    pub fn add(&mut self, item: &str) {

        let cleaned: String = item.to_string().trim().to_lowercase();

        if !cleaned.is_empty() {

            let current: u32 = {

                match self.counter.get(&cleaned) {

                    Some(x) => *x,

            self.counter.insert(cleaned, current + 1);

    pub fn get(&self, word: &str) -> u32 {

        match self.counter.get(word) {

             Some(x) => *x,

             None => 0,

    pub fn total_count(&self) -> u32 {

        self.counter.values().sum()

impl std::fmt::Display for WordCounter {

    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {

        writeln!(f, "WordCounter, total count: {}", self.total_count())?;

        let mut sorted: Vec<(&String, &u32)> = self.counter.iter().collect();

        sorted.sort_by(|a, b| b.1.cmp(a.1));

        for entry in sorted {

            writeln!(f, "{}: {}", entry.0, entry.1)?;

        Ok(())

pub struct SpellChecker {

    word_counter: WordCounter,

    alphabet: String,

impl SpellChecker {

    pub fn new(corpus: &str, alphabet: &str) -> Self {

        SpellChecker {

            word_counter: WordCounter::from_str(corpus),

            alphabet: alphabet.to_string(),

    pub fn correction(&self, word: &str) -> String {

        let corrected_word: String = word.to_string().trim().to_lowercase();

        let mut max_probability: f64 = -1.0;

        let mut most_viable_correction: String = corrected_word.clone();

        for candidate in self.candidates(&corrected_word) {

            let probability: f64 = self.probability(&candidate);

            if probability > max_probability {

                max_probability = probability;

                most_viable_correction = candidate;

        most_viable_correction

    pub fn probability(&self, word: &str) -> f64 {

        let total_count: f64 = self.word_counter.total_count() as f64;

        if total_count == 0.0 {

            // if there are no words in the corpus -> 0 probability

            return 0.0;

        self.word_counter.get(word) as f64 / total_count

    pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {

        let mut known_words: Vec<&String> = Vec::new();

        for word in words {

            if self.word_counter.get(word) > 0 {

                known_words.push(word);

        known_words

    pub fn candidates(&self, word: &str) -> Vec<String> {

        let mut word_candidates: Vec<String> = Vec::new();

        let mut candidates_check: HashSet<String> = HashSet::new();

        candidates_check.insert(word.to_string());

        for candidate in self.known(&candidates_check) {

            word_candidates.push(candidate.clone());

        if word_candidates.len() > 0 {

            return word_candidates;

        let edits1 = self.edits1(word);

        for candidate in self.known(&edits1) {

            word_candidates.push(candidate.clone());

        if word_candidates.len() > 0 {

            return word_candidates;

        let edits2 = self.edits2(word);

        for candidate in self.known(&edits2) {

            word_candidates.push(candidate.clone());

        if word_candidates.len() == 0 {

            word_candidates.push(word.to_string());

        word_candidates

    pub fn edits1(&self, word: &str) -> HashSet<String> {

        let mut edits: HashSet<String> = HashSet::new();

        edits.extend(self.remove_one_char(word));

        edits.extend(self.swap_consecutive_chars(word));

        edits.extend(self.replace_from_alphabet(word));

        edits.extend(self.insert_from_alphabet(word));

        edits

    pub fn edits2(&self, word: &str) -> HashSet<String> {

        let mut edits: HashSet<String> = HashSet::new();

        let edits1: HashSet<String> = self.edits1(word);

        for edits1_word in edits1 {

            edits.extend(self.edits1(&edits1_word));

        edits

    pub fn remove_one_char(&self, word: &str) -> HashSet<String> {

        let mut edits: HashSet<String> = HashSet::new();

        let len: usize = word.chars().count();

        let word = word.to_string();

        for i in 0..len {

            let mut chars: Vec<char> = word.chars().collect();

            chars.remove(i);

            let edited: String = chars.iter().collect();

            edits.insert(edited);

        edits

    pub fn swap_consecutive_chars(&self, word: &str) -> HashSet<String> {

        let mut edits: HashSet<String> = HashSet::new();

        let len: usize = word.chars().count();

        let word = word.to_string();

        for i in 0..(len - 1) {

            let mut chars: Vec<char> = word.chars().collect();

            chars.swap(i, i + 1);

            let edited: String = chars.iter().collect();

            edits.insert(edited);

        edits

    pub fn replace_from_alphabet(&self, word: &str) -> HashSet<String> {

        let mut edits: HashSet<String> = HashSet::new();

        let len: usize = word.chars().count();

        let word = word.to_string();

        for i in 0..len {

            let mut chars: Vec<char> = word.chars().collect();

            chars.remove(i);

            for c in self.alphabet.chars() { 

                chars.insert(i, c);

                let edited: String = chars.iter().collect();

                edits.insert(edited);

                chars.remove(i);

        edits

    pub fn insert_from_alphabet(&self, word: &str) -> HashSet<String> {

        let mut edits: HashSet<String> = HashSet::new();

        let len: usize = word.chars().count();

        let word = word.to_string();

        for i in 0..(len + 1) {

            let mut chars: Vec<char> = word.chars().collect();

            for c in self.alphabet.chars() { 

                chars.insert(i, c);

                let edited: String = chars.iter().collect();

                edits.insert(edited);

                chars.remove(i);

        edits

fn from_vec_to_hash_set(data: Vec<&str>) -> HashSet<String> {

    let mut set: HashSet<String> = HashSet::new();

    for word in data {

        set.insert(word.to_string());

    set

#[test]

fn test_basic() {

    let line = String::from(" Foo Bar ");

    assert_eq!("Foo Bar", clean_line(&line));

    let mut counter = WordCounter::new();

    assert_eq!(counter.words(), WordCounter::from_str("").words());

    assert_eq!(counter.get("word"), 0);

    assert_eq!(counter.total_count(), 0);

    assert_eq!(format!("{}", counter), "WordCounter, total count: 0\n");

    counter.add("word");

    let spell_checker = SpellChecker::new("foo bar", ALPHABET_EN);

    assert_eq!(spell_checker.correction("foo"), "foo");

    assert!(spell_checker.probability("foo") > 0.0);

    assert!(spell_checker.known(&HashSet::new()).len() == 0);

    assert!(spell_checker.candidates("foo").len() > 0);

    assert!(spell_checker.edits1("foo").len() > 0);

    assert!(spell_checker.edits2("foo").len() > 0);

#[test]

fn test_cleaning_line() {

    let to_be_cleaned = String::from("This sentece %-%% has to be 'cleared'!!!$");

    assert_eq!(clean_line(&to_be_cleaned), "This sentece - has to be 'cleared'");

#[test]

fn test_cleaning_line_trimming() {

    let to_be_cleaned = String::from("     This sh0o()uld be trimmed???     ");

    assert_eq!(clean_line(&to_be_cleaned), "This should be trimmed");

#[test]

fn test_empty_counter() {

    let word_counter: WordCounter = WordCounter::new();

    assert_eq!(word_counter.words(), Vec::<&String>::new());

    assert_eq!(word_counter.total_count(), 0);

    assert_eq!(word_counter.get(&String::from("test")), 0);

#[test]

fn test_creating_counter_unique_words() {

    let text: String = String::from("Lorem Ipsum is simply dummy text of the printing and typesetting industry.\n");

    let word_counter: WordCounter = WordCounter::from_str(&text);

    assert_eq!(word_counter.words(), vec!["and", "dummy", "industry", "ipsum", "is", "lorem", "of", "printing", "simply", "text", "the", "typesetting"]);

    assert_eq!(word_counter.total_count(), 12);

    assert_eq!(word_counter.get(&String::from("ipsum")), 1);

#[test]

fn test_creating_counter_repeated_word_cyrilic() {

    let text: String = String::from("Тест на текст, написан на кирилица а не на текст на латиница :(");

    let word_counter: WordCounter = WordCounter::from_str(&text);

    assert_eq!(word_counter.words(), vec!["а", "кирилица", "латиница", "на", "написан", "не", "текст", "тест"]);

    assert_eq!(word_counter.total_count(), 12);

    assert_eq!(word_counter.get(&String::from("текст")), 2);

#[test]

fn test_add_over_repeating() {

    let text: String = String::from("Тест на текст, написан на кирилица а не на текст на латиница :(");

    let mut word_counter: WordCounter = WordCounter::from_str(&text);

    assert_eq!(word_counter.words(), vec!["а", "кирилица", "латиница", "на", "написан", "не", "текст", "тест"]);

    assert_eq!(word_counter.total_count(), 12);

    assert_eq!(word_counter.get(&String::from("текст")), 2);

    word_counter.add("текст");

    assert_eq!(word_counter.total_count(), 13);

    assert_eq!(word_counter.get(&String::from(&String::from("текст"))), 3);

#[test]

fn test_new_spellchecker() {

    let text: String = String::from("Кратък текст, който ще се ползва за корпус");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_BG);

    println!("{}", spell_checker.correction(&String::from("краТк")));

    assert_eq!(spell_checker.correction(&String::from("краТк")), String::from("кратък"));  

#[test]

fn test_removing_one_char() {

    let text: String = String::from("Кратък текст, който ще се ползва за корпус");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_BG);

    let expected: HashSet<String> = from_vec_to_hash_set(vec!["ексст", "тксст", "тесст", "текст", "тексс"]);

    assert_eq!(spell_checker.remove_one_char(&String::from("тексст")), expected);  

#[test]

fn test_swap_consecutive_chars() {

    let text: String = String::from("Кратък текст, който ще се ползва за корпус");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_BG);

    let expected: HashSet<String> = from_vec_to_hash_set(vec!["етксст", "ткесст", "тескст", "тексст", "текстс"]);

    assert_eq!(spell_checker.swap_consecutive_chars(&String::from("тексст")), expected);  

#[test]

fn test_replace_from_alphabet() {

    let text: String = String::from("Кратък текст, който ще се ползва за корпус");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_BG);

    let expected: HashSet<String> = from_vec_to_hash_set(vec!["стф", "вмф", "ътф", "втю", "вдф", "вьф", "лтф", "веф", "впф", "вшф", "вта", 

    "итф", "хтф", "вту", "втф", "ктф", "вуф", "втг", "фтф", "ттф", "утф", "внф", "вхф", "втч", "чтф", "втъ", "отф", "вть", "втя", "бтф", "вйф", 

    "вяф", "втд", "ютф", "вчф", "втй", "вфф", "ртф", "вцф", "втх", "вто", "ьтф", "етф", "вюф", "штф", "гтф", "втл", "дтф", "йтф", "ввф", "вщф", 

    "втк", "втн", "влф", "втм", "вте", "втз", "втш", "воф", "втж", "втц", "зтф", "виф", "втщ", "ятф", "втв", "втт", "нтф", "птф", "вкф", "всф", 

    "жтф", "ваф", "втп", "втр", "вжф", "атф", "мтф", "щтф", "взф", "втс", "врф", "вгф", "вти", "вбф", "въф", "втб", "цтф"]);

    let actual = spell_checker.replace_from_alphabet(&String::from("втф"));

    assert_eq!(actual.len(), 3 * 30 - 2); // 3 chars = 3 repetitions only 1 of them is added

    assert_eq!(actual, expected);  

#[test]

fn test_insert_from_alphabet() {

    let text: String = String::from("Кратък текст, който ще се ползва за корпус");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_BG);

    let expected: HashSet<String> = from_vec_to_hash_set(vec!["втфф", "чвтф", "втуф", "втфю", "втоф", "влтф", "вбтф", "витф", "рвтф", "ювтф", "втфр", "втфх", 

    "вътф", "втшф", "ьвтф", "втчф", "вютф", "втфи", "вутф", "втнф", "втфо", "вктф", "втиф", "втгф", "втцф", "вцтф", "втфщ", "жвтф", "втфм", "вхтф", "ввтф", "вптф", 

    "евтф", "втьф", "вжтф", "втфк", "лвтф", "пвтф", "втфд", "звтф", "втъф", "втфз", "йвтф", "вмтф", "встф", "втзф", "втфц", "ъвтф", "вттф", "вотф", "хвтф", "швтф", 

    "внтф", "ивтф", "втфб", "взтф", "вфтф", "двтф", "втмф", "втсф", "щвтф", "втфн", "втщф", "втфг", "втпф", "вртф", "втеф", "втхф", "овтф", "втйф", "автф", "втрф", 

    "бвтф", "втфв", "вгтф", "вйтф", "втюф", "втбф", "свтф", "втфж", "цвтф", "втфй", "втфш", "вчтф", "нвтф", "втфл", "втвф", "втаф", "мвтф", "вштф", "вткф", "квтф", 

    "вдтф", "втфс", "втфт", "втфч", "втфе", "втфп", "втфа", "втфъ", "ватф", "втфу", "втяф", "вщтф", "втжф", "втлф", "втфь", "гвтф", "фвтф", "явтф", "втдф", "увтф", 

    "твтф", "вьтф", "втфя", "ветф", "вятф"]);

    let actual = spell_checker.insert_from_alphabet(&String::from("втф"));

    assert_eq!(actual.len(), 4 * 30 - 3); // ввтф вттф втфф are repeated

    assert_eq!(actual, expected);  

#[test]

fn test_probability_more_than_once() {

    let text: String = String::from("Кратък текст с повтаряща се дума, който ще се ползва за корпус.\n Корпус123412 КоРпУс");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_BG);

    assert_eq!(spell_checker.probability(&String::from("корпус")), 3.0 / 14.0);

#[test]

fn test_probability_once() {

    let text: String = String::from("Кратък текст с повтаряща се дума, който ще се ползва за корпус.\n Корпус123412 КоРпУс");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_BG);

    assert_eq!(spell_checker.probability(&String::from("кратък")), 1.0 / 14.0);

#[test]

fn test_known_words() {

    let text: String = String::from("Кратък текст с повтаряща се дума, който ще се ползва за корпус.\n Корпус123412 КоРпУс");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_BG);

    let words_to_test = from_vec_to_hash_set(vec!["кратък", "ползва", "баба"]);

    let actual: Vec<&String> = spell_checker.known(&words_to_test);

    assert!(actual.contains(&&String::from("кратък")));

    assert!(actual.contains(&&String::from("ползва")));

#[test]

fn test_unknown_words() {

    let text: String = String::from("Кратък текст с повтаряща се дума, който ще се ползва за корпус.\n Корпус123412 КоРпУс");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_BG);

    let words_to_test = from_vec_to_hash_set(vec!["адсасдасдсда", "бабабаба", "баба"]);

    assert_eq!(spell_checker.known(&words_to_test), Vec::<&String>::new());

#[test]

fn test_edits1() {

    let text: String = String::from("Кратък текст с повтаряща се дума, който ще се ползва за корпус.\n Корпус123412 КоРпУс");

    let test_alphabet: String = String::from("абц");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &test_alphabet);

    let expected: HashSet<String> = from_vec_to_hash_set(vec!["ума", "дма", "дуа", "дум", "удма", "дмуа", "дуам", "адума", "даума", "дуама", "думаа", 

    "бдума", "дбума", "дубма", "думба", "думаб", "цдума", "дцума", "дуцма", "думца", "думац", "аума", "дама", "дуаа", "дума", "бума", "дбма", "дуба", 

    "думб", "цума", "дцма", "дуца", "думц"]);

    let actual: HashSet<String> = spell_checker.edits1("дума");

    assert_eq!(actual, expected);

#[test]

fn test_edits2() {

    let text: String = String::from("Кратък текст с повтаряща се дума, който ще се ползва за корпус.\n Корпус123412 КоРпУс");

    let test_alphabet: String = String::from("аб");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &test_alphabet);

    let expected: HashSet<String> = from_vec_to_hash_set(vec!["фпб", "аф", "бб", "апаф", "бп", "фб", "пфба", "афа", "бфб", "апфб", "пфаа", "б", "бфп", 

    "апфа", "бпфа", "пфбб", "пф", "бпф", "ап", "фа", "бпб", "пааф", "а", "апф", "бпаф", "пбф", "пфаб", "фбп", "бпфб", "апа", "бпбф", "ф", "пб", "пбаф", 

    "апб", "пфб", "пбфб", "аапф", "афб", "пбб", "аа", "пфа", "пабф", "паф", "паа", "ба", "пбфа", "апбф", "афп", "паб", "пафб", "", "пббф", "бапф", "п", 

    "фпа", "пафа", "ббф", "абпф", "абф", "ааф", "ббпф", "бфа", "пба", "бпа", "бф", "па", "баф", "аб", "фап"]);

    let actual: HashSet<String> = spell_checker.edits2("пф");

    assert_eq!(actual, expected);

#[test]

fn test_candidates_direct() {

    let text: String = String::from("Lorem Ipsum is simply dummy text of the printing and typesetting industry.\n");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_EN);

    let expected: Vec<String> = vec![String::from("ipsum")];

    let actual: Vec<String> = spell_checker.candidates("ipsum");

    assert_eq!(actual, expected);

#[test]

fn test_candidates_edits1() {

    let text: String = String::from("Lorem Ipsum is simply dummy text of the printing and typesetting industry.\n");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_EN);

    let expected: Vec<String> = vec![String::from("ipsum")];

    let actual: Vec<String> = spell_checker.candidates("isum");

    assert_eq!(actual, expected);

#[test]

fn test_candidates_edits2() {

    let text: String = String::from("Lorem Ipsum is simply dummy text of the printing and typesetting industry.\n");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_EN);

    let expected: Vec<String> = vec![String::from("simply")];

    let actual: Vec<String> = spell_checker.candidates("simy");

    assert_eq!(actual, expected);

#[test]

fn test_candidates_unknown() {

    let text: String = String::from("Lorem Ipsum is simply dummy text of the printing and typesetting industry.\n");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_EN);

    let expected: Vec<String> = vec![String::from("unknown")];

    let actual: Vec<String> = spell_checker.candidates("unknown");

    assert_eq!(actual, expected);

#[test]

fn test_correction_same_probability() {

    let text: String = String::from("Lorem Ipsum is simply dummy text of the printing and typesetting industry.\n");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_EN);

    let expected: String = String::from("dummy");

    let actual: String = spell_checker.correction("dumy");

    assert_eq!(actual, expected);

#[test]

fn test_correction_greater_probability() {

    let text: String = String::from("Lorem Ipsum is simply simply simpl text of the printing and typesetting industry.\n");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_EN);

    let expected: String = String::from("simply");

    let actual: String = spell_checker.correction("simplu");

    assert_eq!(actual, expected);

#[test]

fn test_correction_unknown() {

    let text: String = String::from("Lorem Ipsum is simply simply simpl text of the printing and typesetting industry.\n");

    let spell_checker: SpellChecker = SpellChecker::new(&text, &ALPHABET_EN);

    let expected: String = String::from("unknown");

    let actual: String = spell_checker.correction("unknown");

    assert_eq!(actual, expected);

Лог от изпълнението

Compiling solution v0.1.0 (/tmp/d20200114-2173579-af9frh/solution)
warning: function is never used: `from_vec_to_hash_set`
   --> src/lib.rs:276:1
    |
276 | fn from_vec_to_hash_set(data: Vec<&str>) -> HashSet<String> {
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
    = note: `#[warn(dead_code)]` on by default

    Finished test [unoptimized + debuginfo] target(s) in 6.41s
     Running target/debug/deps/solution-a73e64ec87929bd0

running 25 tests
test test_add_over_repeating ... ok
test test_basic ... ok
test test_candidates_direct ... ok
test test_candidates_edits1 ... ok
test test_candidates_edits2 ... ok
test test_candidates_unknown ... ok
test test_cleaning_line ... ok
test test_cleaning_line_trimming ... ok
test test_correction_greater_probability ... ok
test test_correction_same_probability ... ok
test test_correction_unknown ... ok
test test_creating_counter_repeated_word_cyrilic ... ok
test test_creating_counter_unique_words ... ok
test test_edits1 ... ok
test test_edits2 ... ok
test test_empty_counter ... ok
test test_insert_from_alphabet ... ok
test test_known_words ... ok
test test_new_spellchecker ... ok
test test_probability_more_than_once ... ok
test test_probability_once ... ok
test test_removing_one_char ... ok
test test_replace_from_alphabet ... ok
test test_swap_consecutive_chars ... ok
test test_unknown_words ... ok

test result: ok. 25 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

     Running target/debug/deps/solution_test-38971695424b36d5

running 15 tests
test solution_test::test_best_word_is_returned ... ok
test solution_test::test_clean_line_removes_punctuation ... ok
test solution_test::test_clean_line_trims_the_input ... ok
test solution_test::test_correction ... ok
test solution_test::test_correction_fails_to_produce_new_result ... ok
test solution_test::test_correction_normalizes_case ... ok
test solution_test::test_counting ... ok
test solution_test::test_display ... ok
test solution_test::test_edits1 ... ok
test solution_test::test_edits2 ... ok
test solution_test::test_empty_counter ... ok
test solution_test::test_from_empty_str ... ok
test solution_test::test_from_str ... ok
test solution_test::test_known_words ... ok
test solution_test::test_probability ... ok

test result: ok. 15 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

   Doc-tests solution

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

Програмиране с Rust

Курс във Факултета по Математика и Информатика към СУ

Решение на Spell Checker от Ангел Беширов

Резултати

Код

Лог от изпълнението

История (1 версия и 0 коментара)

Ангел качи първо решение на 14.01.2020 00:00 (преди почти 6 години)