Решение на Spell Checker от Антонио Миндов

Резултати

20 точки от тестове
0 бонус точки
20 точки общо

15 успешни тест(а)
0 неуспешни тест(а)

Код

use std::collections::HashMap;

use std::collections::HashSet;

use std::iter::FromIterator;

use std::ops::{RangeFrom, Range};

pub fn clean_line(input: &str) -> String {

    input.trim().chars().filter(|&c| c.is_alphabetic() || c.is_whitespace() || "'-".contains(c)).collect()

fn clean_word(input: &str) -> String {

    input.trim().to_lowercase()

trait Utf8Index {

    fn utf8_index(&self, r: Range<usize>) -> String;

    fn utf8_index_from(&self, r: RangeFrom<usize>) -> String;

impl Utf8Index for &str {

    fn utf8_index(&self, r: Range<usize>) -> String {

        self.chars().skip(r.start).take(r.end - r.start).collect()

    fn utf8_index_from(&self, r: RangeFrom<usize>) -> String {

        self.chars().skip(r.start).collect()

pub struct WordCounter {

    word_frequencies: HashMap<String, u32>

impl WordCounter {

    pub fn new() -> Self {

        WordCounter { word_frequencies: HashMap::new() }

    pub fn from_str(input: &str) -> Self {

        let cleaned: Vec<String> = input.lines().map(clean_line).collect();

        let words: Vec<String> = cleaned.iter()

            .flat_map(|l| l.split_whitespace())

            .map(String::from).collect();

        let mut counter = WordCounter { word_frequencies: HashMap::new() };

        words.iter().for_each(|w|counter.add(w));

        counter

    pub fn words(&self) -> Vec<&String> {

        let mut words: Vec<&String> = self.word_frequencies.keys().collect();

        words.sort();

        words

    pub fn add(&mut self, item: &str) {

        let word = clean_word(item);

        *self.word_frequencies.entry(word).or_insert(0) += 1;

    pub fn get(&self, word: &str) -> u32 {

        *self.word_frequencies.get(word).unwrap_or(&0)

    pub fn total_count(&self) -> u32 {

        self.word_frequencies.values().sum()

impl std::fmt::Display for WordCounter {

    fn fmt(&self, f: &mut core::fmt::Formatter) -> std::fmt::Result {

        writeln!(f, "WordCounter, total count: {}", self.total_count())?;

        let mut entries: Vec<(&String, &u32)> = self.word_frequencies.iter().collect();

        entries.sort_by(|(_, a), (_, b)| b.cmp(a));

        for (word, freq) in entries {

            writeln!(f, "{}: {}", word, freq)?;

        Ok(())

pub struct SpellChecker {

    counter: WordCounter,

    alphabet: String

impl SpellChecker {

    pub fn new(corpus: &str, alphabet: &str) -> Self {

        SpellChecker { counter: WordCounter::from_str(corpus), alphabet: String::from(alphabet) }

    pub fn correction(&self, word: &str) -> String {

        self.candidates(&clean_word(word)).iter()

            .max_by(|a, b| self.probability(a).partial_cmp(&self.probability(b)).unwrap())

            .unwrap().clone()

    pub fn probability(&self, word: &str) -> f64 {

        self.counter.get(word) as f64 / self.counter.total_count() as f64

    pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {

        let known = self.counter.words();

        words.iter().filter(|w| known.contains(w)).collect()

    pub fn candidates(&self, word: &str) -> Vec<String> {

        let mut word_set = HashSet::new();

        word_set.insert(String::from(word));

        self.try_words(&word_set)

            .or_else(|| self.try_words(&self.edits1(word)))

            .or_else(|| self.try_words(&self.edits2(word)))

            .unwrap_or(vec![String::from(word)])

    fn try_words(&self, words: &HashSet<String>) -> Option<Vec<String>> {

        let result = self.known(words);

        Some(result.iter().map(|&s| s.clone()).collect())

            .filter(|_| !result.is_empty()) // Little hack so there isn't an if else with None on the else branch

    pub fn edits1(&self, word: &str) -> HashSet<String> { // Who said Rust code needs to be longer than Python

        let splits: Vec<(&str, &str)> = (0..word.len()+1).filter(|&i| word.is_char_boundary(i)).map(|i| (word.split_at(i))).collect();

        let deletes = splits.iter().map(|(l, r)| format!("{}{}",l, r.utf8_index_from(1..)));

        let transposes = splits.iter().map(|(l, r)| format!("{}{}{}{}", l, r.utf8_index(1..2), r.utf8_index(0..1), r.utf8_index_from(2..)));

        let replaces = splits.iter().flat_map(|(l, r)| self.alphabet.chars().map(move |c| format!("{}{}{}", l, c, r.utf8_index_from(1..))));

        let inserts = splits.iter().flat_map(|(l, r)| self.alphabet.chars().map(move |c| format!("{}{}{}", l, c, r)));

        HashSet::from_iter(deletes.chain(transposes).chain(replaces).chain(inserts))

    }

Малко се извинявам за липсата на нови редове тук, просто много ми се искаше функцията да е голяма колкото тази на Python. :/
Антонио Миндовкоментира преди почти 6 години

    pub fn edits2(&self, word: &str) -> HashSet<String> {

        self.edits1(word).iter().flat_map(|ew| self.edits1(ew)).collect()

Лог от изпълнението

Compiling solution v0.1.0 (/tmp/d20200114-2173579-14nkjsg/solution)
    Finished test [unoptimized + debuginfo] target(s) in 4.88s
     Running target/debug/deps/solution-a73e64ec87929bd0

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

     Running target/debug/deps/solution_test-38971695424b36d5

running 15 tests
test solution_test::test_best_word_is_returned ... ok
test solution_test::test_clean_line_removes_punctuation ... ok
test solution_test::test_clean_line_trims_the_input ... ok
test solution_test::test_correction ... ok
test solution_test::test_correction_fails_to_produce_new_result ... ok
test solution_test::test_correction_normalizes_case ... ok
test solution_test::test_counting ... ok
test solution_test::test_display ... ok
test solution_test::test_edits1 ... ok
test solution_test::test_edits2 ... ok
test solution_test::test_empty_counter ... ok
test solution_test::test_from_empty_str ... ok
test solution_test::test_from_str ... ok
test solution_test::test_known_words ... ok
test solution_test::test_probability ... ok

test result: ok. 15 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

   Doc-tests solution

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

История (3 версии и 1 коментар)

Антонио качи първо решение на 10.01.2020 16:03 (преди около 6 години)

Антонио качи решение на 11.01.2020 17:56 (преди около 6 години)

 use std::collections::HashMap;

 use std::collections::HashSet;

 use std::iter::FromIterator;

+use std::ops::{RangeFrom, Range};

 pub fn clean_line(input: &str) -> String {

-    input.chars().filter(|&c| c.is_alphabetic() || c.is_whitespace() || "'-".contains(c)).collect()

+    input.trim().chars().filter(|&c| c.is_alphabetic() || c.is_whitespace() || "'-".contains(c)).collect()

 fn clean_word(input: &str) -> String {

     input.trim().to_lowercase()

+trait Utf8Index {

+    fn utf8_index(&self, r: Range<usize>) -> String;

+    fn utf8_index_from(&self, r: RangeFrom<usize>) -> String;

+impl Utf8Index for &str {

+    fn utf8_index(&self, r: Range<usize>) -> String {

+        self.chars().skip(r.start).take(r.end - r.start).collect()

+    fn utf8_index_from(&self, r: RangeFrom<usize>) -> String {

+        self.utf8_index(r.start..self.len())

 pub struct WordCounter {

     word_frequencies: HashMap<String, u32>

 impl WordCounter {

     pub fn new() -> Self {

         WordCounter { word_frequencies: HashMap::new() }

     pub fn from_str(input: &str) -> Self {

         let cleaned: Vec<String> = input.lines().map(clean_line).collect();

         let words: Vec<String> = cleaned.iter()

             .flat_map(|l| l.split_whitespace())

             .map(String::from).collect();

         let mut counter = WordCounter { word_frequencies: HashMap::new() };

         words.iter().for_each(|w|counter.add(w));

         counter

     pub fn words(&self) -> Vec<&String> {

         let mut words: Vec<&String> = self.word_frequencies.keys().collect();

         words.sort();

         words

     pub fn add(&mut self, item: &str) {

         let word = clean_word(item);

         *self.word_frequencies.entry(word).or_insert(0) += 1;

     pub fn get(&self, word: &str) -> u32 {

-        let word = clean_word(word);

-        *self.word_frequencies.get(&word).unwrap_or(&0)

+        *self.word_frequencies.get(word).unwrap_or(&0)

     pub fn total_count(&self) -> u32 {

         self.word_frequencies.values().sum()

 impl std::fmt::Display for WordCounter {

     fn fmt(&self, f: &mut core::fmt::Formatter) -> std::fmt::Result {

         writeln!(f, "WordCounter, total count: {}", self.total_count())?;

         let mut entries: Vec<(&String, &u32)> = self.word_frequencies.iter().collect();

         entries.sort_by(|(_, a), (_, b)| b.cmp(a));

         for (word, freq) in entries {

             writeln!(f, "{}: {}", word, freq)?;

         Ok(())

 pub struct SpellChecker {

     counter: WordCounter,

     alphabet: String

 impl SpellChecker {

     pub fn new(corpus: &str, alphabet: &str) -> Self {

         SpellChecker { counter: WordCounter::from_str(corpus), alphabet: String::from(alphabet) }

     pub fn correction(&self, word: &str) -> String {

         self.candidates(&clean_word(word)).iter()

             .max_by(|a, b| self.probability(a).partial_cmp(&self.probability(b)).unwrap())

             .unwrap().clone()

     pub fn probability(&self, word: &str) -> f64 {

         self.counter.get(word) as f64 / self.counter.total_count() as f64

     pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {

         let known = self.counter.words();

         words.iter().filter(|w| known.contains(w)).collect()

     pub fn candidates(&self, word: &str) -> Vec<String> {

         let mut word_set = HashSet::new();

         word_set.insert(String::from(word));

         self.try_words(&word_set)

             .or_else(|| self.try_words(&self.edits1(word)))

             .or_else(|| self.try_words(&self.edits2(word)))

             .unwrap_or(vec![String::from(word)])

     fn try_words(&self, words: &HashSet<String>) -> Option<Vec<String>> {

         let result = self.known(words);

         Some(result.iter().map(|&s| s.clone()).collect())

             .filter(|_| !result.is_empty()) // Little hack so there isn't an if else with None on the else branch

     pub fn edits1(&self, word: &str) -> HashSet<String> { // Who said Rust code needs to be longer than Python

-        let splits: Vec<(&str, &str)> = (0..word.len()+1).map(|i| (&word[..i], &word[i..])).collect();

-        let deletes = splits.iter().filter(|(_, r)| !r.is_empty()).map(|(l, r)| format!("{}{}",l, &r[1..]));

-        let transposes = splits.iter().filter(|(_, r)| r.len() > 1).map(|(l, r)| format!("{}{}{}{}", l, &r[1..2], &r[0..1], &r[2..]));

-        let replaces = splits.iter() .filter(|(_, r)| !r.is_empty()).flat_map(|(l, r)| self.alphabet.chars().map(move |c| format!("{}{}{}", l, c, &r[1..])));

+        let splits: Vec<(&str, &str)> = (0..word.len()+1).filter(|&i| word.is_char_boundary(i)).map(|i| (word.split_at(i))).collect();

+        let deletes = splits.iter().filter(|(_, r)| !r.is_empty()).map(|(l, r)| format!("{}{}",l, r.utf8_index_from(1..)));

+        let transposes = splits.iter().filter(|(_, r)| r.len() > 1).map(|(l, r)| format!("{}{}{}{}", l, r.utf8_index(1..2), r.utf8_index(0..1), r.utf8_index_from(2..)));

+        let replaces = splits.iter() .filter(|(_, r)| !r.is_empty()).flat_map(|(l, r)| self.alphabet.chars().map(move |c| format!("{}{}{}", l, c, r.utf8_index_from(1..))));

         let inserts = splits.iter().flat_map(|(l, r)| self.alphabet.chars().map(move |c| format!("{}{}{}", l, c, r)));

         HashSet::from_iter(deletes.chain(transposes).chain(replaces).chain(inserts))

     pub fn edits2(&self, word: &str) -> HashSet<String> {

         self.edits1(word).iter().flat_map(|ew| self.edits1(ew)).collect()

Антонио качи решение на 11.01.2020 18:04 (преди около 6 години)

 use std::collections::HashMap;

 use std::collections::HashSet;

 use std::iter::FromIterator;

 use std::ops::{RangeFrom, Range};

 pub fn clean_line(input: &str) -> String {

     input.trim().chars().filter(|&c| c.is_alphabetic() || c.is_whitespace() || "'-".contains(c)).collect()

 fn clean_word(input: &str) -> String {

     input.trim().to_lowercase()

 trait Utf8Index {

     fn utf8_index(&self, r: Range<usize>) -> String;

     fn utf8_index_from(&self, r: RangeFrom<usize>) -> String;

 impl Utf8Index for &str {

     fn utf8_index(&self, r: Range<usize>) -> String {

         self.chars().skip(r.start).take(r.end - r.start).collect()

     fn utf8_index_from(&self, r: RangeFrom<usize>) -> String {

-        self.utf8_index(r.start..self.len())

+        self.chars().skip(r.start).collect()

 pub struct WordCounter {

     word_frequencies: HashMap<String, u32>

 impl WordCounter {

     pub fn new() -> Self {

         WordCounter { word_frequencies: HashMap::new() }

     pub fn from_str(input: &str) -> Self {

         let cleaned: Vec<String> = input.lines().map(clean_line).collect();

         let words: Vec<String> = cleaned.iter()

             .flat_map(|l| l.split_whitespace())

             .map(String::from).collect();

         let mut counter = WordCounter { word_frequencies: HashMap::new() };

         words.iter().for_each(|w|counter.add(w));

         counter

     pub fn words(&self) -> Vec<&String> {

         let mut words: Vec<&String> = self.word_frequencies.keys().collect();

         words.sort();

         words

     pub fn add(&mut self, item: &str) {

         let word = clean_word(item);

         *self.word_frequencies.entry(word).or_insert(0) += 1;

     pub fn get(&self, word: &str) -> u32 {

         *self.word_frequencies.get(word).unwrap_or(&0)

     pub fn total_count(&self) -> u32 {

         self.word_frequencies.values().sum()

 impl std::fmt::Display for WordCounter {

     fn fmt(&self, f: &mut core::fmt::Formatter) -> std::fmt::Result {

         writeln!(f, "WordCounter, total count: {}", self.total_count())?;

         let mut entries: Vec<(&String, &u32)> = self.word_frequencies.iter().collect();

         entries.sort_by(|(_, a), (_, b)| b.cmp(a));

         for (word, freq) in entries {

             writeln!(f, "{}: {}", word, freq)?;

         Ok(())

 pub struct SpellChecker {

     counter: WordCounter,

     alphabet: String

 impl SpellChecker {

     pub fn new(corpus: &str, alphabet: &str) -> Self {

         SpellChecker { counter: WordCounter::from_str(corpus), alphabet: String::from(alphabet) }

     pub fn correction(&self, word: &str) -> String {

         self.candidates(&clean_word(word)).iter()

             .max_by(|a, b| self.probability(a).partial_cmp(&self.probability(b)).unwrap())

             .unwrap().clone()

     pub fn probability(&self, word: &str) -> f64 {

         self.counter.get(word) as f64 / self.counter.total_count() as f64

     pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {

         let known = self.counter.words();

         words.iter().filter(|w| known.contains(w)).collect()

     pub fn candidates(&self, word: &str) -> Vec<String> {

         let mut word_set = HashSet::new();

         word_set.insert(String::from(word));

         self.try_words(&word_set)

             .or_else(|| self.try_words(&self.edits1(word)))

             .or_else(|| self.try_words(&self.edits2(word)))

             .unwrap_or(vec![String::from(word)])

     fn try_words(&self, words: &HashSet<String>) -> Option<Vec<String>> {

         let result = self.known(words);

         Some(result.iter().map(|&s| s.clone()).collect())

             .filter(|_| !result.is_empty()) // Little hack so there isn't an if else with None on the else branch

     pub fn edits1(&self, word: &str) -> HashSet<String> { // Who said Rust code needs to be longer than Python

         let splits: Vec<(&str, &str)> = (0..word.len()+1).filter(|&i| word.is_char_boundary(i)).map(|i| (word.split_at(i))).collect();

-        let deletes = splits.iter().filter(|(_, r)| !r.is_empty()).map(|(l, r)| format!("{}{}",l, r.utf8_index_from(1..)));

-        let transposes = splits.iter().filter(|(_, r)| r.len() > 1).map(|(l, r)| format!("{}{}{}{}", l, r.utf8_index(1..2), r.utf8_index(0..1), r.utf8_index_from(2..)));

-        let replaces = splits.iter() .filter(|(_, r)| !r.is_empty()).flat_map(|(l, r)| self.alphabet.chars().map(move |c| format!("{}{}{}", l, c, r.utf8_index_from(1..))));

+        let deletes = splits.iter().map(|(l, r)| format!("{}{}",l, r.utf8_index_from(1..)));

+        let transposes = splits.iter().map(|(l, r)| format!("{}{}{}{}", l, r.utf8_index(1..2), r.utf8_index(0..1), r.utf8_index_from(2..)));

+        let replaces = splits.iter().flat_map(|(l, r)| self.alphabet.chars().map(move |c| format!("{}{}{}", l, c, r.utf8_index_from(1..))));

         let inserts = splits.iter().flat_map(|(l, r)| self.alphabet.chars().map(move |c| format!("{}{}{}", l, c, r)));

         HashSet::from_iter(deletes.chain(transposes).chain(replaces).chain(inserts))

     }

Малко се извинявам за липсата на нови редове тук, просто много ми се искаше функцията да е голяма колкото тази на Python. :/
Антонио Миндовкоментира преди почти 6 години

     pub fn edits2(&self, word: &str) -> HashSet<String> {

         self.edits1(word).iter().flat_map(|ew| self.edits1(ew)).collect()

Програмиране с Rust

Курс във Факултета по Математика и Информатика към СУ

Решение на Spell Checker от Антонио Миндов

Резултати

Код

Лог от изпълнението

История (3 версии и 1 коментар)

Антонио качи първо решение на 10.01.2020 16:03 (преди около 6 години)

Антонио качи решение на 11.01.2020 17:56 (преди около 6 години)

Антонио качи решение на 11.01.2020 18:04 (преди около 6 години)