Решение на Spell Checker от Антон Чернев

Обратно към всички решения

Към профила на Антон Чернев

Резултати

  • 20 точки от тестове
  • 0 бонус точки
  • 20 точки общо
  • 15 успешни тест(а)
  • 0 неуспешни тест(а)

Код

use std::collections::HashMap;
use std::collections::HashSet;
use std::fmt;
use std::cmp::Reverse;
pub fn clean_line(input: &str) -> String {
input.trim().chars().filter(|&ch|
char::is_alphabetic(ch) ||
char::is_whitespace(ch) ||
ch == '\'' ||
ch == '-'
).collect()
}
pub fn normalize(word: &str) -> String {
word.trim().to_lowercase()
}
pub struct WordCounter {
counter: HashMap<String, u32>
}
impl WordCounter {
pub fn new() -> Self {
WordCounter {
counter: HashMap::new()
}
}
pub fn from_str(input: &str) -> Self {
let mut word_counter = Self::new();
for line in input.split('\n') {
for word in clean_line(line).split_whitespace() {
let word = &normalize(word);
let count = 1 + match word_counter.counter.get(word) {
Some(&value) => value,
None => 0
};
word_counter.counter.insert(word.to_string(), count);
}
}
word_counter
}
pub fn words(&self) -> Vec<&String> {
let mut words = self.counter.keys().collect::<Vec<_>>();
words.sort();
println!("{:?}", words);
words
}
pub fn add(&mut self, item: &str) {
let word = &normalize(item);
let count = 1 + match self.counter.get(word) {
Some(&value) => value,
None => 0
};
self.counter.insert(word.to_string(), count);
}
pub fn get(&self, word: &str) -> u32 {
match self.counter.get(word) {
Some(&value) => value,
None => 0
}
}
pub fn total_count(&self) -> u32 {
self.counter.values().sum()
}
}
impl std::fmt::Display for WordCounter {
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
let mut message = format!("WordCounter, total count: {}\n", self.total_count());
let mut list = self.counter.iter().collect::<Vec<_>>();
list.sort_by_key(|(_, &count)| Reverse(count));
for (word, count) in list {
message = format!("{}{}: {}\n", message, word, count);
}
write!(f, "{}", message)
}
}
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct SpellChecker {
counter: WordCounter,
alphabet: Vec<char>
}
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
let counter = WordCounter::from_str(corpus);
let alphabet = alphabet.chars().collect();
Self { counter, alphabet }
}
pub fn probability(&self, word: &str) -> f64 {
return self.counter.get(word) as f64 / self.counter.total_count() as f64;
}
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
words.iter().filter(|a| self.counter.get(a) > 0).collect()
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
let mut result = HashSet::new();
let size = word.chars().count();
for i in 0..size {
let mut s = String::new();
for (j, ch) in word.chars().enumerate() {
if j != i {
s.push(ch);
}
}
result.insert(s);
}
if size > 0 {
for i in 0..(size - 1) {
let a = word.chars().nth(i).unwrap();
let b = word.chars().nth(i + 1).unwrap();
let mut s = String::new();
for (j, ch) in word.chars().enumerate() {
if j == i {
s.push(b);
} else if j == i + 1 {
s.push(a);
} else {
s.push(ch);
}
}
result.insert(s);
}
}
for i in 0..size {
for a in self.alphabet.iter() {
let mut s = String::new();
for (j, ch) in word.chars().enumerate() {
if j == i {
s.push(*a);
} else {
s.push(ch);
}
}
result.insert(s);
}
}
for i in 0..(size + 1) {
for a in self.alphabet.iter() {
let mut s = String::new();
for (j, ch) in word.chars().enumerate() {
if j == i {
s.push(*a);
}
s.push(ch);
}
if i == size {
s.push(*a);
}
result.insert(s);
}
}
result
}
pub fn edits2(&self, word: &str) -> HashSet<String> {
let mut result = HashSet::new();
for s in self.edits1(word).iter() {
result.extend(self.edits1(s));
}
result
}
pub fn candidates(&self, word: &str) -> Vec<String> {
if self.counter.get(word) > 0 {
return vec![word.to_string()];
}
let e1 = self.edits1(word);
let cand = self.known(&e1);
if cand.len() > 0 {
return cand.into_iter().cloned().collect();
}
let e2 = self.edits2(word);
let cand = self.known(&e2);
if cand.len() > 0 {
return cand.into_iter().cloned().collect();
}
vec![word.to_string()]
}
pub fn correction(&self, word: &str) -> String {
let word = normalize(word);
let mut cand = self.candidates(&word[..]);
cand.sort_by_key(|c| Reverse(self.counter.get(c)));
cand.first().unwrap().to_string()
}
}

Лог от изпълнението

Compiling solution v0.1.0 (/tmp/d20200114-2173579-utqa4y/solution)
    Finished test [unoptimized + debuginfo] target(s) in 4.75s
     Running target/debug/deps/solution-a73e64ec87929bd0

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

     Running target/debug/deps/solution_test-38971695424b36d5

running 15 tests
test solution_test::test_best_word_is_returned ... ok
test solution_test::test_clean_line_removes_punctuation ... ok
test solution_test::test_clean_line_trims_the_input ... ok
test solution_test::test_correction ... ok
test solution_test::test_correction_fails_to_produce_new_result ... ok
test solution_test::test_correction_normalizes_case ... ok
test solution_test::test_counting ... ok
test solution_test::test_display ... ok
test solution_test::test_edits1 ... ok
test solution_test::test_edits2 ... ok
test solution_test::test_empty_counter ... ok
test solution_test::test_from_empty_str ... ok
test solution_test::test_from_str ... ok
test solution_test::test_known_words ... ok
test solution_test::test_probability ... ok

test result: ok. 15 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

   Doc-tests solution

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

История (2 версии и 0 коментара)

Антон качи първо решение на 13.01.2020 01:35 (преди над 5 години)

Антон качи решение на 13.01.2020 23:59 (преди над 5 години)

use std::collections::HashMap;
use std::collections::HashSet;
use std::fmt;
use std::cmp::Reverse;
pub fn clean_line(input: &str) -> String {
input.trim().chars().filter(|&ch|
char::is_alphabetic(ch) ||
char::is_whitespace(ch) ||
ch == '\'' ||
ch == '-'
).collect()
}
-fn normalize(word: &str) -> String {
+pub fn normalize(word: &str) -> String {
word.trim().to_lowercase()
}
pub struct WordCounter {
counter: HashMap<String, u32>
}
impl WordCounter {
pub fn new() -> Self {
WordCounter {
counter: HashMap::new()
}
}
pub fn from_str(input: &str) -> Self {
let mut word_counter = Self::new();
for line in input.split('\n') {
for word in clean_line(line).split_whitespace() {
let word = &normalize(word);
let count = 1 + match word_counter.counter.get(word) {
Some(&value) => value,
None => 0
};
word_counter.counter.insert(word.to_string(), count);
}
}
word_counter
}
pub fn words(&self) -> Vec<&String> {
let mut words = self.counter.keys().collect::<Vec<_>>();
words.sort();
println!("{:?}", words);
words
}
pub fn add(&mut self, item: &str) {
let word = &normalize(item);
let count = 1 + match self.counter.get(word) {
Some(&value) => value,
None => 0
};
self.counter.insert(word.to_string(), count);
}
pub fn get(&self, word: &str) -> u32 {
match self.counter.get(word) {
Some(&value) => value,
None => 0
}
}
pub fn total_count(&self) -> u32 {
self.counter.values().sum()
}
}
impl std::fmt::Display for WordCounter {
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
let mut message = format!("WordCounter, total count: {}\n", self.total_count());
let mut list = self.counter.iter().collect::<Vec<_>>();
list.sort_by_key(|(_, &count)| Reverse(count));
for (word, count) in list {
message = format!("{}{}: {}\n", message, word, count);
}
write!(f, "{}", message)
}
}
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct SpellChecker {
counter: WordCounter,
alphabet: Vec<char>
}
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
let counter = WordCounter::from_str(corpus);
let alphabet = alphabet.chars().collect();
Self { counter, alphabet }
}
pub fn probability(&self, word: &str) -> f64 {
return self.counter.get(word) as f64 / self.counter.total_count() as f64;
}
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
words.iter().filter(|a| self.counter.get(a) > 0).collect()
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
let mut result = HashSet::new();
let size = word.chars().count();
for i in 0..size {
let mut s = String::new();
for (j, ch) in word.chars().enumerate() {
if j != i {
s.push(ch);
}
}
result.insert(s);
}
if size > 0 {
for i in 0..(size - 1) {
let a = word.chars().nth(i).unwrap();
let b = word.chars().nth(i + 1).unwrap();
let mut s = String::new();
for (j, ch) in word.chars().enumerate() {
if j == i {
s.push(b);
} else if j == i + 1 {
s.push(a);
} else {
s.push(ch);
}
}
result.insert(s);
}
}
for i in 0..size {
for a in self.alphabet.iter() {
let mut s = String::new();
for (j, ch) in word.chars().enumerate() {
if j == i {
s.push(*a);
} else {
s.push(ch);
}
}
result.insert(s);
}
}
for i in 0..(size + 1) {
for a in self.alphabet.iter() {
let mut s = String::new();
for (j, ch) in word.chars().enumerate() {
if j == i {
s.push(*a);
}
s.push(ch);
}
if i == size {
s.push(*a);
}
result.insert(s);
}
}
result
}
pub fn edits2(&self, word: &str) -> HashSet<String> {
let mut result = HashSet::new();
for s in self.edits1(word).iter() {
result.extend(self.edits1(s));
}
result
}
pub fn candidates(&self, word: &str) -> Vec<String> {
if self.counter.get(word) > 0 {
return vec![word.to_string()];
}
let e1 = self.edits1(word);
let cand = self.known(&e1);
if cand.len() > 0 {
return cand.into_iter().cloned().collect();
}
let e2 = self.edits2(word);
let cand = self.known(&e2);
if cand.len() > 0 {
return cand.into_iter().cloned().collect();
}
vec![word.to_string()]
}
pub fn correction(&self, word: &str) -> String {
let word = normalize(word);
let mut cand = self.candidates(&word[..]);
cand.sort_by_key(|c| Reverse(self.counter.get(c)));
cand.first().unwrap().to_string()
}
}