Решение на Spell Checker от Андрей
Резултати
- 20 точки от тестове
- 0 бонус точки
- 20 точки общо
- 15 успешни тест(а)
- 0 неуспешни тест(а)
Код
use std::collections::{HashMap, HashSet};
use std::fmt;
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct WordCounter {
storage: HashMap<String, u32>,
total_count: u32,
}
impl WordCounter {
pub fn new() -> Self {
WordCounter { storage: HashMap::new(), total_count: 0 }
}
pub fn from_str(input: &str) -> Self {
let mut counter = Self::new();
for line in input.lines() {
let line = clean_line(line);
for word in line.trim().split_whitespace() {
if word.trim().len() > 0 {
counter.add(word);
}
}
}
counter
}
pub fn words(&self) -> Vec<&String> {
let mut keys: Vec<_> = self.storage.keys().collect();
keys.sort();
keys
}
pub fn add(&mut self, item: &str) {
let word = item.trim().to_lowercase();
*self.storage.entry(word).or_insert(0) += 1;
self.total_count += 1;
}
pub fn get(&self, item: &str) -> u32 {
*self.storage.get(item).unwrap_or(&0)
}
pub fn total_count(&self) -> u32 {
self.total_count
}
}
impl fmt::Display for WordCounter {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(f, "WordCounter, total count: {}", self.total_count())?;
let mut entries: Vec<_> = self.storage.iter().collect();
entries.sort_by_key(|(_, v)| *v);
for (word, count) in entries.into_iter().rev() {
writeln!(f, "{}: {}", word, count)?;
}
Ok(())
}
}
pub fn clean_line(input: &str) -> String {
input.trim().replace(|c: char| {
!(c.is_alphabetic() || c.is_whitespace() || c == '\'' || c == '-')
}, "")
}
pub struct SpellChecker {
counter: WordCounter,
alphabet_chars: Vec<char>,
}
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
let counter = WordCounter::from_str(corpus);
let alphabet_chars: Vec<_> = alphabet.chars().collect();
SpellChecker { counter, alphabet_chars }
}
pub fn correction(&self, word: &str) -> String {
self.candidates(word).into_iter().max_by(|l, r| {
self.probability(l).partial_cmp(&self.probability(r)).unwrap()
}).unwrap()
}
pub fn probability(&self, word: &str) -> f64 {
self.counter.get(&word) as f64 / self.counter.total_count() as f64
}
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
words.iter().filter(|word| self.counter.get(word) > 0).collect()
}
pub fn candidates(&self, word: &str) -> Vec<String> {
let word = word.trim().to_lowercase();
let mut word_set = HashSet::new();
word_set.insert(word.clone());
let candidates = self.known(&word_set);
if candidates.len() > 0 {
return candidates.into_iter().cloned().collect();
}
let edits = self.edits1(&word);
let candidates = self.known(&edits);
if candidates.len() > 0 {
return candidates.into_iter().cloned().collect();
}
let edits = self.edits2(&word);
let candidates = self.known(&edits);
if candidates.len() > 0 {
return candidates.into_iter().cloned().collect();
}
word_set.into_iter().collect()
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
let word_len = word.len();
let mut edits = Vec::new();
let splits = word.char_indices().map(|(i, _)| word.split_at(i));
macro_rules! skip_first_char {
($s:expr) => {
{
let mut chars = $s.chars();
chars.next();
chars.as_str()
}
}
}
for (prefix, suffix) in splits {
// deletions
let mut new_word = String::with_capacity(word_len);
new_word.push_str(prefix);
new_word.push_str(skip_first_char!(suffix));
edits.push(new_word);
// transposes
let mut suffix_chars = suffix.chars();
if let (Some(first_char), Some(second_char)) = (suffix_chars.next(), suffix_chars.next()) {
let mut new_word = String::with_capacity(word_len);
new_word.push_str(prefix);
new_word.push(second_char);
new_word.push(first_char);
new_word.push_str(suffix_chars.as_str());
edits.push(new_word);
}
for new_char in self.alphabet_chars.iter() {
// replaces
let mut new_word = String::with_capacity(word_len);
new_word.push_str(prefix);
new_word.push(*new_char);
new_word.push_str(skip_first_char!(suffix));
edits.push(new_word);
// inserts
let mut new_word = String::with_capacity(word_len + new_char.len_utf8());
new_word.push_str(prefix);
new_word.push(*new_char);
new_word.push_str(suffix);
edits.push(new_word);
}
}
// inserts at end
for new_char in self.alphabet_chars.iter() {
let mut new_word = String::with_capacity(word_len + 1);
new_word.push_str(&word);
new_word.push(*new_char);
edits.push(new_word);
}
edits.into_iter().collect()
}
pub fn edits2(&self, word: &str) -> HashSet<String> {
let mut edits = HashSet::new();
for edit in self.edits1(word) {
edits.extend(self.edits1(&edit));
}
edits
}
}
Лог от изпълнението
Compiling solution v0.1.0 (/tmp/d20200114-2173579-flxmrs/solution) Finished test [unoptimized + debuginfo] target(s) in 4.26s Running target/debug/deps/solution-a73e64ec87929bd0 running 0 tests test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out Running target/debug/deps/solution_test-38971695424b36d5 running 15 tests test solution_test::test_best_word_is_returned ... ok test solution_test::test_clean_line_removes_punctuation ... ok test solution_test::test_clean_line_trims_the_input ... ok test solution_test::test_correction ... ok test solution_test::test_correction_fails_to_produce_new_result ... ok test solution_test::test_correction_normalizes_case ... ok test solution_test::test_counting ... ok test solution_test::test_display ... ok test solution_test::test_edits1 ... ok test solution_test::test_edits2 ... ok test solution_test::test_empty_counter ... ok test solution_test::test_from_empty_str ... ok test solution_test::test_from_str ... ok test solution_test::test_known_words ... ok test solution_test::test_probability ... ok test result: ok. 15 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out Doc-tests solution running 0 tests test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
История (2 версии и 0 коментара)
Андрей качи решение на 05.01.2020 17:52 (преди почти 6 години)
use std::collections::{HashMap, HashSet};
use std::fmt;
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct WordCounter {
storage: HashMap<String, u32>,
total_count: u32,
}
impl WordCounter {
pub fn new() -> Self {
WordCounter { storage: HashMap::new(), total_count: 0 }
}
pub fn from_str(input: &str) -> Self {
let mut counter = Self::new();
for line in input.lines() {
let line = clean_line(line);
for word in line.trim().split_whitespace() {
if word.trim().len() > 0 {
counter.add(word);
}
}
}
counter
}
pub fn words(&self) -> Vec<&String> {
let mut keys: Vec<_> = self.storage.keys().collect();
keys.sort();
keys
}
pub fn add(&mut self, item: &str) {
let word = item.trim().to_lowercase();
*self.storage.entry(word).or_insert(0) += 1;
self.total_count += 1;
}
pub fn get(&self, item: &str) -> u32 {
*self.storage.get(item).unwrap_or(&0)
}
pub fn total_count(&self) -> u32 {
self.total_count
}
}
impl fmt::Display for WordCounter {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(f, "WordCounter, total count: {}", self.total_count())?;
let mut entries: Vec<_> = self.storage.iter().collect();
entries.sort_by_key(|(_, v)| *v);
for (word, count) in entries.into_iter().rev() {
writeln!(f, "{}: {}", word, count)?;
}
Ok(())
}
}
pub fn clean_line(input: &str) -> String {
input.trim().replace(|c: char| {
!(c.is_alphabetic() || c.is_whitespace() || c == '\'' || c == '-')
}, "")
}
pub struct SpellChecker {
counter: WordCounter,
alphabet_chars: Vec<char>,
}
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
let counter = WordCounter::from_str(corpus);
let alphabet_chars: Vec<_> = alphabet.chars().collect();
SpellChecker { counter, alphabet_chars }
}
pub fn correction(&self, word: &str) -> String {
self.candidates(word).into_iter().max_by(|l, r| {
self.probability(l).partial_cmp(&self.probability(r)).unwrap()
}).unwrap()
}
pub fn probability(&self, word: &str) -> f64 {
self.counter.get(&word) as f64 / self.counter.total_count() as f64
}
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
words.iter().filter(|word| self.counter.get(word) > 0).collect()
}
pub fn candidates(&self, word: &str) -> Vec<String> {
let word = word.trim().to_lowercase();
let mut word_set = HashSet::new();
word_set.insert(word.clone());
let candidates = self.known(&word_set);
if candidates.len() > 0 {
return candidates.into_iter().cloned().collect();
}
let edits = self.edits1(&word);
let candidates = self.known(&edits);
if candidates.len() > 0 {
return candidates.into_iter().cloned().collect();
}
let edits = self.edits2(&word);
let candidates = self.known(&edits);
if candidates.len() > 0 {
return candidates.into_iter().cloned().collect();
}
word_set.into_iter().collect()
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
- let chars: Vec<_> = word.chars().collect();
- let mut edits = HashSet::new();
+ let word_len = word.len();
+ let mut edits = Vec::new();
+ let splits = word.char_indices().map(|(i, _)| word.split_at(i));
- let mut splits = Vec::with_capacity(chars.len());
- for i in 0..chars.len() {
- splits.push(chars.split_at(i));
+ macro_rules! skip_first_char {
+ ($s:expr) => {
+ {
+ let mut chars = $s.chars();
+ chars.next();
+ chars.as_str()
+ }
+ }
}
for (prefix, suffix) in splits {
// deletions
- let mut new_word = String::new();
- new_word.extend(prefix.iter());
- new_word.extend(suffix.iter().skip(1));
- edits.insert(new_word);
+ let mut new_word = String::with_capacity(word_len);
+ new_word.push_str(prefix);
+ new_word.push_str(skip_first_char!(suffix));
+ edits.push(new_word);
+
// transposes
- if suffix.len() > 1 {
- let mut new_word = String::new();
- new_word.extend(prefix.iter());
- new_word.push(suffix[1]);
- new_word.push(suffix[0]);
- new_word.extend(suffix.iter().skip(2));
- edits.insert(new_word);
+ let mut suffix_chars = suffix.chars();
+ if let (Some(first_char), Some(second_char)) = (suffix_chars.next(), suffix_chars.next()) {
+ let mut new_word = String::with_capacity(word_len);
+ new_word.push_str(prefix);
+ new_word.push(second_char);
+ new_word.push(first_char);
+ new_word.push_str(suffix_chars.as_str());
+ edits.push(new_word);
}
- // replaces
for new_char in self.alphabet_chars.iter() {
- let mut new_word = String::new();
- new_word.extend(prefix.iter());
+ // replaces
+ let mut new_word = String::with_capacity(word_len);
+ new_word.push_str(prefix);
new_word.push(*new_char);
- new_word.extend(suffix.iter().skip(1));
- edits.insert(new_word);
- }
+ new_word.push_str(skip_first_char!(suffix));
- // inserts
- for new_char in self.alphabet_chars.iter() {
- let mut new_word = String::new();
- new_word.extend(prefix.iter());
+ edits.push(new_word);
+
+ // inserts
+ let mut new_word = String::with_capacity(word_len + new_char.len_utf8());
+ new_word.push_str(prefix);
new_word.push(*new_char);
- new_word.extend(suffix.iter());
- edits.insert(new_word);
+ new_word.push_str(suffix);
+ edits.push(new_word);
}
}
// inserts at end
for new_char in self.alphabet_chars.iter() {
- let mut new_word = String::new();
- new_word.extend(chars.iter());
+ let mut new_word = String::with_capacity(word_len + 1);
+ new_word.push_str(&word);
new_word.push(*new_char);
- edits.insert(new_word);
+ edits.push(new_word);
}
- edits
+ edits.into_iter().collect()
}
pub fn edits2(&self, word: &str) -> HashSet<String> {
let mut edits = HashSet::new();
for edit in self.edits1(word) {
- edits.extend(self.edits1(&edit).into_iter());
+ edits.extend(self.edits1(&edit));
}
edits
}
-}
+}