Решение на Spell Checker от Стефан Чолаков
Резултати
- 20 точки от тестове
- 0 бонус точки
- 20 точки общо
- 15 успешни тест(а)
- 0 неуспешни тест(а)
Код
use std::{fmt};
use std::collections::{HashSet,HashMap};
fn is_allowed(c: &char) ->bool{
c.is_alphabetic() || c.is_whitespace() || (*c == '-') || (*c == '\'')
}
pub fn clean_line(input: &str) -> String {
input.trim().chars().filter(|c| is_allowed(c)).collect()
}
pub struct WordCounter{
words: HashMap<String,u32>,
}
impl WordCounter {
pub fn new() -> Self {
WordCounter{words:HashMap::new()}
}
pub fn from_str(input: &str) -> Self {
let mut words = HashMap::new();
for line in input.lines() {
let cleaned_line = clean_line(line);
for word in cleaned_line.split_whitespace() {
let to_add = word.trim().to_lowercase();
*words.entry(to_add).or_insert(0) += 1;
}
}
WordCounter{words}
}
pub fn words(&self) -> Vec<&String> {
let mut words =Vec::new();
for key in self.words.keys() {
words.push(key);
}
words.sort();
words
}
pub fn add(&mut self, item: &str) {
*self.words.entry(item.trim().to_lowercase()).or_insert(0) += 1;
}
pub fn get(&self, word: &str) -> u32 {
let result = self.words.get(word);
match result {
None => 0,
Some(v) => *v,
}
}
pub fn total_count(&self) -> u32 {
self.words.values().sum()
}
}
impl std::fmt::Display for WordCounter {
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
let mut words_as_vec:Vec<_> = self.words.iter().collect();
words_as_vec.sort_by(|a, b| b.1.cmp(a.1));
let result = write!(f, "WordCounter, total count: {}\n", self.total_count());
if result.is_err(){
return result;
}
for (key, value) in words_as_vec {
let result = write!(f, "{}: {}\n", key, value);
if result.is_err() {
return result;
}
}
Ok(())
}
}
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct SpellChecker {
counter:WordCounter,
alphabet:String,
}
pub fn split_word(word:&str)->Vec<(String,String)>{
let mut splits = Vec::new();
let word_chars:Vec<_>= String::from(word).chars().collect();
for i in 0..word_chars.len()+1 {
let mut first = String::from("");
for j in 0..i {
first.push(word_chars[j]);
}
let mut second = String::from("");
for k in i..word_chars.len() {
second.push(word_chars[k]);
}
splits.push((first, second));
}
splits
}
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
let counter = WordCounter::from_str(corpus);
SpellChecker{counter, alphabet:alphabet.to_string()}
}
pub fn correction(&self, word: &str) -> String {
let word = word.trim().to_lowercase();
let mut candidates = self.candidates(word.as_str());
candidates.sort_by(|a,b|self.probability(b).partial_cmp(&self.probability(a)).unwrap());
let correction = candidates.remove(0);
correction
}
pub fn probability(&self, word: &str) -> f64 {
self.counter.get(word) as f64 / self.counter.total_count() as f64
}
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
let mut known_words = words.iter().collect::<Vec<&String>>();
known_words.retain(|&word|self.counter.get(word)>0);
known_words
}
pub fn candidates(&self, word: &str) -> Vec<String> {
let mut candidates = Vec::new();
if self.counter.get(&word) > 0 {
candidates.push(word.to_string());
return candidates;
}
let edits1 = &self.edits1(word);
let edit1_known_candidates = self.known(edits1);
if !edit1_known_candidates.is_empty() {
for candidate in edit1_known_candidates {
candidates.push(candidate.to_string());
}
return candidates;
}
let edits2 = &self.edits2(word);
let edit2_known_candidates = self.known(edits2);
if !edit2_known_candidates.is_empty() {
for candidate in edit2_known_candidates {
candidates.push(candidate.to_string());
}
return candidates;
}
candidates.push(word.to_string());
candidates
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
let splits = split_word(word);
let mut edits1 = HashSet::new();
//deletes
let chars:Vec<_>= String::from(word).chars().collect();
for i in 0..chars.len() {
let mut word = String::from("");
for j in 0..i {
word.push(chars[j]);
}
for k in i+1..chars.len() {
word.push(chars[k]);
}
edits1.insert(word);
}
//transposes
for split in &splits {
if (split.1).chars().count() >=2 {
let first = &split.0;
let second = &split.1;
let first_char = second.chars().nth(0).unwrap();
let second_char = second.chars().nth(1).unwrap();
let mut word = String::from(first);
word.push(second_char);
word.push(first_char);
let second_chars:Vec<_> = second.chars().collect();
for i in 2..second_chars.len() {
word.push(second_chars[i]);
}
edits1.insert(word);
}
}
//replaces
for split in &splits {
for c in self.alphabet.chars() {
if !split.1.is_empty() {
let first = &split.0;
let second = &split.1;
let mut word = String::from(first);
word.push(c);
let second_chars:Vec<_> = second.chars().collect();
for i in 1..second_chars.len(){
word.push(second_chars[i]);
}
edits1.insert(word);
}
}
}
//insert
for split in &splits {
for c in self.alphabet.chars() {
let first = &split.0;
let second = &split.1;
let mut word = String::from(first);
word.push(c);
word.push_str(second);
edits1.insert(word);
}
}
edits1
}
pub fn edits2(&self, word: &str) -> HashSet<String> {
let mut edits2 = HashSet::new();
for e1 in self.edits1(word) {
let e2 = self.edits1(e1.as_str());
for edit in e2 {
edits2.insert(edit);
}
}
edits2
}
}
Лог от изпълнението
Compiling solution v0.1.0 (/tmp/d20200114-2173579-udwpz9/solution) Finished test [unoptimized + debuginfo] target(s) in 4.69s Running target/debug/deps/solution-a73e64ec87929bd0 running 0 tests test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out Running target/debug/deps/solution_test-38971695424b36d5 running 15 tests test solution_test::test_best_word_is_returned ... ok test solution_test::test_clean_line_removes_punctuation ... ok test solution_test::test_clean_line_trims_the_input ... ok test solution_test::test_correction ... ok test solution_test::test_correction_fails_to_produce_new_result ... ok test solution_test::test_correction_normalizes_case ... ok test solution_test::test_counting ... ok test solution_test::test_display ... ok test solution_test::test_edits1 ... ok test solution_test::test_edits2 ... ok test solution_test::test_empty_counter ... ok test solution_test::test_from_empty_str ... ok test solution_test::test_from_str ... ok test solution_test::test_known_words ... ok test solution_test::test_probability ... ok test result: ok. 15 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out Doc-tests solution running 0 tests test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
История (2 версии и 0 коментара)
Стефан качи решение на 14.01.2020 00:40 (преди над 5 години)
-use std::{fmt, fs};
+use std::{fmt};
use std::collections::{HashSet,HashMap};
fn is_allowed(c: &char) ->bool{
c.is_alphabetic() || c.is_whitespace() || (*c == '-') || (*c == '\'')
}
pub fn clean_line(input: &str) -> String {
input.trim().chars().filter(|c| is_allowed(c)).collect()
}
pub struct WordCounter{
words: HashMap<String,u32>,
}
impl WordCounter {
pub fn new() -> Self {
WordCounter{words:HashMap::new()}
}
pub fn from_str(input: &str) -> Self {
let mut words = HashMap::new();
for line in input.lines() {
let cleaned_line = clean_line(line);
for word in cleaned_line.split_whitespace() {
let to_add = word.trim().to_lowercase();
*words.entry(to_add).or_insert(0) += 1;
}
}
WordCounter{words}
}
pub fn words(&self) -> Vec<&String> {
let mut words =Vec::new();
for key in self.words.keys() {
words.push(key);
}
words.sort();
words
}
pub fn add(&mut self, item: &str) {
*self.words.entry(item.trim().to_lowercase()).or_insert(0) += 1;
}
pub fn get(&self, word: &str) -> u32 {
let result = self.words.get(word);
match result {
None => 0,
Some(v) => *v,
}
}
pub fn total_count(&self) -> u32 {
self.words.values().sum()
}
}
impl std::fmt::Display for WordCounter {
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
let mut words_as_vec:Vec<_> = self.words.iter().collect();
words_as_vec.sort_by(|a, b| b.1.cmp(a.1));
let result = write!(f, "WordCounter, total count: {}\n", self.total_count());
if result.is_err(){
return result;
}
for (key, value) in words_as_vec {
let result = write!(f, "{}: {}\n", key, value);
if result.is_err() {
return result;
}
}
Ok(())
}
}
+
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct SpellChecker {
counter:WordCounter,
alphabet:String,
}
+pub fn split_word(word:&str)->Vec<(String,String)>{
+ let mut splits = Vec::new();
+ let word_chars:Vec<_>= String::from(word).chars().collect();
+ for i in 0..word_chars.len()+1 {
+ let mut first = String::from("");
+ for j in 0..i {
+ first.push(word_chars[j]);
+ }
+ let mut second = String::from("");
+ for k in i..word_chars.len() {
+ second.push(word_chars[k]);
+ }
+ splits.push((first, second));
+ }
+ splits
+}
+
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
let counter = WordCounter::from_str(corpus);
SpellChecker{counter, alphabet:alphabet.to_string()}
}
pub fn correction(&self, word: &str) -> String {
let word = word.trim().to_lowercase();
let mut candidates = self.candidates(word.as_str());
- candidates.sort_by(|a,b|self.probability(a).partial_cmp(&self.probability(b)).unwrap());
- candidates.reverse();
+ candidates.sort_by(|a,b|self.probability(b).partial_cmp(&self.probability(a)).unwrap());
let correction = candidates.remove(0);
correction
}
pub fn probability(&self, word: &str) -> f64 {
self.counter.get(word) as f64 / self.counter.total_count() as f64
}
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
let mut known_words = words.iter().collect::<Vec<&String>>();
known_words.retain(|&word|self.counter.get(word)>0);
known_words
}
pub fn candidates(&self, word: &str) -> Vec<String> {
let mut candidates = Vec::new();
if self.counter.get(&word) > 0 {
candidates.push(word.to_string());
return candidates;
}
let edits1 = &self.edits1(word);
let edit1_known_candidates = self.known(edits1);
if !edit1_known_candidates.is_empty() {
for candidate in edit1_known_candidates {
candidates.push(candidate.to_string());
}
return candidates;
}
let edits2 = &self.edits2(word);
let edit2_known_candidates = self.known(edits2);
if !edit2_known_candidates.is_empty() {
for candidate in edit2_known_candidates {
candidates.push(candidate.to_string());
}
return candidates;
}
candidates.push(word.to_string());
candidates
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
- let mut splits = Vec::new();
- let word = word.trim().to_lowercase();
- for i in 0..word.len()+1 {
- splits.push((&word[0..i],&word[i..]));
- }
+ let splits = split_word(word);
let mut edits1 = HashSet::new();
+
//deletes
- for i in 0..word.len() {
- let word = [&word[0..i],&word[i+1..]].concat();
+ let chars:Vec<_>= String::from(word).chars().collect();
+ for i in 0..chars.len() {
+ let mut word = String::from("");
+ for j in 0..i {
+ word.push(chars[j]);
+ }
+ for k in i+1..chars.len() {
+ word.push(chars[k]);
+ }
edits1.insert(word);
}
+
//transposes
- for &split in &splits {
- if (split.1).len()>=2 {
- let first = split.0;
- let second = split.1;
- let word = [first,&second[1..2],&second[0..1],&second[2..]].concat();
+ for split in &splits {
+ if (split.1).chars().count() >=2 {
+ let first = &split.0;
+ let second = &split.1;
+ let first_char = second.chars().nth(0).unwrap();
+ let second_char = second.chars().nth(1).unwrap();
+ let mut word = String::from(first);
+ word.push(second_char);
+ word.push(first_char);
+ let second_chars:Vec<_> = second.chars().collect();
+ for i in 2..second_chars.len() {
+ word.push(second_chars[i]);
+ }
edits1.insert(word);
}
}
+
//replaces
- for &split in &splits {
+ for split in &splits {
for c in self.alphabet.chars() {
if !split.1.is_empty() {
- let mut word = String::from(split.0);
+ let first = &split.0;
+ let second = &split.1;
+ let mut word = String::from(first);
word.push(c);
- word.push_str(&split.1[1..]);
+ let second_chars:Vec<_> = second.chars().collect();
+ for i in 1..second_chars.len(){
+ word.push(second_chars[i]);
+ }
edits1.insert(word);
}
}
}
+
//insert
- for &split in &splits {
+ for split in &splits {
for c in self.alphabet.chars() {
- let mut word = String::from(split.0);
+ let first = &split.0;
+ let second = &split.1;
+ let mut word = String::from(first);
word.push(c);
- word.push_str(split.1);
+ word.push_str(second);
edits1.insert(word);
}
}
+
edits1
}
pub fn edits2(&self, word: &str) -> HashSet<String> {
let mut edits2 = HashSet::new();
for e1 in self.edits1(word) {
let e2 = self.edits1(e1.as_str());
for edit in e2 {
edits2.insert(edit);
}
}
edits2
}
}