Решение на Spell Checker от Петър Петров
Резултати
- 5 точки от тестове
- 0 бонус точки
- 5 точки общо
- 4 успешни тест(а)
- 11 неуспешни тест(а)
Код
use std::collections::HashMap;
use std::fmt;
use std::collections::HashSet;
pub fn clean_line(input: &str) -> String {
let mut res=String::from("");
let len=input.len();
let mut iter: Vec<&str> =input.split_whitespace().collect();
let mut count=1;
for word in &iter{
for x in word.chars(){
if x.is_alphabetic()||x.is_whitespace()||x=='-'||x=='\'' {
res.push(x);
}
}
if iter.len() > count{
res.push(' ');
}
count=count+1;
}
res
}
pub struct WordCounter{
words_map:HashMap<String, u32>
}
impl WordCounter {
pub fn new() -> Self {
let mut words:HashMap<String, u32> = HashMap::new();
WordCounter{words_map:words}
}
pub fn from_str(input: &str) -> Self {
let mut lines = input.lines();
let mut words:HashMap<String, u32> = HashMap::new();
for line in lines{
let separateLine=clean_line(line);
let mut iter=separateLine.split_whitespace();
for word in iter{
if words.contains_key(word) {
let result=words.get(word);
let mut m;
match result {
Some(x) => m=x+1,
None => m=1,
}
words.insert(word.to_string(),m);
}
else{
words.insert(word.to_string(),1);
}
}
}
WordCounter{words_map:words}
}
pub fn words(&self) -> Vec<&String> {
let mut v = Vec::new();
for (key, val) in self.words_map.iter() {
v.push(key);
}
v.sort_by(|a, b| a.to_lowercase().cmp(&b.to_lowercase()));
v
}
pub fn add(&mut self, item: & str) {
item.replace(""," ").to_lowercase();
if self.words_map.contains_key(item) {
let result=self.words_map.get(item);
let m;
match result {
Some(x) => m=x+1,
None => m=1,
}
self.words_map.insert(item.to_string(),m);
}
else{
self.words_map.insert(item.to_string(),1);
}
}
pub fn get(&self, word: &str) -> u32 {
let mut count=0;
let vec=self.words();
for word_now in vec{
if word_now == &word.to_string() {
count=count+1;
}
}
count
}
pub fn total_count(&self) -> u32 {
let lenght = self.words().len() as u32;
lenght
}
}
impl std::fmt::Display for WordCounter {
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
let mut vec:Vec<_> = self.words_map.iter().collect();
vec.sort_by(|a, b| b.1.cmp(a.1));
let result = write!(f, "WordCounter, total count: {}\n", self.total_count());
if result.is_err(){
return result;
}
for (key, value) in vec {
let result = write!(f, "{}: {}\n", key, value);
if result.is_err() {
return result;
}
}
Ok(())
}
}
pub struct TwoStrings{
string_one:String,
string_two:String
}
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct SpellChecker {
corpus:WordCounter,
alphabet:String
}
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
let word_counter=WordCounter::from_str(corpus);
SpellChecker{corpus:word_counter,alphabet:alphabet.to_string()}
}
pub fn correction(&self, word: &str) -> String {
let mut vec=self.candidates(word);
if vec.len() == 1 {
return vec[0].to_string()
}
let mut new_word=String::from("");
let mut count:f64=0.0;
for i in vec.iter(){
if self.probability(i) > count {
new_word=i.to_string();
count=self.probability(i);
}
}
new_word
}
pub fn probability(&self, word: &str) -> f64 {
let word_count=self.corpus.get(word) as f64;
let all_words_count=self.corpus.total_count() as f64;
word_count/all_words_count
}
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
let mut vec=Vec::new();
for i in words.iter(){
if self.corpus.words_map.contains_key(i){
vec.push(i);
}
}
vec
}
pub fn candidates(&self, word: &str) -> Vec<String> {
let mut vec=Vec::new();
let mut vec1=Vec::<String>::new();
if self.corpus.words_map.contains_key(word){
vec.push(word.to_string());
return vec
}
else {
let mut set=self.edits1(word);
let candidates_one=self.known(&set);
if candidates_one.len() > 0 {
for i in candidates_one.iter(){
vec1.push((*i).to_string());
}
return vec1
}
set=self.edits2(word);
let mut candidates_two=self.known(&set);
if candidates_two.len() > 0 {
for i in candidates_two.iter(){
vec1.push((*i).to_string());
}
return vec1
}
vec.push(word.to_string());
vec
}
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
let mut left=Vec::new();
let mut right=Vec::new();
let mut set = HashSet::new();
let arr:Vec<_>=word.chars().collect();
for i in 0..arr.len()+1 {
let mut first = String::from("");
for j in 0..i {
first.push(arr[j]);
}
let mut second = String::from("");
for k in i..arr.len() {
second.push(arr[k]);
}
left.push(first);
right.push(second);
}
let char_arr:Vec<_>= String::from(word).chars().collect();
for i in 0..char_arr.len(){
let mut new_word=String::from("");
for j in 0..i {
new_word.push(char_arr[j]);
}
for k in i+1..char_arr.len() {
new_word.push(char_arr[k]);
}
set.insert(new_word);
}
for i in 0..left.len()-2{
let mut new_word=String::from("");
new_word.push_str(&left[i]);
let first = right[i].chars().nth(0).unwrap();
let second = right[i].chars().nth(1).unwrap();
new_word.push(second);
new_word.push(first);
let chars:Vec<_> = right[i].chars().collect();
for i in 2..chars.len() {
new_word.push(chars[i]);
}
set.insert(new_word);
}
for i in 0..left.len()-1{
for c in self.alphabet.chars(){
let mut word = String::from(&left[i]);
word.push(c);
let chars:Vec<_> =right[i].chars().collect();
for i in 1..chars.len(){
word.push(chars[i]);
}
}
}
for i in 0..left.len(){
for c in self.alphabet.chars(){
let mut new_word=String::from("");
new_word.push_str(&left[i]);
new_word.push(c);
new_word.push_str(&right[i]);
set.insert(new_word);
}
}
set
}
pub fn edits2(&self, word: &str) -> HashSet<String> {
let mut set = HashSet::new();
let mut set_edits1=self.edits1(word);
for e1 in set_edits1 {
let mut set_edits1_e1=self.edits1(&e1);
for e2 in set_edits1_e1 {
set.insert(e2);
}
}
set
}
}
Лог от изпълнението
Compiling solution v0.1.0 (/tmp/d20200114-2173579-1s6ep0p/solution) warning: unused variable: `len` --> src/lib.rs:6:9 | 6 | let len=input.len(); | ^^^ help: consider prefixing with an underscore: `_len` | = note: `#[warn(unused_variables)]` on by default warning: unused variable: `val` --> src/lib.rs:61:18 | 61 | for (key, val) in self.words_map.iter() { | ^^^ help: consider prefixing with an underscore: `_val` warning: variable does not need to be mutable --> src/lib.rs:8:9 | 8 | let mut iter: Vec<&str> =input.split_whitespace().collect(); | ----^^^^ | | | help: remove this `mut` | = note: `#[warn(unused_mut)]` on by default warning: variable does not need to be mutable --> src/lib.rs:31:13 | 31 | let mut words:HashMap<String, u32> = HashMap::new(); | ----^^^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:36:13 | 36 | let mut lines = input.lines(); | ----^^^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:40:17 | 40 | let mut iter=separateLine.split_whitespace(); | ----^^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:44:27 | 44 | let mut m; | ----^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:147:13 | 147 | let mut vec=self.candidates(word); | ----^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:198:17 | 198 | let mut candidates_two=self.known(&set); | ----^^^^^^^^^^^^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:283:13 | 283 | let mut set_edits1=self.edits1(word); | ----^^^^^^^^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:285:17 | 285 | let mut set_edits1_e1=self.edits1(&e1); | ----^^^^^^^^^^^^^ | | | help: remove this `mut` warning: field is never used: `string_one` --> src/lib.rs:126:5 | 126 | string_one:String, | ^^^^^^^^^^^^^^^^^ | = note: `#[warn(dead_code)]` on by default warning: field is never used: `string_two` --> src/lib.rs:127:5 | 127 | string_two:String | ^^^^^^^^^^^^^^^^^ warning: variable `separateLine` should have a snake case name --> src/lib.rs:39:17 | 39 | let separateLine=clean_line(line); | ^^^^^^^^^^^^ help: convert the identifier to snake case: `separate_line` | = note: `#[warn(non_snake_case)]` on by default warning: unused variable: `len` --> src/lib.rs:6:9 | 6 | let len=input.len(); | ^^^ help: consider prefixing with an underscore: `_len` | = note: `#[warn(unused_variables)]` on by default warning: unused variable: `val` --> src/lib.rs:61:18 | 61 | for (key, val) in self.words_map.iter() { | ^^^ help: consider prefixing with an underscore: `_val` warning: variable does not need to be mutable --> src/lib.rs:8:9 | 8 | let mut iter: Vec<&str> =input.split_whitespace().collect(); | ----^^^^ | | | help: remove this `mut` | = note: `#[warn(unused_mut)]` on by default warning: variable does not need to be mutable --> src/lib.rs:31:13 | 31 | let mut words:HashMap<String, u32> = HashMap::new(); | ----^^^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:36:13 | 36 | let mut lines = input.lines(); | ----^^^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:40:17 | 40 | let mut iter=separateLine.split_whitespace(); | ----^^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:44:27 | 44 | let mut m; | ----^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:147:13 | 147 | let mut vec=self.candidates(word); | ----^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:198:17 | 198 | let mut candidates_two=self.known(&set); | ----^^^^^^^^^^^^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:283:13 | 283 | let mut set_edits1=self.edits1(word); | ----^^^^^^^^^^ | | | help: remove this `mut` warning: variable does not need to be mutable --> src/lib.rs:285:17 | 285 | let mut set_edits1_e1=self.edits1(&e1); | ----^^^^^^^^^^^^^ | | | help: remove this `mut` warning: field is never used: `string_one` --> src/lib.rs:126:5 | 126 | string_one:String, | ^^^^^^^^^^^^^^^^^ | = note: `#[warn(dead_code)]` on by default warning: field is never used: `string_two` --> src/lib.rs:127:5 | 127 | string_two:String | ^^^^^^^^^^^^^^^^^ warning: variable `separateLine` should have a snake case name --> src/lib.rs:39:17 | 39 | let separateLine=clean_line(line); | ^^^^^^^^^^^^ help: convert the identifier to snake case: `separate_line` | = note: `#[warn(non_snake_case)]` on by default Finished test [unoptimized + debuginfo] target(s) in 4.34s Running target/debug/deps/solution-a73e64ec87929bd0 running 0 tests test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out Running target/debug/deps/solution_test-38971695424b36d5 running 15 tests test solution_test::test_best_word_is_returned ... FAILED test solution_test::test_clean_line_removes_punctuation ... FAILED test solution_test::test_clean_line_trims_the_input ... ok test solution_test::test_correction ... FAILED test solution_test::test_correction_fails_to_produce_new_result ... FAILED test solution_test::test_correction_normalizes_case ... FAILED test solution_test::test_counting ... FAILED test solution_test::test_display ... FAILED test solution_test::test_edits1 ... FAILED test solution_test::test_edits2 ... FAILED test solution_test::test_empty_counter ... ok test solution_test::test_from_empty_str ... ok test solution_test::test_from_str ... FAILED test solution_test::test_known_words ... ok test solution_test::test_probability ... FAILED failures: ---- solution_test::test_best_word_is_returned stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `"beet"`, right: `"boot"`', tests/solution_test.rs:213:5 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace. ---- solution_test::test_clean_line_removes_punctuation stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `"ала бала\'ница"`, right: `"ала бала\'ница"`', tests/solution_test.rs:61:5 ---- solution_test::test_correction stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `"либоф"`, right: `"любов"`', tests/solution_test.rs:177:5 ---- solution_test::test_correction_fails_to_produce_new_result stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `"Либоф"`, right: `"либоф"`', tests/solution_test.rs:198:5 ---- solution_test::test_correction_normalizes_case stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `"Либоф"`, right: `"любов"`', tests/solution_test.rs:188:5 ---- solution_test::test_counting stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `0`, right: `1`', tests/solution_test.rs:35:5 ---- solution_test::test_display stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `"WordCounter, total count: 1\nOne: 1\n"`, right: `"WordCounter, total count: 1\none: 1\n"`', tests/solution_test.rs:49:5 ---- solution_test::test_edits1 stdout ---- thread 'main' panicked at 'assertion failed: edits.contains("тли")', tests/solution_test.rs:123:5 ---- solution_test::test_edits2 stdout ---- thread 'main' panicked at 'assertion failed: edits.contains("спи")', tests/solution_test.rs:147:5 ---- solution_test::test_from_str stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `["а", "варен", "го", "картоф", "Любов", "любов", "Обелих", "суров", "той"]`, right: `["а", "варен", "го", "картоф", "любов", "обелих", "суров", "той"]`', tests/solution_test.rs:88:5 ---- solution_test::test_probability stdout ---- thread 'main' panicked at 'assertion failed: `(left == right)` left: `0.5`, right: `0.25`', tests/solution_test.rs:99:5 failures: solution_test::test_best_word_is_returned solution_test::test_clean_line_removes_punctuation solution_test::test_correction solution_test::test_correction_fails_to_produce_new_result solution_test::test_correction_normalizes_case solution_test::test_counting solution_test::test_display solution_test::test_edits1 solution_test::test_edits2 solution_test::test_from_str solution_test::test_probability test result: FAILED. 4 passed; 11 failed; 0 ignored; 0 measured; 0 filtered out error: test failed, to rerun pass '--test solution_test'
История (2 версии и 0 коментара)
Петър качи решение на 14.01.2020 13:15 (преди над 5 години)
use std::collections::HashMap;
use std::fmt;
use std::collections::HashSet;
pub fn clean_line(input: &str) -> String {
let mut res=String::from("");
let len=input.len();
let mut iter: Vec<&str> =input.split_whitespace().collect();
let mut count=1;
for word in &iter{
for x in word.chars(){
if x.is_alphabetic()||x.is_whitespace()||x=='-'||x=='\'' {
res.push(x);
}
}
if iter.len() > count{
res.push(' ');
}
count=count+1;
}
res
}
pub struct WordCounter{
words_map:HashMap<String, u32>
}
impl WordCounter {
pub fn new() -> Self {
let mut words:HashMap<String, u32> = HashMap::new();
WordCounter{words_map:words}
}
pub fn from_str(input: &str) -> Self {
let mut lines = input.lines();
let mut words:HashMap<String, u32> = HashMap::new();
for line in lines{
let separateLine=clean_line(line);
let mut iter=separateLine.split_whitespace();
for word in iter{
if words.contains_key(word) {
let result=words.get(word);
let mut m;
match result {
Some(x) => m=x+1,
None => m=1,
}
words.insert(word.to_string(),m);
}
else{
words.insert(word.to_string(),1);
}
}
}
WordCounter{words_map:words}
}
pub fn words(&self) -> Vec<&String> {
let mut v = Vec::new();
for (key, val) in self.words_map.iter() {
v.push(key);
}
v.sort_by(|a, b| a.to_lowercase().cmp(&b.to_lowercase()));
v
}
pub fn add(&mut self, item: & str) {
item.replace(""," ").to_lowercase();
if self.words_map.contains_key(item) {
let result=self.words_map.get(item);
let m;
match result {
Some(x) => m=x+1,
None => m=1,
}
self.words_map.insert(item.to_string(),m);
}
else{
self.words_map.insert(item.to_string(),1);
}
}
pub fn get(&self, word: &str) -> u32 {
let mut count=0;
let vec=self.words();
- for word in vec{
- if word == &word.to_string() {
+ for word_now in vec{
+ if word_now == &word.to_string() {
count=count+1;
}
}
count
}
pub fn total_count(&self) -> u32 {
let lenght = self.words().len() as u32;
lenght
}
}
impl std::fmt::Display for WordCounter {
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
let mut vec:Vec<_> = self.words_map.iter().collect();
vec.sort_by(|a, b| b.1.cmp(a.1));
let result = write!(f, "WordCounter, total count: {}\n", self.total_count());
if result.is_err(){
return result;
}
for (key, value) in vec {
let result = write!(f, "{}: {}\n", key, value);
if result.is_err() {
return result;
}
}
Ok(())
}
}
pub struct TwoStrings{
string_one:String,
string_two:String
}
pub const ALPHABET_EN: &'static str = "abcdefghijklmnopqrstuvwxyz";
pub const ALPHABET_BG: &'static str = "абвгдежзийклмнопрстуфхцчшщъьюя";
pub struct SpellChecker {
corpus:WordCounter,
alphabet:String
}
impl SpellChecker {
pub fn new(corpus: &str, alphabet: &str) -> Self {
let word_counter=WordCounter::from_str(corpus);
SpellChecker{corpus:word_counter,alphabet:alphabet.to_string()}
}
pub fn correction(&self, word: &str) -> String {
let mut vec=self.candidates(word);
if vec.len() == 1 {
return vec[0].to_string()
}
let mut new_word=String::from("");
- let mut count=0;
+ let mut count:f64=0.0;
for i in vec.iter(){
- if self.corpus.get(i) > count {
+ if self.probability(i) > count {
new_word=i.to_string();
- count=self.corpus.get(i);
+ count=self.probability(i);
}
}
new_word
}
pub fn probability(&self, word: &str) -> f64 {
let word_count=self.corpus.get(word) as f64;
let all_words_count=self.corpus.total_count() as f64;
word_count/all_words_count
}
- /// Кои думи от този Set са познати (присъстват в подадения корпус)?
- ///
+
pub fn known<'a>(&self, words: &'a HashSet<String>) -> Vec<&'a String> {
let mut vec=Vec::new();
for i in words.iter(){
if self.corpus.words_map.contains_key(i){
vec.push(i);
}
}
vec
}
- /// Всички познати кандидати за поправка на тази дума:
- ///
- /// - Ако думата е позната, директно връщаме вектор с нея.
- /// - Намираме познатите edits1 на тази дума -- ако има такива, връщаме ги.
- /// - Намираме познатите edits2 на тази дума -- ако има такива, връщаме ги.
- /// - Иначе, връщаме вектор с думата.
- ///
+
pub fn candidates(&self, word: &str) -> Vec<String> {
let mut vec=Vec::new();
let mut vec1=Vec::<String>::new();
if self.corpus.words_map.contains_key(word){
vec.push(word.to_string());
return vec
}
else {
let mut set=self.edits1(word);
let candidates_one=self.known(&set);
if candidates_one.len() > 0 {
for i in candidates_one.iter(){
vec1.push((*i).to_string());
}
return vec1
}
set=self.edits2(word);
let mut candidates_two=self.known(&set);
if candidates_two.len() > 0 {
for i in candidates_two.iter(){
vec1.push((*i).to_string());
+
}
+ return vec1
}
vec.push(word.to_string());
vec
}
}
pub fn edits1(&self, word: &str) -> HashSet<String> {
let mut left=Vec::new();
let mut right=Vec::new();
let mut set = HashSet::new();
- for i in 0..word.len()+1 {
- left.push(&word[..i]);
- right.push(&word[i..]);
+ let arr:Vec<_>=word.chars().collect();
+ for i in 0..arr.len()+1 {
+ let mut first = String::from("");
+ for j in 0..i {
+ first.push(arr[j]);
+ }
+ let mut second = String::from("");
+ for k in i..arr.len() {
+ second.push(arr[k]);
+ }
+ left.push(first);
+ right.push(second);
}
-
- for i in 0..left.len()-1{
+ let char_arr:Vec<_>= String::from(word).chars().collect();
+ for i in 0..char_arr.len(){
let mut new_word=String::from("");
- new_word.push_str(left[i]);
- new_word.push_str(&right[i][1..]);
+ for j in 0..i {
+ new_word.push(char_arr[j]);
+ }
+ for k in i+1..char_arr.len() {
+ new_word.push(char_arr[k]);
+ }
+
set.insert(new_word);
}
for i in 0..left.len()-2{
let mut new_word=String::from("");
- new_word.push_str(left[i]);
- new_word.push_str(&right[i][0..1]);
- let ch=right[i].chars().nth(0);
- match ch{
- Some(x)=>new_word.push(x),
- None=>new_word.push_str("")
+ new_word.push_str(&left[i]);
+ let first = right[i].chars().nth(0).unwrap();
+ let second = right[i].chars().nth(1).unwrap();
+ new_word.push(second);
+ new_word.push(first);
+ let chars:Vec<_> = right[i].chars().collect();
+ for i in 2..chars.len() {
+ new_word.push(chars[i]);
}
- new_word.push_str(&right[i][0..0]);
- new_word.push_str(&right[i][2..]);
+
set.insert(new_word);
}
-
- for i in 0..left.len()-1{
+ for i in 0..left.len()-1{
+
for c in self.alphabet.chars(){
- let mut new_word=String::from("");
- new_word.push_str(left[i]);
- new_word.push(c);
- new_word.push_str(&right[i][1..]);
- set.insert(new_word);
+ let mut word = String::from(&left[i]);
+ word.push(c);
+ let chars:Vec<_> =right[i].chars().collect();
+ for i in 1..chars.len(){
+ word.push(chars[i]);
+ }
}
}
for i in 0..left.len(){
for c in self.alphabet.chars(){
let mut new_word=String::from("");
- new_word.push_str(left[i]);
+ new_word.push_str(&left[i]);
new_word.push(c);
- new_word.push_str(right[i]);
+ new_word.push_str(&right[i]);
set.insert(new_word);
}
}
+
set
}
+
pub fn edits2(&self, word: &str) -> HashSet<String> {
let mut set = HashSet::new();
let mut set_edits1=self.edits1(word);
for e1 in set_edits1 {
let mut set_edits1_e1=self.edits1(&e1);
for e2 in set_edits1_e1 {
set.insert(e2);
}
}
set
}
-}
+}