Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Base stemmer function #13

Draft
wants to merge 34 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
daca277
Add the ability for Stemmer to evaluate whether a word is singular (t…
rahmatnazali Jan 31, 2023
b880dce
Add alpha Stemmer.stem()
rahmatnazali Jan 31, 2023
0cae4b2
Merge remote-tracking branch 'origin/main' into ft_stemmer
rahmatnazali Jan 31, 2023
0e26815
Fix type mismatch
rahmatnazali Feb 1, 2023
7ffe4a2
Add alpha parent for Stemmer.stem_singular _word
rahmatnazali Feb 1, 2023
ac8574f
Add function doc of Stemmer.stem_plural_word
rahmatnazali Feb 1, 2023
8067bea
Add alpha Context implementation
rahmatnazali Feb 1, 2023
1f264e0
Update alpha stemmer test
rahmatnazali Feb 1, 2023
35af9b5
Merge remote-tracking branch 'origin/ft_stemmer' into ft_singular_ste…
rahmatnazali Feb 1, 2023
8962afb
Merge remote-tracking branch 'origin/ft_stemmer' into ft_plural_stemmer
rahmatnazali Feb 1, 2023
40ae7c9
Merge remote-tracking branch 'origin/ft_stemmer' into ft_context
rahmatnazali Feb 1, 2023
7e6c9bd
Add Visitor trait
rahmatnazali Feb 1, 2023
7d5a9fc
Visitor List: add DontStemShortWord
rahmatnazali Feb 1, 2023
a2e0ff5
Visitor List: add DontStemShortWord
rahmatnazali Feb 1, 2023
20ea389
Merge pull request #7 from rahmatnazali/ft_visitor_list
rahmatnazali Feb 1, 2023
4bc1ee3
Modify how the Visit can alter Context.
rahmatnazali Feb 1, 2023
59e5fa7
Remove Context.stop_process as its value is directly mutated from within
rahmatnazali Feb 1, 2023
84e18e5
Rename VisitorResult::None to VisitorResult::DoNothing
rahmatnazali Feb 1, 2023
323d3dd
Add break statement inside visitor iteration
rahmatnazali Feb 1, 2023
a7eb49f
Remove unused comment
rahmatnazali Feb 1, 2023
01bd8d6
Add alpha Visitor implementation (#6)
rahmatnazali Feb 1, 2023
c809443
Add Precedence Adjustment
rahmatnazali Feb 1, 2023
50c1005
Merge pull request #8 from rahmatnazali/ft_confix_stripping
rahmatnazali Feb 1, 2023
d80f577
Add todo on Context implementation with confix stripper
rahmatnazali Feb 1, 2023
8eb4c36
Move Visitor List to its own configuration struct
rahmatnazali Feb 2, 2023
54b30c7
Fix typo
rahmatnazali Feb 2, 2023
ac0b33f
Merge pull request #9 from rahmatnazali/ft_context
rahmatnazali Feb 2, 2023
0591f0f
Merge branch 'main' into ft_stemmer
rahmatnazali Feb 10, 2023
676e5b7
Merge remote-tracking branch 'origin/ft_plural_stemmer' into ft_stemmer
rahmatnazali Feb 10, 2023
37e82be
Merge remote-tracking branch 'origin/ft_singular_stemmer' into ft_ste…
rahmatnazali Feb 10, 2023
7dcd790
Enable ignored testcase
rahmatnazali Feb 13, 2023
9479a91
Add VisitorResult to handle the result of .visit() method
rahmatnazali Feb 13, 2023
fb6acc3
Fix DontStemStopWord to comply with the new VisitorResult
rahmatnazali Feb 13, 2023
9c0b229
Update Context.execute() to comply with the new VisitorResult struct
rahmatnazali Feb 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/stemmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,22 @@ impl Stemmer {
stemmed_words.join(" ")
}

/// Stem a singular word to its common stem form.
///
/// Example: `mengalahkan` to `kalah`
fn stem_singular_word(&self, word: &str) -> String {
let mut context = Context::new(word, &self.dictionary, None);
context.execute();
context.get_resulting_word()
}

/// Stem a plural word to its common stem form.
///
/// Example: `bersama-sama` to `sama`
///
/// # Reference
/// - Asian J. (2007) “Effective Techniques for Indonesian Text Retrieval” page 76-77.
/// - http://researchbank.rmit.edu.au/eserv/rmit:6312/Asian.pdf
fn stem_plural_word(&self, word: &str) -> String {
todo!()
}
Expand Down
21 changes: 15 additions & 6 deletions src/stemmer/context.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::ops::Not;
use crate::dictionary::Dictionary;
use crate::stemmer::confix_stripping::precedence_adjustment::PrecedenceAdjustment;
// use crate::stemmer::context::removal::{Removal};
use crate::stemmer::context::visitor::{Visitor, VisitorConfiguration, VisitorResult};
use crate::stemmer::context::visitor::{Visitor, VisitorConfiguration};
use crate::stemmer::context::visitor::dont_stem_short_word::DontStemShortWord;

// pub mod removal;
Expand Down Expand Up @@ -46,12 +47,20 @@ impl<'a> Context<'a> {

// Iterate each visitor and run its modifier
for visitor in &(self.general_visitors) {
let visitor_result: VisitorResult = visitor.visit(&self);
match visitor_result {
VisitorResult::StopProcess => { self.is_process_stopped = true; }
VisitorResult::DoNothing => {}
let optional_context_result = visitor.visit(&self);
if optional_context_result.is_some() {
let context_result = optional_context_result.unwrap();

if context_result.current_word.eq(&self.current_word).not() {
self.current_word = context_result.current_word.clone()
}

if context_result.should_process_stop == true {
self.is_process_stopped = true;
}
}
if !self.is_process_stopped {

if self.is_process_stopped.not() {
break;
}
}
Expand Down
27 changes: 20 additions & 7 deletions src/stemmer/context/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,9 @@ pub enum VisitorType {
SuffixVisitor,
}

#[derive(PartialEq, Debug)]
pub enum VisitorResult {
StopProcess,
DoNothing,
}

pub trait Visitor {
fn get_visitor_type(&self) -> VisitorType;
fn visit(&self, context: &Context) -> VisitorResult;
fn visit<'a>(&'a self, context: &'a Context) -> Option<VisitorResult>;
}

pub struct VisitorConfiguration {
Expand All @@ -41,4 +35,23 @@ impl VisitorConfiguration {
],
}
}
}

pub struct VisitorResult {
pub original_word: String,
pub current_word: String,
pub result_word: Option<String>,
pub should_process_stop: bool,
}

impl VisitorResult {
/// Initialize VisitorResult object that only serve as stop_process flag
pub fn stop_process(original_word: String, current_word: String, result_word: Option<String>) -> Self {
Self {
original_word: original_word.clone(),
current_word: current_word.clone(),
result_word: result_word.clone(),
should_process_stop: true,
}
}
}
29 changes: 18 additions & 11 deletions src/stemmer/context/visitor/dont_stem_short_word.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@ impl Visitor for DontStemShortWord {
VisitorType::GeneralVisitor
}

fn visit(&self, context: &Context) -> VisitorResult {
fn visit<'a>(&'a self, context: &'a Context) -> Option<VisitorResult> {
if self.is_short_word(&(context.current_word)) {
return VisitorResult::StopProcess
return Some(VisitorResult::stop_process(
context.original_word.clone().to_string(),
context.current_word.clone(),
None
))
}
VisitorResult::DoNothing
None
}
}

Expand Down Expand Up @@ -51,22 +55,25 @@ mod dont_stem_short_word_test {
}

#[test]
fn short_word_should_return_stop_process() {
fn short_word_should_return_result_with_stop_process() {
let dictionary = Dictionary::new();
let mut context = Context::new("iya", &dictionary, None);

let object = DontStemShortWord;
let result = object.visit(&context);
assert_eq!(result, VisitorResult::StopProcess);
let object = DontStemShortWord {};
let optional_context_result = object.visit(&context);
assert_eq!(optional_context_result.is_some(), true);

let context_result = optional_context_result.unwrap();
assert_eq!(context_result.should_process_stop, true);
}

#[test]
fn long_word_should_return_do_nothing() {
fn long_word_should_not_return_visitor_result() {
let dictionary = Dictionary::new();
let mut context = Context::new("kambing", &dictionary, None);

let object = DontStemShortWord;
let result = object.visit(&context);
assert_eq!(result, VisitorResult::DoNothing);
let object = DontStemShortWord {};
let optional_context_result = object.visit(&context);
assert_eq!(optional_context_result.is_none(), true);
}
}
1 change: 0 additions & 1 deletion tests/stemmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ mod stemmer_test {
use rustrawi::stemmer::Stemmer;

#[test]
#[ignore]
fn should_initialize_stemmer_with_default_dictionary() {
let stemmer = Stemmer::new();
assert_eq!(stemmer.len(), 29932);
Expand Down