From 7b21d3895d111fed1a184c62e07ddec0178a52af Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 18 Dec 2024 10:46:43 -0600 Subject: [PATCH 1/5] Add t tags for hashtags --- crates/notedeck_columns/src/post.rs | 67 +++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs index a9c519d9..534dae77 100644 --- a/crates/notedeck_columns/src/post.rs +++ b/crates/notedeck_columns/src/post.rs @@ -20,9 +20,18 @@ impl NewPost { } pub fn to_note(&self, seckey: &[u8; 32]) -> Note { - add_client_tag(NoteBuilder::new()) + let mut builder = add_client_tag(NoteBuilder::new()) .kind(1) - .content(&self.content) + .content(&self.content); + + for hashtag in Self::extract_hashtags(&self.content) { + builder = builder + .start_tag() + .tag_str("t") + .tag_str(&hashtag); + } + + builder .sign(seckey) .build() .expect("note should be ok") @@ -106,9 +115,18 @@ impl NewPost { enostr::NoteId::new(*quoting.id()).to_bech().unwrap() ); - NoteBuilder::new() + let mut builder = NoteBuilder::new() .kind(1) - .content(&new_content) + .content(&new_content); + + for hashtag in Self::extract_hashtags(&self.content) { + builder = builder + .start_tag() + .tag_str("t") + .tag_str(&hashtag); + } + + builder .start_tag() .tag_str("q") .tag_str(&hex::encode(quoting.id())) @@ -119,4 +137,45 @@ impl NewPost { .build() .expect("expected build to work") } + + fn extract_hashtags(content: &str) -> Vec { + let mut hashtags = Vec::new(); + for word in content.split_whitespace() { + if word.starts_with('#') && word.len() > 1 { + let tag = word[1..].trim_end_matches(|c: char| !c.is_alphanumeric()) + .to_string(); + if !tag.is_empty() { + hashtags.push(tag); + } + } + } + hashtags + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_hashtags() { + let test_cases = vec![ + ("Hello #world", vec!["world"]), + ("Multiple #tags #in #one post", vec!["tags", "in", "one"]), + ("No hashtags here", vec![]), + ("#tag1 with #tag2!", vec!["tag1", "tag2"]), + ("Ignore # empty", vec![]), + ("Keep #alphanumeric123", vec!["alphanumeric123"]), + ]; + + for (input, expected) in test_cases { + let result = NewPost::extract_hashtags(input); + assert_eq!( + result, + expected.into_iter().map(String::from).collect::>(), + "Failed for input: {}", + input + ); + } + } } From f6e0ec7f798cbb7df8daaf4fe42d15ac6e79507d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 18 Dec 2024 15:02:22 -0600 Subject: [PATCH 2/5] Use HashSet, lowercase, and add emoji tests --- crates/notedeck_columns/src/post.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs index 534dae77..40d2bc3b 100644 --- a/crates/notedeck_columns/src/post.rs +++ b/crates/notedeck_columns/src/post.rs @@ -138,14 +138,14 @@ impl NewPost { .expect("expected build to work") } - fn extract_hashtags(content: &str) -> Vec { - let mut hashtags = Vec::new(); + fn extract_hashtags(content: &str) -> HashSet { + let mut hashtags = HashSet::new(); for word in content.split_whitespace() { if word.starts_with('#') && word.len() > 1 { let tag = word[1..].trim_end_matches(|c: char| !c.is_alphanumeric()) - .to_string(); + .to_lowercase(); if !tag.is_empty() { - hashtags.push(tag); + hashtags.insert(tag); } } } @@ -166,13 +166,18 @@ mod tests { ("#tag1 with #tag2!", vec!["tag1", "tag2"]), ("Ignore # empty", vec![]), ("Keep #alphanumeric123", vec!["alphanumeric123"]), + ("Testing emoji #🍌sfd", vec!["🍌sfd"]), + ("Testing emoji with space #🍌 sfd", vec!["🍌"]), + ("Duplicate #tag #tag #tag", vec!["tag"]), + ("Mixed case #TaG #tag #TAG", vec!["tag"]), ]; for (input, expected) in test_cases { let result = NewPost::extract_hashtags(input); + let expected: HashSet = expected.into_iter().map(String::from).collect(); assert_eq!( result, - expected.into_iter().map(String::from).collect::>(), + expected, "Failed for input: {}", input ); From 659ce458e070cc1aad27f292ff70751ef1eb5aee Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 18 Dec 2024 18:53:39 -0600 Subject: [PATCH 3/5] Add test and format --- crates/notedeck_columns/src/post.rs | 33 ++++++++++------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs index 40d2bc3b..e99cc21b 100644 --- a/crates/notedeck_columns/src/post.rs +++ b/crates/notedeck_columns/src/post.rs @@ -25,16 +25,10 @@ impl NewPost { .content(&self.content); for hashtag in Self::extract_hashtags(&self.content) { - builder = builder - .start_tag() - .tag_str("t") - .tag_str(&hashtag); + builder = builder.start_tag().tag_str("t").tag_str(&hashtag); } - builder - .sign(seckey) - .build() - .expect("note should be ok") + builder.sign(seckey).build().expect("note should be ok") } pub fn to_reply(&self, seckey: &[u8; 32], replying_to: &Note) -> Note { @@ -115,15 +109,10 @@ impl NewPost { enostr::NoteId::new(*quoting.id()).to_bech().unwrap() ); - let mut builder = NoteBuilder::new() - .kind(1) - .content(&new_content); + let mut builder = NoteBuilder::new().kind(1).content(&new_content); for hashtag in Self::extract_hashtags(&self.content) { - builder = builder - .start_tag() - .tag_str("t") - .tag_str(&hashtag); + builder = builder.start_tag().tag_str("t").tag_str(&hashtag); } builder @@ -142,7 +131,8 @@ impl NewPost { let mut hashtags = HashSet::new(); for word in content.split_whitespace() { if word.starts_with('#') && word.len() > 1 { - let tag = word[1..].trim_end_matches(|c: char| !c.is_alphanumeric()) + let tag = word[1..] + .trim_end_matches(|c: char| !c.is_alphanumeric()) .to_lowercase(); if !tag.is_empty() { hashtags.insert(tag); @@ -170,17 +160,16 @@ mod tests { ("Testing emoji with space #🍌 sfd", vec!["🍌"]), ("Duplicate #tag #tag #tag", vec!["tag"]), ("Mixed case #TaG #tag #TAG", vec!["tag"]), + ( + "#tag1, #tag2, #tag3 with commas", + vec!["tag1", "tag2", "tag3"], + ), ]; for (input, expected) in test_cases { let result = NewPost::extract_hashtags(input); let expected: HashSet = expected.into_iter().map(String::from).collect(); - assert_eq!( - result, - expected, - "Failed for input: {}", - input - ); + assert_eq!(result, expected, "Failed for input: {}", input); } } } From 7916961bf424c0ee23fdb41342b8abd5f1ccb8ca Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Wed, 18 Dec 2024 19:02:34 -0600 Subject: [PATCH 4/5] Fix emoji hashtags --- crates/notedeck_columns/src/post.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs index e99cc21b..1943278c 100644 --- a/crates/notedeck_columns/src/post.rs +++ b/crates/notedeck_columns/src/post.rs @@ -132,7 +132,7 @@ impl NewPost { for word in content.split_whitespace() { if word.starts_with('#') && word.len() > 1 { let tag = word[1..] - .trim_end_matches(|c: char| !c.is_alphanumeric()) + .trim_end_matches(|c: char| c.is_ascii_punctuation()) .to_lowercase(); if !tag.is_empty() { hashtags.insert(tag); @@ -155,9 +155,8 @@ mod tests { ("No hashtags here", vec![]), ("#tag1 with #tag2!", vec!["tag1", "tag2"]), ("Ignore # empty", vec![]), - ("Keep #alphanumeric123", vec!["alphanumeric123"]), - ("Testing emoji #🍌sfd", vec!["🍌sfd"]), - ("Testing emoji with space #🍌 sfd", vec!["🍌"]), + ("Testing emoji #🍌banana", vec!["🍌banana"]), + ("Testing emoji #🍌", vec!["🍌"]), ("Duplicate #tag #tag #tag", vec!["tag"]), ("Mixed case #TaG #tag #TAG", vec!["tag"]), ( From bc7a3c89278fa6261585821c62be0bec01086a04 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 24 Dec 2024 19:14:46 -0600 Subject: [PATCH 5/5] Handle punctuation better --- crates/notedeck_columns/src/post.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs index 1943278c..9f53df06 100644 --- a/crates/notedeck_columns/src/post.rs +++ b/crates/notedeck_columns/src/post.rs @@ -129,11 +129,11 @@ impl NewPost { fn extract_hashtags(content: &str) -> HashSet { let mut hashtags = HashSet::new(); - for word in content.split_whitespace() { + for word in + content.split(|c: char| c.is_whitespace() || (c.is_ascii_punctuation() && c != '#')) + { if word.starts_with('#') && word.len() > 1 { - let tag = word[1..] - .trim_end_matches(|c: char| c.is_ascii_punctuation()) - .to_lowercase(); + let tag = word[1..].to_lowercase(); if !tag.is_empty() { hashtags.insert(tag); } @@ -163,6 +163,9 @@ mod tests { "#tag1, #tag2, #tag3 with commas", vec!["tag1", "tag2", "tag3"], ), + ("Separated by commas #tag1,#tag2", vec!["tag1", "tag2"]), + ("Separated by periods #tag1.#tag2", vec!["tag1", "tag2"]), + ("Separated by semicolons #tag1;#tag2", vec!["tag1", "tag2"]), ]; for (input, expected) in test_cases {