From 7b21d3895d111fed1a184c62e07ddec0178a52af Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Wed, 18 Dec 2024 10:46:43 -0600
Subject: [PATCH 1/5] Add t tags for hashtags

---
 crates/notedeck_columns/src/post.rs | 67 +++++++++++++++++++++++++++--
 1 file changed, 63 insertions(+), 4 deletions(-)
diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs
index a9c519d9..534dae77 100644
--- a/crates/notedeck_columns/src/post.rs
+++ b/crates/notedeck_columns/src/post.rs
@@ -20,9 +20,18 @@ impl NewPost {
     }
 
     pub fn to_note(&self, seckey: &[u8; 32]) -> Note {
-        add_client_tag(NoteBuilder::new())
+        let mut builder = add_client_tag(NoteBuilder::new())
             .kind(1)
-            .content(&self.content)
+            .content(&self.content);
+
+        for hashtag in Self::extract_hashtags(&self.content) {
+            builder = builder
+                .start_tag()
+                .tag_str("t")
+                .tag_str(&hashtag);
+        }
+
+        builder
             .sign(seckey)
             .build()
             .expect("note should be ok")
@@ -106,9 +115,18 @@ impl NewPost {
             enostr::NoteId::new(*quoting.id()).to_bech().unwrap()
         );
 
-        NoteBuilder::new()
+        let mut builder = NoteBuilder::new()
             .kind(1)
-            .content(&new_content)
+            .content(&new_content);
+
+        for hashtag in Self::extract_hashtags(&self.content) {
+            builder = builder
+                .start_tag()
+                .tag_str("t")
+                .tag_str(&hashtag);
+        }
+
+        builder
             .start_tag()
             .tag_str("q")
             .tag_str(&hex::encode(quoting.id()))
@@ -119,4 +137,45 @@ impl NewPost {
             .build()
             .expect("expected build to work")
     }
+
+    fn extract_hashtags(content: &str) -> Vec<String> {
+        let mut hashtags = Vec::new();
+        for word in content.split_whitespace() {
+            if word.starts_with('#') && word.len() > 1 {
+                let tag = word[1..].trim_end_matches(|c: char| !c.is_alphanumeric())
+                    .to_string();
+                if !tag.is_empty() {
+                    hashtags.push(tag);
+                }
+            }
+        }
+        hashtags
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_extract_hashtags() {
+        let test_cases = vec![
+            ("Hello #world", vec!["world"]),
+            ("Multiple #tags #in #one post", vec!["tags", "in", "one"]),
+            ("No hashtags here", vec![]),
+            ("#tag1 with #tag2!", vec!["tag1", "tag2"]),
+            ("Ignore # empty", vec![]),
+            ("Keep #alphanumeric123", vec!["alphanumeric123"]),
+        ];
+
+        for (input, expected) in test_cases {
+            let result = NewPost::extract_hashtags(input);
+            assert_eq!(
+                result,
+                expected.into_iter().map(String::from).collect::<Vec<_>>(),
+                "Failed for input: {}",
+                input
+            );
+        }
+    }
 }

From f6e0ec7f798cbb7df8daaf4fe42d15ac6e79507d Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Wed, 18 Dec 2024 15:02:22 -0600
Subject: [PATCH 2/5] Use HashSet, lowercase, and add emoji tests

---
 crates/notedeck_columns/src/post.rs | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs
index 534dae77..40d2bc3b 100644
--- a/crates/notedeck_columns/src/post.rs
+++ b/crates/notedeck_columns/src/post.rs
@@ -138,14 +138,14 @@ impl NewPost {
             .expect("expected build to work")
     }
 
-    fn extract_hashtags(content: &str) -> Vec<String> {
-        let mut hashtags = Vec::new();
+    fn extract_hashtags(content: &str) -> HashSet<String> {
+        let mut hashtags = HashSet::new();
         for word in content.split_whitespace() {
             if word.starts_with('#') && word.len() > 1 {
                 let tag = word[1..].trim_end_matches(|c: char| !c.is_alphanumeric())
-                    .to_string();
+                    .to_lowercase();
                 if !tag.is_empty() {
-                    hashtags.push(tag);
+                    hashtags.insert(tag);
                 }
             }
         }
@@ -166,13 +166,18 @@ mod tests {
             ("#tag1 with #tag2!", vec!["tag1", "tag2"]),
             ("Ignore # empty", vec![]),
             ("Keep #alphanumeric123", vec!["alphanumeric123"]),
+            ("Testing emoji #🍌sfd", vec!["🍌sfd"]),
+            ("Testing emoji with space #🍌 sfd", vec!["🍌"]),
+            ("Duplicate #tag #tag #tag", vec!["tag"]),
+            ("Mixed case #TaG #tag #TAG", vec!["tag"]),
         ];
 
         for (input, expected) in test_cases {
             let result = NewPost::extract_hashtags(input);
+            let expected: HashSet<String> = expected.into_iter().map(String::from).collect();
             assert_eq!(
                 result,
-                expected.into_iter().map(String::from).collect::<Vec<_>>(),
+                expected,
                 "Failed for input: {}",
                 input
             );

From 659ce458e070cc1aad27f292ff70751ef1eb5aee Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Wed, 18 Dec 2024 18:53:39 -0600
Subject: [PATCH 3/5] Add test and format

---
 crates/notedeck_columns/src/post.rs | 33 ++++++++++-------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs
index 40d2bc3b..e99cc21b 100644
--- a/crates/notedeck_columns/src/post.rs
+++ b/crates/notedeck_columns/src/post.rs
@@ -25,16 +25,10 @@ impl NewPost {
             .content(&self.content);
 
         for hashtag in Self::extract_hashtags(&self.content) {
-            builder = builder
-                .start_tag()
-                .tag_str("t")
-                .tag_str(&hashtag);
+            builder = builder.start_tag().tag_str("t").tag_str(&hashtag);
         }
 
-        builder
-            .sign(seckey)
-            .build()
-            .expect("note should be ok")
+        builder.sign(seckey).build().expect("note should be ok")
     }
 
     pub fn to_reply(&self, seckey: &[u8; 32], replying_to: &Note) -> Note {
@@ -115,15 +109,10 @@ impl NewPost {
             enostr::NoteId::new(*quoting.id()).to_bech().unwrap()
         );
 
-        let mut builder = NoteBuilder::new()
-            .kind(1)
-            .content(&new_content);
+        let mut builder = NoteBuilder::new().kind(1).content(&new_content);
 
         for hashtag in Self::extract_hashtags(&self.content) {
-            builder = builder
-                .start_tag()
-                .tag_str("t")
-                .tag_str(&hashtag);
+            builder = builder.start_tag().tag_str("t").tag_str(&hashtag);
         }
 
         builder
@@ -142,7 +131,8 @@ impl NewPost {
         let mut hashtags = HashSet::new();
         for word in content.split_whitespace() {
             if word.starts_with('#') && word.len() > 1 {
-                let tag = word[1..].trim_end_matches(|c: char| !c.is_alphanumeric())
+                let tag = word[1..]
+                    .trim_end_matches(|c: char| !c.is_alphanumeric())
                     .to_lowercase();
                 if !tag.is_empty() {
                     hashtags.insert(tag);
@@ -170,17 +160,16 @@ mod tests {
             ("Testing emoji with space #🍌 sfd", vec!["🍌"]),
             ("Duplicate #tag #tag #tag", vec!["tag"]),
             ("Mixed case #TaG #tag #TAG", vec!["tag"]),
+            (
+                "#tag1, #tag2, #tag3 with commas",
+                vec!["tag1", "tag2", "tag3"],
+            ),
         ];
 
         for (input, expected) in test_cases {
             let result = NewPost::extract_hashtags(input);
             let expected: HashSet<String> = expected.into_iter().map(String::from).collect();
-            assert_eq!(
-                result,
-                expected,
-                "Failed for input: {}",
-                input
-            );
+            assert_eq!(result, expected, "Failed for input: {}", input);
         }
     }
 }

From 7916961bf424c0ee23fdb41342b8abd5f1ccb8ca Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Wed, 18 Dec 2024 19:02:34 -0600
Subject: [PATCH 4/5] Fix emoji hashtags

---
 crates/notedeck_columns/src/post.rs | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs
index e99cc21b..1943278c 100644
--- a/crates/notedeck_columns/src/post.rs
+++ b/crates/notedeck_columns/src/post.rs
@@ -132,7 +132,7 @@ impl NewPost {
         for word in content.split_whitespace() {
             if word.starts_with('#') && word.len() > 1 {
                 let tag = word[1..]
-                    .trim_end_matches(|c: char| !c.is_alphanumeric())
+                    .trim_end_matches(|c: char| c.is_ascii_punctuation())
                     .to_lowercase();
                 if !tag.is_empty() {
                     hashtags.insert(tag);
@@ -155,9 +155,8 @@ mod tests {
             ("No hashtags here", vec![]),
             ("#tag1 with #tag2!", vec!["tag1", "tag2"]),
             ("Ignore # empty", vec![]),
-            ("Keep #alphanumeric123", vec!["alphanumeric123"]),
-            ("Testing emoji #🍌sfd", vec!["🍌sfd"]),
-            ("Testing emoji with space #🍌 sfd", vec!["🍌"]),
+            ("Testing emoji #🍌banana", vec!["🍌banana"]),
+            ("Testing emoji #🍌", vec!["🍌"]),
             ("Duplicate #tag #tag #tag", vec!["tag"]),
             ("Mixed case #TaG #tag #TAG", vec!["tag"]),
             (

From bc7a3c89278fa6261585821c62be0bec01086a04 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Tue, 24 Dec 2024 19:14:46 -0600
Subject: [PATCH 5/5] Handle punctuation better

---
 crates/notedeck_columns/src/post.rs | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs
index 1943278c..9f53df06 100644
--- a/crates/notedeck_columns/src/post.rs
+++ b/crates/notedeck_columns/src/post.rs
@@ -129,11 +129,11 @@ impl NewPost {
 
     fn extract_hashtags(content: &str) -> HashSet<String> {
         let mut hashtags = HashSet::new();
-        for word in content.split_whitespace() {
+        for word in
+            content.split(|c: char| c.is_whitespace() || (c.is_ascii_punctuation() && c != '#'))
+        {
             if word.starts_with('#') && word.len() > 1 {
-                let tag = word[1..]
-                    .trim_end_matches(|c: char| c.is_ascii_punctuation())
-                    .to_lowercase();
+                let tag = word[1..].to_lowercase();
                 if !tag.is_empty() {
                     hashtags.insert(tag);
                 }
@@ -163,6 +163,9 @@ mod tests {
                 "#tag1, #tag2, #tag3 with commas",
                 vec!["tag1", "tag2", "tag3"],
             ),
+            ("Separated by commas #tag1,#tag2", vec!["tag1", "tag2"]),
+            ("Separated by periods #tag1.#tag2", vec!["tag1", "tag2"]),
+            ("Separated by semicolons #tag1;#tag2", vec!["tag1", "tag2"]),
         ];
 
         for (input, expected) in test_cases {