From e8572d7ba0226c15447d07769e84c72db56a7784 Mon Sep 17 00:00:00 2001
From: linuxdaemon <linuxdaemon@snoonet.org>
Date: Fri, 3 Nov 2017 17:43:07 -0500
Subject: [PATCH 1/2] Fix decoding page title in link announcer

---
 plugins/link_announcer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugins/link_announcer.py b/plugins/link_announcer.py
index 48bfb8294..0d35bcb34 100644
--- a/plugins/link_announcer.py
+++ b/plugins/link_announcer.py
@@ -52,7 +52,7 @@ def print_url_title(message, match, chan):
         if len(content) > 1000000:
             r.close()
             return
-        html = BeautifulSoup(content)
+        html = BeautifulSoup(content, "lxml", from_encoding=r.encoding)
         r.close()
         title = " ".join(html.title.text.strip().splitlines())
         out = "Title: \x02{}\x02".format(title)

From 53a5e94dcb4c4c5352daaf51a8241f66fd907584 Mon Sep 17 00:00:00 2001
From: linuxdaemon <linuxdaemon@snoonet.org>
Date: Fri, 3 Nov 2017 18:38:17 -0500
Subject: [PATCH 2/2] Clean up link announcer

---
 plugins/link_announcer.py | 62 ++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 40 deletions(-)

diff --git a/plugins/link_announcer.py b/plugins/link_announcer.py
index 0d35bcb34..ffe370920 100644
--- a/plugins/link_announcer.py
+++ b/plugins/link_announcer.py
@@ -1,9 +1,10 @@
-import requests
 import re
-from bs4 import BeautifulSoup
 from contextlib import closing
-from cloudbot import hook
 
+import requests
+from bs4 import BeautifulSoup
+
+from cloudbot import hook
 from cloudbot.hook import Priority, Action
 
 # This will match any URL, blacklist removed and abstracted to a priority/halting system
@@ -11,49 +12,30 @@
 
 opt_out = []
 
-traditional = [
-    (1024 ** 5, 'PB'),
-    (1024 ** 4, 'TB'),
-    (1024 ** 3, 'GB'),
-    (1024 ** 2, 'MB'),
-    (1024 ** 1, 'KB'),
-    (1024 ** 0, 'B'),
-    ]
+HEADERS = {
+    'Accept-Language': 'en-US,en;q=0.5',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
+}
 
-
-def bytesto(bytes, system = traditional):
-    """ converts bytes to something """
-    bytes = int(bytes)
-    for factor, suffix in system:
-        if bytes >= factor:
-            break
-    amount = int(bytes/factor)
-    return str(amount) + suffix
+MAX_RECV = 1000000
 
 
 @hook.regex(url_re, priority=Priority.LOW, action=Action.HALTTYPE, only_no_match=True)
 def print_url_title(message, match, chan):
     if chan in opt_out:
         return
-    HEADERS = {
-        'Accept-Language': 'en-US,en;q=0.5',
-        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
-    }
-    with closing(requests.get(match.group(), headers = HEADERS, stream = True, timeout=3)) as r:
+
+    with closing(requests.get(match.group(), headers=HEADERS, stream=True, timeout=3)) as r:
         if not r.encoding:
-            # remove the content type and size from output for now
-            r.close()
             return
-            #content = r.headers['content-type']
-            #size = bytesto(r.headers['content-length'])
-            #out = "Content Type: \x02{}\x02 Size: \x02{}\x02".format(content, size)
-            #return out
-        content = r.raw.read(1000000+1, decode_content=True)
-        if len(content) > 1000000:
-            r.close()
-            return
-        html = BeautifulSoup(content, "lxml", from_encoding=r.encoding)
-        r.close()
-        title = " ".join(html.title.text.strip().splitlines())
-        out = "Title: \x02{}\x02".format(title)
-        message(out, chan)
+
+        content = r.raw.read(MAX_RECV + 1, decode_content=True)
+        encoding = r.encoding
+
+    if len(content) > MAX_RECV:
+        return
+
+    html = BeautifulSoup(content, "lxml", from_encoding=encoding)
+    title = " ".join(html.title.text.strip().splitlines())
+    out = "Title: \x02{}\x02".format(title)
+    message(out, chan)