fxsjy · alex-wearisma · Jul 25, 2021
diff --git a/jieba/__init__.py b/jieba/__init__.py
@@ -5,6 +5,7 @@
 
 import marshal
 import re
+import io
 import tempfile
 import threading
 import time
@@ -414,6 +415,8 @@ def load_userdict(self, f):
             if tag is not None:
                 tag = tag.strip()
             self.add_word(word, freq, tag)
+        if isinstance(f, io.IOBase):
+            f.close()
 
     def add_word(self, word, freq=None, tag=None):
         """

diff --git a/jieba/analyse/tfidf.py b/jieba/analyse/tfidf.py
@@ -24,7 +24,8 @@ def set_stop_words(self, stop_words_path):
         abs_path = _get_abs_path(stop_words_path)
         if not os.path.isfile(abs_path):
             raise Exception("jieba: file does not exist: " + abs_path)
-        content = open(abs_path, 'rb').read().decode('utf-8')
+        with open(abs_path, 'rb') as f:
+            content = f.read().decode('utf-8')
         for line in content.splitlines():
             self.stop_words.add(line)
 
@@ -44,7 +45,8 @@ def __init__(self, idf_path=None):
     def set_new_path(self, new_idf_path):
         if self.path != new_idf_path:
             self.path = new_idf_path
-            content = open(new_idf_path, 'rb').read().decode('utf-8')
+            with open(new_idf_path, 'rb') as f:
+                content = f.read().decode('utf-8')
             self.idf_freq = {}
             for line in content.splitlines():
                 word, freq = line.strip().split(' ')

diff --git a/test/extract_tags.py b/test/extract_tags.py
@@ -23,7 +23,8 @@
 else:
     topK = int(opt.topK)
 
-content = open(file_name, 'rb').read()
+with open(file_name, 'rb') as f:
+    content = f.read()
 
 tags = jieba.analyse.extract_tags(content, topK=topK)
 

diff --git a/test/extract_tags_idfpath.py b/test/extract_tags_idfpath.py
@@ -23,7 +23,8 @@
 else:
     topK = int(opt.topK)
 
-content = open(file_name, 'rb').read()
+with open(file_name, 'rb') as f:
+    content = f.read()
 
 jieba.analyse.set_idf_path("../extra_dict/idf.txt.big");
 

diff --git a/test/extract_tags_stop_words.py b/test/extract_tags_stop_words.py
@@ -23,7 +23,8 @@
 else:
     topK = int(opt.topK)
 
-content = open(file_name, 'rb').read()
+with open(file_name, 'rb') as f:
+    content = f.read()
 
 jieba.analyse.set_stop_words("../extra_dict/stop_words.txt")
 jieba.analyse.set_idf_path("../extra_dict/idf.txt.big");

diff --git a/test/extract_tags_with_weight.py b/test/extract_tags_with_weight.py
@@ -32,7 +32,8 @@
     else:
         withWeight = False
 
-content = open(file_name, 'rb').read()
+with open(file_name, 'rb') as f:
+    content = f.read()
 
 tags = jieba.analyse.extract_tags(content, topK=topK, withWeight=withWeight)
 

diff --git a/test/parallel/extract_tags.py b/test/parallel/extract_tags.py
@@ -24,8 +24,8 @@
 else:
     topK = int(opt.topK)
 
-
-content = open(file_name,'rb').read()
+with open(file_name, 'rb') as f:
+    content = f.read()
 
 tags = jieba.analyse.extract_tags(content,topK=topK)
 

diff --git a/test/parallel/test_file.py b/test/parallel/test_file.py
@@ -6,7 +6,8 @@
 jieba.enable_parallel()
 
 url = sys.argv[1]
-content = open(url,"rb").read()
+with open(url, 'rb') as f:
+    content = f.read()
 t1 = time.time()
 words = "/ ".join(jieba.cut(content))
 
@@ -15,6 +16,7 @@
 
 log_f = open("1.log","wb")
 log_f.write(words.encode('utf-8'))
+log_f.close()
 
 print('speed %s bytes/second' % (len(content)/tm_cost))
 
diff --git a/test/parallel/test_pos_file.py b/test/parallel/test_pos_file.py
@@ -8,7 +8,8 @@
 jieba.enable_parallel(4)
 
 url = sys.argv[1]
-content = open(url,"rb").read()
+with open(url, 'rb') as f:
+    content = f.read()
 t1 = time.time()
 words = list(pseg.cut(content))
 
@@ -17,6 +18,7 @@
 
 log_f = open("1.log","w")
 log_f.write(' / '.join(map(str, words)))
+log_f.close()
 
 print('speed' , len(content)/tm_cost, " bytes/second")
 
diff --git a/test/test_file.py b/test/test_file.py
@@ -5,7 +5,8 @@
 jieba.initialize()
 
 url = sys.argv[1]
-content = open(url,"rb").read()
+with open(url, 'rb') as f:
+    content = f.read()
 t1 = time.time()
 words = "/ ".join(jieba.cut(content))
 

diff --git a/test/test_pos_file.py b/test/test_pos_file.py
@@ -7,7 +7,8 @@
 import jieba.posseg as pseg
 
 url = sys.argv[1]
-content = open(url,"rb").read()
+with open(url, 'rb') as f:
+    content = f.read()
 t1 = time.time()
 words = list(pseg.cut(content))
 
@@ -16,6 +17,7 @@
 
 log_f = open("1.log","w")
 log_f.write(' / '.join(map(str, words)))
+log_f.close()
 
 print('speed' , len(content)/tm_cost, " bytes/second")