Create temporal folder and destoy that folder when program terminate

phantichchai · Aug 30, 2023 · 9d455a3 · 9d455a3
1 parent 0606fff
commit 9d455a3
Show file tree

Hide file tree

Showing 6 changed files with 111 additions and 59 deletions.
diff --git a/build.bat b/build.bat
@@ -0,0 +1,18 @@
+@echo off
+
+rem Check if the argument is provided
+if "%path%"=="" (
+    echo Please provide the directory path using the "path" argument.
+    exit /b
+)
+
+rem Navigate to the specified directory
+cd /d %path%
+
+rem Run PyInstaller to build the executable
+pyinstaller ^
+--noconsole ^
+--add-data "resources/*;resources/" ^
+--icon "resources/icon.ico" ^
+--clean ^
+main.py
diff --git a/process/processor.py b/process/processor.py
@@ -2,27 +2,41 @@
 
 import argparse
 import time
+import os
 from .video_processor import VideoProcessor
 from .text_processor import TextProcessor
 from .table_processor import TableProcessor
 from .web_scraper import WebScraper
 
 def run(video_path):
+
+    # Check if the json_data folder exists, if not, create it
+    json_data_folder = "json_data"
+
+    if not os.path.exists(json_data_folder):
+        os.makedirs(json_data_folder)
+
     start_time = time.time()
 
     # Initialize the VideoProcessor
     video_processor = VideoProcessor(video_path)
-    video_processor.capture_frames()
+    video_processor.process_video()
     video_duration = time.time() - start_time
     print(f"Video processing time: {video_duration:.2f} seconds")
 
     start_time = time.time()
     # Initialize the TextProcessor and process text data
-    text_processor = TextProcessor()
+    text_processor = TextProcessor(video_processor.temp_folder_path)
     text_processor.process_text_data()
     text_processing_duration = time.time() - start_time
     print(f"Text processing time: {text_processing_duration:.2f} seconds")
 
+    # Cleanup temporal folder after finish process text data
+    start_time = time.time()
+    video_processor.cleanup()
+    video_cleanup_duration = time.time() - start_time
+    print(f"Clean up images processing time: {video_cleanup_duration:.2f} seconds")
+
     start_time = time.time()
     # Initialize the WebScraper and scrape table data
     table_scraper = WebScraper(url='https://genshin-impact.fandom.com/wiki/Wonders_of_the_World', output_file='table_data.json')
@@ -42,7 +56,7 @@ def run(video_path):
     filtered_data = table_processor.filter_matching_data(text_processor.data)
 
     # Save the filtered data to a JSON file
-    filtered_output_file = 'filtered_data.json'
+    filtered_output_file = os.path.join('json_data', 'filtered_data.json')
     table_processor.save_data_to_json(filtered_data, filtered_output_file)
     filtering_and_saving_duration = time.time() - start_time
     print(f"Filtering and saving time: {filtering_and_saving_duration:.2f} seconds")

diff --git a/process/table_processor.py b/process/table_processor.py
@@ -1,41 +1,42 @@
 import json
 import re
+import os
 from nltk.tokenize import word_tokenize
 
 class TableProcessor:
     def __init__(self, input_file='table_data.json'):
-        self.input_file = input_file
+        self.input_file = os.path.join('json_data', input_file)
         self.data = []
 
     def process_table_data(self):
-        with open(self.input_file, 'r') as json_file:
+        with open(self.input_file, 'r', encoding='utf-8') as json_file:
             table_data = json.load(json_file)
 
         processed_table_data = []
         for row in table_data:
             processed_row = {}
             for key, value in row.items():
-                if key != "Version":
+                if key == "Achievement":
                     processed_value = re.sub(r'[^a-zA-Z\s]', '', value)
                     tokens = word_tokenize(processed_value)
                     processed_value = ' '.join(tokens).lower()
-                    processed_row[key] = processed_value
-                else:
-                    processed_row[key] = value
+                    processed_row['processed_value'] = processed_value
+                processed_row[key] = value
+
             processed_table_data.append(processed_row)
 
         self.data = processed_table_data
 
     def filter_matching_data(self, text_data):
         filtered_data = []
         for entry1 in self.data:
-            achievement = entry1.get('Achievement')
+            achievement = entry1.get('processed_value')
             if achievement:
                 match_condition = next((entry2 for entry2 in text_data if achievement.lower() in ' '.join(entry2.get('processed_text_lines')).lower()), None)
                 if not match_condition:
                     filtered_data.append(entry1)
         return filtered_data
 
     def save_data_to_json(self, data, output_file):
-        with open(output_file, 'w') as json_file:
-            json.dump(data, json_file, indent=4)
+        with open(output_file, 'w', encoding='utf-8') as json_file:
+            json.dump(data, json_file, indent=4, ensure_ascii=False)
diff --git a/process/text_processor.py b/process/text_processor.py
@@ -8,7 +8,7 @@
 nltk.download('punkt')
 
 class TextProcessor:
-    def __init__(self, image_folder='achievements/resource'):
+    def __init__(self, image_folder):
         self.image_folder = image_folder
         self.data = []
 
@@ -29,12 +29,9 @@ def process_text_data(self):
                 if non_empty_lines:
                     extracted_text_data.append({
                         'filename': filename,
-                        'text_lines': non_empty_lines,
-                        'order': int(filename.split('.')[0])
+                        'text_lines': non_empty_lines
                     })
 
-        extracted_text_data.sort(key=lambda x: x['order'])
-
         processed_data = []
         for entry in extracted_text_data:
             processed_lines = self.process_text_lines(entry['text_lines'])

diff --git a/process/video_processor.py b/process/video_processor.py
@@ -1,47 +1,64 @@
-import cv2 as cv
+import cv2
+import tempfile
 import os
 import time
 
 class VideoProcessor:
     def __init__(self, video_path):
+        self.temp_folder = tempfile.TemporaryDirectory()
         self.video_path = video_path
-        self.cap = cv.VideoCapture(video_path)
+        self.cap = cv2.VideoCapture(video_path)
         self.frame_rate = 60
-        self.prev = 0
+        self.prev = time.time()
         self.n = 0
-        self.PATH = os.path.join("achievements", "resource")
-        self.IMAGE_TYPE = ".png"
-
-    def capture_frames(self):
-        while self.cap.isOpened():
-            time_elapsed = time.time() - self.prev
-            ret, frame = self.cap.read()
-
-            if not ret:
-                print("Can't receive frame (stream end?). Exiting ...")
-                break
-
-            gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
-            _, threshold = cv.threshold(gray, 195, 255, cv.THRESH_BINARY)
-
-            contours, _ = cv.findContours(
-                threshold, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
-
-            achievement = None
-            for contour in contours:
-                x,y,w,h = cv.boundingRect(contour)
-                if 1000 < w < 1066 and 10 < h < 200:
-                    cv.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 1)
-                    achievement = gray[y:y+h, x:x+w]
-
-            cv.imshow('frame', frame)
-            if time_elapsed > 1. / self.frame_rate and achievement is not None:
-                self.prev = time.time()
-                cv.imwrite(os.path.join(self.PATH, str(self.n) + self.IMAGE_TYPE), achievement)
-                self.n += 1
-
-            if cv.waitKey(1) == ord('q'):
-                break
-
-        self.cap.release()
-        cv.destroyAllWindows()
+        self.temp_folder_path = self.temp_folder.name
+        self.extension = ".png"
+
+    def process_video(self):
+        try:
+            while self.cap.isOpened():
+                time_elapsed = time.time() - self.prev
+                ret, frame = self.cap.read()
+
+                if not ret:
+                    print("Can't receive frame (stream end?). Exiting ...")
+                    break
+
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                _, threshold = cv2.threshold(gray, 195, 255, cv2.THRESH_BINARY)
+
+                contours, _ = cv2.findContours(
+                    threshold, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+                relevant_frame = None
+                for contour in contours:
+                    x, y, w, h = cv2.boundingRect(contour)
+                    if 1000 < w < 1066 and 10 < h < 200:
+                        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 1)
+                        relevant_frame = gray[y:y+h, x:x+w]
+
+                cv2.imshow('frame', frame)
+                if time_elapsed > 1.0 / self.frame_rate and relevant_frame is not None:
+                    self.prev = time.time()
+                    frame_filename = f"{self.n:04d}{self.extension}"
+                    frame_path = os.path.join(self.temp_folder_path, frame_filename)
+                    cv2.imwrite(frame_path, relevant_frame)
+                    self.n += 1
+
+                if cv2.waitKey(1) == ord('q'):
+                    break
+
+        except Exception as e:
+            print("An error occurred:", str(e))
+
+        finally:
+            self.cap.release()
+            cv2.destroyAllWindows()
+
+    def cleanup(self):
+        self.temp_folder.cleanup()
+
+# Example usage
+if __name__ == "__main__":
+    video_processor = VideoProcessor("input_video.mp4")
+    video_processor.process_video()
diff --git a/process/web_scraper.py b/process/web_scraper.py
@@ -1,11 +1,12 @@
 import requests
 from bs4 import BeautifulSoup
 import json
+import os
 
 class WebScraper:
     def __init__(self, url, output_file):
         self.url = url
-        self.output_file = output_file
+        self.output_file = os.path.join('json_data', output_file)
 
     def scrape_table_data(self):
         response = requests.get(self.url)
@@ -29,7 +30,11 @@ def scrape_table_data(self):
         for row in table_data:
             table_dict_list.append(dict(zip(headers, row)))
 
-        with open(self.output_file, 'w') as json_file:
-            json.dump(table_dict_list, json_file, indent=4)
+        with open(self.output_file, 'w', encoding='utf-8') as json_file:
+            json.dump(table_dict_list, json_file, indent=4, ensure_ascii=False)
 
         print(f"Extracted table data saved to {self.output_file}")
+
+if __name__ == "__main__":
+    scraper = WebScraper("https://genshin-impact.fandom.com/wiki/Wonders_of_the_World", "output_file.json")
+    scraper.scrape_table_data()