Skip to content

Commit

Permalink
Create temporal folder and destoy that folder when program terminate
Browse files Browse the repository at this point in the history
  • Loading branch information
phantichchai committed Aug 30, 2023
1 parent 0606fff commit 9d455a3
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 59 deletions.
18 changes: 18 additions & 0 deletions build.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
@echo off

rem Check if the argument is provided
if "%path%"=="" (
echo Please provide the directory path using the "path" argument.
exit /b
)

rem Navigate to the specified directory
cd /d %path%

rem Run PyInstaller to build the executable
pyinstaller ^
--noconsole ^
--add-data "resources/*;resources/" ^
--icon "resources/icon.ico" ^
--clean ^
main.py
20 changes: 17 additions & 3 deletions process/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,41 @@

import argparse
import time
import os
from .video_processor import VideoProcessor
from .text_processor import TextProcessor
from .table_processor import TableProcessor
from .web_scraper import WebScraper

def run(video_path):

# Check if the json_data folder exists, if not, create it
json_data_folder = "json_data"

if not os.path.exists(json_data_folder):
os.makedirs(json_data_folder)

start_time = time.time()

# Initialize the VideoProcessor
video_processor = VideoProcessor(video_path)
video_processor.capture_frames()
video_processor.process_video()
video_duration = time.time() - start_time
print(f"Video processing time: {video_duration:.2f} seconds")

start_time = time.time()
# Initialize the TextProcessor and process text data
text_processor = TextProcessor()
text_processor = TextProcessor(video_processor.temp_folder_path)
text_processor.process_text_data()
text_processing_duration = time.time() - start_time
print(f"Text processing time: {text_processing_duration:.2f} seconds")

# Cleanup temporal folder after finish process text data
start_time = time.time()
video_processor.cleanup()
video_cleanup_duration = time.time() - start_time
print(f"Clean up images processing time: {video_cleanup_duration:.2f} seconds")

start_time = time.time()
# Initialize the WebScraper and scrape table data
table_scraper = WebScraper(url='https://genshin-impact.fandom.com/wiki/Wonders_of_the_World', output_file='table_data.json')
Expand All @@ -42,7 +56,7 @@ def run(video_path):
filtered_data = table_processor.filter_matching_data(text_processor.data)

# Save the filtered data to a JSON file
filtered_output_file = 'filtered_data.json'
filtered_output_file = os.path.join('json_data', 'filtered_data.json')
table_processor.save_data_to_json(filtered_data, filtered_output_file)
filtering_and_saving_duration = time.time() - start_time
print(f"Filtering and saving time: {filtering_and_saving_duration:.2f} seconds")
Expand Down
19 changes: 10 additions & 9 deletions process/table_processor.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,42 @@
import json
import re
import os
from nltk.tokenize import word_tokenize

class TableProcessor:
def __init__(self, input_file='table_data.json'):
self.input_file = input_file
self.input_file = os.path.join('json_data', input_file)
self.data = []

def process_table_data(self):
with open(self.input_file, 'r') as json_file:
with open(self.input_file, 'r', encoding='utf-8') as json_file:
table_data = json.load(json_file)

processed_table_data = []
for row in table_data:
processed_row = {}
for key, value in row.items():
if key != "Version":
if key == "Achievement":
processed_value = re.sub(r'[^a-zA-Z\s]', '', value)
tokens = word_tokenize(processed_value)
processed_value = ' '.join(tokens).lower()
processed_row[key] = processed_value
else:
processed_row[key] = value
processed_row['processed_value'] = processed_value
processed_row[key] = value

processed_table_data.append(processed_row)

self.data = processed_table_data

def filter_matching_data(self, text_data):
filtered_data = []
for entry1 in self.data:
achievement = entry1.get('Achievement')
achievement = entry1.get('processed_value')
if achievement:
match_condition = next((entry2 for entry2 in text_data if achievement.lower() in ' '.join(entry2.get('processed_text_lines')).lower()), None)
if not match_condition:
filtered_data.append(entry1)
return filtered_data

def save_data_to_json(self, data, output_file):
with open(output_file, 'w') as json_file:
json.dump(data, json_file, indent=4)
with open(output_file, 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, indent=4, ensure_ascii=False)
7 changes: 2 additions & 5 deletions process/text_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
nltk.download('punkt')

class TextProcessor:
def __init__(self, image_folder='achievements/resource'):
def __init__(self, image_folder):
self.image_folder = image_folder
self.data = []

Expand All @@ -29,12 +29,9 @@ def process_text_data(self):
if non_empty_lines:
extracted_text_data.append({
'filename': filename,
'text_lines': non_empty_lines,
'order': int(filename.split('.')[0])
'text_lines': non_empty_lines
})

extracted_text_data.sort(key=lambda x: x['order'])

processed_data = []
for entry in extracted_text_data:
processed_lines = self.process_text_lines(entry['text_lines'])
Expand Down
95 changes: 56 additions & 39 deletions process/video_processor.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,64 @@
import cv2 as cv
import cv2
import tempfile
import os
import time

class VideoProcessor:
def __init__(self, video_path):
self.temp_folder = tempfile.TemporaryDirectory()
self.video_path = video_path
self.cap = cv.VideoCapture(video_path)
self.cap = cv2.VideoCapture(video_path)
self.frame_rate = 60
self.prev = 0
self.prev = time.time()
self.n = 0
self.PATH = os.path.join("achievements", "resource")
self.IMAGE_TYPE = ".png"

def capture_frames(self):
while self.cap.isOpened():
time_elapsed = time.time() - self.prev
ret, frame = self.cap.read()

if not ret:
print("Can't receive frame (stream end?). Exiting ...")
break

gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
_, threshold = cv.threshold(gray, 195, 255, cv.THRESH_BINARY)

contours, _ = cv.findContours(
threshold, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)

achievement = None
for contour in contours:
x,y,w,h = cv.boundingRect(contour)
if 1000 < w < 1066 and 10 < h < 200:
cv.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 1)
achievement = gray[y:y+h, x:x+w]

cv.imshow('frame', frame)
if time_elapsed > 1. / self.frame_rate and achievement is not None:
self.prev = time.time()
cv.imwrite(os.path.join(self.PATH, str(self.n) + self.IMAGE_TYPE), achievement)
self.n += 1

if cv.waitKey(1) == ord('q'):
break

self.cap.release()
cv.destroyAllWindows()
self.temp_folder_path = self.temp_folder.name
self.extension = ".png"

def process_video(self):
try:
while self.cap.isOpened():
time_elapsed = time.time() - self.prev
ret, frame = self.cap.read()

if not ret:
print("Can't receive frame (stream end?). Exiting ...")
break

gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 195, 255, cv2.THRESH_BINARY)

contours, _ = cv2.findContours(
threshold, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

relevant_frame = None
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
if 1000 < w < 1066 and 10 < h < 200:
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 1)
relevant_frame = gray[y:y+h, x:x+w]

cv2.imshow('frame', frame)
if time_elapsed > 1.0 / self.frame_rate and relevant_frame is not None:
self.prev = time.time()
frame_filename = f"{self.n:04d}{self.extension}"
frame_path = os.path.join(self.temp_folder_path, frame_filename)
cv2.imwrite(frame_path, relevant_frame)
self.n += 1

if cv2.waitKey(1) == ord('q'):
break

except Exception as e:
print("An error occurred:", str(e))

finally:
self.cap.release()
cv2.destroyAllWindows()

def cleanup(self):
self.temp_folder.cleanup()

# Example usage
if __name__ == "__main__":
video_processor = VideoProcessor("input_video.mp4")
video_processor.process_video()
11 changes: 8 additions & 3 deletions process/web_scraper.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import requests
from bs4 import BeautifulSoup
import json
import os

class WebScraper:
def __init__(self, url, output_file):
self.url = url
self.output_file = output_file
self.output_file = os.path.join('json_data', output_file)

def scrape_table_data(self):
response = requests.get(self.url)
Expand All @@ -29,7 +30,11 @@ def scrape_table_data(self):
for row in table_data:
table_dict_list.append(dict(zip(headers, row)))

with open(self.output_file, 'w') as json_file:
json.dump(table_dict_list, json_file, indent=4)
with open(self.output_file, 'w', encoding='utf-8') as json_file:
json.dump(table_dict_list, json_file, indent=4, ensure_ascii=False)

print(f"Extracted table data saved to {self.output_file}")

if __name__ == "__main__":
scraper = WebScraper("https://genshin-impact.fandom.com/wiki/Wonders_of_the_World", "output_file.json")
scraper.scrape_table_data()

0 comments on commit 9d455a3

Please sign in to comment.