Skip to content

Commit

Permalink
Attempted to fix file path issue so Docker can find uploaded file
Browse files Browse the repository at this point in the history
  • Loading branch information
c-bommu committed Dec 5, 2023
1 parent 1679fb7 commit 691b931
Showing 1 changed file with 8 additions and 21 deletions.
29 changes: 8 additions & 21 deletions ACMAS/app/ACMAS_Web/ocr_files/ocr.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,11 @@
'''
import os
from pdf2image import convert_from_path
import pytesseract

def ocr_driver(pdf_name):
if ending_type(pdf_name) == 'pdf':
images = 'Hi'
else:
images = pdf_name
ocr_output = images
return pdf_name
'''

import os
from pdf2image import convert_from_path
import pytesseract


#absolute_path = os.path.dirname(__file__)
#relative_path = "../ocr_files/"
#full_path = os.path.join(absolute_path, relative_path)
absolute_path = os.path.dirname(__file__)
relative_path = "../ocr_files"
full_path = os.path.join(absolute_path, relative_path)


# Gets the ending types of files
Expand All @@ -37,16 +23,16 @@ def png_conversion(pdf_name):

print("CONVERTING PNG")

if not os.path.isdir('mediafiles/ocr_images'):
os.system("mkdir mediafiles/ocr_images")
if not os.path.isdir('acmas_media_files/ocr_images'):
os.system("mkdir acmas_media_files/ocr_images")


image_names = []
images_from_path = convert_from_path(pdf_name, 600)
for i in range(len(images_from_path)):
# Save pages as images in the pdf
images_from_path[i].save('mediafiles/ocr_images/' + 'page' + str(i) + '.jpg', 'JPEG')
image_names.append('mediafiles/ocr_images/' + 'page' + str(i) + '.jpg')
images_from_path[i].save('acmas_media_files/ocr_images/' + 'page' + str(i) + '.jpg', 'JPEG')
image_names.append('acmas_media_files/ocr_images/' + 'page' + str(i) + '.jpg')
return image_names


Expand All @@ -60,6 +46,7 @@ def run_ocr(image_name):




def ocr_driver(pdf_name):
if ending_type(pdf_name) == 'pdf':
images = png_conversion(pdf_name)
Expand Down

0 comments on commit 691b931

Please sign in to comment.