From 4cc6b9b1e35eb1e7e21e601d7af4763965e82fcd Mon Sep 17 00:00:00 2001 From: autolordz Date: Sat, 10 Aug 2019 12:56:29 +0800 Subject: [PATCH] new tika flask process --- .gitignore | 2 + README.md | 197 +++++--- .../batch-renamer.py | 37 +- .../extractImage.py | 7 +- batch-renamer-tika.py | 477 ++++++------------ depends.py | 332 ++++++++++++ flask_app/app.py | 88 ++++ flask_app/templates/first_app.html | 14 + flask_app/tika-config.xml | 8 + flask_app/tika_start.sh | 3 + flask_app/tika_stop.sh | 4 + img_process.py | 412 +++++++++++++++ img_tmp/flow.jpg | Bin 0 -> 37580 bytes require.md | 8 + 14 files changed, 1171 insertions(+), 418 deletions(-) rename batch-renamer.py => batch-renamer-old/batch-renamer.py (97%) rename extrectImage.py => batch-renamer-old/extractImage.py (99%) create mode 100644 depends.py create mode 100644 flask_app/app.py create mode 100644 flask_app/templates/first_app.html create mode 100644 flask_app/tika-config.xml create mode 100644 flask_app/tika_start.sh create mode 100644 flask_app/tika_stop.sh create mode 100644 img_process.py create mode 100644 img_tmp/flow.jpg create mode 100644 require.md diff --git a/.gitignore b/.gitignore index 4a37318..e956f91 100644 --- a/.gitignore +++ b/.gitignore @@ -107,5 +107,7 @@ venv.bak/ *.zip *.exe *.txt +*.jar tmp/ exe-win7-tmp/ +README1.md diff --git a/README.md b/README.md index e519892..0aae14e 100644 --- a/README.md +++ b/README.md @@ -1,74 +1,141 @@ -# file-batch-renamer Python 批量重命名文件脚本 +## Python 批量重命名文件 -> a file batch renamer based on python (include Chinese) +* 一个基于Python的终极重命名机 +* a file batch renamer based on python (include Chinese) +* 用于自动对文件夹里大部分类型的文件进行分析,并批量重命名 +* 重命名文件自古就是繁琐事情,谁用谁指导 +* 方便处理IT办公文件和下载文件夹的杂乱文件 +* 简单练手,练手第三方包,编写环节综合到各方面,python初学者必备 +* 基于云端和本地,也可以本地 +* 对小白提供(exe),云端提供临时服务器 -- Updated 2019.1.2: +[![](https://img.shields.io/badge/github-source-orange.svg?style=popout&logo=github)](https://github.com/autolordz/file-batch-renamer) +[![](https://img.shields.io/github/license/autolordz/file-batch-renamer.svg?style=popout&logo=github)](https://github.com/autolordz/file-batch-renamer/blob/master/LICENSE) + +## Tika版架构 + +![](img_tmp/flow.jpg) +(假如条件不允许可以全部本地化) + +## Updated + +- Updated 2019.8.10: + - **Apache Tika** 版改进,基于云端和本地,终极自动重命名机 + +- Updated 2019.1.2: - 新版 **Apache Tika** 解析全文件版本 - 旧版 **Python 3rd party** 解析文件版本 + ---------------- -## Tutorial - -### 1. Tika | Tesseract OCR - -- Files - - batch-renamer-tika.py - -- Requirements - - [zhon](https://pypi.org/project/zhon/) zhon to deal with Chinese - - [tika](https://pypi.org/project/tika/) tika for python - - [Java Jre jre-8u91-windows-x64](https://www.oracle.com/technetwork/java/javase/downloads/java-archive-javase8-2177648.html) Jre8 is at least and fitting package - - [Tesseract v4.0.0.20181030](https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-w32-setup-v4.0.0.20181030.exe) Tesseract for Image OCR - -- Supported Platform: - - [x] win7 32bit,win10 64bit,其他没测试过 - -- Supported Files: - - [x] docx,pptx,xlsx - - [x] doc,ppt,xls - - [x] epub,rar,zip,tar,html,pdf - - [x] png,jpg,jpeg,bmp,tif - - [x] others(follows [tika](http://tika.apache.org/1.20/formats.html)) - -- Usage: - - 安装必须 - - installplug.bat - - setenv.bat - - 要重命名的文件放在当前目录 - - 执行batch-renamer-tika.(py|exe) - -#### 2. Python 3rd party | Tesseract OCR - -- Files - - batch-renamer.py - - extrectImage.py (Author: BJ Jang (jangbi882 at gmail.com)) - -- Requirements - - [python-pptx](https://pypi.org/project/python-pptx/) ppt格式 - - [python-docx](https://pypi.org/project/python-docx/) word格式 - - [xlrd](https://pypi.org/project/xlrd/) excel格式 - - [zhon](https://pypi.org/project/zhon/) 提取中文 - - [PyPDF2](https://github.com/mstamy2/PyPDF2) 提取PDF - - [PDFMiner](https://github.com/euske/pdfminer/) 提取PDF - - [pytesseract](https://pypi.org/project/pytesseract/) 识别图像 - -- Supported Platform: - - [x] win7 32bit,win10 64bit,其他没测试过 - -- Supported Files: - - [x] docx,pptx,xlsx - - [x] doc,ppt,xls - - [x] pdf - - [x] png,jpg,jpeg,bmp,tif - -- Usage: - - 安装必须或手动安装包 - - installplug.bat - - setenv.bat - - 要重命名的文件放在当前目录 - - 执行batch-renamer.(py|exe) - -[![ForTheBadge built-with-science](http://ForTheBadge.com/images/badges/built-with-science.svg)](https://github.com/autolordz/docx-content-modify/blob/master/LICENSE) +## 环境 + +* conda : 4.6.14 +* python : 3.7.3 +* Win10 + Spyder3.3.4 (打开脚本自上而下运行,或者自己添加main来py运行) + +* 组件: tika版 + - [zhon](https://pypi.org/project/zhon/) 提供中文字符 + - [opencv](https://pypi.org/project/opencv-python/) 处理图片,阈值滤镜等 + - [PIL](https://pypi.org/project/Pillow/) 处理图片 + - [fitz](https://pypi.org/project/PyMuPDF/) 提取PDF图片 + - [jieba](https://github.com/fxsjy/jieba) 分词词干识别 + - [numpy,requests,string,json,glob,time,os,re,string,subprocess,configparser,BeautifulSoup4] + - [Java jre-8u91-windows-x64](https://www.oracle.com/technetwork/java/javase/downloads/java-archive-javase8-2177648.html) Jre8 is at least and fitting package + - [tika server](https://www.apache.org/dyn/closer.cgi/tika/tika-server-1.22.jar) 工程没附带,一定要下载 + - **Tesseract 云端** 参考云端[Tesseract]安装 + +* 组件: 普通版 + - [Tesseract v4.0](https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-w32-setup-v4.0.0.20181030.exe) Tesseract for Image OCR + - [PyPDF2,pdfminer,pytesseract,docx,pptx,xlrd,PIL,extrectImage] + +* 打包程序: pyinstaller + +- **以下重点更新和维护Tika版,普通版代码保留** + +## 内容 + +- [x] 按以下格式重命名 + - [x] ['.txt','.html','.epub','.chm','.wps','.md', + '.doc','.odt','.docx','.xlsx','.csv','.xls','.rtf', + '.rar','.zip','.tar','.tgz','.7z', + '.mp4','.gif','.flv','.mkv','.swf','.psd', + '.mp3','.m4a','.flac', + '.pdf',] + - [x] ['.ppt','.pptx','.pptm'] + - [x] ['.png','.jpg','.jpeg','.bmp','.tif'] + - [x] others (rules follow [tika](http://tika.apache.org/1.20/formats.html)) + +- [x] 过滤下格式非重命名 + - [x] ['.bat','.jar','.exe','.py','.ini'] + +- [x] 支持平台 + - [x] win7 32bit,win10 64bit,其他平台请按错误修改代码 + +## 使用 + +相关文件在flask_app目录 + +- 云端[tika]部署 + +```shell +#Centos启动 tika +nohup java -Djava.awt.headless=true -jar tika-server.jar --host=yourhost --port=3232 >/dev/null & + +#Centos终止 +ps -ef | grep tika-server | grep -v grep | awk '{print $2}' | xargs kill -9 +``` + +- 本地[tika]部署 + +```shell + +#win启动 tika + +start /b java -Djava.awt.headless=true -jar tika-server.jar --config=tika-config.xml --host=127.0.0.1 --port=3232 + +#[tika-config.xml 用于跳过本地Tesseract,加速非图片文件读取速度] + +#Win终止 + +taskkill /F /FI "IMAGENAME eq java.exe" +``` +- 云端[flask]部署 + +```shell +#启动 +nohup python3 /pyweb/app.py >/dev/null & + +#终止 +ps -ef | grep pyweb | grep -v grep | awk '{print $2}' | xargs kill -9 +``` + +- 云端[Tesseract]安装 + + - Centos 6.5 安装 Tesseract 4+ + - 参考 https://www.jianshu.com/p/bf8521703143 差异如下: + - autoconf-2.63-5.1.el6.noarch 不用 2.69 也行,保留 + - 实际安装了 autoconf-archive-2015.02.24-1.sdl6.noarch.rpm + +- 客户端安装 + - installplug.bat -> 安装 java 环境 + - 需要处理文件放在target目录 + - 点击 -> batch-renamer-tika.exe -> 处理target目录 + - cmd -> batch-renamer-tika.py 'yourfile' -> 处理yourfile(文件|目录) + +## 未来 + +- [x] 以文件开始内容命名 +- [x] 识别图像内容命名 +- [ ] 提取文章(jieba)关键词命名 +- [ ] 提取文章摘要(NLP)命名 + +## Licence + +[See Licence](#file-batch-renamer) That's it,enjoy. + + + diff --git a/batch-renamer.py b/batch-renamer-old/batch-renamer.py similarity index 97% rename from batch-renamer.py rename to batch-renamer-old/batch-renamer.py index 09820d6..79b38bc 100644 --- a/batch-renamer.py +++ b/batch-renamer-old/batch-renamer.py @@ -26,21 +26,16 @@ """ #%% - import zhon.hanzi,zhon.cedict import os,re,io,glob,shutil,string,platform import itertools as it - -import extrectImage from pdfminer.high_level import extract_text_to_fp - import pytesseract -from PIL import Image - +from PIL import Image from docx import Document from pptx import Presentation from xlrd import open_workbook -from win32com.client import Dispatch # for office 97-2003 +from win32com.client import Dispatch # for office 97-2003 #%% def parse_subpath(path,file): @@ -83,12 +78,12 @@ def clean_txt_func(x,**kwargs): return xx -#%% rename office,officex - +#%% rename office,officex + def rename_officex(file,**kwargs): '''rename only judgment doc files''' suffix = os.path.splitext(file)[1] - + if suffix == '.docx': try: doc = Document(file) @@ -99,7 +94,7 @@ def rename_officex(file,**kwargs): return x except Exception as e: print('>>> 读取 %s 失败,可能格式不正确 => %s'%(file,e)) - + if suffix == '.pptx': try: prs = Presentation(file) @@ -117,7 +112,7 @@ def rename_officex(file,**kwargs): return x except Exception as e: print('>>> 读取 %s 失败,可能格式不正确 => %s'%(file,e)) - + if suffix in ['.xlsx','.xls']: try: exl = open_workbook(file) @@ -149,12 +144,12 @@ def get_txt_text(file,**kwargs): def rename_office(file,**kwargs): name = os.path.splitext(file)[0] suffix = os.path.splitext(file)[1] - + if suffix == '.txt': x = get_txt_text(file,**kwargs) print('>>> 找到 %s 内容: %s'%(file,x)) os_rename(file,x) - + if suffix == '.doc': file_txt = name + '_doc.txt' word = Dispatch("Word.Application") @@ -165,7 +160,7 @@ def rename_office(file,**kwargs): print('>>> 找到 %s 内容: %s'%(file,x)) os_rename(file,x) os.remove(file_txt) - + if suffix == '.ppt': txt = [] try: @@ -186,7 +181,7 @@ def rename_office(file,**kwargs): x = clean_txt_func(','.join(txt),**kwargs) print('>>> 找到 %s 内容: %s'%(file,x)) os_rename(file,x) - + if suffix == '.xls': try: app = Dispatch("Excel.Application") @@ -206,7 +201,7 @@ def rename_office(file,**kwargs): x = clean_txt_func(','.join(txt),**kwargs) print('>>> 找到 %s 内容: %s'%(file,x)) os_rename(file,x) - + return True #%% rename image @@ -220,10 +215,10 @@ def get_image_txt(file,**kwargs): print('image size :',img.size) img = img.crop((0,0,img.width,img.height/img_h)) print('image size 2:',img.size) - + pytesseract.pytesseract.tesseract_cmd = 'c:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe' \ - if '64bit' in platform.architecture() else 'c:\\Program Files\\Tesseract-OCR\\tesseract.exe' - + if '64bit' in platform.architecture() else 'c:\\Program Files\\Tesseract-OCR\\tesseract.exe' + x = pytesseract.image_to_string(img,lang='chi_sim') # eng x = re.sub(r'\s+',',',x) print('>>> 解析 %s \n 内容: %s'%(file,x)) @@ -256,7 +251,7 @@ def get_pdf_txt(ifile,**kwargs): if len(txt) < 10: print('====decode images===') extrectImage.main(sourceName=ifile,outputFolder=odir,**kwargs) - subext = [parse_subpath(odir,x) for x in + subext = [parse_subpath(odir,x) for x in ['*.png','*.jpg','*.jpeg','*.bmp','*.tif']] images = list(it.chain(*(glob.iglob(e) for e in subext))) print(images) diff --git a/extrectImage.py b/batch-renamer-old/extractImage.py similarity index 99% rename from extrectImage.py rename to batch-renamer-old/extractImage.py index bf699a8..c2d4832 100644 --- a/extrectImage.py +++ b/batch-renamer-old/extractImage.py @@ -171,7 +171,6 @@ def get_pdfObj_contents(pdfObj,**kwargs): img = Image.open(jpgData) if mode == "CMYK": # case of CMYK invert all channel - # imgData = list(img.tobytes()) # invData = [(255 - val) & 0xff for val in imgData] # data = struct.pack("{}B".format(len(invData)), *invData) @@ -190,7 +189,7 @@ def get_pdfObj_contents(pdfObj,**kwargs): img.write(data) img.close() print('save to:',outFileName + ".jp2") - + # case of JBIG2 elif len(leftFilters) == 1 and leftFilters[0] == '/JBIG2Decode': img = open(outFileName + ".jbig2", "wb") @@ -222,11 +221,11 @@ def main(sourceName,**kwargs): outputFolder = kwargs.get('outputFolder',None) os.makedirs(outputFolder,exist_ok=True) fileBase = os.path.splitext(os.path.basename(sourceName))[0] - + with open(sourceName, "rb") as fp: pdfObj = PyPDF2.PdfFileReader(fp,strict=False) get_pdfObj_contents(pdfObj,fileBase=fileBase,**kwargs) - + print("Completed.") # main(sourceName = 'aa.pdf', outputFolder = ".\\Temp",num_pages = 1,targetPage = None) diff --git a/batch-renamer-tika.py b/batch-renamer-tika.py index 8565194..c03bece 100644 --- a/batch-renamer-tika.py +++ b/batch-renamer-tika.py @@ -19,357 +19,178 @@ # SOFTWARE. # -*- coding: utf-8 -*- -print(""" -Batch Files Rename (Tika|Tesseract engine) -Created on Thu Dec 28 2018 +print(''' -@author: Autoz (autolordz@gmail.com) +Batch Files Renamer (Tika|Tesseract engine) -======================= -""") +终极自动重命名机 -#%% -import zhon.hanzi,zhon.cedict -import os,re,sys,lob,string -from PIL import Image +Created on Thu Dec 28 2018 -import subprocess,platform +Updated on 2019-08-05 -os.environ['TIKA_VERSION'] = '1.20' -os.environ['TIKA_PATH'] = os.getcwd() +@author: Autoz (autolordz@gmail.com) -from tika import parser -from tika import language -from tika import tika +''') #%% -def remove_file(file): - if os.path.exists(file): - print('>>> del',file) - os.remove(file) - -def os_rename(origin,dist): - if dist: - header = os.path.dirname(origin)+'\\' if os.path.dirname(origin) else '' - file_n = header + dist + os.path.splitext(origin)[1] - if not file_n == origin: - try: - os.rename(origin,file_n) - except FileExistsError: - os_rename(origin,dist+'_copy') - print('>>> 重命名: 【%s】=>【%s】'%(origin,file_n)) - -def rename_file(file,txt,sfile='',img_h = 1): - print('>>> 找到内容: 【%s】'%txt) - if sfile or img_h > 1: - os_rename(sfile,txt) - remove_file(file) - else: - os_rename(file,txt) +import os,re,sys,string,json,glob,time +from depends import * +from img_process import * + +print('分析文字地址:',tika_url_txt) +print('分析图片地址:',tika_url_ocr) + +#%% header +time1 = time.time() +#tg = ['nosysargv','1.jpg'] +#tg = ['.','aa'] +tg = sys.argv +if len(tg)<=1: + tg = ['nosysargv','target'] + +docs_major = ['.txt','.html','.epub','.chm','.wps','.md', + '.doc','.odt','.docx','.xlsx','.csv','.xls','.rtf', + '.rar','.zip','.tar','.tgz','.7z', + '.mp4','.gif','.flv','.mkv','.swf','.psd', + '.mp3','.m4a','.flac', + '.pdf', + ] +docs_file = docs_major+docs_ppt +pics_file = ['.png','.jpg','.jpeg','.bmp','.tif'] +filter_suffix = ['.bat','.jar','.exe','.py','.ini'] +filter_file = ['log.txt','conf.txt'] + +print('\n Supported Files:\n Yes: %s %s \n No: %s'%(docs_file,pics_file,filter_suffix+filter_file)) +#remove_file('log.txt') #%% -def parse_subpath(path,file): - '''make subpath''' - if not path: return file - if not os.path.exists(path): - os.mkdir(path) - return os.path.join(path,file) - -def clean_txt_func(x,**kwargs): - x = re.sub(r'\s+',',',x) - s = int(kwargs.get('txt_l',len(''.join(x)))) - punc_all = string.punctuation + zhon.hanzi.punctuation - char_all = string.printable + zhon.cedict.all - if re.search(r'[\u4e00-\u9fff]+',x): - x = re.sub(r'(?<=[%s\w]{2})[%s]'%(zhon.cedict.all,punc_all),'|',x) - x = re.sub(r'[%s]'%punc_all.replace('|',''),'',x) # chinese punctuation - x = re.sub(r'[^%s]'%char_all,'',x) +def process_tika(file,t=0): + suffix = get_file_suffix(file) +# jtxt = get_tika_txt(subprocess_cmd(get_curl_rmeta,file),jtype=2) + jtxt = get_tika_txt(get_tika_rmeta(file),jtype=2) + if not jtxt: return file,t + title = jtxt.get('title','') + t = jtxt.get('X-TIKA:content','') + if not t: + if jtxt.get('Content-Type','') == 'application/pdf': + print('\n 尝试使用 Tika PDF 来读取...') + # t = get_tika_txt(subprocess_cmd(get_curl_pdf,file)) + # t = get_tika_txt(get_tika_pdf(file)) + # n_page = int(jtxt.get('xmpTPg:NPages',0)) + # if n_page > 1: + # file = slice_pdf(file) # 切割 pdf + print('提取pdf图像.') + file = img_extra(file) + t = process_img(file,isfast=0) + else: + if not suffix in docs_ppt: # not ppt use ocr + print('suffix:',suffix) +# t = get_tika_txt(subprocess_cmd(get_curl_ocr,file)) + t = get_tika_txt(get_tika_ocr(file)) + if not t and title: # 没内容就标题 + return title + return t + +def process_img(file,isfast): + t = img_correct(file, + var.img_correct, + var.img_clean, + isfast, + ) + return t + +def rename_img(file): + t = process_img(file,isfast=1) # 读取图片 + return t + +def rename_func(file): + t = process_tika(file) + return t + +def get_files_list(tg): + + if not tg: + tg=['.'] + + def filter_files(files): + files = list(filter(lambda x: get_file_suffix(x) not in filter_suffix,files)) + files = list(filter(lambda x: os.path.basename(x) not in filter_file,files)) + return files + + if '*' in tg[0]: + files = list(map(glob.glob,tg))[0] else: - x = re.sub(r'(?<=\w{2})[%s]'%(string.punctuation),'|',x) - x = re.sub(r'[%s]'%string.punctuation.replace('|',''),'',x) # punctuation - x = re.sub(r'[^%s]'%string.printable,'',x) # printable - x = re.sub(r'PowerPoint|演示文稿|Sheet1','',x) # PowerPoint Excel - xx = re.split(r'\|',x) - xx = '_'.join(xx)[:s] - return xx + files = list(filter(os.path.isfile,tg)) -#%% -from pptx import Presentation - -class PresentationBuilder(): - - def __init__(self,ifile): - self.presentation = Presentation(ifile) - - @property - def xml_slides(self): - return self.presentation.slides._sldIdLst # pylint: disable=protected-access - - def move_slide(self, old_index, new_index): - slides = list(self.xml_slides) - self.xml_slides.remove(slides[old_index]) - self.xml_slides.insert(new_index, slides[old_index]) - - def delete_slide(self, index): - slides = list(self.xml_slides) - self.xml_slides.remove(slides[index]) - - def remain_slide(self, starti, endi): - slides = list(self.xml_slides) - for i,slide in enumerate(slides): - if i not in range(starti,endi): - self.xml_slides.remove(slide) - - def save_ppt(self,ofile): - self.presentation.save(ofile) - -#%% slice pptx - -def slice_pptx(file,starti=0,endi=0): - endi = endi+1 if endi else starti+1 # 1 page or X pages - print('尝试 slice %s: %s - %s'%(file,starti,endi)) - psr = PresentationBuilder(file) - psr.remain_slide(starti,endi) - ofile = os.path.splitext(file)[0]+'_slice'+os.path.splitext(file)[1] - psr.save_ppt(ofile) - return ofile - -#%% slice pdf - -from PyPDF2 import PdfFileWriter, PdfFileReader - -def slice_pdf(file,num_pages=1): - with open(file,'rb') as fp: - pdfObj = PdfFileReader(fp,strict = False) - print('pdf all page:【%s】'%pdfObj.getNumPages()) - num_pages = num_pages if num_pages < pdfObj.numPages else pdfObj.numPages - output = PdfFileWriter() - for i in range(num_pages): - output.addPage(pdfObj.getPage(i)) - sfile = os.path.splitext(file)[0]+'_slice.pdf' - print('尝试 slice 【%s】 pages of pdf'%num_pages) - with open(sfile, 'wb') as fpo: - output.write(fpo) - return sfile - return '' - -#%% rename office - -import chardet - -def get_txt_text(file): - try: - with open(file,'rb') as f:#,encoding='utf-8' - x = f.read() - typet = chardet.detect(x) - x = re.sub(r'\s+',',',x.decode(typet['encoding'])) - print(x[:50]) - return x - except Exception as e: - print('>>> 读取 %s 失败,可能格式不正确 => %s'%(file,e)) - return '' - -def rename_image(file,**kwargs): - - txt_last = kwargs.get('txt_last','') - sfile = kwargs.get('sfile','') - img_h = kwargs.get('img_h',1) - try_rotate = kwargs.get('try_rotate',False) - rotate_f = kwargs.get('rotate_f',0) - - cfile = file if not sfile else sfile - print('cfile 1:',cfile) - - # cut image - with Image.open(cfile) as img: - print('image: %s kwargs: %s image size : %s'%(file,kwargs,img.size)) - if img_h > 1: - img = img.crop((0,0,img.width,img.height/img_h)) - print('image size cuted:',img.size) - sfile = os.path.splitext(file)[0]+'_cut' + os.path.splitext(file)[1] - img.save(sfile) - if rotate_f: - img = img.rotate(rotate_f,expand=1) - print('image size rotated:',img.size) - sfile = os.path.splitext(file)[0]+'_rotated' + os.path.splitext(file)[1] - img.save(sfile) - - cfile = file if not sfile else sfile - print('cfile 2:',cfile) - - # cmd tesseract - print('\n 尝试使用tesseract来读取,可能要等待... \n') - - tmpf = os.path.splitext(cfile)[0]+'_tmp.txt' #chi_sim+ - - catcmd = 'tesseract %s %s -l chi_sim+eng' \ - %(cfile,os.path.splitext(tmpf)[0]) - - subprocess.check_output(catcmd) - - txt = get_txt_text(tmpf) - txt = re.sub(r'\s+',',',txt) - - # detect languages - - # PyICU-2.2-cp36-cp36m-win_amd64.whl pycld2-0.31-cp36-cp36m-win_amd64.whl - # from polyglot.detect import Detector - # from collections import Counter - - # inter_xy =[] - # if len(txt)<50: txt += txt - # try: - # detector = Detector(txt,quiet=True) - # langs = [x.name for x in detector.languages] - # print('可信度,',detector.reliable) - # if '中文' in langs: - # inter_xy = ['中文'] - # elif detector.reliable: - # inter_xy = list((Counter(langs) & Counter(['中文','英语'])).elements()) - # except Exception as e: - # print(e) - # print('inter_xy',inter_xy) - - # not detect - inter_xy = txt - - if inter_xy: # if meaningful content - txt = clean_txt_func(txt,**kwargs) - rename_file(file,txt,sfile,img_h) - elif txt_last == txt: - rename_office(file) # meaningless try other method - elif try_rotate: # try rotate - if rotate_f / 360 != 1: - kwargs.setdefault('rotate_f',0) - kwargs['rotate_f'] += 90 - kwargs.setdefault('sfile',sfile) - kwargs.setdefault('txt_last',txt) - print('\n 尝试 旋转 %s 度 image \n'%kwargs['rotate_f']) - rename_image(file,**kwargs) - elif sfile: - remove_file(sfile) - remove_file(tmpf) - -def parser_tika(file,pimage=False,parserd = '',txt=''): - - if not pimage: - print('\n 尝试使用 Tika 来读取... \n') - parserd = parser.from_file(file) - else: - print('\n 尝试使用 Tika Image 来读取... \n') - headers = {'X-Tika-PDFextractInlineImages':'true', - 'X-Tika-OCRLanguage':'chi_sim+eng'} - parserd = parser.from_file(file, - serverEndpoint='http://localhost:9998/rmeta/text', - headers=headers) - if parserd: - txt = parserd.get('content','') - return txt - -def rename_office(file,txt='',sfile='',**kwargs): - if '.txt' in file: - print('\n 直接读取',file) - txt = get_txt_text(file) - elif '.pptx' in file and not 'slice' in file: - sfile = slice_pptx(file) - txt = parser_tika(sfile) - elif '.pdf' in file and not 'slice' in file: - sfile = slice_pdf(file) - txt = parser_tika(sfile) - # txt = parser_tika(file,pimage=True) - # print(12121212,txt) - else: - txt = parser_tika(file) - - if not txt and sfile: - txt = parser_tika(sfile,pimage=True) - # print(323232323,txt) - - if txt: - # print(re.sub(r'\s+',',',txt[:30])) - txt = clean_txt_func(txt,**kwargs) - rename_file(file,txt) - - if sfile: - remove_file(sfile) + dirs = list(filter(os.path.isdir,tg)) -#%% main + if len(dirs)>0: + for dirx in dirs: + files0 = glob.glob(dirx+'\\*.*') + files += filter_files(files0) -def convert_bytes(num): - """ - this function will convert bytes to MB.... GB... etc - """ - for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: - if num < 1024.0: - return "%3.1f %s" % (num, x) - num /= 1024.0 - -suffix_img = ['.png','.jpg','.jpeg','.bmp','.tif'] -suffix_fast = ['.txt','.doc','.xls','.docx','.xlsx','.html','.epub','.rar','.zip','.tar'] -suffix_slow = ['.ppt','.pptx','.pdf'] - -def get_files_list(dirs=[]): - dirs = list(filter(os.path.exists,dirs)) - files = list(filter(os.path.isfile,dirs)) - dirs = list(filter(os.path.isdir,dirs)) - for dirx in dirs: - files += glob.glob(dirx+'\\*.*') - return sorted(files, key=lambda x:(suffix_img+suffix_slow).index(os.path.splitext(x)[1]) if os.path.splitext(x)[1] in (suffix_img+suffix_slow) else -1 -) + return sorted(files, key = lambda x: \ + docs_file.index(get_file_suffix(x)) if get_file_suffix(x) in docs_file else -1) -print(''' - -支持文件 - -Supported Files: %s \n %s \n %s +def clean_tmp(tg): + files = get_files_list(tg) + for f in files: + if '_mod' in f: + remove_file(f) + +#%% main +mkdir_file('target') -'''%(suffix_img,suffix_fast,suffix_slow)) +print ("参数: %s \n"%tg) +if tg and len(tg)>0: + tg.pop(0) -if sys.argv: - print ("【sys.argv】:",sys.argv) - if len(sys.argv)>1: - files = get_files_list(sys.argv) - else: - files = get_files_list(['.']) - -# set ocr env -setenv = 'setenv.bat' -if os.path.exists(setenv) and 'Tesseract-OCR' not in str(os.environ): - p = subprocess.check_output(setenv) - typet = chardet.detect(p) - print(re.sub(r'\s+',',',p.decode(typet['encoding']))) -else: - os.environ['path'] += ';c:\\Program Files (x86)\\Tesseract-OCR\\' \ - if '64bit' in platform.architecture() else \ - ';c:\\Program Files\\Tesseract-OCR\\' -print('\n Tesseract-OCR in path >> %s \n'%('Tesseract-OCR' in str(os.environ))) +files = get_files_list(tg) +print('>>> files:',files) +print('\n ================ \n') if len(files)>0: - print('>>> 正在重命名 ...') - oversize = 30 try: for i,file in enumerate(files): - if '~$' in file: continue - print('处理文件: %s \n'%file) - if os.path.getsize(file) > oversize * 1024 ** 2: - print(' 跳过文件大于 %s \n'%convert_bytes(os.path.getsize(file))) - elif os.path.splitext(file)[1] in ['.doc','.ppt','.xls','.txt', - '.docx','.pptx','.xlsx','.epub', - '.rar','.zip','.tar','.html','.pdf']: - rename_office(file,txt_l = 30) - elif os.path.splitext(file)[1] in ['.png','.jpg','.jpeg','.bmp','.tif']: - rename_image(file,try_rotate = True,txt_l = 30,img_h = 1) - else: - print(' 跳过文件 \n') - print('\n ================ \n') - print('\n >>> 重命名完毕... \n') + if os.path.exists(file): + if ' ' in file: + file0 = file + file = file.replace(' ','') + os.rename(file0,file) + + suffix = get_file_suffix(file) + if '_mod' in file or \ + '~$' in file or \ + '_vs_' in file:continue # 忽略临时和已Do的文件 + print('>>> 处理文件%s【%s - %s】 \n'%(i,file,get_sizes_human(file))) + t = '' + if check_size(file): + print('>>> 跳过大文件') + elif suffix in docs_file: + print('尝试使用 Tika 来读取... \n') + t = rename_func(file) + elif suffix in pics_file: + print('尝试使用 Tika (图像) 来读取... \n') + t = rename_img(file) + # if os.path.getsize(file) > 1 * 1024 ** 2: + # print('>>> 跳过文件大于 %s \n'%human_size(os.path.getsize(file))) + # else: + # print('>>> 跳过文件 %s \n'%file) + if t: + t = txt_clean_type(t,suffix,is_cn=0)[:var.txt_n] # 按类型和字段来清除 + rename_file(file,t) + else: + print('没有找到内容',file) + print('\n ================ \n') + + print('>>> 重命名完毕 \n>>> 清除 mod 文件') + clean_tmp(tg) + print('>>> 所有操作完成 THE END') except FileNotFoundError as e: print(e) -#%% test for rename file -# import os,glob -# files = glob.glob('*.*') -# for i,file in enumerate(files): -# os.rename(file,'file%s'%i+os.path.splitext(file)[1]) +time2 = time.time() +print('All Running time: %s Seconds'%(time2-time1)) \ No newline at end of file diff --git a/depends.py b/depends.py new file mode 100644 index 0000000..6346a77 --- /dev/null +++ b/depends.py @@ -0,0 +1,332 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Jun 25 18:00:24 2019 + +@author: autol +""" + +#%% +#import time +import zhon.hanzi,zhon.cedict,requests +import os,re,json,string,subprocess,configparser +from bs4 import BeautifulSoup + +docs_ppt = ['.ppt','.pptx','.pptm'] + +#%% + +class Var(object): + def __init__(self, + target_file=0, + tika_url_local=0, + tika_url_remote=0, + txt_n=0, + img_correct=0, + img_clean=0, + ): + self.target_file = target_file + self.tika_url_local = tika_url_local + self.tika_url_remote = tika_url_remote + self.txt_n = txt_n + self.img_correct = img_correct + self.img_clean = img_clean + +var = Var( + target_file = '.', + tika_url_local = 'http://127.0.0.1:3232/', + tika_url_remote = 'http://45.78.19.198:3232/', + txt_n=60, + img_correct=1, + img_clean=1, + ) + +cfgfile = 'conf.txt' + +punc_all = string.punctuation + zhon.hanzi.punctuation +char_all = string.printable + zhon.cedict.all + +def txt_clean(t,tag=','): + t = list(filter(None,re.split(r'[\n%s]'%punc_all,t))) + t = list(map(lambda x:re.sub(r'\s+','',x),t)) + t = tag.join(t) + return t + +def txt_clean_type(t,suffix,is_cn=0): + if suffix in docs_ppt: + t = re.sub(r'Presentation1|演示文稿|幻灯片','',t) #ppt PowerPoint + if suffix == '.html': + t = re.sub(r'\<.*?\>','',t)#html + if suffix in ['.doc','.docx']: + t = re.sub(r'\[.*?\]','',t)#docx tags + if is_cn: + t = re.sub(r'[a-zA-Z0-9]+','',t)#only chinese + t = txt_clean(t,tag='_') + return t + +def get_tika_put(file,url,headers,t=''): + with open(file,'rb') as f: + r = requests.put(url=url, + data=f, + headers=headers, + ) + r.encoding = r.apparent_encoding # 处理中文乱码 + t = r.text + return t + +# 使用远程 +def get_tika_ocr(file): + url = tika_url_ocr+'tika' + headers = { + 'X-Tika-OCRLanguage':'chi_sim', + 'X-Tika-OCRpageSegMode':'6' + } + return get_tika_put(file,url,headers) + +# 使用本地 +def get_tika_rmeta(file): + url = tika_url_txt+'rmeta/text' + headers = {'Accept': 'application/json',} + return get_tika_put(file,url,headers) + +def get_tika_meta(file): + url = tika_url_txt+'meta' + headers = {'Accept': 'application/json',} + return get_tika_put(file,url,headers) + +def get_tika_pdf(file): + url = tika_url_txt+'tika' + headers = { + 'X-Tika-PDFextractInlineImages':'true', + 'X-Tika-OCRLanguage':'chi_sim', + 'X-Tika-OCRpageSegMode':'6' + } + return get_tika_put(file,url,headers) + +def get_curl_ocr(file): + #-H "X-Tika-PDFOcrStrategy:ocr_only" + #-H "X-Tika-PDFextractInlineImages:true" + url = tika_url_ocr+'tika' + return '''curl -T %s %s -H "X-Tika-OCRLanguage: chi_sim" -H "X-Tika-OCRpageSegMode:6" ''' \ + %(file,url) + +def get_tesseract(file):# os.path.splitext(tmpf)[0] + return 'tesseract %s stdout -l chi_sim+eng'%(file,) +# return 'curl -T %s %s -H "Accept: application/json"'%(file,var.tika_url_local+'meta') # return type:list + +def get_curl_pdf(file): + return '''curl -T %s %s -H "X-Tika-PDFextractInlineImages:true" -H "X-Tika-OCRLanguage: chi_sim" -H "X-Tika-OCRpageSegMode:6" ''' \ + %(file,tika_url_ocr+'tika') + +def get_curl_meta(file): #-H "X-Tika-OCRLanguage: chi_sim+eng" + return 'curl -T %s %s -H "Accept: application/json"'%(file,tika_url_txt+'meta') # return type:list + +def get_curl_rmeta(file): #-H "X-Tika-OCRLanguage: chi_sim+eng" + # return 'curl -T %s %s -H "Accept: application/json"'%(file,var.tika_url_local+'rmeta') # return type:list + return 'curl -T %s %s -H "Accept: application/json"'%(file,tika_url_txt+'rmeta/text') + +def subprocess_cmd(func,file): + try: + print('cmd:',func(file)) + r = subprocess.check_output(func(file),shell=1).decode('utf-8') + except subprocess.CalledProcessError as e: + print(e) + return r + +def subprocess_Popen(cmd): + p = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT,shell=1) + return iter(p.stdout.readline, b'') + +def get_tika_version(url): + try: + print(requests.get(url=url+'version',timeout=0.5).text) + return url + except Exception as e: + print('Tika Error',e) + return None + +def setup_local_tika(): + print('Now Start Tika Server...') + url,url_r = var.tika_url_local,var.tika_url_remote + if get_tika_version(var.tika_url_local): + if not get_tika_version(var.tika_url_remote): + url_r = None + else: + url = url_r = get_tika_version(var.tika_url_remote) + if os.path.exists('tika-server.jar'): + try: + cmd = 'start /B java -Djava.awt.headless=true -jar tika-server.jar --config=tika-config.xml --host=127.0.0.1 --port=3232' + for output_line in subprocess_Popen(cmd): + print(output_line) + if 'Started' in str(output_line): + break +# if(subprocess.check_call(cmd,shell=1)==0): +# print('Start Ok!') +# time.sleep(1.5) + url = get_tika_version(var.tika_url_local) + except Exception as e: + print(e,'Select Remote Server...') + return url,url_r + +tika_url_txt,tika_url_ocr = setup_local_tika() + +def get_tika_version1(): + try: + print(subprocess.check_output('curl %s '%(var.tika_url_local+'version'),shell=1)) + except subprocess.CalledProcessError as e: + print(e) + return False + return True + +def get_curl_ocr_txt(file): + return get_tika_txt(subprocess_cmd(get_curl_ocr,file)) + +def get_tika_txt(txt,jtype=1): + if is_json(txt): #json type + if jtype == 1: + txt = dict(json.loads(txt)) + else: + txt = dict(json.loads(txt)[0]) + elif 'xmlns' in txt: #html type + soup = BeautifulSoup(txt) + txt = soup.find("div", class_="ocr") + if txt: txt = txt.text + return txt + +def get_sizes_human(file): + """ + this function will convert bytes to MB.... GB... etc + """ + num = os.path.getsize(file) + for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: + if num < 1024.0: + return "%3.1f %s" % (num, x) + num /= 1024.0 + +def print_log(*args, **kwargs): + print(*args, **kwargs) + with open('log.txt', "a",encoding='utf-8') as file: + print(*args, **kwargs, file=file) + +def rename_file(file,t): + if '_mod' in file: + file = file.replace('_mod','') + print_log('>>> 找到内容: 【%s】=>【%s】'%(file,t)) + os_rename(file,t) + +def os_rename(file,t): + if file: + dirname = os.path.dirname(file) + suffix = os.path.splitext(file)[1] + oldname = os.path.splitext(os.path.basename(file))[0] + header = dirname+'\\' if dirname else '' + nfile = header + t + '_vs_' + oldname + suffix + nfile_copy = header + t + '_vs_' + oldname + '_copy' + suffix + cond = t == os.path.splitext(os.path.basename(file))[0].split('_vs_')[0] + if not cond: + print_log('>>> 重命名: 【%s】=>【%s】'%(file,nfile)) + try: + os.rename(file,nfile) + except FileExistsError: + os_rename(file,nfile_copy) + +def mkdir_file(path): + if not os.path.exists(path): + os.mkdir(path) + +def parse_subpath(path,file): + '''make subpath''' + if not path: return file + if not os.path.exists(path): + os.mkdir(path) + return os.path.join(path,file) + +def remove_file(file): + if file and os.path.exists(file): + print('>>> del',file) + try: + os.remove(file) + except Exception as e: + print(e) + +def check_size(file,is_img=0): + if is_img: + return os.path.getsize(file) > 500 * 1024 # 大于 500k + return os.path.getsize(file) > 30 * 1024 ** 2 + +def get_file_name(file): + return os.path.splitext(file)[0] + +def get_file_suffix(file): + return os.path.splitext(file)[1] + +def get_mod_name(file,tag='_mod',suffix=0): + # import tempfile # optional use tempfile + # _, tmp_name = tempfile.mkstemp(prefix='tmp_') + # file = tmp_name + get_file_suffix(file) + tag = '' if tag in file else tag + suffix = suffix if suffix else get_file_suffix(file) + file = get_file_name(file) + tag + suffix + return file + +def is_json(s): + try: + json.loads(s) + except ValueError: + return False + return True + +#%% optional config + +def process_config(): + try: + if not os.path.exists(cfgfile): + '''生成默认配置''' + write_config() + read_config() + except Exception as e: + print('>>> 配置文件出错 %s ,删除...'%e) + if os.path.exists(cfgfile): + os.remove(cfgfile) + try: + write_config() + read_config() + except Exception as e: + '''这里可以添加配置问题预判问题''' + print('>>> 配置文件再次生成失败 %s ...'%e) + return var + +def write_config(): + cfg = configparser.ConfigParser(allow_no_value=1, + inline_comment_prefixes=('#', ';')) + + cfg['config'] = { + + 'target_file': var.target_file+' # 重命名目录,留空就是当前目录', + 'tika_url_local': var.tika_url_local+' # tika本地', + 'tika_url_remote': var.tika_url_remote+' # tika远程', + 'img_cut': str(var.img_cut)+' # 是否裁剪', + 'img_thumbnail':str(var.img_thumbnail)+' # 是否缩小,推荐', + 'img_correct':str(var.img_correct)+' # 是否修正', + 'img_clean':str(var.img_clean)+' # 是否清理,模糊图片用', + 'txt_n':str(var.txt_n)+' # 名字长度', + } + with open(cfgfile, 'w',encoding='utf-8-sig') as configfile: + cfg.write(configfile) + print('>>> 重新生成配置 %s ...'%cfgfile) + +def read_config(): + cfg = configparser.ConfigParser(allow_no_value=1, + inline_comment_prefixes=('#', ';')) + cfg.read(cfgfile,encoding='utf-8-sig') + var.target_file = cfg['config']['target_file'] + var.tika_url_local = cfg['config']['tika_url_local'] + var.tika_url_remote = cfg['config']['tika_url_remote'] + var.img_cut = int(cfg['config']['img_cut']) + var.img_thumbnail = int(cfg['config']['img_thumbnail']) + var.img_correct = int(cfg['config']['img_correct']) + var.img_clean =int(cfg['config']['img_clean']) + var.txt_n = int(cfg['config']['txt_n']) + return dict(cfg.items('config')) + diff --git a/flask_app/app.py b/flask_app/app.py new file mode 100644 index 0000000..4db5915 --- /dev/null +++ b/flask_app/app.py @@ -0,0 +1,88 @@ +from flask import Flask, render_template, request +from nltk.corpus import wordnet as wn +import re,string,json,subprocess,os +import jieba,time +import zhon.hanzi + +punc_all = string.punctuation + zhon.hanzi.punctuation + +app = Flask(__name__) + +@app.route('/',methods=['GET']) +def index(): + t = request.args.get('t', 'hahahah') + return render_template('first_app.html',text=t) + +def subprocess_cmd(func,file): + print('cmd:',func(file)) + return subprocess.check_output(func(file),shell=1).decode("utf-8") + +def get_curl_tesseract(file):# os.path.splitext(tmpf)[0] + return 'tesseract %s stdout -l chi_sim+eng'%(file,) + +def remove_file(file): + if file and os.path.exists(file): + print('>>> del',file) + try: + os.remove(file) + except Exception as e: + print(e) + +@app.route('/ocr',methods=['POST','PUT']) +def post2(): + s1 = time.time() + +# print("---data---\r\n", request.data) + print("---files---\n", request.files) +# print("---stream---\r\n", request.stream.read()) +# print("---form---\r\n", request.form) + + file = request.files['file'] + + file.save(file.filename) + print('files-name:',file.filename) + t = subprocess_cmd(get_curl_tesseract,file.filename) + remove_file(file.filename) + s2 = time.time() + print('2222 Running time: %s Seconds'%(s2-s1)) + return t + +def tika_words_rate2(t,r=0): + if t: + t = re.sub(r'[\n%s|0-9]'%punc_all,'',t) + s1 = time.time() + j = jieba.lcut(t, cut_all=0,HMM=0) + s2 = time.time() + print('Running time: %s Seconds'%(s2-s1)) + + s1 = time.time() + wa = list(filter(lambda x:len(x)>1,j)) + if len(wa)==0: + wa = list(filter(None,j)) + ws = list(filter(None,[w if wn.synsets(w,lang='eng') \ +# or wn.synsets(w,lang='jpn') \ + or wn.synsets(w,lang='cmn') \ + else None for w in wa])) + + s2 = time.time() + print('Running time: %s Seconds'%(s2-s1)) + r = len(ws)/(len(wa)+.1) + return r + +@app.route('/',methods=['POST']) +def post1(): + s1 = time.time() + j = request.get_json() + if j: + t = j.get('t', 'no txt !!') + else: + t = request.get_data(as_text=1) + + print('flask t= \n',t) + r = tika_words_rate2(t) + s2 = time.time() + print('2222 Running time: %s Seconds'%(s2-s1)) + return json.dumps({'rate':r}) + +if __name__ == '__main__': + app.run(debug=1,host='127.0.0.1',port=2121) diff --git a/flask_app/templates/first_app.html b/flask_app/templates/first_app.html new file mode 100644 index 0000000..5c84671 --- /dev/null +++ b/flask_app/templates/first_app.html @@ -0,0 +1,14 @@ + + + + First app + + + +
+ Hi, this is my first Flask web app!
+ Test your text: {{ text }} +
+ + + \ No newline at end of file diff --git a/flask_app/tika-config.xml b/flask_app/tika-config.xml new file mode 100644 index 0000000..92a2808 --- /dev/null +++ b/flask_app/tika-config.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/flask_app/tika_start.sh b/flask_app/tika_start.sh new file mode 100644 index 0000000..50565ab --- /dev/null +++ b/flask_app/tika_start.sh @@ -0,0 +1,3 @@ +#!/bin/bash +nohup java -Djava.awt.headless=true -jar /tmp/tika-server.jar --host=yourhost --port=3232 >/dev/null & +nohup python3 /tmp/flask_app/app.py >/dev/null & diff --git a/flask_app/tika_stop.sh b/flask_app/tika_stop.sh new file mode 100644 index 0000000..d373f95 --- /dev/null +++ b/flask_app/tika_stop.sh @@ -0,0 +1,4 @@ +#!/bin/bash +ps -ef | grep tika-server | grep -v grep | awk '{print $2}' | xargs kill -9 +ps -ef | grep flask_app | grep -v grep | awk '{print $2}' | xargs kill -9 + diff --git a/img_process.py b/img_process.py new file mode 100644 index 0000000..46dbd72 --- /dev/null +++ b/img_process.py @@ -0,0 +1,412 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Jul 5 10:42:16 2019 + +@author: autol + +This script is to getting txt through processing img +""" + +#%% +import os,re +import fitz +import cv2 +import numpy as np +from PIL import Image +import time +from depends import txt_clean,get_mod_name,get_sizes_human +from depends import get_tika_ocr,get_tika_txt +import requests,json +img_log = 'img_log.csv' + +#%% CV2 Adaptive Thresholding + +def laplacian(img): + ddepth = cv2.CV_16S + kernel_size = 3 + img = cv2.Laplacian(img, ddepth, ksize=kernel_size) + img = cv2.convertScaleAbs(img) # converting back to uint8 + return img + +def remove_noise(img): + # Apply dilation and erosion to remove some noise + kernel = np.ones((1, 1), np.uint8) + img = cv2.dilate(img, kernel, iterations=1) + img = cv2.erode(img, kernel, iterations=1) + return img + +def img_resize_cv2(img): + img = cv2.resize(img, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA) + # img = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC) + # img = cv2.resize(img, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_LINEAR) + return img + +def img_blur(img,bt=0): + # img = cv2.GaussianBlur(img, (3, 3), 0) # 高斯模糊去噪 + img = cv2.GaussianBlur(img, (9, 9),bt) # 高斯模糊去噪 + return img + +def img_BGR2RGB(img): + return cv2.cvtColor(img,cv2.COLOR_BGR2RGB) + +def img_sharpen_cv2(img): + n=6;m=70 + kr = np.ones((n,n),np.float32)/20 + img = cv2.filter2D(img, -1, kr) + img = cv2.addWeighted(img, 4, cv2.blur(img, (m,m)), -4, 128) + # img = cv_imread_cn(file,0) + # kr = np.ones((5,5),np.float32)/25 + # kr = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) + # kr = np.eye(3,dtype = np.uint8) + # img = cv2.filter2D(img, -1, kr) + # img = cv2.addWeighted(img, 4, cv2.blur(img, (60, 60)), -4, 128) + # img = cv2.GaussianBlur(img, (5, 5),0) + # img = cv2.medianBlur(img,5) + # img = cv2.blur(img, (30, 30)) + # img = cv2.bilateralFilter(img,9,75,75) + return img + +def get_cv2(img): + return { + # 'grey': cv2.cvtColor(cv_imread_cn(file), cv2.COLOR_BGR2GRAY), # convert_to_gray + # 'blur' : cv2.medianBlur(img,5), + # 'EHIST' : cv2.equalizeHist(img), + 'BINARY' : cv2.threshold(img,127,255,cv2.THRESH_BINARY)[1], + 'BINARY1' : cv2.threshold(cv2.GaussianBlur(img, (3, 3), 0),127,255,cv2.THRESH_BINARY)[1], + 'BINARY_INV' : cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)[1], + 'TRUNC': cv2.threshold(img,127,255,cv2.THRESH_TRUNC)[1], + 'TRUNC1': cv2.threshold(img,127,255,cv2.THRESH_TRUNC+cv2.THRESH_BINARY)[1], + 'TRUNC2': cv2.threshold(cv2.GaussianBlur(img, (3, 3), 0),127,255,cv2.THRESH_TRUNC)[1], +# 'TRUNC3': cv2.threshold(img,127,255,cv2.THRESH_TRIANGLE+cv2.THRESH_TRUNC)[1], + # 'TOZERO' : cv2.threshold(img,127,255,cv2.THRESH_TOZERO)[1], + # 'THRESH_TRIANGLE' : cv2.threshold(img,127,255,cv2.THRESH_TRIANGLE)[1], + # 'TOZERO_INV' : cv2.threshold(img,127,255,cv2.THRESH_TOZERO_INV)[1], + # 'Otsu’s Threshold' : cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1], + # 'Otsu1': cv2.threshold(cv2.GaussianBlur(img, (9, 9), 0), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1], + # 'Otsu2': cv2.threshold(cv2.GaussianBlur(img, (7, 7), 0), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1], + # 'Otsu3': cv2.threshold(cv2.GaussianBlur(img, (5, 5), 0), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1], + # 'Otsu4':cv2.threshold(cv2.medianBlur(img, 5), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1], + # 'Otsu5':cv2.threshold(cv2.medianBlur(img, 3), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1], + # 'Adaptive Mean Thresholding':cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,11,2), + # 'Adaptive Gaussian Thresholding':cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2), + } + +#%% + +def img_extra(file): + doc = fitz.open(file) + for i in range(len(doc)): + for img in doc.getPageImageList(i): + xref = img[0] + pix = fitz.Pixmap(doc, xref) + # file = "p%s-%s.png" %(i, xref) + file = get_mod_name(file,suffix='.png') + if pix.n < 5: # this is GRAY or RGB + pix.writePNG(file) + return file + else: # CMYK: convert to RGB first + pix = fitz.Pixmap(fitz.csRGB, pix) + pix.writePNG(file) + return file + pix = None + return file + +def img_save_cv(file,cv_img,tag='_mod'): + nfile = get_mod_name(file,tag=tag) + # img = cv_imread_cn('idcard1.jpg',0) +# cv2.imwrite(nfile,cv_img) + # plt.imsave(nfile,cv_img,cmap='gray') + cv2.imencode(os.path.splitext(nfile)[1], cv_img)[1].tofile(nfile) # 为了保存中文 + return nfile + +def img_save_pli(file,img,tag='_mod'): + nfile = get_mod_name(file,tag=tag) + img.save(nfile) + return nfile + +def img_pli2cv(img): + return np.array(img) + +def img_cv2pli(img): + return Image.fromarray(img) + +def img_clean_cv(img,method='TRUNC1'): + ss = 1.7 + img = cv2.resize(img, None, fx=ss, fy=ss, interpolation=cv2.INTER_AREA) + img = get_cv2(img).get(method,'') + img = cv2.resize(img, None, fx=1/ss, fy=1/ss, interpolation=cv2.INTER_AREA) + return img + +def img_clean_pli(img): + img = img_pli2cv(img) + img = img_clean_cv(img,method='BINARY') + img = img_clean_cv(img,method='TRUNC3') + return img + +def img_rotate_pli(img,angle): + w,h = img.size + if angle: + img = img.rotate(angle,expand=1,fillcolor=255) + # plt.imshow(img,'gray') + return img + +def img_cut(imgs,arate): + def cutt(img): + if not img is None: + h,w=img.shape +# arate = 1/3 if h/w > 1 else 1 + ofs = 60 + img = img[ofs:ofs+int(arate*h),0:w] +# plt.figure() +# plt.imshow(img,'gray') + return img + return [cutt(img) for img in imgs] + +def img_buffer(img,n=8): + h,w = img.shape + h,w = h//n,w//n + roi = img[h:(n-1)*h,w:(n-1)*w] + return roi + +def img_buffer_sq(img,a=.2): + s = min(img.shape) + a1 = int((1-a)/2*s) + a2 = a1+int(a*s) + roi = img[a1:a2,a1:a2] + return roi + +def img_buffer_check_fill(img,file): + arate = .25 + imgb = img_buffer_sq(img,arate) +# plt.figure() +# plt.imshow(imgb,'gray') + t,rate = img_tika_txt(imgb,file,'_buf',clean_nums=1) +# print('img_buffer_check_fill \n',t) + if len(t)<20: + arate = .6 + print('arate',arate) + return arate + +def img_rotate_horizon(img,file,arate): + + src = img_clean_cv(img.copy(),method='BINARY') + src1 = img_clean_cv(img.copy(),method='BINARY1') +# src1 = img_clean_cv(img_clean_cv(img.copy(),method='BINARY'),method='TRUNC') +# src1 = img_clean_cv(img_clean_cv(img.copy(),method='BINARY2'),method='TRUNC1') + angle = 0 + while angle < 360:#360 +# roi = img_buffer_sq(img_rotate_cv(src,angle),n=5) + roi = img_rotate_cv(img_buffer_sq(src.copy(),a=arate),angle) + roi1 = img_rotate_cv(img_buffer_sq(src1.copy(),a=arate),angle) + row_sums = roi.sum(axis=1) + row_sums = (row_sums/max(row_sums)) * 255 +# print('angle:',angle,'\n',row_sums) +# score = sum(row_sums>np.mean(row_sums)) + score = np.count_nonzero(row_sums) + ay = np.array([[angle,score,roi1]]) + print(ay[:,:2]) + if angle == 0: + scores = ay + else: + scores = np.vstack([scores,ay]) + angle += 90#.5 + + ss = scores[scores[:,1].argsort()][:2] + df =[] + for score in ss: + angle = score[0] + roi = score[2] +# plt.figure() +# plt.imshow(roi,'gray') +# plt.title(angle) + t,rate = img_tika_txt(roi,file,str(angle),clean_nums=1) + if angle == 0: + rate += .3 + if angle == 90: + rate += .1 + df.append([angle,t,rate,roi]) + + dfn = img_tika_df_best(np.array(df)) + best_angle = dfn[0] + print('best angle:【%s】'%best_angle) + return best_angle + + +#%% + +#url_flask = 'http://127.0.0.1:2121/' +url_flask = 'http://45.78.19.198:2121/' + +def flask_ocr_get(file): + files = {'file': open(file, 'rb')} + r = requests.post(url=url_flask+'ocr', + files=files) + return r.text + +def tika_words_rate3(t): # 使用远程flask来处理,减少压缩包 + t = json.dumps({'t':t}) + headers = {'Content-Type': 'application/json'} + r = requests.post(url=url_flask, + data=t, + headers=headers) + r = float(dict(json.loads(r.text)).get('rate',0)) + return r + +def tika_words_rate1(t): + en = list(filter(None,re.split(r'[^a-zA-z]',t)));print(en) + zh = list(filter(None,re.split(r'[^\u4e00-\u9fa5]',t)));print(zh) + r = len(en)/len(zh) + r = 0 if .5 .5: + print('不做处理') + return (np.array(img0_t),) + + img_nt = np.array(img_t) + arate = img_buffer_check_fill(255-img_nt.copy(),file) + img2=None + + if iscorrect: + angle = img_rotate_horizon(255-np.array(img_t),file,arate) + img_r = img_nt.copy() + if angle: + img_r = np.array(img_rotate_pli(img_t,angle)) + + img2 = img_clean_cv(img_clean_cv(255-img_r.copy(), + method='BINARY'), + method='TRUNC2') + img1=None + if isclean: + img1 = img_clean_cv(img_clean_cv(img_nt.copy(), + method='BINARY'), + method='TRUNC') + + img_nt = img_clean_cv(img_nt.copy(), + method='TRUNC') + + imgs = (img1,img2,img_nt) + + if arate<.5: + imgs = img_cut(imgs,arate) + + return imgs + +def array1d2d(df): + if len(df)>0 and df.ndim == 1: + return df[np.newaxis,:] + return df + +def img_tika_df_best(df1): + df1 = df1[df1[:,2].astype(float).argsort()[::-1]] + print('======111===\n',df1[:,:3]) +# for d in df1: +# plt.figure() +# plt.imshow(d[3],'gray') +# plt.title(d[0]) + lens = [len(x) for x in df1[:,1]] + if sum(lens)/len(lens) < max(lens)/2: # 选择最长的结果 + df2 = df1[np.argmax(lens)] + else: + df2 = df1[0] # df1[len(df1)//2-1] # 差不多长选择第一个结果 +# print('====222====\n',df2[:3]) + return df2 + +def img_tika_df(file,iscorrect,isclean,isfast): + + print('修正图像:',get_sizes_human(file)) +# plt.imshow(imgs[3],'gray') + df = [] + if os.path.exists(img_log): + df = np.genfromtxt(img_log, delimiter=',',dtype=str,encoding='utf-8') + df = array1d2d(df) + + dfn = [] + if len(df)>0: + checks = [file in x for x in df[:,0]] + if len(df)>0 and any(checks): + dfn = df[checks] + else: + imgs = img_read_pli(file,iscorrect,isclean,isfast) + df1 = [] + for i,img in enumerate(imgs): + if not img is None: + t,rate = img_tika_txt(img,file,'_ix'+str(i)) + if len(t)>10: # 过滤内容不够的 + tk = np.array([[file+str(i),t,rate,img]],dtype=object);tk + df1 = np.vstack([df1,tk]) if len(df1)>0 else tk + if len(df1)>0: + dfn = img_tika_df_best(df1) +# plt.figure() +# plt.imshow(dfn[3],'gray') + dfn = array1d2d(dfn[:3]) + df = np.vstack([df,dfn]) if len(df)>0 else dfn + np.savetxt(img_log, df ,fmt='%s', delimiter=',',encoding='utf-8') + if len(dfn)>0: + return dfn[0,1] + return '' + + +def img_correct(file,iscorrect,isclean,isfast): + start = time.time() + t = img_tika_df(file,iscorrect,isclean,isfast) + print('----final----- \n',t) + end = time.time() + print('Running time: %s Seconds'%(end-start)) + return t + diff --git a/img_tmp/flow.jpg b/img_tmp/flow.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bca708f1db3b511b1d59dd2c090ad7ba2fb85398 GIT binary patch literal 37580 zcmeFZ2UJsCyDl09L_t8BRE^RV5S69~L{vaPj7Sp@qJq){RH~FjL5d(mx`Gnvok$B( zLq|YFx`Z04lt4lWfwZ&y@7~|}zwh5;+;5+8?ilxs^RKXAF*4U&^PS}_&-={9{K=dK z9Wm51&;#w;w-1y8d_YV*NJ7`o`4I?YYz#UL0)Y+!-|>N1fUAAL=N|`i40IK=f8V~} zKmT#Cu(A9;4zRMau(2OtXa9X3Jj8kE;6aXq?Ccy|92}g7frI@JHxJig?%&tHEBSr< z@4JAX!w1<9{x0#~JD80i-h-fN){iXvBtZLl_p$KqW4408KwH@WiGB<9AIHA^EUat? zfF^Np0tG6M0L^D%*$*_9jg=KB9Rz$2V&!GyJ9$p$0Kdt7c8R9~=U>LAAC$aO+9Y_p zk0_=5;92k?j-x`tBBIhVvT~*}3_J#a~Oyq>at3ZSoFfckj1c`#>!Jk63?5_7}N$0lD@AW5CM( zTdsZkeSwpOmzC}0xdVJUChYg0@=Kh5c~Ia=Y3iA>P6j;?^^)2)-b0=M@4Z#qFe6%Ispsk9*@ zLz2wLlBFk8cb1P@CRv_%=YHa&L*y-P?qSxD;Dggc&AGQ=y`+wA@J%fIvjKS)Sz2tcq?4h_%!VB zvbrU&q*V*6PHN|)pCC{u&PY7TK3f#tf48ukW{OkhCA(|H&gQzKep%R+Du$8S~;97S9U&6I^wqHi+xh${a9_t zJvu+MoSx&MAIbzdL@BW>ALng|zcR92L5ZyD(0zVFp>ys+D>dOIwZ74Pr7duRINM~+ zg=*))$?GBU=nxvGYc2&Iq+wHwJ&MU`V2dqU47QTI7YFq!zk!$DNO1^T^O`Gn z(q0@M&W1}nrL4c)PF)YfJzGJQtA~#@r>rFhJuPxl6P@#K$iaMLk*^(fvzMKeoM&s^ zV1wgAzC3!P2OiBDn5e9Bb)|gpOF7dNgooV7tfAM>f=}Zt;DshAn2BqarAV>w|eBz|Xh5Qyy zliZ;z#&CUyXEakwvw0@y1$IO0O5_F;M0q?FhTS@knAcumf}&0SazAz(b`hA-hFHoO z#2k3^3wC!~iN?kR#V!k2CNn`Tk7%k)P8uljj-M0>AKGm|57@vhvLcvU6o~+u3;%d;N=ESCg^e^f_N8r zAf)^$VsjZr;bej&2j$R8OweCS6GyRJh+hs&P*>0h6C|2RxBQOy#fN5aqITKI?n;ax zBPM7cV$MHQ5E!<<7G{X4A?Hnt&FZVDz*xLB08iX(bx+ zRz@B6uZ90QdjB5lzmB!NpZd)4P!e}HLG*a>{Q|wbxC_DedJjG^T!+@ZnkF_LKiIBQ zoI)xz$v1TWc;`!}drCP>=2_SIsQI{Z4!MkSsXtcl@0DEQ*?|xWR3>)GxcORt9+)$; z3Jj0;Zpl>p(aksi(@`_@LenbuMWsJ4BFn|RO-_eT=;#IVf?n>+8tvIROL4B&VW@|yJs6bOu1C=6lMh2}*x9~vaC0mF z7?~e=$v7Bzo8sm=O^3|*Hut{XL%#A_7c`#jT}2=0a4yu%o9M;dNw2Z(RFGjmET+&! zjo5d65@C^7EH~)2_`PaizL*J8K_$ixL3ulk2j*+wRio(6&Olo1Zsjz1p9>Sjhjn*_ z@TrF|K~Baz%{4nwpGY@w+J)2IMwjG`bx$}Qow2=q_x({R3;WOdy*F=i-6kSeZCt5p z@E>5|$}plw8xOCopF0z*6P=_;DSdN`~S93AK!gooiW zjMZEDmdf;{jvpCFp<38@;YgE(a?wH*vqqiM2C^4a_e$N=ac-IM_8kjyZZT)OV}DR1 z<08#nH`~u)aDg$8EhNXF9}5wU21;K6L$G)e@ajJ8|9yU0k1%kag+yp`5*4>_uhi_$ z)K;rE54FOaD>0Zj%a^G2g0GC*m~VV(_r|LiOH0J8nV|Rv$|WY~DVi=JxD_aCsH%Ly z(<|CQ>CQkx2E$m!-|B{0sR^`)*6#+F+k+SnxNW&ro${rz!)zn!y*+qjeNkJ3?>mlS%^# zzQ0eBY^f}+q2*;j?BDy(&U<~T%?4MG$Em8H*?39_hU ztf1?Cwa!P*!7l{*5Yel6;>K*7oyriTw1U%W8k4lLI&jx5&e=jP>}%}!ny<;*FCv|d zokghEu*#8URERazS^Eqnc^b^Q+}B>N8diwLjZU0aKRsy{17u^hO_NIYWr8w#iR(-d`z^rLR&wTE*&EpqOdZ)<4X6}c!~$-$D>w{MVV51` zI1Le}IJQ+;KXz3ew6@*2t6FoextvWF6dU7u70*Lpn`VNLCX|;n$zt6ibf~v;g_c>? z`3|zt&!-Dld<*7E^yL$`WD4JV%p>A_Jo?mKcX%isX=G~UrPJ~ebQHp=mmNNcxH?+aCGp1?3qv%7cwR zhbqOgUbmfc+20e&ir10-tdcD)yxBQwy|d(M$4DBcN1DVZOIS}1uLR&GtNJH;BIUvO zb)*+6GHrifpz)`PpxSFBt~VMSAKoW2$6^JSqmZUiY{t)z z7SF76{k-eE-1C$b_r^%`j#$LGwW$w2?a=$?CI!CcDwv)A;qiA=T!(2 zm|c9CPLd5}ob+>FH~11eqhC|a=zVuyZJ3LzF~)o`XI_Mr_pJ`v0#hBcF?lgPRyxd_ zJ0`l?7J4FBBV^@rb^OmD0h7yrEL32z@<`E3G(l&y!M1(_*F%h}NRGiX8Xt>Sf1GqK z#v2?S?$>T!P>VG99BH`|dUJlR-_nGt&jfYW`E4@VCay9;23aL`b!Sc`s~rys$W?`7 z?ncX#jWNka4U179168JME$11J0qsC(Y5GIQw^O2KPSr^gsdhufNvF=J)N9PFNhk2V zA+>*J0qL|~{(f+8xaS!YBnH@tC_gNtF&Vpea;01;S7avtipx^hsXevm&!pRh0#x+3 zE%0dJ^bocg68@v}spC8)vy~$S8ByXOw|Up@%alYCH2%S*nvC481J^qrJ=)TcL-!XB zCtAm3?I4Dd<3`U!P%j#;;dKSCLP-|Mg|5DP4@h@5w;U?Hf;h zA?40;Lfk?EFpm1qpL2h7RBUT4ZSvSJB9)}a5t}EXU@#53DAgC>(ij(6nC?2PE!S7< zt$2Q8#AIMS!>YKX_>oomm*_C_zQM$Yl5cgIO2ipMQ`}?tk!VdTp2&)^ULKle%JNjJ zrO(SXMA*<^hM$h z!4Uvk+U#VAXOQ=2cq&|Ad|Qqr_Aoq^3jk!C?_@d48&zj=9_Jk-BxHKNag?ZNkcRic z*(qAHU{1e)R)$RfmaRwoNLb6YvM=#`%l!L3$bqK4Y#f?DmirmaGg4)eR)A@$4;3$=`X*>wAGHS6)S zuJBFrM+)LTiL<3aFutwk)JUC8jzO{0aHsk^Rj~_$%EsbR2@i3;JgEu=ZO;TFP|in7 zP}=~eBsG=sxbV{09$~j4W-XG4qCDSO1(b^}3O~i*27mz3)e?v+G86;jRGAXLOS?*R zLxohn#^E}wtB0n8+t^M?c20LOi-d}s7&0_@XMO$5mn-^o)Xa`Y!_O06-{ zvJbaq#A%27!scLp>dSuNCyl&mm$N89Aj`%^ktcavCml$~Pen|M4Jic5JJQpO?n<>i z7%1LEs_EU^U!t>sbQY@&GGCu}TVHa756cFq4i9fO3~w)ulQra9E0B~^>W#*an*)(osk-3j@c%a&yj_~K2t&%whY0&F`P(-3FWt~6vvtPi`jVJvh+9Uhpcz$lV#o4@!NZKdLlZ;3?(BC{+{tGV?51}tN)FsLU5Jdql$dMzX*(5FaCgQ&E(nTq%dys){ zU=f5mbq}d=7=tnPONccH6?)kp1vvSU$M6Pwsw)zvk zPmf96bEv7kSFrKHA(ZXiB8Tf1lyRsQz^iu}=r18oJmEh6xls-znd}+I!JIFcP!SdS z#f;Lo4?rEBLYvx-Cw(776YNoFyuV83ySBEKPD>~?1oCC?u#6B7l=sjTyX`a-d4-A` zu-;u{aPU))(3I$f+QKfPi^#fj^!frnBxC2q)fv&#gZxjjCeWlv_RnRYbNMbDq)Ib4D<7)dQvZS z1HM|~`@r?`-Q2ZZtnar)k4cf)_r!G?zo}9Au=auOV%H@2i2()|MUUDX_jAIj%YMXS zd076@y9JAlC#6-Lug^KeSDG806kxNxnGs27-aKqC&u*WkN{c)-*W^4<0P)x_Q-`&t>*j>eZv=r(zW| z)XI#sdcGoEg&v^}Bnhob;-Bl$WC# zd**~umcgvWVZBa0e3cgQLLrhO)qu^UX%b_Qc+?wQQb=B)5{Ydse_GMdgXd~qRrqR2 zvi%)nfz9)_tm67758e`YlN8>D{Y)my4`CfpM*^=5ZX+Trr;aE!NVuGJ6{=ECF|#n0 z#(gz;PEwbH`h#0y;n{dkfzH-ypn#(B-(|>e$jS$ee)zGj#V5u^ABEBxUl}fWq zfq0`8RVqfQ98ItYL)cXK?cX1C)%oqrDj+^C_+tm5e z2(F%4NF{bPm9#l)`DzO?y3?Y~LQG30LR3VD<5jZWHUbGonl<`}`Fx3a=&Gxjo-mf_JwAz`|$I zk(i3!pYr&}2+=yLB4;K@5M8PFnbAayDK0k}AR*{vO6bd2|fXkz(8SvEg0duk<)Ei^0c>H5~nApLP`!HatU5c%Rr9 z#+D|xy4l#!(VDjC(WM=22}*9J5gvQc%4$+lU;34`SFFwacwTf?oUR2=1$*>&pSSI< zrupk;F{Ffr3W8Cr^wj(B>$(-iWrCtl5|)}h^wN(4jt}%=D)y#1SZmg6t@l0P*sVut zjI^UE(t5?k;1|b5De}_tq$-Y*TKn9@{9<*q?~C6X=|OuKw0NEI1EV80W-Qi~V=tRbt6^SxL1FN;4qALP0O9SxuMASg~lqozav5Xn#ZYB?z>fBKlRE^%wqHEBo9 zzmlG%t6qDpz~tSF`5V_$V}3Nj@2`MALuvf-O;h`^51AkyCno4fo!E5mQOfd4&tN4% z|8~D3dcn1N{443TgPua^oQ!9%S?n-ibZ$=Yy%{lw`}fCLSWwT^LUsL?3S_Srdb>l; zSermC7glRWS28s-8He<#8k7NiSaR+tg@>F(RBwfbY8)@ttidE^`T4Uu=e#$nK)6iy zin%)kI_8SAz)l4|pae~Sv?t%E5N4GG{8BrTnIPUukJtWy-lm=WH0l>N>z=KIGna@~ zXwrTln_>hyt0z<_2l<2|8ugAMTCwHFvq@cCo-I%O91Th8t?~q$mLEFO)nT?5gCpMa z9hFdRY}Dhp4E8dI{1hS!R1vw0f!q~;;5Fj$SnwM~xFjXShWI`G*@@~e!R;GnPV>=y zsqWK<_I>X1Y>x9h-$M01SkIq3VkuT*X-L0qEJYp~({n!6#rJH2s{d=XzzQp~I=13n zl8@r`;{lwx7RUjLEZL1D)PfSG&;uX3l9XEDM@eBJ1uMSiI$x+WA-$kCf@%C+< z+v$TMO)QyothVSc2p7a1CTe@vIuJ5gqWLs$X@|5_zJ$?MCi&Dqi9H>qsNHPzq{i#~ zy`r+aLSJuv{iYMs5Ju0#K0>?(9|s&|#3953+7V($C`}o^e%$%E{19C7O#Pz~ZHcPV zD;Bbq=0Y5^0qyGUA|`z|^}vKtz`^K@??7R4Luk&^h~NfxQc`@L^>VVdqOrXhqo=&S zAwlnC+@o6?$-8%=V-v>>E+`n}d)_>kVo^6o_Lt@GxHjN?QFrXUYJf~us@R+RLdLF6 zZT=0;@)Pd;arMR7xTika_vu*-@RGJV3GnS+c|${|4#5vlqVAMey`U(Y4{Q}{Yw2;} zdnTi1?hxuKYA^EEndY0s2`C3bMK{UyEBZX-}6boGV>J%}znZa2$oQ>e`#IN&p2D&SX9=KJmulWS?WA4a< z@zF&V5%d6pg93vR^}|#MLI&mG(EhDdr`1(Lx^T>-@SubQ^71F1W=vCC`-j~`lxqEB zbdYyd{lk9Ru!(`BXtK9u?gGJm$`K}SN5H@`BIQ5b|0md)UFctth8iRe!Z4ogK#|WZ zgZGT31A*)zcSblPhLfBG_$EIPq!c10?wp^$=J&}hCP?nMotyB5tMA7w`{~G{Jajw) zKL~(ReNFKGDT*}@Ls_4u5eeSApoFtwf;e@6-cLt!t^rZGVw+GQCH#(=YK0$?Nh zY*Zrz-U&kK0TY+y5Wl9e&K@*Hb|y$9GY+*0-?zGifJ}c6Ap_byIQkcWltZF5Sv+PG z4^Yj2&>WOP+lJmmk)t)oQXij<{w#G{_M%uqbf(CXtwk!;)Q^RnMNg&K7WxUb;@s;w zepxUW^HRpikTX__Q{zbms>OH)lkt_?_U zmaGMS`Ict8bk21r6#;J96=}bWo}2FUl>V&Kmtma`Yau3Cb>$cs#7id9*0C03DLy9LkE&QbLhFd66dJ{|se@S!RQB)OjKsHLtxT<19prtypR&g#{Z@Rd9wQ#MX0@uIElv@R~KYdEdRv;v8b zGUp#$dk|?YqciTl7}ctiGX$QG8!)ypzU16z7cZb%I7to*`-%1IiHWnxY@(PP>{tPJ-JHv{lfCs*^v4-JJO{3XpralykXcOYiCR;uDddwSC{qWt&S7pZB_3t zfKP~bOoJ&hzL7kwNMXN`8TEFf-Uz$1B%xOxnj-drX-CWZ?td=-(kXMNc#JMR^x3TZ z%mMv$yBoTvhvd4_2+gk9==BX6ZXTL3^}b>k)6%1t^=PYNhG9ow7;LHz5xPo>7a>Fg zaubg8iTaaiRQaq0Pi{2sC-Mf3kCb$KhLU$qTFk zK5vt{Epb!ai0PE4fkGsqCe#r>NZZ?AfNgu2E*qv=`)f?I22-ba44yih-vg?3$D*3Bs{xT#+DtG{8qS5t8*uii1GH%$BD`JWVn#gY3l0~XKMwpoLZ>X#air3_vd{v2_z zOYm!l;ID>ef_{tC^{=(F2Lc?Ef7q$O)cfMn8igTYO^{-aL$J~(X4R@wTv%#dQ*e0e|S&D{dixIRy_0IR}H1CKJ7&YhaDd=+B(o73{Pl{_C4Osi6ST^ALPA(I#y zlmd^JHC3U#ZxGWc6dPe(Gic^uAS4GulM>Z!v2zh(iQxDhep{sYF1#uHKJeeQ`)D zm(W2Kq3O{xA&Q>V6=NfM)#i13a|Dx#~xX*H08}2(M0#(b4Q@=?ex2dCL`w1ZJh!i#_-eO{eOt zsKS1(_=a5K_W?i;9j}ApMLgn0po4Yx9vQ!CyXwZ@M~ZPfnUIi$`m4{M{uZOj-&7pb1fnQ%X>FBK{y@~fv&cSqT89kG#@eEE?SCWaBp7wquzUfD* zIo0ff5Z1+Uqpq0YF>zvTv}&?;opOrj70q^*2^wW@YXB1=FN*_%W*|p!M%gx_#(7Qm z9*4HxzB1x0HGAJFWYK+2;n2QtxW=I4_C@L11q%#%DzSks+7P8jAN0OImn5@(?St)J z;5qE_$4dV((+$TN^|q;lPIf{RmOQJ#yCYj%rY+j+Ny7_Z!DTy%0o5cS{y5FlVzqXU zE!A^1t)by-NoRG0RYw_}VE#baS$Qb#ff4exs8aMc`Y25v8`!{=B*vwdiR9mxXfM7C#=X70z|-MBinAA;ut1Br-U(LBy!(TI6$PgUTtqw ze-w-L+8Rh&4*-MV9StJHDS|So1rG~;tcqo&q;0I=+v~IM;+)fX++>0zmW@xFU8_mC zwx8^6-@1}!_JKCE2B5%(wwIbAfGh8UeSsxLQXp4af**FQD`JLu zv%f_a^mmuu_*89A?O0D)Q-x4ucnE?-<=K&DW1AK@q?(fIO6+Lyez) z%+mY*_>a8f4}_8*fjE=wOZ7zjKa*VQ93Lgu?@XeVg0vPU$u6Q5yD=`Mak+AU2f0>p zu^ujvG;-75sh_?N@Q(9-0Ex6e7?biZ=sAeF1SV+QLyrj>$jtlj2N?zScRZGcW$A+>^Z!w3>Px0&y`L)k5+{2*hnaXt9^}SE>eK5ec{S#CS zCaz5FeL>>l0l#<(K(&)M0BDzGmj1s61`j4!e$`lP!bZbB&+n3CUj$msD#bK#;l!+o zRZ-p$%t{2iRnZyDo6q2l4v!1dm(I$sO_f6luxT`|qp7%%qHnsHRdN4lX3ltSe4exN zv%{80Q16A0v}hK(UApr!<#z7FZ97DIfWNNh19JC3#@_F!A2& ztk^`}eUBbp;9$RP4ALmm^qr;i;KM^pwtIP9aPZfeKhxu8Hpt##!Y{TTUFiL^9ANn! zdlWmJl~jX@KBLJ=)Nk1gv?I;wKWr)usyP$ADX8S_yL0y7uiUiMmow#cfd9(+YY495 z7uVYkuJC;w-s<*j;?*!*wDJHCms5v3N0IpRRY)C${fm2LS+1L7?t)hbPgiw8?U@N! zH?2!FmrzKOC$|jd^dTH!T*bNJ7J9)eGfEKz+%XN!vxgr~->>muxu9^C#~ic(K<%Tw zj6*|saL93aAWrRWO|C(1oX)ej7`I%U2TyE1N4r`PH2*sTal&%1VLdlghAvRN!K%%UoE zJ0k5?_=8J#;tHUbuj`-i#Z87aq`s0$dX&mA#O}E`<^q$yzM~(uXQcfWm{H5f>_6qf z{w8hd0`|`#>$B}X_y8`kA(484vSw2l;g;!kfGqg@u0(R2(6w7ol5$zzF@t;TGSxVu z-$zK{EyN^h{syTW9gHf%l79g0n1%kK1MXwqj3272*i1BJAKPS;;!_?GVL`eqG^hg^ z`lBQS{iPCO>*|QfT`Yi_?n+E50U@UJIwQh=4akU1IWa+zkQB8Rgs67mruM6hd_Rv? zHTW4KA*?vJha3$t!^EuNY~Q3kzAfkV_$=$g>laS&j+^7?+$K+b0#FVDCb}p7+x_w9 zOrS0OIf`I2Bc>&UZ%B3XR!>$#&uAU*@AFZ9UKT}pB>(m-#P@J?{L78mizkkKCb?(b zUONEr7hnk}Zs6%4!ObbcIqCr`nlUMbc7+m9Y)l-PnTTX8+I`lUTYh1`qyB?FI{O%8 z!1jXWQgA3bU9RBG2iv}Ejku6H=zBN|V{#-3e59Bm68NO=9>tq<>U+0X^xQA_sbj23 zrd9jiO+UZsyZHq6F~5wt;{H=ZL3wgax{#<5sNAJHBP;U~H_;y;Qi(lXCHZ z-@_BrZ=W4vUr-_CwgF~DxwtAa)rwM0Ji}jh%wJ`*P6sbn%b5)(k=!$pXPNBER; z6bsEFiUv1eD%#I0m^eH7E8Yb8JjekuR z1QfC~S?o)gJ94niOE3nf{66R7I}=@jga2$F{`nfna!_PW-lFmHZ^%V@9L(M;iTRAIna4Q z(>l*LDIF5cmaRW>jd<8k**K)`E)9Gme$h@?ekklTVBDZv+HYd#3)l@Z6U2CpXzQiP zi~*^NOokAF38M1>mne#H6=42hCXD=rsm*!%3az)8(FL%%C27C8N9+GmQlF*-RIw)w z+yo(}dN9r)x1nkPYh7s_wg4WzgPlXX1k(B+0R)JS1z1Qhk|*GFW0;_sUqFaRMKD3v zurrGcem8(8(f7wQTd49vG$c`rFLD!0fdLKLGGd%T;MJyRwqPRGg9-YTOar>7*$qpn zfedMmF#@smh(*kh~E8MZ?WDJ$5%3+l>44#3+V7<@jse=TraE4}Wd=avC6{Hf$364N_K$_{s#a z^vR(w{cA7$>+sM`d$5b$)REnAvW3zvc-miDuxSb4MGR%uq?!|9tsdMzN5|^Fjnp8+ zWsi~yHb&2d%ZGOJHOg^3(-$GEdZZ7ordMN9|H%81 zDYqLg`@zvPbn}k>`ya_&J@n2h3co_RprG#oUja)V(k3~M2wiQPu2x+a_Y3a0^7+=} zt;8y0uSQ1)lM*{U@~t2eOdZkOLs_RHAj#7SLu?Sf-|OM%l5m!m$dY^E!(MW^w(`H&XY_ff>%2c+^;>g79%yEA~u{EM}K5c zALaYZ@qGe_4TYvL#W5H6)mX-LP}KsR=(X8GD3Qv1ZBg1K)M8X4{1~wL!?@uM;RI{~ zdDl~PZ|jQu7~~Q7ClW{xtbWY>Iw{saxnfr@GN`GOZ$K`#yzSYo60a^PMIXd9j4sqa zE5zwR*ryn_(OMO!0KMQkvA$soQGQ3!C&HRh`|89zfO^=SG(5t+0^k;t-xXgXeSE0j zJ8I7<1f(qe$|!ZKx9T~iw)UGfr}Izv^uFKMcN#7bKcvYI6f-*V?VNC!)F(<_6IXuH z$i5s!>5-_UYp2(u#R5xW8fPzxnOXb{)HM{ew~nvKSVL`RZbJ1c9uDNo8Qc_Ad9Vzw z@MtaGf62y&;+VBE0(QKrHT-eLysFAs@m@jT*!rK#$-qW2^ryFNH3 zh*a|{51k(_)FW@iWzevWgD3J)V^=yZ6(oH|GK>K?Kz#i#r>!X?5-rBQp55n0CD@8BNKFH48~B`|4Vhh@-pd9 zlIi;aMraq7RspFxjp+RX5cj5Nv4EayF$**V6tXfClxQ6A zc$t|nz8EoHIwR&`3~TUMB4lwKvEjkEzKHlsD}hLc%Y-D)i0Eb5Vz4y#n$vPj&|SbF zT&}=w05vP}5V}yCYmi-afH~%gB@Ti4u)p3gLGxp-0ICm|fxn1hD=2^{0c_DHYuK(O z6O%4fN80~rpAj?6c^-c*p?n*g8<~fXT1=A6{93F5;AfK@k3FG2E?)5SEYG(IK_kDP6L>hUC6pw#eXzuEoZA|BQw z*H3KcJLQU~ti4zWl%G)&p|SlqQ_;g?R7t7|C-@wb=~^*Y^WA&KABN%6bdPKPAaJJZ zYxRvUhi?B@6LPOc)7I`BeOuTpdDG~l`bzhO!fp2vcLJvOL7KPq>Hf(0wRJ>O=HFS- z#fX2J+=l<2WB*nrH241tmC2H7kgPQ&b2cEzv2E&JOs}qYyk4I|$?=(CoEuhkyktov zB{1nw0w>?br}CpEF_H6aBy<1IPqJ?cl%%+8?vzj1nI_gcJ3boz){b$r&U${^WDC1I zH4P|*z^3`o|74!3T_3(PYGs}nc2M#}q_@Dn4OkoaC#-b}r0Et!;wQUd%ZUr6K9lhB z>8SFm7KUVp>;nf*=h_3e3&+j{x7DG@Kq@+T21mO@0ZXUF<3HN8Ktz&AkI^)(;!(^5 zzx#090}7|gW2}N-)1@jw&^<<&<*Yuz20(-cDJ#@lfu1DU=;S-Zb!7G++E8T@jJ#b} z@%eIO-*SD2_o%U$|&fMw=Yo)+)jz59hiR(CYp7AWEj& zNU|SgK9kXpVAH%f8_l-k+#peIllTU+fQeu0n1QVTd5{COuV z@G2fA<_B)G?hW#_W%;;NufZnI0{_L~mv5Xlfcl62IgA@|yUE*Q zmJ)q*q<{%h*3^SWMC+xCm4yGzu*`}H`olq22LGQInf+@7vK)AcB&1K$EA`I7O=$XO zBh^KNChC%W-tQV@W0Sbn;)e6(c8wQ#yC8fgz;G)C8Pvb+LY~S`e9SEox9~w! zZvrq}I$!?<8EAM(kEJQegc9ZT2Hn@C2P-`UuceSPePzsgR9l@I-Nvs(%ruwoK66ec3W?Y(|3(1>h;OeV~6}oVD}wX)&g}Jl0S> z<5*RN=`X1m%dUx;c~mt#O9>}`9%N$-DGgQB1e)luFDY>cT**P!zaR^E}avF+~FsD8LITG3Z?yl9z@uS zbw$E!&w(b+@_yN@Z<*=4U3Q<=Kxne!y9KgYt0g8 zqIq~fB|1DQ@CiA!&x{-X7M)T7+r6~@2DZb-1ocZ=&F1INzbo-57p`0(oD`$?yDogc z8)(`*h`okl+(@1q9S1h1?!MnR`A;F(0Z3D5DG5LUXD%*SIiMOf9u{Y37zsSOkkha! zmEv$g=}Vh{9Jk)SgnMC~JBCE?D2#E_bXJ|-1ldZm-dUzwc+DCniar?|9Y9b7BxQHH zjQ7e3a&3AE64)e2q%lK5uEb{6u^q<4=tXd-u8aB=YMIn;Dx=k55s*IIf_0T&JhegoW;HPIBExf zI1#RSx0-)Mchk3QcXaGW->>R495xQv?N_*$HV*(j1ZCGbulG>_B6Of{qmTi!HP&JsmO)q&h*^hv?TS$svJmv+q?<0?zXM zws&M+=iOC#a6QS4KKDe$QEu4}t{f7Q5^k4Lj{`RL$a+-gZl1aVeFy|_@<2f8UzNY* zEjT|_h9U&vdB)_M{KF$Og=?nBb}0YHffGi?ajoM=A{h>@C)hGCmDa|XhaHd~biFMQ z|MT=QBjc)Jua&4v-G1)HXEL1}gB|$v&!0V+m_P)kc}UZNoQzjE->ce3DCel6f$AfS zOMV^g0GnE^676T-s_uUv!8>3{mDBAEZZx=AaV8=0P0>Rk*{27z8iU&#;?b8Vz&ts* z9tkFEFJt!vyniKs4w#TS>*O zO_UI7wNrse+LsBcLd^w7_L4ka6KF#CD7y8=rQKq$pQ1BnWo0R3e4^g<7-93Q*m^<1 zcUz#5my`|$M?A5mv1S;VyKIeUleg(T0wo)t7il=QlC8)3CaA1WN|}<{>|3LtuJc+r zw1ihZHs-R3$bo9e7(l}o`AybWwAJXf6CH`5K?~!C?L>+51yd{htp(pUccn)jUoU8_ zJu7VbW*To+cu@D|&CXX3=5WZX6LKX=)9i#pcph8;J}ecWtFo?G_%B4|>xnpoHU;>e z;1|drz9@9T;qejIZvb(Faw@lZ>SuUI8b5I=T9vb<(68)Xq2Sw4pn-&LHmYr3+_eq- z_vd2oF_R~?7q^rqN>cqehY>}F zZ>JhPa_Hu)HR^t+xLEaYqUx}aV_gFJUB;&0hHdpN#v8f8!Wd;!J{=6N3t78>Fy<|w!5UP+D0L#n7Gyn)sFapuZiDERDBYyE( z4uH$UX{jAA0Fcdg06@o6H1@QC6&UM4cV?K%wxD+T0F`kVLY^%eS_~UAtcjbxK1^yQ z$X#=M@QLq90VjV}+KqCxNnT$obZns%1x8(JCFQA$L;V%rH>-I9Bl zm=m^CX?&|zwL88($?J4hVs!H?xL$R4kp#SY>LQ6{21W=a8Fi3>gu;GRW1;qBAJZig zrS{5~;WLCJvFrFJpp9XLviKLhU)(MVw*^olJ-rh$D{Fj>t?9?taL$F+^^nU8cAXtP za42kPCBO~)9b1ZRg&m}fk)tay^z=F?DP_m3e1uRE$UjXmT%g*NeXfad8WwABsNCo7 zz<1&DHRmIn+=Q_guq*V>mW}0zCc8lRqy|NL78PUdrOnmfe+<&R$M3S~9M6wZGqd7< zkk}P$(5WSlgV^&>Wb|VYj?gi??wK>i;HC+PP8G3dIEFlVxZkng&R{-8-O|s-zhVCo zn4jd!ifyzP{XNzJPy-?03UoX2wKch(8(SE7b4W4eYnC2lm7q3xk9lusRvKym5$|Ta54D2`ove@vTb<99b>y6REbrd1Fu8C7U9o9~ISL)q1}n#=dt< z19_O$>KVB@Az&AfGq4d*F923XiLxS^VbmV%YU)zK>4&G7W%dgSPtdxzCii=&m_eJ)*upqU-kxD+t&JJHp7U?PMBet z$uWnGrHx+u8O|QN6Ps?9ha`H%A76WZMfe4cWeLWHn2xUr>R2@pYErU`?3`=BFX6nu z_^g(-h5C&vS8fO7R)t2d0 zS;3xOA%W!cD?Zp-za{uu&s4^aX~oBe{S+O*r9V&43-p|Ua{0}SG$W4I8GNFIZA>f> z>#Leiq>0Bg8-Sb@z2B`JU(mbrecl&&-~E#!yn-bbcAh2^RL_uhE$3+$4n!;REFdTx0P>8?hg1{;uOOQNQ&&y? zsrCi{CS(CKUMPIZlb`m{x!xrQ0UF?ZR~O z$vnJpk)4(MphSZ(Jr**Jwd111l5HhtasVX%;LXnzD8c6Pj_3`-a`(5-4tu+i7o`U+ z(`i7z1gpZor-}WpQLner^y>5bId{h!nV>OT)ZxYqd^&EH~9xm z>U+PVWy0Q`(I%YL5#?7sHlhJ}yFz*U*u~)CerWQ@tz7DtSYT_`KrK(5I?j z-I%nf_s=u8zB*uFIL=G<+S=U<^B49X_7?|U+Ka(vEHZ$~@Y^#O`+TWqZbeg$uq*wY z8|C8D=OIgv#3Ku*4y|_qUJL`Cn6(u_YzoCQ<_W-AG)VOL9M5`#Ub#@q%h$87I@-}h zjMOw<|GoSw?h96T;)tq%vU0*>ujx0vgN2c^PC3RgTRx+%8y4w3CF<2AgcdVmdF4}5 zO!dx25ynz48abX1Wt=5IYB5i#tfa6)us5o&?AhpMy5K~eqPs`AA?M2Vmz1xcVvbbp zu$NxvpLzy1PffanfN_N`(-PMkG4xej<7hg2RT_d=5MX05!^a%k$Q`0s=JOZK&A-x8 z*Y2|Y7H8>tan`;C0#ccTTY5uE2Kzb!b{ACzEsx*z9aEC-M0X1m z4qs(_%0Me@EkBzNIFpK1I*>X&Y#?73n;mvAqDEE6Cno+y!B;0oTRSH#-x-m*$}=yO zo)<&8n{;fKGMlk|5s`aS(b>5p_mj8RH7i%oe4TG!{p5W+%&Oy>YG@n?VqJxE9M7t6 zPaAJQNMe9|6^7$gWilnTO|+041@c1W76oAS-EbpmwwpP#K^-;~Ty(#mWad72@!od1 zGVk^-9%_RNX%TAqqwgim;e6=tzZe0pZcZ++4| zccr@h+OrH1J-nWtZjgYW^NnfDEwCh>xIPU85n*eBqz^x6tXHytn4*SNu$32qk$N4+ z4qfu=M{b;1>=vJb7g5#wt4*d*u^q^<*~MLuLV;cO#{U%R|1I14U*P}45EN#zP2&em zL8uy_WhUnMHo~EQ`pItwSg_O6R7K?cdDR4vl?HHF8G8Ce6LS26ApJLl69|xRAdlPm z*uIq95IDSTUQ*mAbisTt6N@#y=XpzgIr~79cU}VvpFR56W*cI zDRX8rOPlM>W<6Q+8rVCk+A$`(D6;LE_4%2(=Qxjr?}qVNge4v?Qgt z=u+px4exC>n<>dfND?OIFF$jyTk4t_u7Z~gA>0c~RPmL#b^ zi;BZy8c~7N%LLeS_hr30<1vV5pL@BycAS}Fd$uzAH3!)55X>+hUNw_ja=7~4sgJta z+Ln?QJ}I_s>^&u`3rXjr@n^L?lYKW`G`;6OE&Ii9IL@HIgZ+_5JoK>1bv2hsC7Bir zFs`br-h$D1Cm~&ez*Z0*eN5+UUGpR10OcWnYx2DAxQbR#UVjMb`2=)Ho~%YOBYh== znh{ZrEJ0L>Z&V1pVz@zpGvAKQZaq`uRntY|H#ZwK5AfZcRmxi4UHp(DWt?L#7aRR? zb6;5n-g0+;$Ga06=KpIM+ z`eITL58X3nKe2<2Ks=;lqZ-=!K^6F^Es?RLdpkEb0MW80pZxwGQJVh%r^2)bJL9dw zJ?NJ;iH`bqL|YHlXeaiBpblZrBb_qu!WgU-h3LI_Y8$LYIAZSi`qo`s-&Z%A*; zxQvl@gW2wh>H`PFR^zz`e7)|c(NBBu2%Tg&A zgRAZvUh176H1lTwWAV$+O{D$1#>0icwsMFFYh={-u-k?a5*ECe59*@i^ou`mBp#4R zh*xMDmn{>DaO@GEBxP!ume_I)u4zIWb`ae-4kfSsjzp~2{nVuY-L(Jgf0>{VWOED( z*ip_mgct-_Hn=*dj+oxOSWqtQTnA`kIo3=uU?{w`fl>A ze>;u|w&mcb2yzpjdVjJw$H5r)shs@y2hp8e+AZ)FN8SdCeARqEXmn$uaf5Nx{+d%o z-Tr*@rGvM<-5jVw-OG|()(Qz{FoE~WX=%3~w+vl^NXbSX7-r#IP?H+B-3RP739|s$ z`&AyYSHd02hPd1>AG&&I0c#oW$foW4B*nqUwZmF+Sc8qhXhNoqPa%4pt>>U+!b6`O z^dG6JbrAyXS>top+y#k6I->FGI`X@@L7}zq_&2Walru2-5gJrZmuA$?BmqSw^+8FQ zQ2av*{0B`E^vDmI)$r?p`J7u3vfH&;&jsi+^I*P02zmunY#tW+1lTh`cmuJ&vOg<` zuXw@Jb7)W`a45)nGaj->Oh*!p2KSjXU=+{*IwP=14oaT91?Z?*xq!d(Q=Qkz4j8+=W>B0_VX~P*Bmutx;Q*l+K?p_u z%E&Ir3*>eau%3-3mTHj2KxwKUh{nG_$Ah-|{kzG3t(DM0CK!dB2K+E&Ez5G|Uh+0S z>euEN?lb&;ZLp;Wpht0IcDD6EP61dF$gmMvV?`12hxVlZsl^owV7auAL9VZ$sRw}B zhn8k-p+;Pxe`t^XpIYqE2dH58K?FJqrO;B3Ae~H>A)jZ80Rfd_*FRZN*5^XypDXBa zUgQ`{yyxZhgemB0tw{9kut-K{9vY65@s;B!z?7!FPG&|Wo{CieTweb9Rhyfi(z~z9 zJ%Uym$}y^6?$R7?2g z-`)Gk*E|dfRTlBc#+fdd>6WC!eXV+hl9aR;EhEpE8XnfX_lkQutgxx^j;&0Fv4O17 zW?nFa_kG_dOO+bVQPVJQ*AXmkxMhTCn$v`qUMNtJMo{*5z>uDSHC(C^<~Xy~dTz@g z#Iv#n<+qx$oaI@ml)n@0ATt#nztrFPm)_%zpNIG|^8c#Gr;vY|Dan7BON;xAKVYW1 zDv%_4%JKj6tho(2PBA0q2Kl8f#t*(wR$~5kliG_P){d=N`?@zCVfu-YF`DoC3s(iU zi_wQGLe4on@s!EBc06?DeQpgo?~6g_6J}FW1S-Df60%l~B0n+e1L347_HRl90a?~! z5+|RNo4Q-!vi`wymUX&AeF3*LcM>2n&e3glu(GspjE$u;t{zz&8)%|a90NPu){JG6 zRt-YN9L3mOQ7|uW5esAUU-&(KcQ@0qf*BPe=Nk^aU0B@bPHvFta`HIZ!PM%LC3KM6 zmldWYaU)Z+wXA9^r+Rhl2hGfrJ)1VIvF+LIA2f+ARc9wUq?@e-iU#B?)tilbN{%#? z4#v1ie6u(;*Sc_1XwLCkjT#ZCgv>TViqJ2etBlAON$GH1`*&Tw<2k1Bll2uDG*hRe zDn`3#I8?v9&kY?9qOhaY1`&<`p*v^ig*)Oe#B{QEGo}fhQ3jDu!Lhbv>(RW@eav`V zM?1v>?zj^rp)x2h){fy=+tc4@WY@p*@00K4n?Pg5@jw@?)x?G#R>b@L0GC-$V#O9%?&VaWiC9L8*!c(SmgDQh1JTQQErT@PclX3%v(x> zdFT7%$bjEBL@M(8tPK=mwt728#at2!YgW}VRbUynx0H9+E$vg5dVRfzYPQ0cALi`> z>CCNU0?v+>3`%b#h z&-v(de#q6FlmszaJrIJ48NORo4Y0aI(RHo!wC~U4GHlJ%JOqnm0Of2sF?RVBS*GlF zg!dDCqL}{YSe>0->OFmeo^8TH1kVs* zFRNq~z#a@N{Mok=9RXCV?SUBc&rSfW_vSCmmlTU>h`yDa!k%8IwdPsmU#fc)w{x*3 ztBC!)`?T(b>f!LEl7W7!1mx+$no_@Bs6%!Mqo4XkU+)i})m+26Ki}o%KmKGy@MCIT zJaYUM6%tjC$%j6lyr{tIQQhD4!OPy|@`aX|HJI+VNWLWNb600iK97puH7jbkz2zaG z+MHmZT=eEdb%M$2!+xqSNL1a4{5kjk55F4=mQB9st-5Xf{`>rHV09j>X`3PK`;qp7 zD{-UB;Xs9a{0)PAgQ;YtRsYTLA+pTmJF0%dbs=mexpveIo*ZbDY~o0k{6whMlK2gN{1~+=FDQ zt()3o+#jlNLcjnptOSVIma!y#YTAq+@GG9%BiHWjp+fhW$iHh)I%h)gm|v@)Vo4al z{`Law?@DPx@}6Eh8F){JLFR9&o($pIr#~M=?Id<^)FXeb;g_D>MaOLgQJGT!`W&>S zD!pSuJp@@-iWd+3!|i_^#!aZ~G7E)I8w4CI|2;;{l~*c@sO6_6->1qpYBuz4Cvq4b zQ8%GBK6#|`>D0JuJxx^Sc@GNfB-j@{hjcDjPnKyJ*uFT(`Zo8G?e*Z3@v^NqgPx^* z4|~S2Uc`jrdxlbhXW3`l@TB-+SEMu*Y?BoWxOq1?79^G9kx>tUsj$*)`FZZD`B@)J zvym@pm$5J(5mJ*dc<&y~eHZLDqhmXw;<(=203kd+gj&M+JsHp8(j*$Eumsxh-mjg2 zu?*LHq`hRlci#EgiVEJ8MVj0Hrbs@spB~wSIx5Jy7i-m}X70Png}5?6wdAWayS#%9 zC%;>kpiwpSrkARJ@ATp2X^bL3zn}yi09UdiL^2SbM3JQDfUoedz>BqCS?s-;vy)U` zX7h(+MM^halG zVV1yjI`ohfO@=SHP0KVN^v}&PRd$4(^%bXeZIl+ ztBY(jyDuvW8mw4?0?67%wZv^m%vy_W4H#@GXs*F zq05S*K{{n+y~!oTy#_`vZdx-vc+)awBGHp1@P>xjMc?9cr)ZPO^VBzYFrbo`GY->Yx*}1rpo2d+v`DpSRWAskShX8Iu88% z5x*l~za-87i;oE(v3|>+;Ie#bJfjLa>B2IKGcTZC`owKFoYww{cPTwJ zL3b|wYvU+{5A__%550SFoyxbMfaa;;x#CJ?8_1Q(nU`@E4i!i5*kxAH-5Wl@FjJkd zWwlpMp(UG`w>f^mb)=Az8l3tN3R%rtDrU7`+71Vl-&HDf)*pG^=*t1eEUS^}NsL<= za1Tj?fyL`Yzo(;VB(&Pwp@mZ2!|cH(tH19Jwg?mql+1=vRmi|FbuZt$P0F%b+_XeO zZ)4BoMX)qu!qNGLXd6=2rxAd+IXQbaPg))x=FLsuG^F)&U}>jOl&YC3IuWw#GCtT0 zJ3{@I#1zyZ$Vfb#Fu^S9vg70^IunekJk2lXL~VQOmuaKZY&qz5`Hjt?j*691YPD`* z$eHZ+YkvWMVPPi$gGRRoc#2Ix2)|~aN}T_4Otmiu^@Cs9NSlp^mxO+?o!QOr0+n~B z)oJ-<=+>=d$UMkefhsP7TkSUU69x0CXuC|J?pPDFOC5 zhA-!OL(-`l=`*EwrXm1LlpGN;X_&#oYRe3Ew59s~RWX(vg)^_q~Bk^}c}8EDoXke1Wb z896O#zicC;Wq24WMR=^s(0tXh(c-~7!#_o#DQ zDphg@2alPYa;kdNOqU(WB^9Xhg^$n6LywICn4><(3M46Yi5xloO_dT)+C)0S zOKJ?|8?+hOJ{lmajvsVXyl*eUpzx`GFn-z&Iv~H8l}a;OSphQnS77Sen+i zVbZvNlHaoy3(Bo;*?AOtvOKjrNw}n6$n;U37-7US-mmrI#fp*O2PpeaFQzb)LW(9R z{+hx{lATyY9V69Gh(5>XvEKQl=amYR&1uYtlz(mX#oz#T0WOwfQWB7h!+qde@(ti1 znk)f=z@dJUBoV1Wq)U<>EY=J12@K8FOEVuHS&;M=%y51?Dag)wUABsIL&Sb{#)}BGT~i;pt4OQ7Yp~TfVz`pM zOv=jBa7G!~`UTl~R>BCLNb$|@yXQhkV)Zz}V~x7e>yuXHA02qPf&sR4Xvj>s!X-GS zZpE{2MoFPz;Qol_vC)oo&bywTEqQR#e%~trb#|fgq%0yEuCXnZ1Xb%-5B5f8n(95w zAAMuO8tu;9@}yHH>1;OPjglu(_mf?wrJG+aUE2)yKa&Zne*(i3+^+XX6Og zZhsY$N!^x}1Ysrx@8{68GHXCQw{Yy$Tu;M+5YEt6k@oVF0&QXL@{1tcBQ^rL`8B^r*K2px{{!(p3%AmXNG`qg>FBx1bF`{feeLbeQ zqcp`yLdKkwR^Jmk>|SbQZ(Eqe5#bfJD$BjnY8`fDFlsGet91<>4qY=t0HE})4v1qk z=1Y=J!@-%TN*H|A=&sMa?&3d2Z*YY!Fv__lz47Z>vs-yZMMNuBGd1$ky#Vd6 zLit8&w8D{anq!-5dH{TPa^d#x7nH6ch%F5}I{FH{3mi5l1^aO>bE#3+uv7OJ{NZ23 zzv17+T9A=1kS73O@@Y_i0m7Et@6Scyv1K!ZOO06{E1J03d~P=^v|Xl*{;LJ#gVC!! zn{i0cE~KvO$Q=^0-p42+^y7TNj`4u!` z+NgB5s>b0Lypc=EgQZWrmG-%B30K@(NYe zBDhf4r5hjzl-2CXE>yM{2*sgzCUw(cEU>E2COb;6&7}eQryD0z`J%?t_BdGrFeIbzNFo-B58AIA zvS0U$6i_#BGL_p66k!F=f+1<$0jFQWD%ZJIl>Y|M2TtwRsP-Jx$l(d0c%n$ode6`s zcT_n+D@^YFplP=I^JAe%_qsqK5S4&JXF;K+^gY~L|H&?p()fFq|EZ2YUH*6K7~Ki_ z=QI8~4FBxMf4-0ZI_3U3Xa2eN{C8c$m0hSu4*(yVl@Rnh^Z}Z~5yDg>0YALn%*kvm z8h1D`_YQ7E%YW2aM4!e}ib+(b-o9={*;)t$1`nX5OI-E-GGr+f(G3J8FJQMGY3!Ay zCU>N@!g(U49)vE`-2Fl0Zc+psm@Am^B*Ejvt|in&wHO{%hdf{gpV-N^?iKAVR^nrP zi5kvnPkDHxlIEr6=M{()zVnIIS@LOQEk+PRh#m4kg<_j2!m*3N(EN>ZLB1nh^pA?3 z2xHXNnk$dar)?r_R=su-H~dqUAjQ?I>qGv6Y4fp50TW6r+z9%D{*B5?zw6rl@{-1D zoeT6V9PJkzOmriKjvXz0byeVM-fwxbd#?}*_r`Hg2Y6MZ?GK~DzSfv5TU*D2%Mn=a zP4Q1R`#g?SKu;)541}cIW-4;fFhrn1l~qS8L?;E3V1jitK$ey zs|&=)IaRrD9>sl#%O3WJL##SSxKqsaU#v5{jd?g4860Mf{zw(`?*hEouM?;vi&nxt z$l?;|g9|Y1EN&zA1>;?n zZR|t6f@asuD3qjyIyw4ETNZ1CjD?~7CCxIxP#2MoO5v79U%;d!?Vd1IgvIU%LETHw&Jqsqk$pA#2)TdkQ5! zC{HM^Kif&DaT+eqHG?~ROV)h{tEMZdSypF&=@{5&iThUNx%bY`1}RSsM7rpSomG-n zaNk{QFLfB3{$!;&DSgmvgdw#c!6;voF5kMn%wCAz)tSylv!z8y=#}G|mpTbTBySzu ztB~;Lp(qgt>%qoQc)}#2tfC=FXs~$RdDHq(1&;#krcoS6+yiXEiouuJ_&vLC%Ce4l z7^A7(uESS*>r`w?V){B0iplQx6El8|FKJwd#Q0}wj7atXYRiuOQjyl6$Av=}6TM!l zN|VOM%o!nkR@X50gz1bZ_b0=3*~gC`46HS}``IvD^LBqoYLeLSZQZZhISH1hD=#j4 zxo0O;a^J@E_n*c15O-7y*9)s-wjG`H;{7=gbfgpjr5#;}M>fLv=61cpsV7MOgBH{g zf(ZJ1XPS#J@qW|vsA1s+rLYU==D#QE}vINDDh(v+ST|x~ocI79N zjOM3BwOvH9kV@Wj_m7a~W zImL?{rjiDoNcpmxoi~D|N6&B^fi$RJ#pgUvpYI8iM2_JK-3;uvd%lI1dCTXfZaA#< zy~Fu9c@@Ux4%!?A4UnNC&M_#S!x0P}%0Z(eLJ_;hrw<*4cb_IE%P;sV>;*ll(jfnaU?XlNB0zF@(}8X3zGZ*XSqu7s>i*!_!^FC*h=jgP%H^I`ocP#d2&>w;wIWV z@aXCYd*RXD=hUOD*$7s!FBHkp41iOLT92}R>KyisAlCrp&^^y0lCx67c?)Hz?P>P6 z&!aD>H|n*YcYuM6DnX6qMGchKID@cFzCDf(SB5&D?3KsSw+`ec7-q&L(|M;w#WF>f z5pQa6Y~(#vIt`J zv<+#=7~7ZbuDgjQmKgVR7x_q@@X1f}kZ)g!*X|h@H8-!U++2ux3B=WbiyK|Nmn;X4 z41!LUEhk#zD}C>!FZ#q3Wi4=3ym`IS9OaVM@cP2k(Ift z0-Ko(0Ud6FhWhMXl-J2iAy)tQO_eAY_K=F{4`&cSN zGElj;sJFf`z1Cm+RR}&hpkdHmu}mk>ctrwwg(?HT-2mQIn{0bk4&sfZqePA+J_-4T z-if|riK16ornJ|9UTGHVE1%~b-Y!dOWF7}tVFGE=geZ-YRuzJGO^9X)W@InQ+CQ;> z(%RiT-IU?NcOjKav9@-S=6G58E&I)SDUGEJ14y-jp^Al?)zk34=gxhuK(g9ES2R%&R9oNi%jDes&4 zt>K$Tl&H;T)tI3o-*Z)8lRy%Su8xSV)njppca9?8KwRn0E@6JSZEUUr(dzwB@^qxV zC;A{f&9&Z>0rxyCz$$tkR=~34=tn*Uh>dko2E>x>h&*NQ2?Ziu*NtIZAmy>9iR9EQmTnMG5Bt$H5G4r3db%9I|R5u=ag7N^P-s9L4S9SOu{e{r^N z2{=cwZXcKVmk$Aht(?dUpdLr)5+0^Z;MnX`xq5ATHqGr;O8l;ga|^#|OUn;a&d#*R zR6Kg@akeinkYxbZ;7tOk(iCt_!UT?aPW4RhU2x6YWe-19@j0P4+G**pgfm_frK>Vg~rcPB=SOfXzo=eDcdIr2;8{HIpuUEjcRtBd-CL|hg~Iz zOC%*AyBwpyS;Go)+!S#q_=M?1_eRSj>%=c#9^Q$*e*Dtc+{hq(5j$ipgR#Cvaq-G7 zh9n4sgOup&&Yy_@fA9OBgkh@ds{WjUwNQkSQ=r#pKkHF%kdOpGEf)X03uwlF`BYDu z|0HE;y|w_W>wj{fZ9%dn)&n;f?cb^@rr^U8T+I0LLrI^q!UW$qkuf?(^JGO_-l=!# z(;Ne?@A;=1My5DmMtS`Oti{8tnd^FnRQpuFD$lEG?8ZX>GFN_{H~;ASpRw zWnF|C`L|*F((db$n1Cvkv(=xqqaTL&4&m*_;ncPKh;KFCEwAB_vyX@XGklmO6~cz; zF!ftbS?EN-2%;494lfvF|Az=L_`55@W&kyz5$P?kt=WZSd#G2ofF`S zFV8~GxiNC!K(&1i8fa`xN-Sy6ZJ*NqFHM6Cb7N*;Bqn4F9`noH z?Wm3W6f`x^wUXM>#M~Jmy~)&71tl0sQ?EcvWsvO{0ymJQsQ>l$-Fu;c+84@2HH(2w zT6O?BI}aL;9M6PNo+2NC2aiLJ%K*XYz+dm4{7a`+#)G7tg{&%g!)pyOJ2ceq;Mp&L z_t~1N!Tj_6HGUaFFF8PEf9MBZ{AU@+wFCbh#%xN?QheeK?;EFdgjq#J5zqSRQo?jV zRd{Wa!0RuywbQy+gu