forked from chaonan99/ppt_presenter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
118 lines (96 loc) · 4.76 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
from pathlib import Path
import tempfile
import argparse
from subprocess import call
from pdf2image import convert_from_path
from pptx import Presentation
from ttsgen import TTSGen
from engines.xtts2_engine import XTTS2Engine
from engines.gtts_engine import GTTSEngine
__author__ = ['hoangbv15']
## Sometimes ffmpeg is avconv
FFMPEG_NAME = 'ffmpeg'
#FFMPEG_NAME = 'avconv'
def ppt_presenter(pptx_path, pdf_path, output_path, temp_dir, engineName, fast, saveclips, pagenos, saveaudio):
if fast:
tts = TTSGen(GTTSEngine())
elif engineName:
engine = globals()[engineName]
else:
tts = TTSGen(XTTS2Engine())
with tempfile.TemporaryDirectory(dir=temp_dir) as temp_path:
images_from_path = convert_from_path(pdf_path)
prs = Presentation(pptx_path)
assert len(images_from_path) == len(prs.slides)
tts.enable(True)
for i, (slide, image) in enumerate(zip(prs.slides, images_from_path)):
if pagenos and i not in pagenos:
continue
if slide.has_notes_slide:
notes = slide.notes_slide.notes_text_frame.text
audio_path = os.path.join(temp_path, 'frame_{}.wav'.format(i+1))
tts.generate(text=notes,
output_file=audio_path)
if saveaudio:
continue
image_path = os.path.join(temp_path, 'frame_{}.jpg'.format(i+1))
image.save(image_path)
ffmpeg_call(image_path, audio_path, temp_path, i+1)
video_list = [os.path.join(temp_path, 'frame_{}.ts'.format(i+1)) \
for i in range(len(images_from_path))]
video_list_str = 'concat:' + '|'.join(video_list)
ffmpeg_concat(video_list_str, output_path)
if saveclips or saveaudio:
output_path = output_path.replace('.mp4', '-clips')
print("saveclips option is set")
if not os.path.exists(output_path):
os.makedirs(output_path)
src_path = Path(temp_path)
dest_path = Path(output_path)
glob = src_path.glob('*.mp4')
if saveaudio:
glob = src_path.glob('*.wav')
for each_file in glob:
print("Moving % s to % s" % (each_file.name, output_path))
each_file.rename(dest_path.joinpath(each_file.name))
def ffmpeg_call(image_path, audio_path, temp_path, i):
out_path_mp4 = os.path.join(temp_path, 'frame_{}.mp4'.format(i))
out_path_ts = os.path.join(temp_path, 'frame_{}.ts'.format(i))
call([FFMPEG_NAME, '-loop', '1', '-y', '-i', image_path, '-i', audio_path, '-shortest', '-fflags', '+shortest', '-max_interleave_delta', '200M',
'-c:v', 'libx264', '-tune', 'stillimage', '-c:a', 'aac',
'-b:a', '192k', '-vf', 'scale=-1:1080', out_path_mp4])
call([FFMPEG_NAME, '-y', '-i', out_path_mp4, '-c', 'copy',
'-bsf:v', 'h264_mp4toannexb', '-f', 'mpegts', out_path_ts])
def ffmpeg_concat(video_list_str, out_path):
call([FFMPEG_NAME, '-y', '-f', 'mpegts', '-i', '{}'.format(video_list_str),
'-c', 'copy', '-bsf:a', 'aac_adtstoasc', out_path])
def main():
parser = argparse.ArgumentParser(description='Local AI PPT Presenter help.')
parser.add_argument('--pptx', help='input pptx path')
parser.add_argument('--pdf', help='input pdf path')
parser.add_argument('-o', '--output', help='output path')
parser.add_argument('-t', '--tempdir', help='path to store temporary files needed to generate the output. A ramdisk is recommended. Leave none to use python tempfile defaults.')
parser.add_argument('-e', '--engine', help='the name of the text to speech engine to use')
parser.add_argument('-f', '--fast', help='use the text to speech engine of the OS for fast execution at the expense of quality', action='store_true')
parser.add_argument('-sc', '--saveclips', help='save the clips for each page instead of deleting them', action='store_true')
parser.add_argument('-sa', '--saveaudio', help='only generate voice audios', action='store_true')
parser.add_argument('-p', '--pageno', help='only regenerate the given page number')
args = parser.parse_args()
pagenos = []
if args.pageno:
for page in args.pageno.split(','):
r = page.split('-')
if len(r) > 1:
for i in range(int(r[0]) - 1, int(r[1])):
pagenos.append(i)
continue
pagenos.append(int(page) - 1)
args.saveclips = True
print('Page list: % s' % pagenos)
ppt_presenter(args.pptx, args.pdf, args.output, args.tempdir,
args.engine, args.fast, args.saveclips, pagenos, args.saveaudio)
if __name__ == '__main__':
main()