-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathJHandleOLE2Containers.py
166 lines (141 loc) · 5.89 KB
/
JHandleOLE2Containers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# -*- coding: utf-8 -*-
import os
import sys
import uniqid
from jarray import zeros
from java.io import FileOutputStream, FileInputStream, ByteArrayOutputStream
from org.apache.poi.poifs.filesystem import POIFSFileSystem, DocumentInputStream, DirectoryNode
from org.apache.poi.hpsf import SummaryInformation, DocumentSummaryInformation, PropertySetFactory, PropertySet, UnexpectedPropertySetTypeException
class ReadWriteOLE2Containers:
replacechar1 = '\x01'
replacechar5 = '\x05'
def __debugfos__(self, fos, bufsize):
buf = zeros(bufsize, 'b')
fin.read(buf)
print buf
def replaceDocumentSummary(self, ole2filename, blank=False):
fin = FileInputStream(ole2filename)
fs = POIFSFileSystem(fin)
root = fs.getRoot()
si = False
siFound = False
for obj in root:
x = obj.getShortDescription()
if x == (u"\u0005" + "DocumentSummaryInformation"):
siFound=True
if blank == False:
test = root.getEntry((u"\u0005" + "DocumentSummaryInformation"))
dis = DocumentInputStream(test);
ps = PropertySet(dis);
try:
si = DocumentSummaryInformation(ps)
except UnexpectedPropertySetTypeException as e:
sys.stderr.write("Error writing old DocumentSymmaryInformation:" + str(e).replace('org.apache.poi.hpsf.UnexpectedPropertySetTypeException:',''))
sys.exit(1)
if blank == False and siFound == True:
si.write(root, (u"\u0005" + "DocumentSummaryInformation"))
else:
ps = PropertySetFactory.newDocumentSummaryInformation()
ps.write(root, (u"\u0005" + "DocumentSummaryInformation"));
out = FileOutputStream(ole2filename);
fs.writeFilesystem(out);
out.close();
#https://poi.apache.org/hpsf/how-to.html#sec3
def replaceSummaryInfo(self, ole2filename, blank=False):
fin = FileInputStream(ole2filename)
fs = POIFSFileSystem(fin)
root = fs.getRoot()
si = False
siFound = False
for obj in root:
x = obj.getShortDescription()
if x == (u"\u0005" + "SummaryInformation"):
siFound = True
if blank == False:
test = root.getEntry((u"\u0005" + "SummaryInformation"))
dis = DocumentInputStream(test);
ps = PropertySet(dis);
#https://poi.apache.org/apidocs/org/apache/poi/hpsf/SummaryInformation.html
si = SummaryInformation(ps);
if blank == False and siFound == True:
si.write(root, (u"\u0005" + "SummaryInformation"))
else:
ps = PropertySetFactory.newSummaryInformation()
ps.write(root, (u"\u0005" + "SummaryInformation"));
out = FileOutputStream(ole2filename);
fs.writeFilesystem(out);
out.close();
def __makeoutputdir__(self, ole2filename):
dirname = ole2filename.split('.')[0]
if not os.path.exists(dirname):
os.makedirs(dirname)
else:
sys.exit("Directory to output OLE2 contents to already exists.")
return dirname
def recurse_dir(self, root, outdir):
#Cache DirectoryNode and directory name
dircache = {'object': False, 'directory': False}
for obj in root:
fname = obj.getShortDescription()
if type(obj) is DirectoryNode:
tmpoutdir = outdir + '/' + fname
os.makedirs(tmpoutdir)
if dircache['object'] is False:
dircache['object'] = obj
dircache['directory'] = tmpoutdir
else:
sys.stderr.write("Check container in 7-Zip, likely more dirs at a root DirectoryNode than expected.")
else:
#replace strange ole2 characters we can't save in filesystem, todo: check spec
#this seems to be the convention in 7-Zip, and it seems to work...
fname = fname.replace(self.replacechar1, '[1]').replace(self.replacechar5, '[5]')
f = open(outdir + "/" + fname, "wb")
size = obj.getSize()
stream = DocumentInputStream(obj);
bytes = zeros(size, 'b')
n_read = stream.read(bytes)
data = bytes.tostring()
f.write(data)
f.close()
#only recurse if we have an object to recurse into after processing DocumentNodes
if dircache['object'] != False:
self.recurse_dir(dircache['object'], dircache['directory'])
def extractContainer(self, ole2filename):
fin = FileInputStream(ole2filename)
fs = POIFSFileSystem(fin)
root = fs.getRoot()
outdir = self.__makeoutputdir__(ole2filename)
self.recurse_dir(root, outdir)
def writeContainer(self, containerfoldername, ext, outputfilename=False):
written = False
if outputfilename == False:
outputfilename = containerfoldername.strip('/') + "-" + uniqid.uniqid() + "." + ext.strip('.')
containerfoldername = containerfoldername
#we have folder name, written earlier
#foldername is filename!!
if os.path.isdir(containerfoldername):
fname = outputfilename
fs = POIFSFileSystem()
root = fs.getRoot();
#triplet ([Folder], [sub-dirs], [files])
for folder, subs, files in os.walk(containerfoldername):
if subs != []:
break
else:
for f in files:
fin = FileInputStream(folder + '/' + f)
if fin.getChannel().size() == 0:
fin.close()
written = False
break
else:
root.createDocument(f, fin)
fin.close()
written = True
else:
sys.exit("Not a valid folder: " + containerfoldername)
if written == True:
fos = FileOutputStream(fname)
fs.writeFilesystem(fos);
fs.close()
return written