Skip to content

Commit

Permalink
Applied some changes from: https://github.com/HoneyNED
Browse files Browse the repository at this point in the history
- Can now be imported in other projects
- Fixed parsing of filenames without quotes in Content-Disposition
- Added datetime extracted from Date heder
  • Loading branch information
dazoot committed Mar 17, 2015
1 parent fea02bb commit 20a0de8
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 24 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ json:
header_key1: value
header_key2: value
subject: "The email subject as utf-8 string"
datetime: "2015-03-17 17:48:06"
encoding: "utf-8"
from:
- { name: "Sender Name", email: "[email protected]" }
Expand Down
69 changes: 45 additions & 24 deletions mailtojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
## (c) 2013 Newsman App
## https://github.com/Newsman/MailToJson

import sys, urllib2, email, re, csv, StringIO, base64, json
import sys, urllib2, email, re, csv, StringIO, base64, json, datetime, pprint
from optparse import OptionParser

VERSION = "1.3"

ERROR_NOUSER = 67
ERROR_PERM_DENIED = 77
ERROR_TEMP_FAIL = 75
Expand All @@ -20,7 +22,7 @@
r'|\[(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\]$', re.IGNORECASE)

email_extract_re = re.compile("<(([.0-9a-z_+-=]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,9}))>", re.M|re.S|re.I)
filename_re = re.compile("filename=\"(.*?)\"", re.I|re.S)
filename_re = re.compile("filename=\"(.+)\"|filename=([^;\n\r\"\']+)", re.I|re.S)

class MailJson:
def __init__(self, content):
Expand Down Expand Up @@ -126,6 +128,19 @@ def _parse_recipients(self, v):

return ret

def _parse_date(self, v):
if v is None:
return datetime.datetime.now()

tt = email.utils.parsedate_tz(v)

if tt is None:
return datetime.datetime.now()

timestamp = email.utils.mktime_tz(tt)
date = datetime.datetime.fromtimestamp(timestamp)
return date

def _get_content_charset(self, part, failobj = None):
"""Return the charset parameter of the Content-Type header.
Expand Down Expand Up @@ -173,6 +188,7 @@ def parse(self):
headers[k] = v

self.data["headers"] = headers
self.data["datetime"] = self._parse_date(headers.get("date", None)).strftime("%Y-%m-%d %H:%M:%S")
self.data["subject"] = headers.get("subject", None)
self.data["to"] = self._parse_recipients(headers.get("to", None))
self.data["from"] = self._parse_recipients(headers.get("from", None))
Expand All @@ -189,7 +205,7 @@ def parse(self):
# we have attachment
r = filename_re.findall(content_disposition)
if r:
filename = r[0]
filename = sorted(r[0])[1]
else:
filename = "undefined"

Expand All @@ -210,29 +226,34 @@ def parse(self):
def getData(self):
return self.data

usage = "usage: %prog [options]"
parser = OptionParser(usage)
parser.add_option("-u", "--url", dest = "url", action = "store", help = "the url where to post the mail data as json")
if __name__ == "__main__":
usage = "usage: %prog [options]"
parser = OptionParser(usage)
parser.add_option("-u", "--url", dest = "url", action = "store", help = "the url where to post the mail data as json")
parser.add_option("-p", "--print", dest = "do_print", action = "store_true", help = "no json posting, just print the data")

opt, args = parser.parse_args()
opt, args = parser.parse_args()

if not opt.url:
print parser.format_help()
sys.exit(1)
if not opt.url and not opt.do_print:
print parser.format_help()
sys.exit(1)

content = sys.stdin.read()
content = sys.stdin.read()

try:
mj = MailJson(content)
mj.parse()
data = mj.getData()
try:
mj = MailJson(content)
mj.parse()
data = mj.getData()

headers = {"Content-Type": "application/json; charset=%s" % data.get("encoding")}
req = urllib2.Request(opt.url, json.dumps(data, encoding = data.get("encoding")), headers)
resp = urllib2.urlopen(req)
ret = resp.read()

print "Parsed Mail Data sent to: %s\n" % opt.url
except Exception, inst:
print "ERR: %s" % inst
sys.exit(ERROR_TEMP_FAIL)
if opt.do_print:
pprint.pprint(data)
else:
headers = { "Content-Type": "application/json; charset=%s" % data.get("encoding"), "User-Agent": "NewsmanApp/MailToJson %s - https://github.com/Newsman/MailToJson" % VERSION }
req = urllib2.Request(opt.url, json.dumps(data, encoding = data.get("encoding")), headers)
resp = urllib2.urlopen(req)
ret = resp.read()

print "Parsed Mail Data sent to: %s\n" % opt.url
except Exception, inst:
print "ERR: %s" % inst
sys.exit(ERROR_TEMP_FAIL)

0 comments on commit 20a0de8

Please sign in to comment.