#!/usr/bin/env python
#Copyright (c) 2008 Jeff Bryner
#python script to gather ymail artifacts from a pd process memory dump

#example: 
#
#on windows box, use pd from www.trapkit.de ala: 
#pd -p 1234> 1234.dump
#
#where 1234 is a running instance of IE
#
#on linux box do:
#strings -el 1234.dump> memorystrings.txt
#pdymail -f memorystrings.txt
#
#It'll find what it can out of the memory image  

#This program is free software; you can redistribute it and/or modify it under
#the terms of the GNU General Public License as published by the Free Software
#Foundation; either version 2 of the License, or (at your option) any later
#version.

#This program is distributed in the hope that it will be useful, but WITHOUT
#ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
#FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

#You should have received a copy of the GNU General Public License along with
#this program; if not, write to the Free Software Foundation, Inc.,
#59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


import sys
import os
import types
import struct
from time import ctime
import getopt
import array
import re
import xml.dom.ext
from xml.dom import minidom
import hashlib

safestringre=re.compile('[\x80-\xFF]')
ipre=re.compile('(?:\d{1,3}\.){3}\d{1,3}')


ymailuserre=re.compile(r'(?:<userSendPref.*?</userSendPref>)')
ymailmessageInfore=re.compile(r'(?:<messageInfo.*?</messageInfo>)')
ymailfolderre=re.compile(r'(?:<folder.*?</folder>)')
ymailnumberoffoldersre=re.compile(r'(?:<numberOfFolders.*?</numberOfFolders>)')
ymailblockedaddressre=re.compile(r'(?:<blockedAddresses.*?</blockedAddresses>)')
ymailmetadatare=re.compile(r'(?:<GetMetaDataResponse.*?</GetMetaDataResponse>)')
ymailparentalcontrolre=re.compile(r'(?:<parentalControls.*?</parentalControls>)')
ymailexternalaccountsre=re.compile(r'(?:<externalAccounts.*?</externalAccounts>)')
ymailpopprefre=re.compile(r'(?:<userPopPref.*?</userPopPref>)')
ymailaliasre=re.compile(r'(?:<aliases.*?</aliases>)')

def safestring(badstring):
        """makes a good strings out of a potentially bad one by escaping chars out of printable range"""
        return safestringre.sub(lambda c: 'char#%d;' % ord(c.group(0)),badstring)


def parseOptions():
	options = {'file'	:'',
		   'verbose'	: False,
		   'xml'	: True
		  }
	helpstr = 'Usage: ' + sys.argv[0] + ' [OPTIONS]' + """\n
Options:
   -f, --file       the file to use (stdin if no file given)
   -h, --help	    prints this 
   -v,--verbose	    be verbose (prints filename, other junk to stderr)
   -V,--version     prints just the version info and exits.
   
This expects to be unleashed on the result of running strings -el on a pd dump from windows process memory. Anything other than that, your mileage will certainly vary.\n
\n
"""	
	optlist, args = getopt.getopt(sys.argv[1:], 'vhf:Vx', ['help','file=','version','verbose','xml'])
	#parse options.
	for o, a in optlist:
		if (o == '-h' or o == '--help'):
			print helpstr
			sys.exit()
		elif (o == '-v' or o == '--verbose'):
			options['verbose']=True		
		elif (o == '-x' or o == '--xml'):
			options['xml']=True			
	
		elif (o == '-V' or o == '--version'):
			print "pdymail version 0.1.0 Jeff Bryner"
			sys.exit()		
		else:	
			for option in options.keys():
				execcode = "if (o == '-%s' or o == '--%s'): options['%s'] = a" % (option[0], option, option)
				exec execcode

	return options


def gatherArtifacts():

	filedata=""
	xmlout=""
	messages={}
	
	if options["verbose"]:
		print "FileName: %s " % options["file"]
	try:
		if options["file"]!='':
			fileHandle = open(options["file"], mode='r')
			fileHandle.close()
	except IOError:
		sys.stderr.write('Cannot open file\n')
		sys.exit(1)


    #read in the stdin/file 
    	if options["file"] != '':
        	fp = open(options['file'], 'r')
		filedata = fp.read()
		fp.seek(0)


    	if options['xml']:
		document=minidom.parseString("<ymailartifacts/>")
		xmlymail=document.documentElement
		xmlmessages=document.createElement("messages")
		xmlusers=document.createElement("users")
		xmlfolders=document.createElement("folders")
		xmlblockedaddresses=document.createElement("blockedaddresses")
		xmlmetadataentries=document.createElement("metadataentries")
		xmlparentalcontrols=document.createElement("parentalcontrols")
		xmlexternalaccounts=document.createElement("externalaccounts")
		xmlpopprefs=document.createElement("poppreferences")
		xmlaliases=document.createElement("aliases")
		
	while 1:
		try:
		        if options["file"] != '':
       			        line = safestring(fp.readline())
		        else:
	        	        line = safestring(sys.stdin.readline())
				#we're reading stdin. Messages cross more than one line, so messily concat lines back into a filedata blob for use later.
				filedata +=line
		        if not line:
	        	    break
	        except:
			sys.stderr.write("Error handling line:" + line + "\n")

			
		#done with line by line proccessing
		

		
	#account preferences/identity info:
	userInfo=ymailuserre.findall(filedata)
	if len(userInfo)>0:
		if options["verbose"]:
			sys.stderr.write("found preferences for %s users\n" %(len(userInfo)))
		for userpref in userInfo:
			try:			
				if options["xml"]:
					xmluserPrefdoc=xml.dom.minidom.parseString(userpref)
					xmluserPrefs=document.importNode(xmluserPrefdoc.childNodes[0],deep=1)
					xmlusers.appendChild(xmluserPrefs)
				else:
					sys.stdout.write("userprefs: " + userpref)		
			except:	
				#something amiss, dump what we've got
				sys.stderr.write("error parsing userprefs: " + str(userInfo) + "\n")
						
	#alias info
	aliases=ymailaliasre.findall(filedata)
	if len(aliases)>0:
		if options["verbose"]:
			sys.stderr.write("found %s alias entries\n" %(len(aliases)))
		for alias in aliases:
			try:			
				if options["xml"]:
					xmlaliasdoc=xml.dom.minidom.parseString(alias)
					xmlalias=document.importNode(xmlaliasdoc.childNodes[0],deep=1)
					xmlaliases.appendChild(xmlalias)
				else:
					sys.stdout.write("alias: " + alias)		
			except:	
				#something amiss, dump what we've got
				sys.stderr.write("error parsing alias: " + str(alias) + "\n")
	
		
	#folder info
	#todo: grab extraneous html to get even more folder info:
	#class="folderRow" title="Foldername contains 58 messages">
	folderInfo=ymailfolderre.findall(filedata)
	if len(folderInfo)>0:
		if options["verbose"]:
			sys.stderr.write("found info for %s folders\n" %(len(folderInfo)))
		for folder in folderInfo:
			try:			
				if options["xml"]:
					xmlfolderdoc=xml.dom.minidom.parseString(folder)
					xmlfolder=document.importNode(xmlfolderdoc.childNodes[0],deep=1)
					xmlfolders.appendChild(xmlfolder)
				else:
					sys.stdout.write("folder: " + folder)		
			except:	
				#something amiss, dump what we've got
				sys.stderr.write("error parsing folder: " + str(folderInfo) + "\n")		
	#number of folders
	#may not match actual folder info recovered from memory, but accurate count from yahoo mails perspective.
	foldercounts=ymailnumberoffoldersre.findall(filedata)
	if len(foldercounts)>0:
		if options["verbose"]:
			sys.stderr.write("found %s entries for folder counts\n" %(len(foldercounts)))
		for foldercount in foldercounts:
			try:			
				if options["xml"]:
					xmlfoldercountdoc=xml.dom.minidom.parseString(foldercount)
					xmlfoldercount=document.importNode(xmlfoldercountdoc.childNodes[0],deep=1)
					xmlfolders.appendChild(xmlfoldercount)
				else:
					sys.stdout.write("foldercount: " + foldercount)		
			except:	
				#something amiss, dump what we've got
				sys.stderr.write("error parsing foldercount: " + str(foldercount) + "\n")			
			

	#blocked email addresses
	blockedaddresses=ymailblockedaddressre.findall(filedata)
	if len(blockedaddresses)>0:
		if options["verbose"]:
			sys.stderr.write("found %s blocked addresses\n" %(len(blockedaddresses)))
		for blockedaddress in blockedaddresses:
			try:			
				if options["xml"]:
					xmlblockedaddressdoc=xml.dom.minidom.parseString(blockedaddress)
					xmlblockedaddress=document.importNode(xmlblockedaddressdoc.childNodes[0],deep=1)
					xmlblockedaddresses.appendChild(xmlblockedaddress)
				else:
					sys.stdout.write("foldercount: " + blockedaddress)		
			except:	
				#something amiss, dump what we've got
				sys.stderr.write("error parsing blockedaddress: " + str(blockedaddress) + "\n")	
	
	#meta data
	metadataentries=ymailmetadatare.findall(filedata)
	if len(metadataentries)>0:
		if options["verbose"]:
			sys.stderr.write("found %s metadata entries\n" %(len(metadataentries)))
		for metadata in metadataentries:
			try:			
				if options["xml"]:
					xmlmetadatadoc=xml.dom.minidom.parseString(metadata)
					xmlmetadata=document.importNode(xmlmetadatadoc.childNodes[0],deep=1)
					xmlmetadataentries.appendChild(xmlmetadata)
				else:
					sys.stdout.write("metadata: " + metadata)		
			except:	
				#something amiss, dump what we've got
				sys.stderr.write("error parsing metadata: " + str(metadata) + "\n")		

	#parental controls
	parentalcontrols=ymailparentalcontrolre.findall(filedata)
	if len(parentalcontrols)>0:
		if options["verbose"]:
			sys.stderr.write("found %s parental control entries\n" %(len(parentalcontrols)))
		for parentalcontrol in parentalcontrols:
			try:			
				if options["xml"]:
					xmlparentalcontroldoc=xml.dom.minidom.parseString(parentalcontrol)
					xmlparentalcontrol=document.importNode(xmlparentalcontroldoc.childNodes[0],deep=1)
					xmlparentalcontrols.appendChild(xmlparentalcontrol)
				else:
					sys.stdout.write("parentalcontrol: " + parentalcontrol)		
			except:	
				#something amiss, dump what we've got
				sys.stderr.write("error parsing parentalcontrol: " + str(parentalcontrol) + "\n")		
	
	#external accounts
	externalaccounts=ymailexternalaccountsre.findall(filedata)
	if len(externalaccounts)>0:
		if options["verbose"]:
			sys.stderr.write("found %s external account entries\n" %(len(externalaccounts)))
		for externalaccount in externalaccounts:
			try:			
				if options["xml"]:
					xmlexternalaccountdoc=xml.dom.minidom.parseString(externalaccount)
					xmlexternalaccount=document.importNode(xmlexternalaccountdoc.childNodes[0],deep=1)
					xmlexternalaccounts.appendChild(xmlexternalaccount)
				else:
					sys.stdout.write("externalaccount: " + externalaccount)		
			except:	
				#something amiss, dump what we've got
				sys.stderr.write("error parsing externalaccount: " + str(externalaccount) + "\n")		
	

	#pop preferences
	popprefs=ymailpopprefre.findall(filedata)
	if len(popprefs)>0:
		if options["verbose"]:
			sys.stderr.write("found %s POP preference entries\n" %(len(popprefs)))
		for poppref in popprefs:
			try:			
				if options["xml"]:
					xmlpopprefdoc=xml.dom.minidom.parseString(poppref)
					xmlpoppref=document.importNode(xmlpopprefdoc.childNodes[0],deep=1)
					xmlpopprefs.appendChild(xmlpoppref)
				else:
					sys.stdout.write("poppref: " + poppref)		
			except:	
				#something amiss, dump what we've got
				sys.stderr.write("error parsing poppref: " + str(poppref) + "\n")		
	
	
		
	#look for messageSummaryInfo, they can cross multiple lines via xml blobs:
	#dedup by message blob hash? one message can repeat in memory
	ymailmessages=ymailmessageInfore.findall(filedata)
	if len(ymailmessages)>0:
		if options["verbose"]:
			sys.stderr.write("found %s messages\n" %(len(ymailmessages)))
		if options["xml"]:
			xmlout="<ymailmessages>"
		for msg in ymailmessages:
			try:
				#hash the message/regex text to make sure we don't double up on messages even if they're in memory more than once.
				if not ( messages.has_key(hashlib.sha1(msg).hexdigest()) ):
					messages[hashlib.sha1(msg).hexdigest()]=msg
					if options["xml"]:
						xmlmessageDoc=xml.dom.minidom.parseString(msg)
						xmlmessage=document.importNode(xmlmessageDoc.childNodes[0],deep=1)
						xmlmessages.appendChild(xmlmessage)
					else:
						sys.stdout.write("message: " + msg)
			except:	
				#something amiss, dump what we've got
				sys.stderr.write("error parsing messages: " + str(ymailmessages) + "\n")
	if options["xml"]:
		#add what we got out of memory to the xml document.
		xmlymail.appendChild(xmlusers)
		xmlymail.appendChild(xmlaliases)
		xmlymail.appendChild(xmlfolders)
		xmlymail.appendChild(xmlblockedaddresses)
		xmlymail.appendChild(xmlmetadataentries)
		xmlymail.appendChild(xmlparentalcontrols)
		xmlymail.appendChild(xmlexternalaccounts)
		xmlymail.appendChild(xmlpopprefs)
		xmlymail.appendChild(xmlmessages)

		xml.dom.ext.PrettyPrint(document,sys.stdout)
		
		
def main():
	global options
	options = parseOptions()
	gatherArtifacts()

if __name__ == '__main__':
  main()
