""" Read Outlook Express .dbx files $Revision: 1 $ Released into the public domain $Date: 10/28/04 12:04p $ Steve Holden http://www.holdenweb.com/ """ import sys import struct import string WORDSIZE = 2 DWORDSIZE = 4 INTEGERSIZE = 2 LONGINTSIZE = 4 HdrDecode = { 0x02: "Sent Date", 0x03: "Filename", 0x05: "Subject", 0x04: "Message Position 04", 0x06: "???? 06", 0x07: "Message-Id", 0x08: "Subject", 0x09: "From", 0x0a: "References", 0x0b: "newsgroup", 0x0d: "From", 0x0e: "Reply-To", 0x12: "Received Date", 0x13: "To", 0x14: "???? 14", 0x1A: "Account", 0x1b: "Account-Id", 0x80: "Message Number", 0x81: "Message Status", 0x84: "Message Position 84", 0x91: "Message Size" } class THeaderData: def __init__(self, f, pos): self.Position = readLongInt(f, pos) self.DataLength = readLongInt(f) self.HeaderLength = readWord(f) self.FlagCount = readWord(f) class TOE5_IndexHeader: def __init__(self, f, pos): self.FilePos = readLongInt(f, pos) self.Unknown1 = readLongInt(f) self.PrevIndex = readLongInt(f) self.NextIndex = readLongInt(f) self.Count = readLongInt(f) self.Unknown2 = readLongInt(f) class TOE5_MsgItem: def __init__(self, f, pos): self.FilePos = readLongInt(f, pos) self.Unknown = readLongInt(f) self.ItemSize = readLongInt(f) self.NextItem = readLongInt(f) self.MsgContent = f.read(512) class TOE5_IndexItem: def __init__(self, f, pos): self.HeaderPos = readLongInt(f, pos) self.ChildIndex = readLongInt(f) self.Unknown = readLongInt(f) class TOE5_MessageInfo: pass def stringFrom(s, offset): fin = s.find('\0', offset) return s[offset:fin] def readLongInt(f, pos=None): if pos: f.seek(pos) return(struct.unpack("l", f.read(LONGINTSIZE)))[0] def readInteger(f, pos=None): if pos: f.seek(pos) return(struct.unpack("h", f.read(INTEGERSIZE)))[0] def readDWord(f, pos=None): if pos: f.seek(pos) return(struct.unpack("l", f.read(DWORDSIZE)))[0] def readWord(f, pos=None): if pos: f.seek(pos) return(struct.unpack("h", f.read(WORDSIZE)))[0] def ReadOEFile(f): # takes file, not name mlst = [] position = readLongInt(f, 0x30) print "Starting at", position nodelst = [] mlst += ReadIndex(f, position, nodelst, 0) print "ReadOEFile returns:", len(mlst), "messages" return mlst def ReadIndex(f, pos, tl, folders): ml = [] iheader = TOE5_IndexHeader(f, pos) print "Input position:", pos print "IndexHeader position:", iheader.FilePos if pos <> iheader.FilePos: raise ValueError, "Index header has incorrect position" tl.append(pos) if iheader.NextIndex and iheader.NextIndex not in tl: ml2 = ReadIndex(f, iheader.NextIndex, tl, folders) for m in ml2: if m not in ml: ml.append(m) if iheader.PrevIndex and iheader.PrevIndex not in tl: ml2 = ReadIndex(f, iheader.PrevIndex, tl, folders) for m in ml2: if m not in ml: ml.append(m) icount = iheader.Count >> 8 print "pos:", pos, "icount:", icount if icount: lpos = iheader.FilePos+24 for i in range(icount): indexItem = TOE5_IndexItem(f, lpos) if indexItem.HeaderPos: if folders: raise ValueError, "cannot handle folders yet" else: mpos = ReadMessageInfo(f, indexItem.HeaderPos, tl) if mpos and mpos not in ml: ml.append(mpos) if indexItem.ChildIndex: if indexItem.ChildIndex not in tl: ml2 = ReadIndex(f, indexItem.ChildIndex, tl, folders) for m in ml2: if m not in ml: ml.append(m) lpos = lpos+12 print "ReadIndex(", pos, ") returns", len(ml), "messages" return ml def ReadMessageInfo(f, pos, tl): HeaderData = THeaderData(f, pos) if HeaderData.Position != pos: raise ValueError, "Message at %d in file %s has incorrect Position %d" % \ (pos, f.name, HeaderData.Position) oe5_MessageInfo = TOE5_MessageInfo() oe5_MessageInfo.Position = pos Flags = HeaderData.FlagCount & 0xff FlagSize = Flags * DWORDSIZE DataSize = HeaderData.DataLength - FlagSize FlagBuf = f.read(FlagSize) # purely to locate the data DataBuf = f.read(DataSize) for i in range(Flags): FlagDWord, = struct.unpack("l", FlagBuf[:DWORDSIZE]) FlagBuf = FlagBuf[DWORDSIZE:] FlagType = FlagDWord & 0xff FlagOffset = FlagDWord >> 8 if FlagType == 0x84: oe5_MessageInfo.position = FlagOffset #print "Position (24-bit):", oe5_MessageInfo.position elif FlagType == 0xd: oe5_MessageInfo.From = stringFrom(DataBuf, FlagOffset) #print "From: ", oe5_MessageInfo.From elif FlagType == 0x4: oe5_MessageInfo.position, = struct.unpack("l", DataBuf[FlagOffset:FlagOffset+DWORDSIZE]) #print "Position (32-bit):", oe5_MessageInfo.position else: #print "ReadMessageInfo: Unprocessed Flag Type %02x" % FlagType pass return oe5_MessageInfo def MailHeaders(f, pos): HeaderData = THeaderData(f, pos) if pos <> HeaderData.Position: raise ValueError, "Incorrect Header Data Position at %d in %s" % ( pos, f.name) Flags = HeaderData.FlagCount & 0xff print Flags, "flags" Size = Flags*DWORDSIZE DataSize = HeaderData.DataLength-Size print "Size:", Size, "DataSize", DataSize str = "*** Message Header Dump *** %d **\n\n*" % pos for i in range(Flags): flag = readDWord(f) ftype = flag & 0xff fstring = "::: Unknown :::" if HdrDecode.has_key(ftype): fstring = HdrDecode[ftype] fval = flag >> 8 str += "%2d = %02x: %08x ==> %s\n" % ( i, ftype, fval, fstring) return str def ReadHeaderInfo(f, pos, str): f.seek(pos) s = f.read(12) t = struct.unpack("llhh", f.read(12)) HeaderData = THeaderData(f, pos) if pos == HeaderData.Position: print "Found header at", pos return pos def Read_OE_Message(f, pos): IndexItemsCount = readInteger(f, 0xc4) if not IndexItemsCount > 0: raise ValueError, "File %s has invalid IndexItemsCount %d at 0xc4" % \ (f.name, IndexItemsCount) msg = "" while 1: oe5MsgItem = TOE5_MsgItem(f, pos) if oe5MsgItem.FilePos <> pos: break msg += oe5MsgItem.MsgContent[:oe5MsgItem.ItemSize] pos = oe5MsgItem.NextItem if pos == 0: break return msg f = open("comp.lang.python.dbx", "rb") msgs = ReadOEFile(f) print len(msgs), "messages" #while 1: # m = raw_input("Message Number: ") # sys.stdout.flush() # if not m: # break # print "#", m # m = msgs[int(m)] # if hasattr(m, "From"): # print "From:", m.From # #for i in range(496): # print string.replace(Read_OE_Message(f, msgs[i].position), '\r', '') #f.close()