-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbbab2csv
executable file
·165 lines (149 loc) · 6.03 KB
/
bbab2csv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/python
#
# bbab2csv - BlackBerry Address Book to CSV
# Copyright 2011 Brandon Mintern, [email protected]
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# Hosted on github
usage = """
Usage: This program is designed to process the result of a backup produced by
the barrybackup command line program. The result of such a backup is a tarball
at ~/.barry/backup/<PIN>/<PIN>-<YYYYMMDD>-<HHMSS>.tar.gz. Inside that tarball
is a directory named "Address Book" which contains a binary file for each
contact.
To run it, execute
bbab2csv TARBALL
where TARBALL is the file generated by barrybackup. The resulting CSV will
be printed to STDOUT, and unrecognized data will be printed to STDERR.
"""
# The layout of each BlackBerry contact file seems to be:
# 1. 2-byte little-endian field length l
# 2. field type byte
# 3. l-byte field
# 4. (1-3) repeated or 0x00
#
# The following list of tuples indicates the field types (see #2 above) we
# know about. This could be improved by moving the information to a separate
# file and then reading/parsing it here.
#
# The order in which field types appear is the same order in which the columns
# will be output in the resulting CSV.
#
# After running the program, any unknown fields will be reported to standard
# error. That information can be used to update this program to work better in
# the future.
name = "Name" # This field is directly used in the code for sorting
fields = [ (0x20, name)
, (0x56, "Nickname")
, (0x21, "Organization")
, (0x07, "Home Phone")
, (0x08, "Mobile Phone")
, (0x06, "Work Phone")
, (0x11, "Home Phone 2")
, (0x12, "Mobile Phone 2")
, (0x0a, "BB PIN")
, (0x3d, "Home Address")
, (0x3e, "Home Address 2")
, (0x45, "Home City")
, (0x46, "Home State")
, (0x47, "Home ZIP")
, (0x40, "Home Directions")
, (0x23, "Work Address")
, (0x24, "Work Address 2")
, (0x26, "Work City")
, (0x27, "Work State")
]
field_types = dict(fields) # Allow for indexing known fields by hex value
field_names = [x[1] for x in fields] # Allow for easy iteration over names
import sys, csv, tarfile
from os import path
from struct import unpack
from collections import defaultdict
def read_bb_field (fh):
"""
Reads a standard field in a BlackBerry Address Book contact file. The
format (to the best of my knowledge) is laid out above.
Returns a tuple of (field_name, field_value), with field_name being a
string if it is a known field type and an integer value otherwise.
If the initial 2-byte read is not actually 2 bytes, it returns
(False, <read value>) to indicate a likely end-of-file condition.
"""
# Read the first two bytes to determine the length of the entire field
length_bytes = fh.read(2)
if len(length_bytes) < 2:
return False, length_bytes
length = unpack("<H", length_bytes)[0]
# Read the 1-byte field type
field_type = unpack("<B", fh.read(1))[0]
# Read the field itself, assumed to be length bytes as indicated above
field = fh.read(length)
try:
field_type = field_types[field_type]
# all known fields are NULL-terminated strings. Strip the NULL
# character
field = field[:-1]
except KeyError:
# Log unknown field types. Note that we can improve this a lot by:
# 1. Adding known irrelevant field types that aren't logged
# 2. Generating a better string representation of the field than
# simply dumping the binary value in there as a string.
log('Unknown field type: %x, value: "%s"' % (field_type, field))
return field_type, field
def iterate_contacts (tarball):
"""
Take an open tarfile object and yield open contact files from the Address
Book. The contact files will be closed when iteration is resumed.
"""
for member in tarball:
if path.split(member.name)[0] == "Address Book":
contact = tarball.extractfile(member)
yield contact
contact.close()
def log (msg):
print >>sys.stderr, msg
try:
try:
tarball = tarfile.open(sys.argv[1])
except IOError:
log('\nNo such file: "%s"' % sys.argv[1])
raise
except (IndexError, IOError):
log(usage)
exit(2)
contacts = []
for contactfile in iterate_contacts(tarball):
# We'll store each field from the contact file in a dict mapping field
# name to list of field values. It's important to store it as a list
# because the name field (for example) appears more than once
contact = defaultdict(list)
while True:
field_type, field = read_bb_field(contactfile)
if field_type is False: # end of input
# assert field == "\0", "Non-NULL closing byte"
break
contact[field_type].append(field)
contacts.append(contact)
# We're done with the tarball; free up some memory
tarball.close()
# Get CSV output object and write header row
csvout = csv.writer(sys.stdout)
csvout.writerow(field_names)
# Iterate over the contacts in order by name. Note that x[name] will never
# fail because x is a defaultdict(list)
for contact in sorted(contacts, key=lambda x: x[name]):
# Write out the contact info for known fields to the CSV. Here we join the
# list items corresponding to each field
csvout.writerow(map(lambda x: " ".join(contact[x]),
field_names))