Merge pull request #47 from mholt/smsbackuprestore

Data source: smsbackuprestore
This commit is contained in:
Matt Holt
2019-07-14 14:40:40 -06:00
committed by GitHub
6 changed files with 556 additions and 30 deletions

View File

@@ -20,6 +20,7 @@ import (
_ "github.com/mholt/timeliner/datasources/googlelocation"
_ "github.com/mholt/timeliner/datasources/googlephotos"
_ "github.com/mholt/timeliner/datasources/instagram"
"github.com/mholt/timeliner/datasources/smsbackuprestore"
"github.com/mholt/timeliner/datasources/twitter"
)
@@ -35,6 +36,8 @@ func init() {
flag.BoolVar(&twitterRetweets, "twitter-retweets", twitterRetweets, "Twitter: include retweets")
flag.BoolVar(&twitterReplies, "twitter-replies", twitterReplies, "Twitter: include replies that are not just replies to self")
flag.StringVar(&phoneDefaultRegion, "phone-default-region", phoneDefaultRegion, "SMS Backup & Restore: default region")
}
func main() {
@@ -117,6 +120,8 @@ func main() {
case *twitter.Client:
v.Retweets = twitterRetweets
v.Replies = twitterReplies
case *smsbackuprestore.Client:
v.DefaultRegion = phoneDefaultRegion
}
clients = append(clients, wc)
@@ -296,4 +301,6 @@ var (
twitterRetweets bool
twitterReplies bool
phoneDefaultRegion string = "US"
)

View File

@@ -0,0 +1,197 @@
package smsbackuprestore
import (
"encoding/base64"
"fmt"
"io"
"strings"
"time"
"github.com/mholt/timeliner"
)
// MMS represents a multimedia message.
type MMS struct {
CommonSMSandMMSFields
Rr string `xml:"rr,attr"`
Sub string `xml:"sub,attr"`
CtT string `xml:"ct_t,attr"`
ReadStatus string `xml:"read_status,attr"`
Seen string `xml:"seen,attr"`
MsgBox string `xml:"msg_box,attr"`
SubCs string `xml:"sub_cs,attr"`
RespSt string `xml:"resp_st,attr"`
RetrSt string `xml:"retr_st,attr"`
DTm string `xml:"d_tm,attr"`
TextOnly string `xml:"text_only,attr"`
Exp string `xml:"exp,attr"`
MID string `xml:"m_id,attr"`
St string `xml:"st,attr"`
RetrTxtCs string `xml:"retr_txt_cs,attr"`
RetrTxt string `xml:"retr_txt,attr"`
Creator string `xml:"creator,attr"`
MSize string `xml:"m_size,attr"`
RptA string `xml:"rpt_a,attr"`
CtCls string `xml:"ct_cls,attr"`
Pri string `xml:"pri,attr"`
TrID string `xml:"tr_id,attr"`
RespTxt string `xml:"resp_txt,attr"`
CtL string `xml:"ct_l,attr"`
MCls string `xml:"m_cls,attr"`
DRpt string `xml:"d_rpt,attr"`
V string `xml:"v,attr"`
MType string `xml:"m_type,attr"`
Parts Parts `xml:"parts"`
Addrs Addresses `xml:"addrs"`
client *Client
}
// ID returns a unique ID by concatenating the
// date of the message with its TRID.
func (m MMS) ID() string {
return fmt.Sprintf("%d_%s", m.Date, m.TrID)
}
// Timestamp returns the message's date.
func (m MMS) Timestamp() time.Time {
return time.Unix(0, m.Date*int64(time.Millisecond))
}
// Class returns the class Message.
func (m MMS) Class() timeliner.ItemClass {
return timeliner.ClassMessage
}
// Owner returns the name and number of the sender,
// if available. The export format does not give us
// the contacts' names, however.
func (m MMS) Owner() (number *string, name *string) {
for _, addr := range m.Addrs.Addr {
if addr.Type == mmsAddrTypeSender {
// TODO: Get sender name... for group texts this is tricky/impossible, since order varies
// TODO: If there is only one other contact on the message (other than the account owner's number), we can probably assume the contact name is theirs.
standardized, err := m.client.standardizePhoneNumber(addr.Address)
if err != nil {
// oh well; just go with what we have, I guess
return &addr.Address, nil
}
return &standardized, nil
}
}
return nil, nil
}
// DataText returns the text of the multimedia message, if any.
func (m MMS) DataText() (*string, error) {
var text string
for _, part := range m.Parts.Part {
if part.Seq < 0 {
continue
}
if part.ContentType == "text/plain" &&
part.AttrText != "" &&
part.AttrText != "null" {
text += part.AttrText
}
}
if text != "" {
return &text, nil
}
return nil, nil
}
// DataFileName returns the name of the file, if any.
func (m MMS) DataFileName() *string {
for _, part := range m.Parts.Part {
if part.Seq < 0 {
continue
}
if isMediaContentType(part.ContentType) {
return &part.Filename
}
}
return nil
}
// DataFileReader returns the data file reader, if any.
func (m MMS) DataFileReader() (io.ReadCloser, error) {
for _, part := range m.Parts.Part {
if part.Seq < 0 {
continue
}
if isMediaContentType(part.ContentType) {
sr := strings.NewReader(part.Data)
bd := base64.NewDecoder(base64.StdEncoding, sr)
return timeliner.FakeCloser(bd), nil
}
}
return nil, nil
}
// DataFileHash returns nil.
func (m MMS) DataFileHash() []byte {
return nil
}
// DataFileMIMEType returns the MIME type, if any.
func (m MMS) DataFileMIMEType() *string {
for _, part := range m.Parts.Part {
if isMediaContentType(part.ContentType) {
return &part.ContentType
}
}
return nil
}
// Metadata returns nil.
func (m MMS) Metadata() (*timeliner.Metadata, error) {
return nil, nil
}
// Location returns nil.
func (m MMS) Location() (*timeliner.Location, error) {
return nil, nil
}
// Parts is the parts of an MMS.
type Parts struct {
Text string `xml:",chardata"`
Part []Part `xml:"part"`
}
// Part is a part of an MMS.
type Part struct {
Text string `xml:",chardata"`
Seq int `xml:"seq,attr"`
ContentType string `xml:"ct,attr"`
Name string `xml:"name,attr"`
Charset string `xml:"chset,attr"`
Cd string `xml:"cd,attr"`
Fn string `xml:"fn,attr"`
Cid string `xml:"cid,attr"`
Filename string `xml:"cl,attr"`
CttS string `xml:"ctt_s,attr"`
CttT string `xml:"ctt_t,attr"`
AttrText string `xml:"text,attr"`
Data string `xml:"data,attr"`
}
// Addresses is the addresses the MMS was sent to.
type Addresses struct {
Text string `xml:",chardata"`
Addr []Address `xml:"addr"`
}
// Address is a sender or recipient of the MMS.
type Address struct {
Text string `xml:",chardata"`
Address string `xml:"address,attr"`
Type int `xml:"type,attr"` // 151 = recipient, 137 = sender
Charset string `xml:"charset,attr"`
}
func isMediaContentType(ct string) bool {
return strings.HasPrefix(ct, "image/") ||
strings.HasPrefix(ct, "video/")
}

View File

@@ -0,0 +1,130 @@
package smsbackuprestore
import (
"encoding/xml"
"fmt"
"io"
"strings"
"time"
"github.com/mholt/timeliner"
)
// Smses was generated 2019-07-10 using an export from
// SMS Backup & Restore v10.05.602 (previous versions
// have a bug with emoji encodings).
type Smses struct {
XMLName xml.Name `xml:"smses"`
Text string `xml:",chardata"`
Count int `xml:"count,attr"`
BackupSet string `xml:"backup_set,attr"` // UUID
BackupDate int64 `xml:"backup_date,attr"` // unix timestamp in milliseconds
SMS []SMS `xml:"sms"`
MMS []MMS `xml:"mms"`
}
// CommonSMSandMMSFields are the fields that both
// SMS and MMS share in common.
type CommonSMSandMMSFields struct {
Text string `xml:",chardata"`
Address string `xml:"address,attr"`
Date int64 `xml:"date,attr"` // unix timestamp in milliseconds
Read int `xml:"read,attr"`
Locked int `xml:"locked,attr"`
DateSent int64 `xml:"date_sent,attr"` // unix timestamp in (SMS: milliseconds, MMS: seconds)
SubID int `xml:"sub_id,attr"`
ReadableDate string `xml:"readable_date,attr"` // format: "Oct 20, 2017 12:35:30 PM"
ContactName string `xml:"contact_name,attr"` // might be "(Unknown)"
}
// SMS represents a simple text message.
type SMS struct {
CommonSMSandMMSFields
Protocol int `xml:"protocol,attr"`
Type int `xml:"type,attr"` // 1 = received, 2 = sent
Subject string `xml:"subject,attr"`
Body string `xml:"body,attr"`
Toa string `xml:"toa,attr"`
ScToa string `xml:"sc_toa,attr"`
ServiceCenter string `xml:"service_center,attr"`
Status int `xml:"status,attr"`
client *Client
}
// ID returns a unique ID for this text message.
// Because text messages do not have IDs, an ID
// is constructed by concatenating the millisecond
// timestamp of the message with a fast hash of
// the message body.
func (s SMS) ID() string {
return fmt.Sprintf("%d_%s", s.Date, fastHash(s.Body))
}
// Timestamp returns the message's date.
func (s SMS) Timestamp() time.Time {
return time.Unix(0, s.Date*int64(time.Millisecond))
}
// Class returns class Message.
func (s SMS) Class() timeliner.ItemClass {
return timeliner.ClassMessage
}
// Owner returns the sender's phone number and name, if available.
func (s SMS) Owner() (number *string, name *string) {
switch s.Type {
case smsTypeSent:
return &s.client.account.UserID, nil
case smsTypeReceived:
if s.ContactName != "" && s.ContactName != "(Unknown)" {
name = &s.ContactName
}
standardized, err := s.client.standardizePhoneNumber(s.Address)
if err == nil {
number = &standardized
} else {
number = &s.Address // oh well
}
}
return
}
// DataText returns the text of the message.
func (s SMS) DataText() (*string, error) {
body := strings.TrimSpace(s.Body)
if body != "" {
return &body, nil
}
return nil, nil
}
// DataFileName returns nil.
func (s SMS) DataFileName() *string {
return nil
}
// DataFileReader returns nil.
func (s SMS) DataFileReader() (io.ReadCloser, error) {
return nil, nil
}
// DataFileHash returns nil.
func (s SMS) DataFileHash() []byte {
return nil
}
// DataFileMIMEType returns nil.
func (s SMS) DataFileMIMEType() *string {
return nil
}
// Metadata returns nil.
func (s SMS) Metadata() (*timeliner.Metadata, error) {
return nil, nil
}
// Location returns nil.
func (s SMS) Location() (*timeliner.Location, error) {
return nil, nil
}

View File

@@ -0,0 +1,148 @@
// Package smsbackuprestore implements a Timeliner data source for
// the Android SMS Backup & Restore app by SyncTech:
// https://synctech.com.au/sms-backup-restore/
package smsbackuprestore
import (
"context"
"encoding/xml"
"fmt"
"hash/fnv"
"log"
"os"
"github.com/mholt/timeliner"
"github.com/ttacon/libphonenumber"
)
// Data source name and ID.
const (
DataSourceName = "SMS Backup & Restore"
DataSourceID = "smsbackuprestore"
)
var dataSource = timeliner.DataSource{
ID: DataSourceID,
Name: DataSourceName,
NewClient: func(acc timeliner.Account) (timeliner.Client, error) {
return &Client{account: acc}, nil
},
}
func init() {
err := timeliner.RegisterDataSource(dataSource)
if err != nil {
log.Fatal(err)
}
}
// Client implements the timeliner.Client interface.
type Client struct {
// DefaultRegion is the region to assume for phone
// numbers that do not have an explicit country
// calling code. This value should be the ISO
// 3166-1 alpha-2 standard region code.
DefaultRegion string
account timeliner.Account
}
// ListItems lists items from the data source.
func (c *Client) ListItems(ctx context.Context, itemChan chan<- *timeliner.ItemGraph, opt timeliner.Options) error {
defer close(itemChan)
if opt.Filename == "" {
return fmt.Errorf("filename is required")
}
// ensure the client's phone number is standardized
// TODO: It would be better to have a hook in the account creation process to be able to do this
ownerPhoneNum, err := c.standardizePhoneNumber(c.account.UserID)
if err != nil {
return fmt.Errorf("standardizing client phone number '%s': %v", c.account.UserID, err)
}
c.account.UserID = ownerPhoneNum
xmlFile, err := os.Open(opt.Filename)
if err != nil {
return err
}
defer xmlFile.Close()
var data Smses
dec := xml.NewDecoder(xmlFile)
err = dec.Decode(&data)
if err != nil {
return fmt.Errorf("decoding XML file: %v", err)
}
for _, sms := range data.SMS {
sms.client = c
itemChan <- timeliner.NewItemGraph(sms)
}
for _, mms := range data.MMS {
mms.client = c
ig := timeliner.NewItemGraph(mms)
// add relations to make sure other participants in a group text
// are recorded; necessary if more than two participants
if len(mms.Addrs.Addr) > 2 {
ownerNum, _ := mms.Owner()
if ownerNum != nil {
for _, addr := range mms.Addrs.Addr {
participantNum, err := c.standardizePhoneNumber(addr.Address)
if err != nil {
participantNum = addr.Address // oh well
}
// if this participant is not the owner of the message or
// the account owner, then it must be another group member
if participantNum != *ownerNum && participantNum != c.account.UserID {
ig.Relations = append(ig.Relations, timeliner.RawRelation{
FromItemID: mms.ID(),
ToPersonUserID: participantNum,
Relation: timeliner.RelCCed,
})
}
}
}
}
itemChan <- ig
}
return nil
}
// fastHash hashes input using a fast 32-bit hashing algorithm
// and returns the hash as a hex-encoded string. Do not use this
// for cryptographic purposes. If the hashing fails for some
// reason, an empty string is returned.
func fastHash(input string) string {
h := fnv.New32a()
h.Write([]byte(input))
return fmt.Sprintf("%x", h.Sum32())
}
// standardizePhoneNumber attempts to parse number and returns
// a standardized version in E164 format. If the number does
// not have an explicit region/country code, the country code
// for c.DefaultRegion is used instead.
//
// We chose E164 because that's what Twilio uses.
func (c *Client) standardizePhoneNumber(number string) (string, error) {
ph, err := libphonenumber.Parse(number, c.DefaultRegion)
if err != nil {
return "", err
}
return libphonenumber.Format(ph, libphonenumber.E164), nil
}
const (
smsTypeReceived = 1
smsTypeSent = 2
mmsAddrTypeRecipient = 151
mmsAddrTypeSender = 137
)

View File

@@ -141,6 +141,7 @@ const (
ClassLocation
ClassEmail
ClassPrivateMessage
ClassMessage
)
// These are the standard relationships that Timeliner
@@ -148,9 +149,10 @@ const (
// required, but it makes it easier to translate them to
// human-friendly phrases when visualizing the timeline.
var (
RelReplyTo = Relation{Label: "reply_to", Bidirectional: false} // "<from> is in reply to <to>"
RelAttached = Relation{Label: "attached", Bidirectional: true} // "<to|from> is attached to <from|to>"
RelQuotes = Relation{Label: "quotes", Bidirectional: false} // "<from> quotes <to>"
RelReplyTo = Relation{Label: "reply_to", Bidirectional: false} // "<from> is in reply to <to>"
RelAttached = Relation{Label: "attached", Bidirectional: true} // "<to|from> is attached to <from|to>"
RelQuotes = Relation{Label: "quotes", Bidirectional: false} // "<from> quotes <to>"
RelCCed = Relation{Label: "carbon_copied", Bidirectional: false} // "<from_item> is carbon-copied to <to_person>"
)
// ItemRow has the structure of an item's row in our DB.
@@ -276,15 +278,20 @@ func (ig *ItemGraph) Connect(node *ItemGraph, rel Relation) {
}
// RawRelation represents a relationship between
// two items from the same data source (but not
// necessarily the same accounts; assuming that
// a data source's item IDs are globally unique
// across accounts). The item IDs should be those
// which are assigned/provided by the data source,
// NOT a database row ID.
// two items or people (or both) from the same
// data source (but not necessarily the same
// accounts; we assume that a data source's item
// IDs are globally unique across accounts).
// The item IDs should be those which are
// assigned/provided by the data source, NOT a
// database row ID. Likewise, the persons' user
// IDs should be the IDs of the user as associated
// with the data source, NOT their row IDs.
type RawRelation struct {
FromItemID string
ToItemID string
FromItemID string
ToItemID string
FromPersonUserID string
ToPersonUserID string
Relation
}

View File

@@ -139,30 +139,54 @@ func (wc *WrappedClient) processItemGraph(ig *ItemGraph, state *recursiveState)
// process raw relations, if any
for _, rr := range ig.Relations {
// get each item's row ID from their data source item ID
fromItemRowID, err := wc.itemRowIDFromOriginalID(rr.FromItemID)
if err == sql.ErrNoRows {
continue // item does not exist in timeline; skip this relation
var fromItemRowID, toItemRowID, fromPersonRowID, toPersonRowID *int64
var err error
if rr.FromItemID != "" {
// get each item's row ID from their data source item ID
fromItemRowID, err = wc.itemRowIDFromOriginalID(rr.FromItemID)
if err == sql.ErrNoRows {
continue // item does not exist in timeline; skip this relation
}
if err != nil {
return 0, fmt.Errorf("querying 'from' item row ID: %v", err)
}
}
if err != nil {
return 0, fmt.Errorf("querying 'from' item row ID: %v", err)
if rr.ToItemID != "" {
toItemRowID, err = wc.itemRowIDFromOriginalID(rr.ToItemID)
if err == sql.ErrNoRows {
continue // item does not exist in timeline; skip this relation
}
if err != nil {
return 0, fmt.Errorf("querying 'to' item row ID: %v", err)
}
}
toItemRowID, err := wc.itemRowIDFromOriginalID(rr.ToItemID)
if err == sql.ErrNoRows {
continue // item does not exist in timeline; skip this relation
if rr.FromPersonUserID != "" {
fromPersonRowID, err = wc.personRowIDFromUserID(rr.FromPersonUserID)
if err == sql.ErrNoRows {
continue // person does not exist in timeline; skip this relation
}
if err != nil {
return 0, fmt.Errorf("querying 'from' person row ID: %v", err)
}
}
if err != nil {
return 0, fmt.Errorf("querying 'to' item row ID: %v", err)
if rr.ToPersonUserID != "" {
toPersonRowID, err = wc.personRowIDFromUserID(rr.ToPersonUserID)
if err == sql.ErrNoRows {
continue // person does not exist in timeline; skip this relation
}
if err != nil {
return 0, fmt.Errorf("querying 'to' person row ID: %v", err)
}
}
// store the relation
_, err = wc.tl.db.Exec(`INSERT OR IGNORE INTO relationships
(from_item_id, to_item_id, directed, label)
VALUES (?, ?, ?, ?)`,
fromItemRowID, toItemRowID, rr.Bidirectional, rr.Label)
(from_person_id, from_item_id, to_person_id, to_item_id, directed, label)
VALUES (?, ?, ?, ?, ?, ?)`,
fromPersonRowID, fromItemRowID, toPersonRowID, toItemRowID, !rr.Bidirectional, rr.Label)
if err != nil {
return 0, fmt.Errorf("storing raw item relationship: %v (from_item=%d to_item=%d directed=%t label=%v)",
err, fromItemRowID, toItemRowID, !rr.Bidirectional, rr.Label)
return 0, fmt.Errorf("storing raw item relationship: %v (from_person=%d from_item=%d to_person=%d to_item=%d directed=%t label=%v)",
err, fromPersonRowID, fromItemRowID, toPersonRowID, toItemRowID, !rr.Bidirectional, rr.Label)
}
}
@@ -543,8 +567,9 @@ func (wc *WrappedClient) loadItemRow(accountID int64, originalID string) (ItemRo
// itemRowIDFromOriginalID returns an item's row ID from the ID
// associated with the data source of wc, along with its original
// item ID from that data source. If the item does not exist,
// sql.ErrNoRows will be returned.
func (wc *WrappedClient) itemRowIDFromOriginalID(originalID string) (int64, error) {
// sql.ErrNoRows will be returned. A pointer is returned because
// the column is nullable in the DB.
func (wc *WrappedClient) itemRowIDFromOriginalID(originalID string) (*int64, error) {
var rowID int64
err := wc.tl.db.QueryRow(`SELECT items.id
FROM items, accounts
@@ -552,7 +577,19 @@ func (wc *WrappedClient) itemRowIDFromOriginalID(originalID string) (int64, erro
AND accounts.data_source_id=?
AND items.account_id = accounts.id
LIMIT 1`, originalID, wc.ds.ID).Scan(&rowID)
return rowID, err
return &rowID, err
}
// personRowIDFromUserID returns a person's row ID from the user ID
// associated with the data source of wc. If the person does not exist,
// sql.ErrNoRows will be returned. A pointer is returned because the
// column is nullable in the DB.
func (wc *WrappedClient) personRowIDFromUserID(userID string) (*int64, error) {
var rowID int64
err := wc.tl.db.QueryRow(`SELECT person_id FROM person_identities
WHERE data_source_id=? AND user_id=? LIMIT 1`,
wc.ds.ID, userID).Scan(&rowID)
return &rowID, err
}
// itemLocks is used to ensure that an item