mirror of
https://github.com/mholt/timeliner.git
synced 2026-01-06 04:40:18 -06:00
424 lines
13 KiB
Go
424 lines
13 KiB
Go
package timeliner
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/gob"
|
|
"io"
|
|
"time"
|
|
)
|
|
|
|
// Item is the central concept of a piece of content
|
|
// from a service or data source. Take note of which
|
|
// methods are required to return non-empty values.
|
|
//
|
|
// The actual content of an item is stored either in
|
|
// the database or on disk as a file. Generally,
|
|
// content that is text-encoded can and should be
|
|
// stored in the database where it will be indexed.
|
|
// However, if the item's content (for example, the
|
|
// bytes of a photo or video) are not text or if the
|
|
// text is too large to store well in a database (for
|
|
// example, an entire novel), it should be stored
|
|
// on disk, and this interface has methods to
|
|
// accommodate both. Note that an item may have both
|
|
// text and non-text content, too: for example, photos
|
|
// and videos may have descriptions that are as much
|
|
// "content" as the media iteself. One part of an item
|
|
// is not mutually exclusive with any other.
|
|
type Item interface {
|
|
// The unique ID of the item assigned by the service.
|
|
// If the service does not assign one, then invent
|
|
// one such that the ID is unique to the content or
|
|
// substance of the item (for example, an ID derived
|
|
// from timestamp or from the actual content of the
|
|
// item -- whatever makes it unique). The ID need
|
|
// only be unique for the account it is associated
|
|
// with, although more unique is, of course, acceptable.
|
|
//
|
|
// REQUIRED.
|
|
ID() string
|
|
|
|
// The originating timestamp of the item, which
|
|
// may be different from when the item was posted
|
|
// or created. For example, a photo may be taken
|
|
// one day but uploaded a week later. Prefer the
|
|
// time when the original item content was captured.
|
|
//
|
|
// REQUIRED.
|
|
Timestamp() time.Time
|
|
|
|
// A classification of the item's kind.
|
|
//
|
|
// REQUIRED.
|
|
Class() ItemClass
|
|
|
|
// The user/account ID of the owner or
|
|
// originator of the content, along with their
|
|
// username or real name. The ID is used to
|
|
// relate the item with the person behind it;
|
|
// the name is used to make the person
|
|
// recognizable to the human reader. If the
|
|
// ID is nil, the current account owner will
|
|
// be assumed. (Use the ID as given by the
|
|
// data source.) If the data source only
|
|
// provides a name but no ID, you may return
|
|
// the name as the ID with the understanding
|
|
// that a different name will be counted as a
|
|
// different person. You may also return the
|
|
// name as the name and leave the ID nil and
|
|
// have correct results if it is safe to assume
|
|
// the name belongs to the current account owner.
|
|
Owner() (id *string, name *string)
|
|
|
|
// Returns the text of the item, if any.
|
|
// This field is indexed in the DB, so don't
|
|
// use for unimportant metadata or huge
|
|
// swaths of text; if there is a large
|
|
// amount of text, use an item file instead.
|
|
DataText() (*string, error)
|
|
|
|
// For primary content which is not text or
|
|
// which is too large to be stored well in a
|
|
// database, the content can be downloaded
|
|
// into a file. If so, the following methods
|
|
// should return the necessary information,
|
|
// if available from the service, so that a
|
|
// data file can be obtained, stored, and
|
|
// later read successfully.
|
|
//
|
|
// DataFileName returns the filename (NOT full
|
|
// path or URL) of the file; prefer the original
|
|
// filename if it originated as a file. If the
|
|
// filename is not unique on disk when downloaded,
|
|
// it will be made unique by modifying it. If
|
|
// this value is nil/empty, a filename will be
|
|
// generated from the item's other data.
|
|
//
|
|
// DataFileReader returns a way to read the data.
|
|
// It will be closed when the read is completed.
|
|
//
|
|
// DataFileHash returns the checksum of the
|
|
// content as provided by the service. If the
|
|
// service (or data source) does not provide a
|
|
// hash, leave this field empty, but note that
|
|
// later it will be impossible to efficiently
|
|
// know whether the content has changed on the
|
|
// service from what is stored locally.
|
|
//
|
|
// DataFileMIMEType returns the MIME type of
|
|
// the data file, if known.
|
|
DataFileName() *string
|
|
DataFileReader() (io.ReadCloser, error)
|
|
DataFileHash() []byte
|
|
DataFileMIMEType() *string
|
|
|
|
// Metadata returns any optional metadata.
|
|
// Feel free to leave as many fields empty
|
|
// as you'd like: the less fields that are
|
|
// filled out, the smaller the storage size.
|
|
// Metadata is not indexed by the DB but is
|
|
// rendered in projections and queries
|
|
// according to the item's classification.
|
|
Metadata() (*Metadata, error)
|
|
|
|
// Location returns an item's location,
|
|
// if known. For now, only Earth
|
|
// coordinates are accepted, but we can
|
|
// improve this later.
|
|
Location() (*Location, error)
|
|
}
|
|
|
|
// ItemClass classifies an item.
|
|
type ItemClass int
|
|
|
|
// Various classes of items.
|
|
const (
|
|
ClassUnknown ItemClass = iota
|
|
ClassImage
|
|
ClassVideo
|
|
ClassAudio
|
|
ClassPost
|
|
ClassLocation
|
|
ClassEmail
|
|
ClassPrivateMessage
|
|
ClassMessage
|
|
)
|
|
|
|
// These are the standard relationships that Timeliner
|
|
// recognizes. Using these known relationships is not
|
|
// required, but it makes it easier to translate them to
|
|
// human-friendly phrases when visualizing the timeline.
|
|
var (
|
|
RelReplyTo = Relation{Label: "reply_to", Bidirectional: false} // "<from> is in reply to <to>"
|
|
RelAttached = Relation{Label: "attached", Bidirectional: true} // "<to|from> is attached to <from|to>"
|
|
RelQuotes = Relation{Label: "quotes", Bidirectional: false} // "<from> quotes <to>"
|
|
RelCCed = Relation{Label: "carbon_copied", Bidirectional: false} // "<from_item> is carbon-copied to <to_person>"
|
|
)
|
|
|
|
// ItemRow has the structure of an item's row in our DB.
|
|
type ItemRow struct {
|
|
ID int64
|
|
AccountID int64
|
|
OriginalID string
|
|
PersonID int64
|
|
Timestamp time.Time
|
|
Stored time.Time
|
|
Modified *time.Time
|
|
Class ItemClass
|
|
MIMEType *string
|
|
DataText *string
|
|
DataFile *string
|
|
DataHash *string // base64-encoded SHA-256
|
|
Metadata *Metadata
|
|
Location
|
|
|
|
metaGob []byte // use Metadata.(encode/decode)
|
|
}
|
|
|
|
// Location contains location information.
|
|
type Location struct {
|
|
Latitude *float64
|
|
Longitude *float64
|
|
}
|
|
|
|
// ItemGraph is an item with optional connections to other items.
|
|
// All ItemGraph values should be pointers to ensure consistency.
|
|
// The usual weird/fun thing about representing graph data structures
|
|
// in memory is that a graph is a node, and a node is a graph. 🤓
|
|
type ItemGraph struct {
|
|
// The node item. This can be nil, but note that
|
|
// Edges will not be traversed if Node is nil,
|
|
// because there must be a node on both ends of
|
|
// an edge.
|
|
//
|
|
// Optional.
|
|
Node Item
|
|
|
|
// Edges are represented as 1:many relations
|
|
// to other "graphs" (nodes in the graph).
|
|
// Fill this out to add multiple items to the
|
|
// timeline at once, while drawing the
|
|
// designated relationships between them.
|
|
// Useful when processing related items in
|
|
// batches.
|
|
//
|
|
// Directional relationships go from Node to
|
|
// the map key.
|
|
//
|
|
// If the items involved in a relationship are
|
|
// not efficiently available at the same time
|
|
// (i.e. if loading both items involved in the
|
|
// relationship would take a non-trivial amount
|
|
// of time or API calls), you can use the
|
|
// Relations field instead, but only after the
|
|
// items have been added to the timeline.
|
|
//
|
|
// Optional.
|
|
Edges map[*ItemGraph][]Relation
|
|
|
|
// If items in the graph belong to a collection,
|
|
// specify them here. If the collection does not
|
|
// exist (by row ID or AccountID+OriginalID), it
|
|
// will be created. If it already exists, the
|
|
// collection in the DB will be unioned with the
|
|
// collection specified here. Collections are
|
|
// processed regardless of Node and Edges.
|
|
//
|
|
// Optional.
|
|
Collections []Collection
|
|
|
|
// Relationships between existing items in the
|
|
// timeline can be represented here in a list
|
|
// of item IDs that are connected by a label.
|
|
// This field is useful when relationships and
|
|
// the items involved in them are not discovered
|
|
// at the same time. Relations in this list will
|
|
// be added to the timeline, joined by the item
|
|
// IDs described in the RawRelations, only if
|
|
// the items having those IDs (as provided by
|
|
// the data source; we're not talking about DB
|
|
// row IDs here) already exist in the timeline.
|
|
// In other words, this is a best-effort field;
|
|
// useful for forming relationships of existing
|
|
// items, but without access to the actual items
|
|
// themselves. If you have the items involved in
|
|
// the relationships, use Edges instead.
|
|
//
|
|
// Optional.
|
|
Relations []RawRelation
|
|
}
|
|
|
|
// NewItemGraph returns a new node/graph.
|
|
func NewItemGraph(node Item) *ItemGraph {
|
|
return &ItemGraph{
|
|
Node: node,
|
|
Edges: make(map[*ItemGraph][]Relation),
|
|
}
|
|
}
|
|
|
|
// Add adds item to the graph ig by making an edge described
|
|
// by rel from the node ig to a new node for item.
|
|
//
|
|
// This method is for simple inserts, where the only thing to add
|
|
// to the graph at this moment is a single item, since the graph
|
|
// it inserts contains only a single node populated by item. To
|
|
// add a full graph with multiple items (i.e. a graph with edges),
|
|
// call ig.Connect directly.
|
|
func (ig *ItemGraph) Add(item Item, rel Relation) {
|
|
ig.Connect(NewItemGraph(item), rel)
|
|
}
|
|
|
|
// Connect is a simple convenience function that adds a graph (node)
|
|
// to ig by an edge described by rel.
|
|
func (ig *ItemGraph) Connect(node *ItemGraph, rel Relation) {
|
|
if ig.Edges == nil {
|
|
ig.Edges = make(map[*ItemGraph][]Relation)
|
|
}
|
|
ig.Edges[node] = append(ig.Edges[node], rel)
|
|
}
|
|
|
|
// RawRelation represents a relationship between
|
|
// two items or people (or both) from the same
|
|
// data source (but not necessarily the same
|
|
// accounts; we assume that a data source's item
|
|
// IDs are globally unique across accounts).
|
|
// The item IDs should be those which are
|
|
// assigned/provided by the data source, NOT a
|
|
// database row ID. Likewise, the persons' user
|
|
// IDs should be the IDs of the user as associated
|
|
// with the data source, NOT their row IDs.
|
|
type RawRelation struct {
|
|
FromItemID string
|
|
ToItemID string
|
|
FromPersonUserID string
|
|
ToPersonUserID string
|
|
Relation
|
|
}
|
|
|
|
// Relation describes how two nodes in a graph are related.
|
|
// It's essentially an edge on a graph.
|
|
type Relation struct {
|
|
Label string
|
|
Bidirectional bool
|
|
}
|
|
|
|
// Collection represents a group of items.
|
|
type Collection struct {
|
|
// The ID of the collection as given
|
|
// by the service; for example, the
|
|
// album ID. If the service does not
|
|
// provide an ID for the collection,
|
|
// invent one such that the next time
|
|
// the collection is encountered and
|
|
// processed, its ID will be the same.
|
|
// An ID is necessary here to ensure
|
|
// uniqueness.
|
|
//
|
|
// REQUIRED.
|
|
OriginalID string
|
|
|
|
// The name of the collection as
|
|
// given by the service; for example,
|
|
// the album title.
|
|
//
|
|
// Optional.
|
|
Name *string
|
|
|
|
// The description, caption, or any
|
|
// other relevant text describing
|
|
// the collection.
|
|
//
|
|
// Optional.
|
|
Description *string
|
|
|
|
// The items for the collection;
|
|
// if ordering is significant,
|
|
// specify each item's Position
|
|
// field; the order of elememts
|
|
// of this slice will not be
|
|
// considered important.
|
|
Items []CollectionItem
|
|
}
|
|
|
|
// CollectionItem represents an item
|
|
// stored in a collection.
|
|
type CollectionItem struct {
|
|
// The item to add to the collection.
|
|
Item Item
|
|
|
|
// Specify if ordering is important.
|
|
Position int
|
|
|
|
// Used when processing; this will
|
|
// store the row ID of the item
|
|
// after the item has been inserted
|
|
// into the DB.
|
|
itemRowID int64
|
|
}
|
|
|
|
// Metadata is a unified structure for storing
|
|
// item metadata in the DB.
|
|
type Metadata struct {
|
|
// A hash or etag provided by the service to
|
|
// make it easy to know if it has changed
|
|
ServiceHash []byte
|
|
|
|
// Locations
|
|
LocationAccuracy int
|
|
Altitude int // meters
|
|
AltitudeAccuracy int
|
|
Heading int // degrees
|
|
Velocity int
|
|
|
|
GeneralArea string // natural language description of a location
|
|
|
|
// Photos and videos
|
|
EXIF map[string]interface{}
|
|
// TODO: Should we have some of the "most important" EXIF fields explicitly here?
|
|
|
|
Width int
|
|
Height int
|
|
|
|
// TODO: Google Photos (how many of these belong in EXIF?)
|
|
CameraMake string
|
|
CameraModel string
|
|
FocalLength float64
|
|
ApertureFNumber float64
|
|
ISOEquivalent int
|
|
ExposureTime time.Duration
|
|
|
|
FPS float64 // Frames Per Second
|
|
|
|
// Posts (Facebook so far)
|
|
Link string
|
|
Description string
|
|
Name string
|
|
ParentID string
|
|
StatusType string
|
|
Type string
|
|
|
|
Shares int // aka "Retweets" or "Reshares"
|
|
Likes int
|
|
}
|
|
|
|
func (m *Metadata) encode() ([]byte, error) {
|
|
// then encode the actual data, and trim off
|
|
// schema from the beginning
|
|
buf := new(bytes.Buffer)
|
|
err := gob.NewEncoder(buf).Encode(m)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return buf.Bytes()[len(metadataGobPrefix):], nil
|
|
}
|
|
|
|
func (m *Metadata) decode(b []byte) error {
|
|
if b == nil {
|
|
return nil
|
|
}
|
|
fullGob := append(metadataGobPrefix, b...)
|
|
return gob.NewDecoder(bytes.NewReader(fullGob)).Decode(m)
|
|
}
|
|
|
|
var metadataGobPrefix []byte
|