Files
timeliner/itemgraph.go

424 lines
13 KiB
Go

package timeliner
import (
"bytes"
"encoding/gob"
"io"
"time"
)
// Item is the central concept of a piece of content
// from a service or data source. Take note of which
// methods are required to return non-empty values.
//
// The actual content of an item is stored either in
// the database or on disk as a file. Generally,
// content that is text-encoded can and should be
// stored in the database where it will be indexed.
// However, if the item's content (for example, the
// bytes of a photo or video) are not text or if the
// text is too large to store well in a database (for
// example, an entire novel), it should be stored
// on disk, and this interface has methods to
// accommodate both. Note that an item may have both
// text and non-text content, too: for example, photos
// and videos may have descriptions that are as much
// "content" as the media iteself. One part of an item
// is not mutually exclusive with any other.
type Item interface {
// The unique ID of the item assigned by the service.
// If the service does not assign one, then invent
// one such that the ID is unique to the content or
// substance of the item (for example, an ID derived
// from timestamp or from the actual content of the
// item -- whatever makes it unique). The ID need
// only be unique for the account it is associated
// with, although more unique is, of course, acceptable.
//
// REQUIRED.
ID() string
// The originating timestamp of the item, which
// may be different from when the item was posted
// or created. For example, a photo may be taken
// one day but uploaded a week later. Prefer the
// time when the original item content was captured.
//
// REQUIRED.
Timestamp() time.Time
// A classification of the item's kind.
//
// REQUIRED.
Class() ItemClass
// The user/account ID of the owner or
// originator of the content, along with their
// username or real name. The ID is used to
// relate the item with the person behind it;
// the name is used to make the person
// recognizable to the human reader. If the
// ID is nil, the current account owner will
// be assumed. (Use the ID as given by the
// data source.) If the data source only
// provides a name but no ID, you may return
// the name as the ID with the understanding
// that a different name will be counted as a
// different person. You may also return the
// name as the name and leave the ID nil and
// have correct results if it is safe to assume
// the name belongs to the current account owner.
Owner() (id *string, name *string)
// Returns the text of the item, if any.
// This field is indexed in the DB, so don't
// use for unimportant metadata or huge
// swaths of text; if there is a large
// amount of text, use an item file instead.
DataText() (*string, error)
// For primary content which is not text or
// which is too large to be stored well in a
// database, the content can be downloaded
// into a file. If so, the following methods
// should return the necessary information,
// if available from the service, so that a
// data file can be obtained, stored, and
// later read successfully.
//
// DataFileName returns the filename (NOT full
// path or URL) of the file; prefer the original
// filename if it originated as a file. If the
// filename is not unique on disk when downloaded,
// it will be made unique by modifying it. If
// this value is nil/empty, a filename will be
// generated from the item's other data.
//
// DataFileReader returns a way to read the data.
// It will be closed when the read is completed.
//
// DataFileHash returns the checksum of the
// content as provided by the service. If the
// service (or data source) does not provide a
// hash, leave this field empty, but note that
// later it will be impossible to efficiently
// know whether the content has changed on the
// service from what is stored locally.
//
// DataFileMIMEType returns the MIME type of
// the data file, if known.
DataFileName() *string
DataFileReader() (io.ReadCloser, error)
DataFileHash() []byte
DataFileMIMEType() *string
// Metadata returns any optional metadata.
// Feel free to leave as many fields empty
// as you'd like: the less fields that are
// filled out, the smaller the storage size.
// Metadata is not indexed by the DB but is
// rendered in projections and queries
// according to the item's classification.
Metadata() (*Metadata, error)
// Location returns an item's location,
// if known. For now, only Earth
// coordinates are accepted, but we can
// improve this later.
Location() (*Location, error)
}
// ItemClass classifies an item.
type ItemClass int
// Various classes of items.
const (
ClassUnknown ItemClass = iota
ClassImage
ClassVideo
ClassAudio
ClassPost
ClassLocation
ClassEmail
ClassPrivateMessage
ClassMessage
)
// These are the standard relationships that Timeliner
// recognizes. Using these known relationships is not
// required, but it makes it easier to translate them to
// human-friendly phrases when visualizing the timeline.
var (
RelReplyTo = Relation{Label: "reply_to", Bidirectional: false} // "<from> is in reply to <to>"
RelAttached = Relation{Label: "attached", Bidirectional: true} // "<to|from> is attached to <from|to>"
RelQuotes = Relation{Label: "quotes", Bidirectional: false} // "<from> quotes <to>"
RelCCed = Relation{Label: "carbon_copied", Bidirectional: false} // "<from_item> is carbon-copied to <to_person>"
)
// ItemRow has the structure of an item's row in our DB.
type ItemRow struct {
ID int64
AccountID int64
OriginalID string
PersonID int64
Timestamp time.Time
Stored time.Time
Modified *time.Time
Class ItemClass
MIMEType *string
DataText *string
DataFile *string
DataHash *string // base64-encoded SHA-256
Metadata *Metadata
Location
metaGob []byte // use Metadata.(encode/decode)
}
// Location contains location information.
type Location struct {
Latitude *float64
Longitude *float64
}
// ItemGraph is an item with optional connections to other items.
// All ItemGraph values should be pointers to ensure consistency.
// The usual weird/fun thing about representing graph data structures
// in memory is that a graph is a node, and a node is a graph. 🤓
type ItemGraph struct {
// The node item. This can be nil, but note that
// Edges will not be traversed if Node is nil,
// because there must be a node on both ends of
// an edge.
//
// Optional.
Node Item
// Edges are represented as 1:many relations
// to other "graphs" (nodes in the graph).
// Fill this out to add multiple items to the
// timeline at once, while drawing the
// designated relationships between them.
// Useful when processing related items in
// batches.
//
// Directional relationships go from Node to
// the map key.
//
// If the items involved in a relationship are
// not efficiently available at the same time
// (i.e. if loading both items involved in the
// relationship would take a non-trivial amount
// of time or API calls), you can use the
// Relations field instead, but only after the
// items have been added to the timeline.
//
// Optional.
Edges map[*ItemGraph][]Relation
// If items in the graph belong to a collection,
// specify them here. If the collection does not
// exist (by row ID or AccountID+OriginalID), it
// will be created. If it already exists, the
// collection in the DB will be unioned with the
// collection specified here. Collections are
// processed regardless of Node and Edges.
//
// Optional.
Collections []Collection
// Relationships between existing items in the
// timeline can be represented here in a list
// of item IDs that are connected by a label.
// This field is useful when relationships and
// the items involved in them are not discovered
// at the same time. Relations in this list will
// be added to the timeline, joined by the item
// IDs described in the RawRelations, only if
// the items having those IDs (as provided by
// the data source; we're not talking about DB
// row IDs here) already exist in the timeline.
// In other words, this is a best-effort field;
// useful for forming relationships of existing
// items, but without access to the actual items
// themselves. If you have the items involved in
// the relationships, use Edges instead.
//
// Optional.
Relations []RawRelation
}
// NewItemGraph returns a new node/graph.
func NewItemGraph(node Item) *ItemGraph {
return &ItemGraph{
Node: node,
Edges: make(map[*ItemGraph][]Relation),
}
}
// Add adds item to the graph ig by making an edge described
// by rel from the node ig to a new node for item.
//
// This method is for simple inserts, where the only thing to add
// to the graph at this moment is a single item, since the graph
// it inserts contains only a single node populated by item. To
// add a full graph with multiple items (i.e. a graph with edges),
// call ig.Connect directly.
func (ig *ItemGraph) Add(item Item, rel Relation) {
ig.Connect(NewItemGraph(item), rel)
}
// Connect is a simple convenience function that adds a graph (node)
// to ig by an edge described by rel.
func (ig *ItemGraph) Connect(node *ItemGraph, rel Relation) {
if ig.Edges == nil {
ig.Edges = make(map[*ItemGraph][]Relation)
}
ig.Edges[node] = append(ig.Edges[node], rel)
}
// RawRelation represents a relationship between
// two items or people (or both) from the same
// data source (but not necessarily the same
// accounts; we assume that a data source's item
// IDs are globally unique across accounts).
// The item IDs should be those which are
// assigned/provided by the data source, NOT a
// database row ID. Likewise, the persons' user
// IDs should be the IDs of the user as associated
// with the data source, NOT their row IDs.
type RawRelation struct {
FromItemID string
ToItemID string
FromPersonUserID string
ToPersonUserID string
Relation
}
// Relation describes how two nodes in a graph are related.
// It's essentially an edge on a graph.
type Relation struct {
Label string
Bidirectional bool
}
// Collection represents a group of items.
type Collection struct {
// The ID of the collection as given
// by the service; for example, the
// album ID. If the service does not
// provide an ID for the collection,
// invent one such that the next time
// the collection is encountered and
// processed, its ID will be the same.
// An ID is necessary here to ensure
// uniqueness.
//
// REQUIRED.
OriginalID string
// The name of the collection as
// given by the service; for example,
// the album title.
//
// Optional.
Name *string
// The description, caption, or any
// other relevant text describing
// the collection.
//
// Optional.
Description *string
// The items for the collection;
// if ordering is significant,
// specify each item's Position
// field; the order of elememts
// of this slice will not be
// considered important.
Items []CollectionItem
}
// CollectionItem represents an item
// stored in a collection.
type CollectionItem struct {
// The item to add to the collection.
Item Item
// Specify if ordering is important.
Position int
// Used when processing; this will
// store the row ID of the item
// after the item has been inserted
// into the DB.
itemRowID int64
}
// Metadata is a unified structure for storing
// item metadata in the DB.
type Metadata struct {
// A hash or etag provided by the service to
// make it easy to know if it has changed
ServiceHash []byte
// Locations
LocationAccuracy int
Altitude int // meters
AltitudeAccuracy int
Heading int // degrees
Velocity int
GeneralArea string // natural language description of a location
// Photos and videos
EXIF map[string]interface{}
// TODO: Should we have some of the "most important" EXIF fields explicitly here?
Width int
Height int
// TODO: Google Photos (how many of these belong in EXIF?)
CameraMake string
CameraModel string
FocalLength float64
ApertureFNumber float64
ISOEquivalent int
ExposureTime time.Duration
FPS float64 // Frames Per Second
// Posts (Facebook so far)
Link string
Description string
Name string
ParentID string
StatusType string
Type string
Shares int // aka "Retweets" or "Reshares"
Likes int
}
func (m *Metadata) encode() ([]byte, error) {
// then encode the actual data, and trim off
// schema from the beginning
buf := new(bytes.Buffer)
err := gob.NewEncoder(buf).Encode(m)
if err != nil {
return nil, err
}
return buf.Bytes()[len(metadataGobPrefix):], nil
}
func (m *Metadata) decode(b []byte) error {
if b == nil {
return nil
}
fullGob := append(metadataGobPrefix, b...)
return gob.NewDecoder(bytes.NewReader(fullGob)).Decode(m)
}
var metadataGobPrefix []byte