photo-index: index photos by source too (#2833)

Fixes https://github.com/attic-labs/attic/issues/19
This commit is contained in:
Aaron Boodman
2016-11-15 14:20:17 -08:00
committed by GitHub
parent e730306bd2
commit 7f760a9ba3
5 changed files with 68 additions and 23 deletions
+25 -7
View File
@@ -79,11 +79,13 @@ func index() (win bool) {
d.CheckErrorNoUsage(err)
byDate := types.NewGraphBuilder(db, types.MapKind, true)
byTag := types.NewGraphBuilder(db, types.MapKind, true)
byFace := types.NewGraphBuilder(db, types.MapKind, true)
bySource := types.NewGraphBuilder(db, types.MapKind, true)
byTag := types.NewGraphBuilder(db, types.MapKind, true)
tagCounts := map[types.String]int{}
faceCounts := map[types.String]int{}
sourceCounts := map[types.String]int{}
tagCounts := map[types.String]int{}
countsMtx := sync.Mutex{}
addToIndex := func(p Photo, cv types.Value) {
@@ -127,6 +129,17 @@ func index() (win bool) {
}
}
// Index by source, then date
moreSources := map[types.String]int{}
var ws struct {
Sources []string
}
if err = marshal.Unmarshal(cv, &ws); err == nil {
for _, s := range ws.Sources {
bySource.SetInsert([]types.Value{types.String(s), types.Number(d)}, cv)
}
}
countsMtx.Lock()
for tag, count := range moreTags {
tagCounts[tag] += count
@@ -134,6 +147,9 @@ func index() (win bool) {
for face, count := range moreFaces {
faceCounts[face] += count
}
for source, count := range moreSources {
sourceCounts[source] += count
}
countsMtx.Unlock()
}
@@ -171,11 +187,13 @@ func index() (win bool) {
}
outDS, err = db.Commit(outDS, types.NewStruct("", types.StructData{
"byDate": byDate.Build(),
"byTag": byTag.Build(),
"byFace": byFace.Build(),
"tagsByCount": stringsByCount(db, tagCounts),
"facesByCount": stringsByCount(db, faceCounts),
"byDate": byDate.Build(),
"byFace": byFace.Build(),
"bySource": bySource.Build(),
"byTag": byTag.Build(),
"facesByCount": stringsByCount(db, faceCounts),
"sourcesByCount": stringsByCount(db, sourceCounts),
"tagsByCount": stringsByCount(db, tagCounts),
}), datas.CommitOptions{
Meta: types.NewStruct("", types.StructData{
"date": types.String(time.Now().Format(time.RFC3339)),
+11 -5
View File
@@ -25,12 +25,16 @@ const args = argv
.usage(
'Indexes Photo objects out of slurped Dropbox metadata.\n' +
'See dropbox/slurp for how to get an access token.\n\n' +
'Usage: node . --access-token=<token> <in-object> <out-dataset>')
'Usage: node . [flags] <in-object> <out-dataset>')
.option('access-token', {
describe: 'Dropbox oauth access token',
type: 'string',
demand: true,
})
.option('source-tags', {
describe: 'comma-separated list of source tags to write into created photos',
type: 'string',
})
.demand(2)
.argv;
@@ -82,6 +86,7 @@ async function main(): Promise<void> {
if (!input) {
throw `Input spec ${args._[0]} does not exist`;
}
const sourceTags = new Set(args['source-tags'] ? args['source-tags'].split(',') : []);
const outSpec = DatasetSpec.parse(args._[1]);
const [outDB, output] = outSpec.dataset();
let result = Promise.resolve(new Set());
@@ -92,12 +97,13 @@ async function main(): Promise<void> {
const resources = getResources(v);
const photo: Object = {
id: `https://github.com/attic-labs/noms/samples/js/dropbox/find-photos#${v.id}`,
title: v.name,
tags: new Set(),
sizes: await getSizes(resources),
resources: resources,
dateTaken: newDate(v.media_info.metadata.time_taken),
dateUpdated: newDate(v.server_modified),
resources: resources,
sizes: await getSizes(resources),
sources: sourceTags,
tags: new Set(),
title: v.name,
};
if (isSubtype(hasLocation, getTypeOfValue(v.media_info.metadata))) {
+10 -4
View File
@@ -26,6 +26,10 @@ const args = argv
'Finds photos in slurped Facebook metadata\n\n' +
'Usage: node . <in-path> <out-dataset>')
.demand(2)
.option('source-tags', {
describe: 'comma-separated list of source tags to write into created photos',
type: 'string',
})
.argv;
main().catch(ex => {
@@ -82,6 +86,7 @@ async function main(): Promise<void> {
if (!input) {
throw `Invalid input spec: ${inSpec.toString()}`;
}
const sourceTags = new Set(args['source-tags'] ? args['source-tags'].split(',') : []);
const outSpec = DatasetSpec.parse(args._[1]);
const [outDB, output] = outSpec.dataset();
let result = Promise.resolve(new Set());
@@ -91,12 +96,13 @@ async function main(): Promise<void> {
if (v instanceof Struct && isSubtype(photoType, v.type)) {
const photo: Object = {
id: `https://github.com/attic-labs/noms/samples/js/fb/find-photos#${v.id}`,
title: v.name || '',
sizes: await getSizes(v),
resources: await getResources(v),
tags: new Set(), // fb has 'tags', but they are actually people not textual tags
datePublished: new NomsDate({nsSinceEpoch: v.created_time * 1e9}),
dateUpdated: new NomsDate({nsSinceEpoch: v.updated_time * 1e9}),
resources: await getResources(v),
sizes: await getSizes(v),
sources: sourceTags,
tags: new Set(), // fb has 'tags', but they are actually people not textual tags
title: v.name || '',
};
if (isSubtype(placeType, v.type)) {
photo.geoposition = getGeo(v);
+11 -4
View File
@@ -26,6 +26,10 @@ const args = argv
'Indexes Photo objects out of slurped Flickr metadata\n\n' +
'Usage: node . <in-object> <out-dataset>')
.demand(2)
.option('source-tags', {
describe: 'comma-separated list of source tags to write into created photos',
type: 'string',
})
.argv;
const sizes = ['t', 's', 'm', 'l', 'o'];
@@ -88,6 +92,8 @@ async function main(): Promise<void> {
if (!input) {
throw `Input spec ${args._[0]} does not exist`;
}
const sourceTags = new Set(args['source-tags'] ? args['source-tags'].split(',') : []);
const outSpec = DatasetSpec.parse(args._[1]);
const [outDB, output] = outSpec.dataset();
let result = Promise.resolve(new Set());
@@ -97,12 +103,13 @@ async function main(): Promise<void> {
if (isSubtype(imageType, getTypeOfValue(v))) {
const photo: Object = {
id: `https://github.com/attic-labs/noms/samples/js/flickr/find-photos#${v.id}`,
title: v.title,
tags: new Set(v.tags ? v.tags.split(' ') : []),
sizes: getSizes(v),
resources: getResources(v),
datePublished: newDate(Number(v.dateupload) * nsInSecond),
dateUpdated: newDate(Number(v.lastupdate) * nsInSecond),
resources: getResources(v),
sizes: getSizes(v),
sources: sourceTags,
tags: new Set(v.tags ? v.tags.split(' ') : []),
title: v.title,
};
if (!v.datetakenunknown) {
+11 -3
View File
@@ -25,8 +25,12 @@ import {
const args = argv
.usage(
'Finds Noms Photo objects from output of picasa/slurp\n\n' +
'Usage: node . <in-path> <out-dataset>')
'Usage: node . [flags] <in-path> <out-dataset>')
.demand(2)
.option('source-tags', {
describe: 'comma-separated list of source tags to write into created photos',
type: 'string',
})
.argv;
main().catch(ex => {
@@ -92,10 +96,13 @@ async function main(): Promise<void> {
if (!pinnedSpec) {
throw `Invalid input dataset: ${inSpec.path.dataset}`;
}
const [db, input] = await pinnedSpec.value();
if (!input) {
throw `Invalid input spec: ${inSpec.toString()}`;
}
const sourceTags = new Set(args['source-tags'] ? args['source-tags'].split(',') : []);
const outSpec = DatasetSpec.parse(args._[1]);
const [outDB, output] = outSpec.dataset();
const result = [];
@@ -109,10 +116,11 @@ async function main(): Promise<void> {
const photo: Object = {
id: 'https://github.com/attic-labs/noms/samples/js/picasa/find-photos' +
`#${v.gphotoQ24id.Q24t}`,
sizes: await getSizes(resources),
resources: resources,
datePublished: getDate(Date.parse(v.published.Q24t)),
dateUpdated: getDate(Date.parse(v.updated.Q24t)),
resources: resources,
sizes: await getSizes(resources),
sources: sourceTags,
};
if (isSubtype(hasTitle, v.type)) {