Add photo-index: a simple photo indexer. For now only indexes by tag. (#2610)

Add photo-index: a simple photo indexer. For now only indexes by tag.

Will add indexing by face/geo in subsequent patches.
This commit is contained in:
Aaron Boodman
2016-09-27 10:50:37 -07:00
committed by GitHub
parent 181f549179
commit 362a5630d9
6 changed files with 297 additions and 1 deletions
+4 -1
View File
@@ -235,7 +235,10 @@ func structDecoder(t reflect.Type) decoderFunc {
}
d = func(v types.Value, rv reflect.Value) {
s := v.(types.Struct)
s, ok := v.(types.Struct)
if !ok {
panic(&UnmarshalTypeMismatchError{v, rv.Type(), ", expected struct"})
}
// If the name is empty then the Go struct has to be anonymous.
if !strings.EqualFold(s.Type().Desc.(types.StructDesc).Name, name) {
panic(&UnmarshalTypeMismatchError{v, rv.Type(), ", names do not match"})
+1
View File
@@ -221,6 +221,7 @@ func TestDecodeTypeMismatch(t *testing.T) {
X int
}
var s S
assertDecodeErrorMessage(t, types.String("hi!"), &s, "Cannot unmarshal String into Go value of type marshal.S, expected struct")
assertDecodeErrorMessage(t, types.NewStruct("S", types.StructData{
"x": types.String("hi"),
}), &s, "Cannot unmarshal String into Go value of type int")
+33
View File
@@ -383,6 +383,39 @@ func MakeMapType(keyType, valType *Type) *Type {
return staticTypeCache.getCompoundType(MapKind, keyType, valType)
}
type fieldSorter struct {
names []string
types []*Type
}
func (fs *fieldSorter) Len() int {
return len(fs.names)
}
func (fs *fieldSorter) Swap(i, j int) {
fs.names[i], fs.names[j] = fs.names[j], fs.names[i]
fs.types[i], fs.types[j] = fs.types[j], fs.types[i]
}
func (fs *fieldSorter) Less(i, j int) bool {
return fs.names[i] < fs.names[j]
}
type FieldMap map[string]*Type
func MakeStructTypeFromFields(name string, fields FieldMap) *Type {
// I'm the computer
names := make([]string, 0, len(fields))
types := make([]*Type, 0, len(fields))
for k, v := range fields {
names = append(names, k)
types = append(types, v)
}
fs := fieldSorter{names, types}
sort.Sort(&fs)
return MakeStructType(name, names, types)
}
func MakeStructType(name string, fieldNames []string, fieldTypes []*Type) *Type {
staticTypeCache.Lock()
defer staticTypeCache.Unlock()
+16
View File
@@ -238,3 +238,19 @@ func TestInvalidCyclesAndUnions(t *testing.T) {
[]*Type{MakeStructType("A", []string{"a"}, []*Type{MakeCycleType(1)})})
})
}
func TestMakeStructTypeFromFields(t *testing.T) {
assert := assert.New(t)
fields := map[string]*Type{
"str": StringType,
"number": NumberType,
"bool": BoolType,
}
desc := MakeStructTypeFromFields("Thing", fields).Desc.(StructDesc)
assert.Equal("Thing", desc.Name)
assert.Equal(3, desc.Len())
for k, v := range fields {
f := desc.Field(k)
assert.True(v == f)
}
}
+144
View File
@@ -0,0 +1,144 @@
// Copyright 2016 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package main
import (
"fmt"
"os"
"path"
"github.com/attic-labs/noms/go/datas"
"github.com/attic-labs/noms/go/spec"
"github.com/attic-labs/noms/go/types"
"github.com/attic-labs/noms/go/walk"
flag "github.com/juju/gnuflag"
)
func main() {
if !index() {
os.Exit(1)
}
}
func index() (win bool) {
var dbStr = flag.String("db", "", "input database spec")
var outDSStr = flag.String("out-ds", "", "output dataset to write to - if empty, defaults to input dataset")
var parallelism = flag.Int("parallelism", 16, "number of parallel goroutines to search")
flag.Usage = usage
flag.Parse(false)
if flag.NArg() == 0 {
flag.Usage()
return
}
if flag.NArg() == 0 {
fmt.Fprintln(os.Stderr, "Need at least one dataset to index")
return
}
db, err := spec.GetDatabase(*dbStr)
if err != nil {
fmt.Fprintf(os.Stderr, "Invalid input database '%s': %s\n", flag.Arg(0), err)
return
}
defer db.Close()
var outDS datas.Dataset
if !datas.DatasetFullRe.MatchString(*outDSStr) {
fmt.Fprintf(os.Stderr, "Invalid output dataset name: %s\n", *outDSStr)
return
} else {
outDS = db.GetDataset(*outDSStr)
}
inputs := []types.Value{}
for i := 0; i < flag.NArg(); i++ {
p, err := spec.NewAbsolutePath(flag.Arg(i))
if err != nil {
fmt.Fprintf(os.Stderr, "Invalid input path '%s', error: %s\n", flag.Arg(i), err)
return
}
v := p.Resolve(db)
if v == nil {
fmt.Fprintf(os.Stderr, "Input path '%s' does not exist in '%s'", flag.Arg(i), *dbStr)
return
}
inputs = append(inputs, v)
continue
}
sizeType := types.MakeStructTypeFromFields("", types.FieldMap{
"width": types.NumberType,
"height": types.NumberType,
})
dateType := types.MakeStructTypeFromFields("Date", types.FieldMap{
"nsSinceEpoch": types.NumberType,
})
fields := types.FieldMap{
"sizes": types.MakeMapType(sizeType, types.StringType),
"tags": types.MakeSetType(types.StringType),
"title": types.StringType,
"datePublished": dateType,
"dateUpdated": dateType,
}
photoType := types.MakeStructTypeFromFields("Photo", fields)
fields["dateTaken"] = dateType
photoType = types.MakeUnionType(photoType, types.MakeStructTypeFromFields("Photo", fields))
byDate := types.NewGraphBuilder(db, types.MapKind, true)
byTag := types.NewGraphBuilder(db, types.MapKind, true)
for _, v := range inputs {
walk.SomeP(v, db, func(cv types.Value, _ *types.Ref) (stop bool) {
if types.IsSubtype(photoType, cv.Type()) {
s := cv.(types.Struct)
// Prefer to sort by the actual date the photo was taken, but if it's not
// available, use the date it was published instead.
ds, ok := s.MaybeGet("dateTaken")
if !ok {
ds = s.Get("datePublished")
}
// Sort by most recent by negating the timestamp.
d := ds.(types.Struct).Get("nsSinceEpoch").(types.Number)
d = types.Number(-float64(d))
byDate.SetInsert([]types.Value{d}, cv)
s.Get("tags").(types.Set).IterAll(func(t types.Value) {
byTag.SetInsert([]types.Value{t, d}, cv)
})
// Can't be any photos inside photos, so we can save a little bit here.
stop = true
}
return
}, *parallelism)
}
outDS, err = db.CommitValue(outDS, types.NewStruct("", types.StructData{
"byDate": byDate.Build(),
"byTag": byTag.Build(),
}))
if err != nil {
fmt.Fprintf(os.Stderr, "Could not commit: %s\n", err)
return
}
win = true
return
}
func usage() {
fmt.Fprintf(os.Stderr, "photo-index indexes photos by common attributes\n\n")
fmt.Fprintf(os.Stderr, "Usage: %s -db=<db-spec> -out-ds=<name> [input-paths...]\n\n", path.Base(os.Args[0]))
fmt.Fprintf(os.Stderr, " <db> : Database to work with\n")
fmt.Fprintf(os.Stderr, " <out-ds> : Dataset to write index to\n")
fmt.Fprintf(os.Stderr, " [input-paths...] : One or more paths within <db-spec> to crawl\n\n")
fmt.Fprintln(os.Stderr, "Flags:\n")
flag.PrintDefaults()
}
+99
View File
@@ -0,0 +1,99 @@
// Copyright 2016 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package main
import (
"fmt"
"testing"
"github.com/attic-labs/noms/go/marshal"
"github.com/attic-labs/noms/go/spec"
"github.com/attic-labs/noms/go/types"
"github.com/attic-labs/noms/go/util/clienttest"
"github.com/attic-labs/testify/suite"
)
func TestBasics(t *testing.T) {
suite.Run(t, &testSuite{})
}
type testSuite struct {
clienttest.ClientTestSuite
}
func (s *testSuite) TestWin() {
sp := fmt.Sprintf("ldb:%s::test", s.LdbDir)
db, ds, _ := spec.GetDataset(sp)
type Date struct {
NsSinceEpoch int
}
type Photo struct {
Title string
Tags types.Set
Sizes map[struct {
Width int
Height int
}]string
DateTaken Date
DatePublished Date
DateUpdated Date
}
getTags := func(n int) types.Set {
s := types.NewSet()
for i := 0; i < n; i++ {
s = s.Insert(types.String(fmt.Sprintf("tag%d", i)))
}
return s
}
getPhoto := func(n int) Photo {
return Photo{
Title: fmt.Sprintf("photo %d", n),
Tags: getTags(n),
Sizes: map[struct{ Width, Height int }]string{
{100, 100}: "100.jpg"},
DateTaken: Date{n * 10},
DatePublished: Date{n*10 + 1},
DateUpdated: Date{n*10 + 2},
}
}
photos := []Photo{}
for i := 0; i < 5; i++ {
photos = append(photos, getPhoto(i))
}
v, err := marshal.Marshal(photos)
s.NoError(err)
ds, err = db.CommitValue(ds, v)
s.NoError(err)
db.Close()
_, _ = s.MustRun(main, []string{"--out-ds", "idx", "--db", s.LdbDir, "test"})
db, ds, _ = spec.GetDataset(fmt.Sprintf("%s::idx", s.LdbDir))
var idx struct {
ByDate map[int]types.Set
ByTag map[string]map[int]types.Set
}
marshal.Unmarshal(ds.HeadValue(), &idx)
s.Equal(5, len(idx.ByDate))
for i := 0; i < 5; i++ {
s.Equal(uint64(1), idx.ByDate[-i*10].Len())
p := idx.ByDate[-i*10].First().(types.Struct)
s.Equal(fmt.Sprintf("photo %d", i), string(p.Get("title").(types.String)))
}
s.Equal(4, len(idx.ByTag))
for i := 1; i < 5; i++ {
k := fmt.Sprintf("tag%d", i)
v := idx.ByTag[k]
s.Equal(4-i, len(v))
}
}