mirror of
https://github.com/dolthub/dolt.git
synced 2026-05-11 19:11:10 -05:00
Add photo-index: a simple photo indexer. For now only indexes by tag. (#2610)
Add photo-index: a simple photo indexer. For now only indexes by tag. Will add indexing by face/geo in subsequent patches.
This commit is contained in:
@@ -235,7 +235,10 @@ func structDecoder(t reflect.Type) decoderFunc {
|
||||
}
|
||||
|
||||
d = func(v types.Value, rv reflect.Value) {
|
||||
s := v.(types.Struct)
|
||||
s, ok := v.(types.Struct)
|
||||
if !ok {
|
||||
panic(&UnmarshalTypeMismatchError{v, rv.Type(), ", expected struct"})
|
||||
}
|
||||
// If the name is empty then the Go struct has to be anonymous.
|
||||
if !strings.EqualFold(s.Type().Desc.(types.StructDesc).Name, name) {
|
||||
panic(&UnmarshalTypeMismatchError{v, rv.Type(), ", names do not match"})
|
||||
|
||||
@@ -221,6 +221,7 @@ func TestDecodeTypeMismatch(t *testing.T) {
|
||||
X int
|
||||
}
|
||||
var s S
|
||||
assertDecodeErrorMessage(t, types.String("hi!"), &s, "Cannot unmarshal String into Go value of type marshal.S, expected struct")
|
||||
assertDecodeErrorMessage(t, types.NewStruct("S", types.StructData{
|
||||
"x": types.String("hi"),
|
||||
}), &s, "Cannot unmarshal String into Go value of type int")
|
||||
|
||||
@@ -383,6 +383,39 @@ func MakeMapType(keyType, valType *Type) *Type {
|
||||
return staticTypeCache.getCompoundType(MapKind, keyType, valType)
|
||||
}
|
||||
|
||||
type fieldSorter struct {
|
||||
names []string
|
||||
types []*Type
|
||||
}
|
||||
|
||||
func (fs *fieldSorter) Len() int {
|
||||
return len(fs.names)
|
||||
}
|
||||
|
||||
func (fs *fieldSorter) Swap(i, j int) {
|
||||
fs.names[i], fs.names[j] = fs.names[j], fs.names[i]
|
||||
fs.types[i], fs.types[j] = fs.types[j], fs.types[i]
|
||||
}
|
||||
|
||||
func (fs *fieldSorter) Less(i, j int) bool {
|
||||
return fs.names[i] < fs.names[j]
|
||||
}
|
||||
|
||||
type FieldMap map[string]*Type
|
||||
|
||||
func MakeStructTypeFromFields(name string, fields FieldMap) *Type {
|
||||
// I'm the computer
|
||||
names := make([]string, 0, len(fields))
|
||||
types := make([]*Type, 0, len(fields))
|
||||
for k, v := range fields {
|
||||
names = append(names, k)
|
||||
types = append(types, v)
|
||||
}
|
||||
fs := fieldSorter{names, types}
|
||||
sort.Sort(&fs)
|
||||
return MakeStructType(name, names, types)
|
||||
}
|
||||
|
||||
func MakeStructType(name string, fieldNames []string, fieldTypes []*Type) *Type {
|
||||
staticTypeCache.Lock()
|
||||
defer staticTypeCache.Unlock()
|
||||
|
||||
@@ -238,3 +238,19 @@ func TestInvalidCyclesAndUnions(t *testing.T) {
|
||||
[]*Type{MakeStructType("A", []string{"a"}, []*Type{MakeCycleType(1)})})
|
||||
})
|
||||
}
|
||||
|
||||
func TestMakeStructTypeFromFields(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
fields := map[string]*Type{
|
||||
"str": StringType,
|
||||
"number": NumberType,
|
||||
"bool": BoolType,
|
||||
}
|
||||
desc := MakeStructTypeFromFields("Thing", fields).Desc.(StructDesc)
|
||||
assert.Equal("Thing", desc.Name)
|
||||
assert.Equal(3, desc.Len())
|
||||
for k, v := range fields {
|
||||
f := desc.Field(k)
|
||||
assert.True(v == f)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
// Copyright 2016 Attic Labs, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, version 2.0:
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
|
||||
"github.com/attic-labs/noms/go/datas"
|
||||
"github.com/attic-labs/noms/go/spec"
|
||||
"github.com/attic-labs/noms/go/types"
|
||||
"github.com/attic-labs/noms/go/walk"
|
||||
flag "github.com/juju/gnuflag"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if !index() {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func index() (win bool) {
|
||||
var dbStr = flag.String("db", "", "input database spec")
|
||||
var outDSStr = flag.String("out-ds", "", "output dataset to write to - if empty, defaults to input dataset")
|
||||
var parallelism = flag.Int("parallelism", 16, "number of parallel goroutines to search")
|
||||
|
||||
flag.Usage = usage
|
||||
flag.Parse(false)
|
||||
|
||||
if flag.NArg() == 0 {
|
||||
flag.Usage()
|
||||
return
|
||||
}
|
||||
|
||||
if flag.NArg() == 0 {
|
||||
fmt.Fprintln(os.Stderr, "Need at least one dataset to index")
|
||||
return
|
||||
}
|
||||
|
||||
db, err := spec.GetDatabase(*dbStr)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Invalid input database '%s': %s\n", flag.Arg(0), err)
|
||||
return
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
var outDS datas.Dataset
|
||||
if !datas.DatasetFullRe.MatchString(*outDSStr) {
|
||||
fmt.Fprintf(os.Stderr, "Invalid output dataset name: %s\n", *outDSStr)
|
||||
return
|
||||
} else {
|
||||
outDS = db.GetDataset(*outDSStr)
|
||||
}
|
||||
|
||||
inputs := []types.Value{}
|
||||
for i := 0; i < flag.NArg(); i++ {
|
||||
p, err := spec.NewAbsolutePath(flag.Arg(i))
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Invalid input path '%s', error: %s\n", flag.Arg(i), err)
|
||||
return
|
||||
}
|
||||
|
||||
v := p.Resolve(db)
|
||||
if v == nil {
|
||||
fmt.Fprintf(os.Stderr, "Input path '%s' does not exist in '%s'", flag.Arg(i), *dbStr)
|
||||
return
|
||||
}
|
||||
|
||||
inputs = append(inputs, v)
|
||||
continue
|
||||
}
|
||||
|
||||
sizeType := types.MakeStructTypeFromFields("", types.FieldMap{
|
||||
"width": types.NumberType,
|
||||
"height": types.NumberType,
|
||||
})
|
||||
dateType := types.MakeStructTypeFromFields("Date", types.FieldMap{
|
||||
"nsSinceEpoch": types.NumberType,
|
||||
})
|
||||
fields := types.FieldMap{
|
||||
"sizes": types.MakeMapType(sizeType, types.StringType),
|
||||
"tags": types.MakeSetType(types.StringType),
|
||||
"title": types.StringType,
|
||||
"datePublished": dateType,
|
||||
"dateUpdated": dateType,
|
||||
}
|
||||
photoType := types.MakeStructTypeFromFields("Photo", fields)
|
||||
fields["dateTaken"] = dateType
|
||||
photoType = types.MakeUnionType(photoType, types.MakeStructTypeFromFields("Photo", fields))
|
||||
|
||||
byDate := types.NewGraphBuilder(db, types.MapKind, true)
|
||||
byTag := types.NewGraphBuilder(db, types.MapKind, true)
|
||||
|
||||
for _, v := range inputs {
|
||||
walk.SomeP(v, db, func(cv types.Value, _ *types.Ref) (stop bool) {
|
||||
if types.IsSubtype(photoType, cv.Type()) {
|
||||
s := cv.(types.Struct)
|
||||
// Prefer to sort by the actual date the photo was taken, but if it's not
|
||||
// available, use the date it was published instead.
|
||||
ds, ok := s.MaybeGet("dateTaken")
|
||||
if !ok {
|
||||
ds = s.Get("datePublished")
|
||||
}
|
||||
|
||||
// Sort by most recent by negating the timestamp.
|
||||
d := ds.(types.Struct).Get("nsSinceEpoch").(types.Number)
|
||||
d = types.Number(-float64(d))
|
||||
|
||||
byDate.SetInsert([]types.Value{d}, cv)
|
||||
s.Get("tags").(types.Set).IterAll(func(t types.Value) {
|
||||
byTag.SetInsert([]types.Value{t, d}, cv)
|
||||
})
|
||||
// Can't be any photos inside photos, so we can save a little bit here.
|
||||
stop = true
|
||||
}
|
||||
return
|
||||
}, *parallelism)
|
||||
}
|
||||
|
||||
outDS, err = db.CommitValue(outDS, types.NewStruct("", types.StructData{
|
||||
"byDate": byDate.Build(),
|
||||
"byTag": byTag.Build(),
|
||||
}))
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Could not commit: %s\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
win = true
|
||||
return
|
||||
}
|
||||
|
||||
func usage() {
|
||||
fmt.Fprintf(os.Stderr, "photo-index indexes photos by common attributes\n\n")
|
||||
fmt.Fprintf(os.Stderr, "Usage: %s -db=<db-spec> -out-ds=<name> [input-paths...]\n\n", path.Base(os.Args[0]))
|
||||
fmt.Fprintf(os.Stderr, " <db> : Database to work with\n")
|
||||
fmt.Fprintf(os.Stderr, " <out-ds> : Dataset to write index to\n")
|
||||
fmt.Fprintf(os.Stderr, " [input-paths...] : One or more paths within <db-spec> to crawl\n\n")
|
||||
fmt.Fprintln(os.Stderr, "Flags:\n")
|
||||
flag.PrintDefaults()
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
// Copyright 2016 Attic Labs, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, version 2.0:
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/attic-labs/noms/go/marshal"
|
||||
"github.com/attic-labs/noms/go/spec"
|
||||
"github.com/attic-labs/noms/go/types"
|
||||
"github.com/attic-labs/noms/go/util/clienttest"
|
||||
"github.com/attic-labs/testify/suite"
|
||||
)
|
||||
|
||||
func TestBasics(t *testing.T) {
|
||||
suite.Run(t, &testSuite{})
|
||||
}
|
||||
|
||||
type testSuite struct {
|
||||
clienttest.ClientTestSuite
|
||||
}
|
||||
|
||||
func (s *testSuite) TestWin() {
|
||||
sp := fmt.Sprintf("ldb:%s::test", s.LdbDir)
|
||||
db, ds, _ := spec.GetDataset(sp)
|
||||
|
||||
type Date struct {
|
||||
NsSinceEpoch int
|
||||
}
|
||||
|
||||
type Photo struct {
|
||||
Title string
|
||||
Tags types.Set
|
||||
Sizes map[struct {
|
||||
Width int
|
||||
Height int
|
||||
}]string
|
||||
DateTaken Date
|
||||
DatePublished Date
|
||||
DateUpdated Date
|
||||
}
|
||||
|
||||
getTags := func(n int) types.Set {
|
||||
s := types.NewSet()
|
||||
for i := 0; i < n; i++ {
|
||||
s = s.Insert(types.String(fmt.Sprintf("tag%d", i)))
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
getPhoto := func(n int) Photo {
|
||||
return Photo{
|
||||
Title: fmt.Sprintf("photo %d", n),
|
||||
Tags: getTags(n),
|
||||
Sizes: map[struct{ Width, Height int }]string{
|
||||
{100, 100}: "100.jpg"},
|
||||
DateTaken: Date{n * 10},
|
||||
DatePublished: Date{n*10 + 1},
|
||||
DateUpdated: Date{n*10 + 2},
|
||||
}
|
||||
}
|
||||
|
||||
photos := []Photo{}
|
||||
for i := 0; i < 5; i++ {
|
||||
photos = append(photos, getPhoto(i))
|
||||
}
|
||||
|
||||
v, err := marshal.Marshal(photos)
|
||||
s.NoError(err)
|
||||
ds, err = db.CommitValue(ds, v)
|
||||
s.NoError(err)
|
||||
db.Close()
|
||||
|
||||
_, _ = s.MustRun(main, []string{"--out-ds", "idx", "--db", s.LdbDir, "test"})
|
||||
|
||||
db, ds, _ = spec.GetDataset(fmt.Sprintf("%s::idx", s.LdbDir))
|
||||
var idx struct {
|
||||
ByDate map[int]types.Set
|
||||
ByTag map[string]map[int]types.Set
|
||||
}
|
||||
marshal.Unmarshal(ds.HeadValue(), &idx)
|
||||
|
||||
s.Equal(5, len(idx.ByDate))
|
||||
for i := 0; i < 5; i++ {
|
||||
s.Equal(uint64(1), idx.ByDate[-i*10].Len())
|
||||
p := idx.ByDate[-i*10].First().(types.Struct)
|
||||
s.Equal(fmt.Sprintf("photo %d", i), string(p.Get("title").(types.String)))
|
||||
}
|
||||
|
||||
s.Equal(4, len(idx.ByTag))
|
||||
for i := 1; i < 5; i++ {
|
||||
k := fmt.Sprintf("tag%d", i)
|
||||
v := idx.ByTag[k]
|
||||
s.Equal(4-i, len(v))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user