photo-index: output PhotoGroup instead of Photo (#2902)

photo-index: output PhotoGroup instead of Photo (#2902)
This commit is contained in:
Aaron Boodman
2016-12-06 16:21:02 -08:00
committed by GitHub
parent 0750459e4e
commit 8d4ff605f5
4 changed files with 133 additions and 112 deletions

View File

@@ -30,7 +30,7 @@ type Date struct {
type Photo struct {
Id string
DateTaken Date
DateTaken Date `noms:",omitempty"`
}
type PhotoGroup struct {
@@ -124,11 +124,9 @@ func buildDateIndex(db types.ValueReadWriter, inputs []types.Value) types.Map {
var p Photo
if err := marshal.Unmarshal(cv, &p); err == nil {
stop = true
if p.DateTaken.NsSinceEpoch != 0 {
indexBuilder.SetInsert(
[]types.Value{types.Number(float64(p.DateTaken.NsSinceEpoch))},
cv)
}
indexBuilder.SetInsert(
[]types.Value{types.Number(float64(p.DateTaken.NsSinceEpoch))},
cv)
}
return
})
@@ -151,7 +149,7 @@ func buildGroups(db types.ValueReadWriter, thresh int, byDate types.Map) types.L
}
flush := func() {
if group != nil && group.Photos.Len() > 0 {
if group != nil {
v, err := marshal.Marshal(*group)
d.Chk.NoError(err)
vals <- v
@@ -163,10 +161,17 @@ func buildGroups(db types.ValueReadWriter, thresh int, byDate types.Map) types.L
byDate.IterAll(func(key, s types.Value) {
s.(types.Set).IterAll(func(val types.Value) {
dt := float64(key.(types.Number))
if (dt - lastTime) > float64(thresh*1e6) {
if dt == 0 {
// If date is not known, then the photo is in its own group
flush()
startGroup(val)
flush()
} else if (dt - lastTime) > float64(thresh*1e6) {
// Otherwise, if we've surpassed the threshold, start a new group
flush()
startGroup(val)
} else {
// Otherwise, add to the existing group
group.Photos = group.Photos.Insert(val)
}
lastTime = dt

View File

@@ -57,21 +57,28 @@ func (s *testSuite) TestBasic() {
DateTaken: Date{NsSinceEpoch: float64(55 * 1e9)},
}),
// No dupes, so it doen't end up in a group
// No dupes
marshal.MustMarshal(Photo{
Id: "48",
DateTaken: Date{NsSinceEpoch: float64(61 * 1e9)},
}),
// Zero date taken, so it doesn't end up in a group
// If the DateTaken is zero, it should end up in its own group
marshal.MustMarshal(Photo{
Id: "49",
DateTaken: Date{NsSinceEpoch: float64(0)},
}),
marshal.MustMarshal(Photo{
Id: "50",
DateTaken: Date{NsSinceEpoch: float64(0)},
}),
// No date taken, so it doens't end up in a group
// If the DateTaken is not present, it should end up in its own group
types.NewStruct("Photo", types.StructData{
"Id": types.String("50"),
"id": types.String("51"),
}),
types.NewStruct("Photo", types.StructData{
"id": types.String("52"),
}),
)
@@ -90,15 +97,29 @@ func (s *testSuite) TestBasic() {
err = marshal.Unmarshal(sp.GetDataset().HeadValue(), &result)
s.NoError(err)
s.Equal(2, len(result.Groups))
expectedGroups := map[string]map[string]bool{
"44": map[string]bool{"45": true, "43": true, "42": true},
"46": map[string]bool{"47": true},
"48": nil,
"49": nil,
"50": nil,
"51": nil,
"52": nil,
}
s.Equal("44", result.Groups[0].Cover.Id)
s.Equal(3, len(result.Groups[0].Photos))
s.Equal("45", result.Groups[0].Photos[0].Id)
s.Equal("43", result.Groups[0].Photos[1].Id)
s.Equal("42", result.Groups[0].Photos[2].Id)
for _, g := range result.Groups {
exp, ok := expectedGroups[g.Cover.Id]
s.True(ok, "Group cover %s not expected", g.Cover.Id)
for _, p := range g.Photos {
if _, ok = exp[p.Id]; ok {
delete(exp, p.Id)
} else {
s.Fail("Photo %s not expected in group %s", p.Id, g.Cover.Id)
}
}
s.Equal(0, len(exp), "Some expected photos not found in group %s: %+v", g.Cover.Id, exp)
delete(expectedGroups, g.Cover.Id)
}
s.Equal("46", result.Groups[1].Cover.Id)
s.Equal(1, len(result.Groups[1].Photos))
s.Equal("47", result.Groups[1].Photos[0].Id)
s.Equal(0, len(expectedGroups), "Some expected groups not found in result: %+v", expectedGroups)
}

View File

@@ -9,13 +9,11 @@ import (
"math"
"os"
"path"
"sync"
"time"
"github.com/attic-labs/noms/go/config"
"github.com/attic-labs/noms/go/d"
"github.com/attic-labs/noms/go/datas"
"github.com/attic-labs/noms/go/hash"
"github.com/attic-labs/noms/go/marshal"
"github.com/attic-labs/noms/go/spec"
"github.com/attic-labs/noms/go/types"
@@ -32,23 +30,39 @@ func main() {
}
type Photo struct {
Id string
Sizes map[struct{ Width, Height int }]string
Id string
Sizes map[struct{ Width, Height int }]string
DateTaken Date `noms:",omitempty"`
DatePublished Date `noms:",omitempty"`
DateUpdated Date `noms:",omitempty"`
Tags []string `noms:",omitempty"`
Sources []string `noms:",omitempty"`
Original types.Struct `noms:",original"`
Faces []struct {
Name string
X, Y, W, H float32
} `noms:",omitempty"`
}
type PhotoGroup struct {
Cover Photo
Photos types.Set
Id string
Cover Photo
Photos []Photo
Original types.Struct `noms:",original"`
}
type Date struct {
NsSinceEpoch float64
}
func (d Date) IsEmpty() bool {
return d.NsSinceEpoch == 0
}
func index() (win bool) {
var dbStr = flag.String("db", "", "input database spec")
var groupsStr = flag.String("groups", "", "path within db to look for PhotoGroup structs")
var outDSStr = flag.String("out-ds", "", "output dataset to write to - if empty, defaults to input dataset")
var indexCovers = flag.Bool("index-covers", false, "the resulting index will contain only the cover Photo, not the entire PhotoGroup")
verbose.RegisterVerboseFlags(flag.CommandLine)
flag.Usage = usage
@@ -86,101 +100,65 @@ func index() (win bool) {
faceCounts := map[types.String]int{}
sourceCounts := map[types.String]int{}
tagCounts := map[types.String]int{}
countsMtx := sync.Mutex{}
addToIndex := func(p Photo, cv types.Value) {
d := math.MaxFloat64
var dt struct{ DateTaken Date }
var dp struct{ DatePublished Date }
var du struct{ DateUpdated Date }
if err := marshal.Unmarshal(cv, &dt); err == nil {
d = -dt.DateTaken.NsSinceEpoch
} else if err := marshal.Unmarshal(cv, &dp); err == nil {
d = -dp.DatePublished.NsSinceEpoch
} else if err := marshal.Unmarshal(cv, &du); err == nil {
d = -du.DateUpdated.NsSinceEpoch
addToIndex := func(gb *types.GraphBuilder, path []types.Value, pg PhotoGroup) {
if *indexCovers {
gb.SetInsert(path, pg.Cover.Original)
} else {
gb.SetInsert(path, pg.Original)
}
}
addToIndexes := func(pg PhotoGroup) {
d := math.MaxFloat64
if !pg.Cover.DateTaken.IsEmpty() {
d = pg.Cover.DateTaken.NsSinceEpoch
} else if !pg.Cover.DatePublished.IsEmpty() {
d = pg.Cover.DatePublished.NsSinceEpoch
} else if !pg.Cover.DateUpdated.IsEmpty() {
d = pg.Cover.DateUpdated.NsSinceEpoch
}
d = -d
// Index by date
byDate.SetInsert([]types.Value{types.Number(d)}, cv)
addToIndex(byDate, []types.Value{types.Number(d)}, pg)
allPhotos := []Photo{pg.Cover}
if !*indexCovers {
allPhotos = append(allPhotos, pg.Photos...)
}
// Index by tag, then date
moreTags := map[types.String]int{}
var wt struct{ Tags []string }
if err = marshal.Unmarshal(cv, &wt); err == nil {
for _, t := range wt.Tags {
byTag.SetInsert([]types.Value{types.String(t), types.Number(d)}, cv)
moreTags[types.String(t)]++
for _, p := range allPhotos {
for _, t := range p.Tags {
addToIndex(byTag, []types.Value{types.String(t), types.Number(d)}, pg)
tagCounts[types.String(t)]++
}
}
// Index by face, then date
moreFaces := map[types.String]int{}
var wf struct {
Faces []struct {
Name string
X, Y, W, H float32
}
}
if err = marshal.Unmarshal(cv, &wf); err == nil {
for _, f := range wf.Faces {
byFace.SetInsert([]types.Value{types.String(f.Name), types.Number(d)}, cv)
moreFaces[types.String(f.Name)]++
for _, p := range allPhotos {
for _, f := range p.Faces {
addToIndex(byFace, []types.Value{types.String(f.Name), types.Number(d)}, pg)
faceCounts[types.String(f.Name)]++
}
}
// Index by source, then date
moreSources := map[types.String]int{}
var ws struct {
Sources []string
}
if err = marshal.Unmarshal(cv, &ws); err == nil {
for _, s := range ws.Sources {
bySource.SetInsert([]types.Value{types.String(s), types.Number(d)}, cv)
for _, p := range allPhotos {
for _, s := range p.Sources {
addToIndex(bySource, []types.Value{types.String(s), types.Number(d)}, pg)
sourceCounts[types.String(s)]++
}
}
countsMtx.Lock()
for tag, count := range moreTags {
tagCounts[tag] += count
}
for face, count := range moreFaces {
faceCounts[face] += count
}
for source, count := range moreSources {
sourceCounts[source] += count
}
countsMtx.Unlock()
}
groups := []types.Value{}
inGroups := map[hash.Hash]struct{}{}
if *groupsStr != "" {
groups, err = spec.ReadAbsolutePaths(db, *groupsStr)
d.CheckErrorNoUsage(err)
walk.WalkValues(groups[0], db, func(cv types.Value) (stop bool) {
var pg PhotoGroup
if err := marshal.Unmarshal(cv, &pg); err == nil {
stop = true
// TODO: Don't need to do this second arg separately when decoder can catch full value.
addToIndex(pg.Cover, cv.(types.Struct).Get("cover"))
inGroups[cv.(types.Struct).Get("cover").Hash()] = struct{}{}
pg.Photos.IterAll(func(cv types.Value) {
inGroups[cv.Hash()] = struct{}{}
})
}
return
})
}
for _, v := range inputs {
walk.WalkValues(v, db, func(cv types.Value) (stop bool) {
var p Photo
if _, ok := inGroups[cv.Hash()]; ok {
var pg PhotoGroup
if err := marshal.Unmarshal(cv, &pg); err == nil {
stop = true
} else if err := marshal.Unmarshal(cv, &p); err == nil {
stop = true
addToIndex(p, cv)
addToIndexes(pg)
}
return
})

View File

@@ -24,10 +24,6 @@ type testSuite struct {
}
func (s *testSuite) TestWin() {
sp, err := spec.ForDataset(fmt.Sprintf("ldb:%s::test", s.LdbDir))
s.NoError(err)
defer sp.Close()
type Face struct {
Name string
X, Y, W, H int
@@ -51,6 +47,12 @@ func (s *testSuite) TestWin() {
DateUpdated Date
}
type PhotoGroup struct {
Id string
Cover Photo
Photos []Photo
}
getTags := func(n int) types.Set {
s := types.NewSet()
for i := 0; i < n; i++ {
@@ -86,12 +88,23 @@ func (s *testSuite) TestWin() {
}
}
photos := []Photo{}
for i := 0; i < 5; i++ {
photos = append(photos, getPhoto(i))
getPhotoGroup := func(n int) PhotoGroup {
return PhotoGroup{
Id: fmt.Sprintf("pg%d", n),
Cover: getPhoto(n),
}
}
v, err := marshal.Marshal(photos)
groups := []PhotoGroup{}
for i := 0; i < 5; i++ {
groups = append(groups, getPhotoGroup(i))
}
sp, err := spec.ForDataset(fmt.Sprintf("ldb:%s::test", s.LdbDir))
s.NoError(err)
defer sp.Close()
v, err := marshal.Marshal(groups)
s.NoError(err)
_, err = sp.GetDatabase().CommitValue(sp.GetDataset(), v)
s.NoError(err)
@@ -113,8 +126,12 @@ func (s *testSuite) TestWin() {
s.Equal(5, len(idx.ByDate))
for i := 0; i < 5; i++ {
s.Equal(uint64(1), idx.ByDate[-i*10].Len())
p := idx.ByDate[-i*10].First().(types.Struct)
k := -i * 10
if k == 0 {
k = -1
}
s.Equal(uint64(1), idx.ByDate[k].Len())
p := idx.ByDate[k].First().(types.Struct).Get("cover").(types.Struct)
s.Equal(fmt.Sprintf("photo %d", i), string(p.Get("title").(types.String)))
}