Merge remote-tracking branch 'origin/main' into aaron/database-cleanup

This commit is contained in:
Aaron Son
2022-02-15 12:18:14 -08:00
109 changed files with 3956 additions and 1821 deletions

View File

@@ -167,7 +167,8 @@ func (se *SqlEngine) NewContext(ctx context.Context) (*sql.Context, error) {
}
func (se *SqlEngine) NewDoltSession(ctx context.Context, mysqlSess *sql.BaseSession) (*dsess.DoltSession, error) {
return se.dsessFactory(ctx, mysqlSess, se.engine.Analyzer.Catalog.AllDatabases())
tempCtx := sql.NewContext(ctx, sql.WithSession(mysqlSess))
return se.dsessFactory(ctx, mysqlSess, se.engine.Analyzer.Catalog.AllDatabases(tempCtx))
}
// GetReturnFormat() returns the printing format the engine is associated with.

View File

@@ -0,0 +1,167 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package commands
import (
"context"
"fmt"
"io"
"math"
"path/filepath"
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/cmd/dolt/errhand"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/utils/argparser"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/store/nbs"
)
const tableFileIndexFlag = "index"
type InspectCmd struct {
}
// Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command
func (cmd InspectCmd) Name() string {
return "inspect"
}
// Hidden should return true if this command should be hidden from the help text
func (cmd InspectCmd) Hidden() bool {
return true
}
// RequiresRepo should return false if this interface is implemented, and the command does not have the requirement
// that it be run from within a data repository directory
func (cmd InspectCmd) RequiresRepo() bool {
return true
}
// Description returns a description of the command
func (cmd InspectCmd) Description() string {
return "Inspects a Dolt Database and collects stats."
}
// CreateMarkdown creates a markdown file containing the helptext for the command at the given path
func (cmd InspectCmd) CreateMarkdown(wr io.Writer, commandStr string) error {
return nil
}
func (cmd InspectCmd) ArgParser() *argparser.ArgParser {
ap := argparser.NewArgParser()
ap.SupportsFlag(tableFileIndexFlag, "i", "Measure distribution error in table file chunk indexes.")
return ap
}
// Exec executes the command
func (cmd InspectCmd) Exec(ctx context.Context, commandStr string, args []string, dEnv *env.DoltEnv) int {
ap := cmd.ArgParser()
help, usage := cli.HelpAndUsagePrinters(cli.GetCommandDocumentation(commandStr, cli.CommandDocumentationContent{}, ap))
apr := cli.ParseArgsOrDie(ap, args, help)
var verr errhand.VerboseError
if apr.Contains(tableFileIndexFlag) {
verr = cmd.measureChunkIndexDistribution(ctx, dEnv)
}
return HandleVErrAndExitCode(verr, usage)
}
func (cmd InspectCmd) measureChunkIndexDistribution(ctx context.Context, dEnv *env.DoltEnv) errhand.VerboseError {
newGen := filepath.Join(dEnv.GetDoltDir(), dbfactory.DataDir)
oldGen := filepath.Join(newGen, "oldgen")
itr, err := NewTableFileIter([]string{newGen, oldGen}, dEnv.FS)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
sumErr, sumCnt := 0.0, 0
for {
path, _ := itr.next()
if path == "" {
break
}
summary, err := cmd.processTableFile(path, dEnv.FS)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
sumErr += summary.sumErr
sumCnt += int(summary.count)
cli.Println(summary.format())
}
cli.Printf("average guess error: %f", sumErr/float64(sumCnt))
return nil
}
func (cmd InspectCmd) processTableFile(path string, fs filesys.Filesys) (sum *chunkIndexSummary, err error) {
var rdr io.ReadCloser
rdr, err = fs.OpenForRead(path)
if err != nil {
return sum, err
}
defer func() {
cerr := rdr.Close()
if err == nil {
err = cerr
}
}()
var prefixes []uint64
prefixes, err = nbs.GetTableIndexPrefixes(rdr.(io.ReadSeeker))
if err != nil {
return sum, err
}
sum = &chunkIndexSummary{
file: path,
count: uint32(len(prefixes)),
//errs: make([]float64, 0, len(prefixes)),
}
for i, prefix := range prefixes {
sum.addPrefix(i, prefix)
}
return
}
type chunkIndexSummary struct {
file string
count uint32
//errs []float64
sumErr float64
maxErr float64
}
func (s *chunkIndexSummary) format() string {
return fmt.Sprintf("file: %s \t count: %d sum error: %f \t max error: %f ",
s.file, s.count, s.sumErr, s.maxErr)
}
func (s *chunkIndexSummary) addPrefix(i int, prefix uint64) {
g := nbs.GuessPrefixOrdinal(prefix, s.count)
guessErr := math.Abs(float64(i - g))
//s.errs = append(s.errs, guessErr)
s.sumErr += guessErr
if guessErr > s.maxErr {
s.maxErr = guessErr
}
}

View File

@@ -38,18 +38,24 @@ const (
mergesParam = "merges"
minParentsParam = "min-parents"
parentsParam = "parents"
decorateParam = "decorate"
oneLineParam = "oneline"
)
type logOpts struct {
numLines int
showParents bool
minParents int
decoration string
oneLine bool
}
type logNode struct {
commitMeta *doltdb.CommitMeta
commitHash hash.Hash
parentHashes []hash.Hash
branchNames []string
isHead bool
}
var logDocs = cli.CommandDocumentationContent{
@@ -91,6 +97,8 @@ func (cmd LogCmd) ArgParser() *argparser.ArgParser {
ap.SupportsInt(minParentsParam, "", "parent_count", "The minimum number of parents a commit must have to be included in the log.")
ap.SupportsFlag(mergesParam, "", "Equivalent to min-parents == 2, this will limit the log to commits with 2 or more parents.")
ap.SupportsFlag(parentsParam, "", "Shows all parents of each commit in the log.")
ap.SupportsString(decorateParam, "", "decorate_fmt", "Shows refs next to commits. Valid options are short, full, no, and auto")
ap.SupportsFlag(oneLineParam, "", "Shows logs in a compact format.")
return ap
}
@@ -114,10 +122,19 @@ func (cmd LogCmd) logWithLoggerFunc(ctx context.Context, commandStr string, args
minParents = 2
}
decorateOption := apr.GetValueOrDefault(decorateParam, "auto")
switch decorateOption {
case "short", "full", "auto", "no":
default:
cli.PrintErrln(color.HiRedString("fatal: invalid --decorate option: " + decorateOption))
return 1
}
opts := logOpts{
numLines: apr.GetIntOrDefault(numLinesParam, -1),
showParents: apr.Contains(parentsParam),
minParents: minParents,
oneLine: apr.Contains(oneLineParam),
decoration: decorateOption,
}
// Just dolt log
@@ -152,6 +169,53 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o
return 1
}
cHashToRefs := map[hash.Hash][]string{}
// Get all branches
branches, err := dEnv.DoltDB.GetBranchesWithHashes(ctx)
if err != nil {
cli.PrintErrln(color.HiRedString("Fatal error: cannot get Branch information."))
return 1
}
for _, b := range branches {
refName := b.Ref.String()
if opts.decoration != "full" {
refName = b.Ref.GetPath() // trim out "refs/heads/"
}
refName = fmt.Sprintf("\033[32;1m%s\033[0m", refName) // branch names are bright green (32;1m)
cHashToRefs[b.Hash] = append(cHashToRefs[b.Hash], refName)
}
// Get all remote branches
remotes, err := dEnv.DoltDB.GetRemotesWithHashes(ctx)
if err != nil {
cli.PrintErrln(color.HiRedString("Fatal error: cannot get Branch information."))
return 1
}
for _, r := range remotes {
refName := r.Ref.String()
if opts.decoration != "full" {
refName = r.Ref.GetPath() // trim out "refs/remotes/"
}
refName = fmt.Sprintf("\033[31;1m%s\033[0m", refName) // remote names are bright red (31;1m)
cHashToRefs[r.Hash] = append(cHashToRefs[r.Hash], refName)
}
// Get all tags
tags, err := dEnv.DoltDB.GetTagsWithHashes(ctx)
if err != nil {
cli.PrintErrln(color.HiRedString("Fatal error: cannot get Tag information."))
return 1
}
for _, t := range tags {
refName := t.Ref.String()
if opts.decoration != "full" {
refName = t.Ref.GetPath() // trim out "refs/tags/"
}
refName = fmt.Sprintf("\033[33;1mtag: %s\033[0m", refName) // tags names are bright yellow (33;1m)
cHashToRefs[t.Hash] = append(cHashToRefs[t.Hash], refName)
}
h, err := commit.HashOf()
if err != nil {
@@ -195,7 +259,12 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o
return 1
}
commitsInfo = append(commitsInfo, logNode{meta, cmHash, pHashes})
commitsInfo = append(commitsInfo, logNode{
commitMeta: meta,
commitHash: cmHash,
parentHashes: pHashes,
branchNames: cHashToRefs[cmHash],
isHead: cmHash == h})
}
logToStdOut(opts, commitsInfo)
@@ -293,7 +362,10 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d
return err
}
commitsInfo = append(commitsInfo, logNode{meta, prevHash, ph})
commitsInfo = append(commitsInfo, logNode{
commitMeta: meta,
commitHash: prevHash,
parentHashes: ph})
numLines--
}
@@ -307,6 +379,84 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d
return nil
}
func logRefs(pager *outputpager.Pager, comm logNode) {
// Do nothing if no associate branches
if len(comm.branchNames) == 0 {
return
}
pager.Writer.Write([]byte("\033[33m(\033[0m"))
if comm.isHead {
pager.Writer.Write([]byte("\033[36;1mHEAD -> \033[0m"))
}
pager.Writer.Write([]byte(strings.Join(comm.branchNames, "\033[33m, \033[0m"))) // Separate with Dim Yellow comma
pager.Writer.Write([]byte("\033[33m) \033[0m"))
}
func logCompact(pager *outputpager.Pager, opts logOpts, commits []logNode) {
for _, comm := range commits {
if len(comm.parentHashes) < opts.minParents {
return
}
chStr := comm.commitHash.String()
if opts.showParents {
for _, h := range comm.parentHashes {
chStr += " " + h.String()
}
}
// TODO: use short hash instead
// Write commit hash
pager.Writer.Write([]byte(fmt.Sprintf("\033[33m%s \033[0m", chStr)))
if opts.decoration != "no" {
logRefs(pager, comm)
}
formattedDesc := strings.Replace(comm.commitMeta.Description, "\n", " ", -1) + "\n"
pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc)))
}
}
func logDefault(pager *outputpager.Pager, opts logOpts, commits []logNode) {
for _, comm := range commits {
if len(comm.parentHashes) < opts.minParents {
return
}
chStr := comm.commitHash.String()
if opts.showParents {
for _, h := range comm.parentHashes {
chStr += " " + h.String()
}
}
// Write commit hash
pager.Writer.Write([]byte(fmt.Sprintf("\033[33mcommit %s \033[0m", chStr))) // Use Dim Yellow (33m)
// Show decoration
if opts.decoration != "no" {
logRefs(pager, comm)
}
if len(comm.parentHashes) > 1 {
pager.Writer.Write([]byte(fmt.Sprintf("\nMerge:")))
for _, h := range comm.parentHashes {
pager.Writer.Write([]byte(fmt.Sprintf(" " + h.String())))
}
}
pager.Writer.Write([]byte(fmt.Sprintf("\nAuthor: %s <%s>", comm.commitMeta.Name, comm.commitMeta.Email)))
timeStr := comm.commitMeta.FormatTS()
pager.Writer.Write([]byte(fmt.Sprintf("\nDate: %s", timeStr)))
formattedDesc := "\n\n\t" + strings.Replace(comm.commitMeta.Description, "\n", "\n\t", -1) + "\n\n"
pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc)))
}
}
func logToStdOut(opts logOpts, commits []logNode) {
if cli.ExecuteWithStdioRestored == nil {
return
@@ -314,35 +464,10 @@ func logToStdOut(opts logOpts, commits []logNode) {
cli.ExecuteWithStdioRestored(func() {
pager := outputpager.Start()
defer pager.Stop()
for _, comm := range commits {
if len(comm.parentHashes) < opts.minParents {
return
}
chStr := comm.commitHash.String()
if opts.showParents {
for _, h := range comm.parentHashes {
chStr += " " + h.String()
}
}
pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33mcommit %s \033[0m", chStr)))
if len(comm.parentHashes) > 1 {
pager.Writer.Write([]byte(fmt.Sprintf("\nMerge:")))
for _, h := range comm.parentHashes {
pager.Writer.Write([]byte(fmt.Sprintf(" " + h.String())))
}
}
pager.Writer.Write([]byte(fmt.Sprintf("\nAuthor: %s <%s>", comm.commitMeta.Name, comm.commitMeta.Email)))
timeStr := comm.commitMeta.FormatTS()
pager.Writer.Write([]byte(fmt.Sprintf("\nDate: %s", timeStr)))
formattedDesc := "\n\n\t" + strings.Replace(comm.commitMeta.Description, "\n", "\n\t", -1) + "\n\n"
pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc)))
if opts.oneLine {
logCompact(pager, opts, commits)
} else {
logDefault(pager, opts, commits)
}
})
}

View File

@@ -45,8 +45,15 @@ var loginDocs = cli.CommandDocumentationContent{
Synopsis: []string{"[{{.LessThan}}creds{{.GreaterThan}}]"},
}
// The LoginCmd doesn't handle its own signals, but should stop cancel global context when receiving SIGINT signal
func (cmd LoginCmd) InstallsSignalHandlers() bool {
return true
}
type LoginCmd struct{}
var _ cli.SignalCommand = SqlCmd{}
// Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command
func (cmd LoginCmd) Name() string {
return "login"

View File

@@ -200,6 +200,10 @@ func NewTableFileIter(dirs []string, fs filesys.Filesys) (*TableFileIter, error)
}
func (itr *TableFileIter) next() (string, time.Time) {
if itr.pos >= len(itr.files) {
return "", time.Time{}
}
curr := itr.files[itr.pos]
itr.pos++

View File

@@ -996,7 +996,7 @@ func processQuery(ctx *sql.Context, query string, se *engine.SqlEngine) (sql.Sch
if err != nil {
return nil, nil, err
}
_, err = sql.RowIterToRows(ctx, ri)
_, err = sql.RowIterToRows(ctx, nil, ri)
if err != nil {
return nil, nil, err
}
@@ -1007,7 +1007,7 @@ func processQuery(ctx *sql.Context, query string, se *engine.SqlEngine) (sql.Sch
if err != nil {
return nil, nil, err
}
_, err = sql.RowIterToRows(ctx, ri)
_, err = sql.RowIterToRows(ctx, nil, ri)
if err != nil {
return nil, nil, err
}
@@ -1017,7 +1017,7 @@ func processQuery(ctx *sql.Context, query string, se *engine.SqlEngine) (sql.Sch
if err != nil {
return nil, nil, err
}
_, err = sql.RowIterToRows(ctx, ri)
_, err = sql.RowIterToRows(ctx, nil, ri)
if err != nil {
return nil, nil, err
}

View File

@@ -51,7 +51,7 @@ import (
)
const (
Version = "0.36.1"
Version = "0.36.2"
)
var dumpDocsCommand = &commands.DumpDocsCmd{}
@@ -97,6 +97,7 @@ var doltCommand = cli.NewSubCommandHandler("dolt", "it's git for data", []cli.Co
commands.RootsCmd{},
commands.VersionCmd{VersionStr: Version},
commands.DumpCmd{},
commands.InspectCmd{},
dumpDocsCommand,
dumpZshCommand,
})

View File

@@ -13,14 +13,13 @@ require (
github.com/bcicen/jstream v1.0.0
github.com/boltdb/bolt v1.3.1
github.com/cenkalti/backoff v2.2.1+incompatible
github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf
github.com/denisbrodbeck/machineid v1.0.1
github.com/dolthub/dolt/go/gen/proto/dolt/services/eventsapi v0.0.0-20201005193433-3ee972b1d078
github.com/dolthub/fslock v0.0.3
github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371
github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81
github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a
github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38
github.com/dustin/go-humanize v1.0.0
github.com/fatih/color v1.9.0
github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568
@@ -69,7 +68,7 @@ require (
)
require (
github.com/dolthub/go-mysql-server v0.11.1-0.20220208174427-9756c7d7167f
github.com/dolthub/go-mysql-server v0.11.1-0.20220215141938-c484f95c3408
github.com/google/flatbuffers v2.0.5+incompatible
github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6
github.com/prometheus/client_golang v1.11.0

View File

@@ -142,8 +142,6 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8=
github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf h1:5ZeQB3mThuz5C2MSER6T5GdtXTF9CMMk42F9BOyRsEQ=
github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf/go.mod h1:BO2rLUAZMrpgh6GBVKi0Gjdqw2MgCtJrtmUdDeZRKjY=
github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI=
github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c=
github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
@@ -172,8 +170,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
github.com/dolthub/go-mysql-server v0.11.1-0.20220208174427-9756c7d7167f h1:bbLVyFtC7Wm2q1QZZGFUSyclY9nKUTsQ+Sn3ywkwpOs=
github.com/dolthub/go-mysql-server v0.11.1-0.20220208174427-9756c7d7167f/go.mod h1:X2i6+DzsBgl5uDu1dzNayauCEZFUE+qIEriSv4M8v3s=
github.com/dolthub/go-mysql-server v0.11.1-0.20220215141938-c484f95c3408 h1:+pG8pYVBWPfDtAF1YMGfvyHvfjDrOjii+AkzNcZLBTA=
github.com/dolthub/go-mysql-server v0.11.1-0.20220215141938-c484f95c3408/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY=
github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g=
github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms=
github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8=
@@ -182,8 +180,8 @@ github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66 h1:WRPDbpJWEnPxP
github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66/go.mod h1:N5ZIbMGuDUpTpOFQ7HcsN6WSIpTGQjHP+Mz27AfmAgk=
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9XGFa6q5Ap4Z/OhNkAMBaK5YeuEzwJt+NZdhiE=
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY=
github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a h1:+61CpK9SwG/QFNE+vn6Fxk00GRQgtR+CA6Nvsr87y8g=
github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a/go.mod h1:qpZ4j0dval04OgZJ5fyKnlniSFUosTH280pdzUjUJig=
github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38 h1:qUbVRsX2CPyjj/uLrPu9L69rGiYRb5vwzw7PC5c/Wh8=
github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38/go.mod h1:qpZ4j0dval04OgZJ5fyKnlniSFUosTH280pdzUjUJig=
github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=

View File

@@ -23,6 +23,7 @@ import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap/buffer"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
@@ -128,13 +129,17 @@ func TestPushOnWriteHook(t *testing.T) {
t.Run("replicate to remote", func(t *testing.T) {
srcCommit, err := ddb.Commit(context.Background(), valHash, ref.NewBranchRef(defaultBranch), meta)
require.NoError(t, err)
ds, err := ddb.db.GetDataset(ctx, "refs/heads/main")
require.NoError(t, err)
err = hook.Execute(ctx, ds, ddb.db)
assert.NoError(t, err)
require.NoError(t, err)
cs, _ = NewCommitSpec(defaultBranch)
destCommit, err := destDB.Resolve(context.Background(), cs, nil)
require.NoError(t, err)
srcHash, _ := srcCommit.HashOf()
destHash, _ := destCommit.HashOf()
assert.Equal(t, srcHash, destHash)
@@ -252,12 +257,15 @@ func TestAsyncPushOnWrite(t *testing.T) {
meta, err = NewCommitMeta(committerName, committerEmail, "Sample data")
if err != nil {
t.Error("Failed to commit")
t.Error("Failed to create CommitMeta")
}
_, err = ddb.Commit(context.Background(), valHash, ref.NewBranchRef(defaultBranch), meta)
require.NoError(t, err)
ds, err := ddb.db.GetDataset(ctx, "refs/heads/main")
require.NoError(t, err)
err = hook.Execute(ctx, ds, ddb.db)
require.NoError(t, err)
}
})
}

View File

@@ -803,6 +803,31 @@ func (ddb *DoltDB) GetTags(ctx context.Context) ([]ref.DoltRef, error) {
return ddb.GetRefsOfType(ctx, tagsRefFilter)
}
type TagWithHash struct {
Ref ref.DoltRef
Hash hash.Hash
}
// GetTagsWithHashes returns a list of objects containing TagRefs with their associated Commit's hash
func (ddb *DoltDB) GetTagsWithHashes(ctx context.Context) ([]TagWithHash, error) {
var refs []TagWithHash
err := ddb.VisitRefsOfType(ctx, tagsRefFilter, func(r ref.DoltRef, v types.Value) error {
if tr, ok := r.(ref.TagRef); ok {
tag, err := ddb.ResolveTag(ctx, tr)
if err != nil {
return err
}
h, err := tag.Commit.HashOf()
if err != nil {
return err
}
refs = append(refs, TagWithHash{r, h})
}
return nil
})
return refs, err
}
var workspacesRefFilter = map[ref.RefType]struct{}{ref.WorkspaceRefType: {}}
// GetWorkspaces returns a list of all workspaces in the database.
@@ -817,6 +842,22 @@ func (ddb *DoltDB) GetRemoteRefs(ctx context.Context) ([]ref.DoltRef, error) {
return ddb.GetRefsOfType(ctx, remotesRefFilter)
}
type RemoteWithHash struct {
Ref ref.DoltRef
Hash hash.Hash
}
func (ddb *DoltDB) GetRemotesWithHashes(ctx context.Context) ([]RemoteWithHash, error) {
var refs []RemoteWithHash
err := ddb.VisitRefsOfType(ctx, remotesRefFilter, func(r ref.DoltRef, v types.Value) error {
if tr, ok := v.(types.Ref); ok {
refs = append(refs, RemoteWithHash{r, tr.TargetHash()})
}
return nil
})
return refs, err
}
// GetHeadRefs returns a list of all refs that point to a Commit
func (ddb *DoltDB) GetHeadRefs(ctx context.Context) ([]ref.DoltRef, error) {
return ddb.GetRefsOfType(ctx, ref.HeadRefTypes)
@@ -1163,6 +1204,9 @@ func (ddb *DoltDB) GC(ctx context.Context, uncommitedVals ...hash.Hash) error {
}
datasets, err := ddb.db.Datasets(ctx)
if err != nil {
return err
}
newGen := hash.NewHashSet(uncommitedVals...)
oldGen := make(hash.HashSet)
err = datasets.IterAll(ctx, func(key, value types.Value) error {

View File

@@ -70,6 +70,11 @@ func (c *ChannelRowSource) WithChildren(children ...sql.Node) (sql.Node, error)
return c, nil
}
// CheckPrivileges implements the sql.Node interface.
func (c *ChannelRowSource) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool {
return true
}
// channelRowIter wraps the channel under the sql.RowIter interface
type channelRowIter struct {
rowChannel chan sql.Row

View File

@@ -298,7 +298,7 @@ func (s *SqlEngineTableWriter) createTable() error {
return err
}
analyzedQueryProcess := analyzer.StripQueryProcess(analyzed.(*plan.QueryProcess))
analyzedQueryProcess := analyzer.StripPassthroughNodes(analyzed.(*plan.QueryProcess))
ri, err := analyzedQueryProcess.RowIter(s.sqlCtx, nil)
if err != nil {
@@ -342,7 +342,7 @@ func (s *SqlEngineTableWriter) createInsertImportNode(source chan sql.Row, ignor
return nil, err
}
analyzed = analyzer.StripQueryProcess(analyzed)
analyzed = analyzer.StripPassthroughNodes(analyzed)
// Get the first insert (wrapped with the error handler)
plan.Inspect(analyzed, func(node sql.Node) bool {

View File

@@ -20,6 +20,7 @@ import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/store/geometry"
"github.com/dolthub/dolt/go/store/types"
)
@@ -108,9 +109,9 @@ func (ti *linestringType) Equals(other TypeInfo) bool {
// FormatValue implements TypeInfo interface.
func (ti *linestringType) FormatValue(v types.Value) (*string, error) {
if val, ok := v.(types.Linestring); ok {
buf := make([]byte, types.EWKBHeaderSize+types.LengthSize+types.PointDataSize*len(val.Points))
types.WriteEWKBHeader(val, buf[:types.EWKBHeaderSize])
types.WriteEWKBLineData(val, buf[types.EWKBHeaderSize:])
buf := make([]byte, geometry.EWKBHeaderSize+types.LengthSize+geometry.PointSize*len(val.Points))
types.WriteEWKBHeader(val, buf[:geometry.EWKBHeaderSize])
types.WriteEWKBLineData(val, buf[geometry.EWKBHeaderSize:])
resStr := string(buf)
return &resStr, nil
}

View File

@@ -20,6 +20,7 @@ import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/store/geometry"
"github.com/dolthub/dolt/go/store/types"
)
@@ -102,9 +103,9 @@ func (ti *pointType) Equals(other TypeInfo) bool {
// FormatValue implements TypeInfo interface.
func (ti *pointType) FormatValue(v types.Value) (*string, error) {
if val, ok := v.(types.Point); ok {
buf := make([]byte, types.EWKBHeaderSize+types.PointDataSize)
types.WriteEWKBHeader(val, buf[:types.EWKBHeaderSize])
types.WriteEWKBPointData(val, buf[types.EWKBHeaderSize:])
buf := make([]byte, geometry.EWKBHeaderSize+geometry.PointSize)
types.WriteEWKBHeader(val, buf[:geometry.EWKBHeaderSize])
types.WriteEWKBPointData(val, buf[geometry.EWKBHeaderSize:])
resStr := string(buf)
return &resStr, nil
}

View File

@@ -20,6 +20,7 @@ import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/store/geometry"
"github.com/dolthub/dolt/go/store/types"
)
@@ -108,13 +109,13 @@ func (ti *polygonType) Equals(other TypeInfo) bool {
// FormatValue implements TypeInfo interface.
func (ti *polygonType) FormatValue(v types.Value) (*string, error) {
if val, ok := v.(types.Polygon); ok {
size := types.EWKBHeaderSize + types.LengthSize
size := geometry.EWKBHeaderSize + types.LengthSize
for _, l := range val.Lines {
size += types.LengthSize + types.PointDataSize*len(l.Points)
size += types.LengthSize + geometry.PointSize*len(l.Points)
}
buf := make([]byte, size)
types.WriteEWKBHeader(val, buf[:types.EWKBHeaderSize])
types.WriteEWKBPolyData(val, buf[types.EWKBHeaderSize:])
types.WriteEWKBHeader(val, buf[:geometry.EWKBHeaderSize])
types.WriteEWKBPolyData(val, buf[geometry.EWKBHeaderSize:])
resStr := string(buf)
return &resStr, nil
}

View File

@@ -18,8 +18,6 @@ import (
"context"
"fmt"
"math"
"os"
"sync"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/vitess/go/sqltypes"
@@ -27,24 +25,6 @@ import (
"github.com/dolthub/dolt/go/store/types"
)
const spatialTypesFeatureFlagKey = "DOLT_ENABLE_SPATIAL_TYPES"
// use SpatialTypesEnabled() to check, don't access directly
var spatialTypesFeatureFlag = false
func init() {
// set the spatial types feature flag to true if the env var is set
if v, ok := os.LookupEnv(spatialTypesFeatureFlagKey); ok && v != "" {
spatialTypesFeatureFlag = true
}
}
var spatialTypesLock = &sync.RWMutex{}
func SpatialTypesEnabled() bool {
return spatialTypesFeatureFlag
}
type Identifier string
const (
@@ -266,17 +246,6 @@ func FromSqlType(sqlType sql.Type) (TypeInfo, error) {
// FromTypeParams constructs a TypeInfo from the given identifier and parameters.
func FromTypeParams(id Identifier, params map[string]string) (TypeInfo, error) {
if SpatialTypesEnabled() {
switch id {
case PointTypeIdentifier:
return PointType, nil
case LinestringTypeIdentifier:
return LinestringType, nil
case PolygonTypeIdentifier:
return PolygonType, nil
}
}
switch id {
case BitTypeIdentifier:
return CreateBitTypeFromParams(params)
@@ -298,6 +267,12 @@ func FromTypeParams(id Identifier, params map[string]string) (TypeInfo, error) {
return CreateIntTypeFromParams(params)
case JSONTypeIdentifier:
return JSONType, nil
case PointTypeIdentifier:
return PointType, nil
case LinestringTypeIdentifier:
return LinestringType, nil
case PolygonTypeIdentifier:
return PolygonType, nil
case SetTypeIdentifier:
return CreateSetTypeFromParams(params)
case TimeTypeIdentifier:

View File

@@ -29,15 +29,6 @@ import (
"github.com/dolthub/dolt/go/store/types"
)
func testWithSpatialTypesEnabled(cb func()) {
spatialTypesLock.Lock()
defer spatialTypesLock.Unlock()
spatialTypesFeatureFlag = true
cb()
spatialTypesFeatureFlag = false
}
func TestTypeInfoSuite(t *testing.T) {
typeInfoArrays, validTypeValues := generateTypeInfoArrays(t)
t.Run("VerifyArray", func(t *testing.T) {
@@ -234,11 +225,9 @@ func testTypeInfoGetTypeParams(t *testing.T, tiArrays [][]TypeInfo) {
ti.GetTypeIdentifier() == LinestringTypeIdentifier ||
ti.GetTypeIdentifier() == PolygonTypeIdentifier {
t.Run(ti.String(), func(t *testing.T) {
testWithSpatialTypesEnabled(func() {
newTi, err := FromTypeParams(ti.GetTypeIdentifier(), ti.GetTypeParams())
require.NoError(t, err)
require.True(t, ti.Equals(newTi), "%v\n%v", ti.String(), newTi.String())
})
newTi, err := FromTypeParams(ti.GetTypeIdentifier(), ti.GetTypeParams())
require.NoError(t, err)
require.True(t, ti.Equals(newTi), "%v\n%v", ti.String(), newTi.String())
})
} else {
t.Run(ti.String(), func(t *testing.T) {

View File

@@ -22,6 +22,7 @@ import (
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/grant_tables"
"github.com/dolthub/vitess/go/vt/proto/query"
"gopkg.in/src-d/go-errors.v1"
@@ -59,8 +60,15 @@ type SqlDatabase interface {
func DbsAsDSQLDBs(dbs []sql.Database) []SqlDatabase {
dsqlDBs := make([]SqlDatabase, 0, len(dbs))
for _, db := range dbs {
sqlDb, ok := db.(SqlDatabase)
if !ok {
var sqlDb SqlDatabase
if sqlDatabase, ok := db.(SqlDatabase); ok {
sqlDb = sqlDatabase
} else if privDatabase, ok := db.(grant_tables.PrivilegedDatabase); ok {
if sqlDatabase, ok := privDatabase.Unwrap().(SqlDatabase); ok {
sqlDb = sqlDatabase
}
}
if sqlDb == nil {
continue
}
switch v := sqlDb.(type) {

View File

@@ -98,7 +98,7 @@ func (p DoltDatabaseProvider) WithDbFactoryUrl(url string) DoltDatabaseProvider
return p
}
func (p DoltDatabaseProvider) Database(name string) (db sql.Database, err error) {
func (p DoltDatabaseProvider) Database(ctx *sql.Context, name string) (db sql.Database, err error) {
name = strings.ToLower(name)
var ok bool
p.mu.RLock()
@@ -108,7 +108,7 @@ func (p DoltDatabaseProvider) Database(name string) (db sql.Database, err error)
return db, nil
}
db, _, ok, err = p.databaseForRevision(context.Background(), name)
db, _, ok, err = p.databaseForRevision(ctx, name)
if err != nil {
return nil, err
}
@@ -128,12 +128,12 @@ func (p DoltDatabaseProvider) Database(name string) (db sql.Database, err error)
}
func (p DoltDatabaseProvider) HasDatabase(name string) bool {
_, err := p.Database(name)
func (p DoltDatabaseProvider) HasDatabase(ctx *sql.Context, name string) bool {
_, err := p.Database(ctx, name)
return err == nil
}
func (p DoltDatabaseProvider) AllDatabases() (all []sql.Database) {
func (p DoltDatabaseProvider) AllDatabases(ctx *sql.Context) (all []sql.Database) {
p.mu.RLock()
defer p.mu.RUnlock()
@@ -286,7 +286,7 @@ func (p DoltDatabaseProvider) RevisionDbState(ctx context.Context, revDB string)
return init, nil
}
func (p DoltDatabaseProvider) Function(name string) (sql.Function, error) {
func (p DoltDatabaseProvider) Function(ctx *sql.Context, name string) (sql.Function, error) {
fn, ok := p.functions[strings.ToLower(name)]
if !ok {
return nil, sql.ErrFunctionNotFound.New(name)

View File

@@ -39,7 +39,7 @@ func init() {
sql.SystemVariables.AddSystemVariables([]sql.SystemVariable{
{ // If true, causes a Dolt commit to occur when you commit a transaction.
Name: DoltCommitOnTransactionCommit,
Scope: sql.SystemVariableScope_Session,
Scope: sql.SystemVariableScope_Both,
Dynamic: true,
SetVarHintApplies: false,
Type: sql.NewSystemBoolType(DoltCommitOnTransactionCommit),

View File

@@ -73,14 +73,20 @@ func TestSingleScript(t *testing.T) {
var scripts = []enginetest.ScriptTest{
{
Name: "CrossDB Queries",
Name: "insert into sparse auto_increment table",
SetUpScript: []string{
"create table mytable (i bigint primary key, s varchar(200));",
"create table auto (pk int primary key auto_increment)",
"insert into auto values (10), (20), (30)",
"insert into auto values (NULL)",
"insert into auto values (40)",
"insert into auto values (0)",
},
Assertions: []enginetest.ScriptTestAssertion{
{
Query: "ALTER TABLE mytable ADD COLUMN s2 TEXT COMMENT 'hello' AFTER i",
Expected: nil,
Query: "select * from auto order by 1",
Expected: []sql.Row{
{10}, {20}, {30}, {31}, {40}, {41},
},
},
},
},
@@ -213,6 +219,14 @@ func TestScripts(t *testing.T) {
enginetest.TestScripts(t, newDoltHarness(t).WithSkippedQueries(skipped))
}
func TestUserPrivileges(t *testing.T) {
enginetest.TestUserPrivileges(t, newDoltHarness(t))
}
func TestUserAuthentication(t *testing.T) {
enginetest.TestUserAuthentication(t, newDoltHarness(t))
}
func TestComplexIndexQueries(t *testing.T) {
enginetest.TestComplexIndexQueries(t, newDoltHarness(t))
}
@@ -303,6 +317,22 @@ func TestVersionedViews(t *testing.T) {
enginetest.TestVersionedViews(t, newDoltHarness(t))
}
func TestWindowFunctions(t *testing.T) {
enginetest.TestWindowFunctions(t, newDoltHarness(t))
}
func TestWindowRowFrames(t *testing.T) {
enginetest.TestWindowRowFrames(t, newDoltHarness(t))
}
func TestWindowRangeFrames(t *testing.T) {
enginetest.TestWindowRangeFrames(t, newDoltHarness(t))
}
func TestNamedWindows(t *testing.T) {
enginetest.TestNamedWindows(t, newDoltHarness(t))
}
func TestNaturalJoin(t *testing.T) {
enginetest.TestNaturalJoin(t, newDoltHarness(t))
}

View File

@@ -51,6 +51,7 @@ type DoltHarness struct {
var _ enginetest.Harness = (*DoltHarness)(nil)
var _ enginetest.SkippingHarness = (*DoltHarness)(nil)
var _ enginetest.ClientHarness = (*DoltHarness)(nil)
var _ enginetest.IndexHarness = (*DoltHarness)(nil)
var _ enginetest.VersionedDBHarness = (*DoltHarness)(nil)
var _ enginetest.ForeignKeyHarness = (*DoltHarness)(nil)
@@ -144,32 +145,36 @@ func (d *DoltHarness) Parallelism() int {
}
func (d *DoltHarness) NewContext() *sql.Context {
return sql.NewContext(
context.Background(),
sql.WithSession(d.session))
return sql.NewContext(context.Background(), sql.WithSession(d.session))
}
func (d *DoltHarness) NewContextWithClient(client sql.Client) *sql.Context {
return sql.NewContext(context.Background(), sql.WithSession(d.newSessionWithClient(client)))
}
func (d *DoltHarness) NewSession() *sql.Context {
d.session = d.newSessionWithClient(sql.Client{Address: "localhost", User: "root"})
return d.NewContext()
}
func (d *DoltHarness) newSessionWithClient(client sql.Client) *dsess.DoltSession {
states := make([]dsess.InitialDbState, len(d.databases))
for i, db := range d.databases {
states[i] = getDbState(d.t, db, d.env)
}
dbs := dsqleDBsAsSqlDBs(d.databases)
pro := d.NewDatabaseProvider(dbs...)
localConfig := d.env.Config.WriteableConfig()
var err error
d.session, err = dsess.NewDoltSession(
dSession, err := dsess.NewDoltSession(
enginetest.NewContext(d),
enginetest.NewBaseSession(),
sql.NewBaseSessionWithClientServer("address", client, 1),
pro.(dsess.RevisionDatabaseProvider),
localConfig,
states...,
)
require.NoError(d.t, err)
return d.NewContext()
return dSession
}
func (d *DoltHarness) SupportsNativeIndexCreation() bool {
@@ -224,6 +229,9 @@ func (d *DoltHarness) NewReadOnlyDatabases(names ...string) (dbs []sql.ReadOnlyD
}
func (d *DoltHarness) NewDatabaseProvider(dbs ...sql.Database) sql.MutableDatabaseProvider {
if d.env == nil {
d.env = dtestutils.CreateTestEnv()
}
mrEnv, err := env.DoltEnvAsMultiEnv(context.Background(), d.env)
require.NoError(d.t, err)
pro := sqle.NewDoltDatabaseProvider(d.env.Config, mrEnv.FileSystem(), dbs...)
@@ -303,7 +311,7 @@ func (d *DoltHarness) SnapshotTable(db sql.VersionedDatabase, name string, asOf
_, iter, err := e.Query(ctx,
"set @@"+dsess.HeadKey(db.Name())+" = COMMIT('-m', 'test commit');")
require.NoError(d.t, err)
_, err = sql.RowIterToRows(ctx, iter)
_, err = sql.RowIterToRows(ctx, nil, iter)
require.NoError(d.t, err)
headHash, err := ctx.GetSessionVariable(ctx, dsess.HeadKey(db.Name()))
@@ -318,7 +326,7 @@ func (d *DoltHarness) SnapshotTable(db sql.VersionedDatabase, name string, asOf
_, iter, err = e.Query(ctx,
query)
require.NoError(d.t, err)
_, err = sql.RowIterToRows(ctx, iter)
_, err = sql.RowIterToRows(ctx, nil, iter)
require.NoError(d.t, err)
return nil

View File

@@ -478,7 +478,7 @@ func isBindingCut(cut sql.RangeCut) bool {
func tupleFromKeys(keys sql.Row, tb *val.TupleBuilder) (val.Tuple, error) {
for i, v := range keys {
tb.PutField(i, v)
PutField(tb, i, v)
}
return tb.BuildPermissive(sharePool), nil
}

View File

@@ -1327,7 +1327,7 @@ func TestMergeableIndexes(t *testing.T) {
_, iter, err := engine.Query(sqlCtx, query)
require.NoError(t, err)
res, err := sql.RowIterToRows(sqlCtx, iter)
res, err := sql.RowIterToRows(sqlCtx, nil, iter)
require.NoError(t, err)
if assert.Equal(t, len(test.pks), len(res)) {
@@ -1543,7 +1543,7 @@ func TestMergeableIndexesNulls(t *testing.T) {
_, iter, err := engine.Query(sqlCtx, query)
require.NoError(t, err)
res, err := sql.RowIterToRows(sqlCtx, iter)
res, err := sql.RowIterToRows(sqlCtx, nil, iter)
require.NoError(t, err)
if assert.Equal(t, len(test.pks), len(res)) {
for i, pk := range test.pks {

View File

@@ -0,0 +1,223 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package index
import (
"encoding/json"
"fmt"
"time"
"github.com/dolthub/go-mysql-server/sql"
geo "github.com/dolthub/dolt/go/store/geometry"
"github.com/dolthub/dolt/go/store/val"
)
// GetField reads the value from the ith field of the Tuple as an interface{}.
func GetField(td val.TupleDesc, i int, tup val.Tuple) (v interface{}, err error) {
var ok bool
switch td.Types[i].Enc {
case val.Int8Enc:
v, ok = td.GetInt8(i, tup)
case val.Uint8Enc:
v, ok = td.GetUint8(i, tup)
case val.Int16Enc:
v, ok = td.GetInt16(i, tup)
case val.Uint16Enc:
v, ok = td.GetUint16(i, tup)
case val.Int32Enc:
v, ok = td.GetInt32(i, tup)
case val.Uint32Enc:
v, ok = td.GetUint32(i, tup)
case val.Int64Enc:
v, ok = td.GetInt64(i, tup)
case val.Uint64Enc:
v, ok = td.GetUint64(i, tup)
case val.Float32Enc:
v, ok = td.GetFloat32(i, tup)
case val.Float64Enc:
v, ok = td.GetFloat64(i, tup)
case val.DecimalEnc:
v, ok = td.GetDecimal(i, tup)
case val.TimeEnc:
v, ok = td.GetSqlTime(i, tup)
case val.YearEnc:
v, ok = td.GetYear(i, tup)
case val.TimestampEnc, val.DateEnc, val.DatetimeEnc:
v, ok = td.GetTimestamp(i, tup)
case val.StringEnc:
v, ok = td.GetString(i, tup)
case val.BytesEnc:
v, ok = td.GetBytes(i, tup)
case val.JSONEnc:
var buf []byte
buf, ok = td.GetJSON(i, tup)
if ok {
var doc sql.JSONDocument
err = json.Unmarshal(buf, &doc.Val)
v = doc
}
case val.GeometryEnc:
var buf []byte
buf, ok = td.GetGeometry(i, tup)
if ok {
v = deserializeGeometry(buf)
}
default:
panic("unknown val.encoding")
}
if !ok || err != nil {
return nil, err
}
return v, err
}
// PutField writes an interface{} to the ith field of the Tuple being built.
func PutField(tb *val.TupleBuilder, i int, v interface{}) error {
if v == nil {
return nil // NULL
}
enc := tb.Desc.Types[i].Enc
switch enc {
case val.Int8Enc:
tb.PutInt8(i, int8(convInt(v)))
case val.Uint8Enc:
tb.PutUint8(i, uint8(convUint(v)))
case val.Int16Enc:
tb.PutInt16(i, int16(convInt(v)))
case val.Uint16Enc:
tb.PutUint16(i, uint16(convUint(v)))
case val.Int32Enc:
tb.PutInt32(i, int32(convInt(v)))
case val.Uint32Enc:
tb.PutUint32(i, uint32(convUint(v)))
case val.Int64Enc:
tb.PutInt64(i, int64(convInt(v)))
case val.Uint64Enc:
tb.PutUint64(i, uint64(convUint(v)))
case val.Float32Enc:
tb.PutFloat32(i, v.(float32))
case val.Float64Enc:
tb.PutFloat64(i, v.(float64))
case val.DecimalEnc:
tb.PutDecimal(i, v.(string))
case val.TimeEnc:
tb.PutSqlTime(i, v.(string))
case val.YearEnc:
tb.PutYear(i, v.(int16))
case val.DateEnc, val.DatetimeEnc, val.TimestampEnc:
tb.PutTimestamp(i, v.(time.Time))
case val.StringEnc:
tb.PutString(i, v.(string))
case val.BytesEnc:
if s, ok := v.(string); ok {
v = []byte(s)
}
tb.PutBytes(i, v.([]byte))
case val.GeometryEnc:
tb.PutGeometry(i, serializeGeometry(v))
case val.JSONEnc:
buf, err := json.Marshal(v.(sql.JSONDocument).Val)
if err != nil {
return err
}
tb.PutJSON(i, buf)
default:
panic(fmt.Sprintf("unknown encoding %v %v", enc, v))
}
return nil
}
func convInt(v interface{}) int {
switch i := v.(type) {
case int:
return i
case int8:
return int(i)
case uint8:
return int(i)
case int16:
return int(i)
case uint16:
return int(i)
case int32:
return int(i)
case uint32:
return int(i)
case int64:
return int(i)
case uint64:
return int(i)
default:
panic("impossible conversion")
}
}
func convUint(v interface{}) uint {
switch i := v.(type) {
case uint:
return i
case int:
return uint(i)
case int8:
return uint(i)
case uint8:
return uint(i)
case int16:
return uint(i)
case uint16:
return uint(i)
case int32:
return uint(i)
case uint32:
return uint(i)
case int64:
return uint(i)
case uint64:
return uint(i)
default:
panic("impossible conversion")
}
}
func deserializeGeometry(buf []byte) (v interface{}) {
srid, _, typ := geo.ParseEWKBHeader(buf)
buf = buf[geo.EWKBHeaderSize:]
switch typ {
case geo.PointType:
v = geo.DeserializePoint(buf, srid)
case geo.LinestringType:
v = geo.DeserializeLinestring(buf, srid)
case geo.PolygonType:
v = geo.DeserializePolygon(srid, buf)
default:
panic(fmt.Sprintf("unknown geometry type %d", typ))
}
return
}
func serializeGeometry(v interface{}) []byte {
switch t := v.(type) {
case sql.Point:
return geo.SerializePoint(t)
case sql.Linestring:
return geo.SerializeLinestring(t)
case sql.Polygon:
return geo.SerializePolygon(t)
default:
panic(fmt.Sprintf("unknown geometry %v", v))
}
}

View File

@@ -0,0 +1,198 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package index
import (
"encoding/json"
"math"
"testing"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/expression/function"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/store/pool"
"github.com/dolthub/dolt/go/store/val"
)
type prollyFieldTest struct {
name string
value interface{}
typ val.Type
}
func TestRoundTripProllyFields(t *testing.T) {
tests := []prollyFieldTest{
{
name: "null",
typ: val.Type{
Enc: val.Int8Enc,
Nullable: true,
},
value: nil,
},
{
name: "int8",
typ: val.Type{Enc: val.Int8Enc},
value: int8(-42),
},
{
name: "uint8",
typ: val.Type{Enc: val.Uint8Enc},
value: uint8(42),
},
{
name: "int16",
typ: val.Type{Enc: val.Int16Enc},
value: int16(-42),
},
{
name: "uint16",
typ: val.Type{Enc: val.Uint16Enc},
value: uint16(42),
},
{
name: "int32",
typ: val.Type{Enc: val.Int32Enc},
value: int32(-42),
},
{
name: "uint32",
typ: val.Type{Enc: val.Uint32Enc},
value: uint32(42),
},
{
name: "int64",
typ: val.Type{Enc: val.Int64Enc},
value: int64(-42),
},
{
name: "uint64",
typ: val.Type{Enc: val.Uint64Enc},
value: uint64(42),
},
{
name: "float32",
typ: val.Type{Enc: val.Float32Enc},
value: float32(math.Pi),
},
{
name: "float64",
typ: val.Type{Enc: val.Float64Enc},
value: float64(-math.Pi),
},
{
name: "string",
typ: val.Type{Enc: val.StringEnc},
value: "lorem ipsum",
},
{
name: "bytes",
typ: val.Type{Enc: val.BytesEnc},
value: []byte("lorem ipsum"),
},
{
name: "year",
typ: val.Type{Enc: val.YearEnc},
value: int16(2022),
},
{
name: "date",
typ: val.Type{Enc: val.DateEnc},
value: time.Now().UTC(),
},
{
name: "datetime",
typ: val.Type{Enc: val.DatetimeEnc},
value: time.Now().UTC(),
},
{
name: "timestamp",
typ: val.Type{Enc: val.TimestampEnc},
value: time.Now().UTC(),
},
{
name: "json",
typ: val.Type{Enc: val.JSONEnc},
value: mustParseJson(t, `{"a": 1, "b": false}`),
},
{
name: "point",
typ: val.Type{Enc: val.GeometryEnc},
value: mustParseGeometryType(t, "POINT(1 2)"),
},
{
name: "linestring",
typ: val.Type{Enc: val.GeometryEnc},
value: mustParseGeometryType(t, "LINESTRING(1 2,3 4)"),
},
{
name: "polygon",
typ: val.Type{Enc: val.GeometryEnc},
value: mustParseGeometryType(t, "POLYGON((0 0,1 1,1 0,0 0))"),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
testRoundTripProllyFields(t, test)
})
}
}
var testPool = pool.NewBuffPool()
func testRoundTripProllyFields(t *testing.T, test prollyFieldTest) {
desc := val.NewTupleDescriptor(test.typ)
builder := val.NewTupleBuilder(desc)
err := PutField(builder, 0, test.value)
assert.NoError(t, err)
tup := builder.Build(testPool)
v, err := GetField(desc, 0, tup)
assert.NoError(t, err)
assert.Equal(t, test.value, v)
}
func mustParseGeometryType(t *testing.T, s string) (v interface{}) {
// Determine type, and get data
geomType, data, err := function.ParseWKTHeader(s)
require.NoError(t, err)
srid, order := uint32(0), false
switch geomType {
case "point":
v, err = function.WKTToPoint(data, srid, order)
case "linestring":
v, err = function.WKTToLine(data, srid, order)
case "polygon":
v, err = function.WKTToPoly(data, srid, order)
default:
panic("unknown geometry type")
}
require.NoError(t, err)
return
}
func mustParseJson(t *testing.T, s string) sql.JSONDocument {
var v interface{}
err := json.Unmarshal([]byte(s), &v)
require.NoError(t, err)
return sql.JSONDocument{Val: v}
}

View File

@@ -137,20 +137,26 @@ func (p prollyIndexIter) queueRows(ctx context.Context) error {
}
}
func (p prollyIndexIter) rowFromTuples(key, value val.Tuple, r sql.Row) {
func (p prollyIndexIter) rowFromTuples(key, value val.Tuple, r sql.Row) (err error) {
keyDesc, valDesc := p.primary.Descriptors()
for keyIdx, rowIdx := range p.keyMap {
if rowIdx == -1 {
continue
}
r[rowIdx] = keyDesc.GetField(keyIdx, key)
r[rowIdx], err = GetField(keyDesc, keyIdx, key)
if err != nil {
return err
}
}
for valIdx, rowIdx := range p.valMap {
if rowIdx == -1 {
continue
}
r[rowIdx] = valDesc.GetField(valIdx, value)
r[rowIdx], err = GetField(valDesc, valIdx, value)
if err != nil {
return err
}
}
return
@@ -241,18 +247,23 @@ func (p prollyCoveringIndexIter) Next(ctx *sql.Context) (sql.Row, error) {
}
r := make(sql.Row, len(p.keyMap))
p.writeRowFromTuples(k, v, r)
if err := p.writeRowFromTuples(k, v, r); err != nil {
return nil, err
}
return r, nil
}
func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql.Row) {
func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql.Row) (err error) {
for to := range p.keyMap {
from := p.keyMap.MapOrdinal(to)
if from == -1 {
continue
}
r[to] = p.keyDesc.GetField(from, key)
r[to], err = GetField(p.keyDesc, from, key)
if err != nil {
return err
}
}
for to := range p.valMap {
@@ -260,7 +271,10 @@ func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql.
if from == -1 {
continue
}
r[to] = p.valDesc.GetField(from, value)
r[to], err = GetField(p.valDesc, from, value)
if err != nil {
return err
}
}
return

View File

@@ -22,12 +22,11 @@ import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/store/pool"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/val"
)
type sqlRowIter struct {
type prollyRowIter struct {
ctx context.Context
iter prolly.MapRangeIter
@@ -38,7 +37,7 @@ type sqlRowIter struct {
rowLen int
}
var _ sql.RowIter = sqlRowIter{}
var _ sql.RowIter = prollyRowIter{}
func NewProllyRowIter(ctx context.Context, sch schema.Schema, rows prolly.Map, rng prolly.Range, projections []string) (sql.RowIter, error) {
if schema.IsKeyless(sch) {
@@ -71,7 +70,7 @@ func rowIterFromMapIter(
kd, vd := m.Descriptors()
return sqlRowIter{
return prollyRowIter{
ctx: ctx,
iter: iter,
keyDesc: kd,
@@ -110,7 +109,7 @@ func projectionMappings(sch schema.Schema, projs []string) (keyMap, valMap val.O
return
}
func (it sqlRowIter) Next(ctx *sql.Context) (sql.Row, error) {
func (it prollyRowIter) Next(ctx *sql.Context) (sql.Row, error) {
key, value, err := it.iter.Next(it.ctx)
if err != nil {
return nil, err
@@ -122,20 +121,24 @@ func (it sqlRowIter) Next(ctx *sql.Context) (sql.Row, error) {
if rowIdx == -1 {
continue
}
row[rowIdx] = it.keyDesc.GetField(keyIdx, key)
row[rowIdx], err = GetField(it.keyDesc, keyIdx, key)
if err != nil {
return nil, err
}
}
for valIdx, rowIdx := range it.valProj {
if rowIdx == -1 {
continue
}
row[rowIdx] = it.valDesc.GetField(valIdx, value)
row[rowIdx], err = GetField(it.valDesc, valIdx, value)
if err != nil {
return nil, err
}
}
return row, nil
}
func (it sqlRowIter) Close(ctx *sql.Context) error {
func (it prollyRowIter) Close(ctx *sql.Context) error {
return nil
}
var shimPool = pool.NewBuffPool()

View File

@@ -133,7 +133,7 @@ func innerInit(h *DoltHarness, dEnv *env.DoltEnv) error {
ctx := dsql.NewTestSQLCtx(context.Background())
h.sess = ctx.Session.(*dsess.DoltSession)
dbs := h.engine.Analyzer.Catalog.AllDatabases()
dbs := h.engine.Analyzer.Catalog.AllDatabases(ctx)
dsqlDBs := make([]dsql.Database, len(dbs))
for i, db := range dbs {
dsqlDB := db.(dsql.Database)

View File

@@ -46,7 +46,7 @@ func GetCreateTableStmt(ctx *sql.Context, engine *sqle.Engine, tableName string)
if err != nil {
return "", err
}
rows, err := sql.RowIterToRows(ctx, rowIter)
rows, err := sql.RowIterToRows(ctx, nil, rowIter)
if err != nil {
return "", err
}

View File

@@ -24,6 +24,7 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/globalstate"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
"github.com/dolthub/dolt/go/store/pool"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/val"
@@ -284,7 +285,7 @@ func (m prollyIndexWriter) Map(ctx context.Context) (prolly.Map, error) {
func (m prollyIndexWriter) Insert(ctx *sql.Context, sqlRow sql.Row) error {
for to := range m.keyMap {
from := m.keyMap.MapOrdinal(to)
m.keyBld.PutField(to, sqlRow[from])
index.PutField(m.keyBld, to, sqlRow[from])
}
k := m.keyBld.Build(sharePool)
@@ -297,7 +298,7 @@ func (m prollyIndexWriter) Insert(ctx *sql.Context, sqlRow sql.Row) error {
for to := range m.valMap {
from := m.valMap.MapOrdinal(to)
m.valBld.PutField(to, sqlRow[from])
index.PutField(m.valBld, to, sqlRow[from])
}
v := m.valBld.Build(sharePool)
@@ -307,7 +308,7 @@ func (m prollyIndexWriter) Insert(ctx *sql.Context, sqlRow sql.Row) error {
func (m prollyIndexWriter) Delete(ctx *sql.Context, sqlRow sql.Row) error {
for to := range m.keyMap {
from := m.keyMap.MapOrdinal(to)
m.keyBld.PutField(to, sqlRow[from])
index.PutField(m.keyBld, to, sqlRow[from])
}
k := m.keyBld.Build(sharePool)
@@ -317,7 +318,7 @@ func (m prollyIndexWriter) Delete(ctx *sql.Context, sqlRow sql.Row) error {
func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.Row) error {
for to := range m.keyMap {
from := m.keyMap.MapOrdinal(to)
m.keyBld.PutField(to, oldRow[from])
index.PutField(m.keyBld, to, oldRow[from])
}
oldKey := m.keyBld.Build(sharePool)
@@ -329,7 +330,7 @@ func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.R
for to := range m.keyMap {
from := m.keyMap.MapOrdinal(to)
m.keyBld.PutField(to, newRow[from])
index.PutField(m.keyBld, to, newRow[from])
}
newKey := m.keyBld.Build(sharePool)
@@ -342,7 +343,7 @@ func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.R
for to := range m.valMap {
from := m.valMap.MapOrdinal(to)
m.valBld.PutField(to, newRow[from])
index.PutField(m.valBld, to, newRow[from])
}
v := m.valBld.Build(sharePool)
@@ -350,26 +351,30 @@ func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.R
}
func (m prollyIndexWriter) primaryKeyError(ctx context.Context, key val.Tuple) error {
existing := make(sql.Row, len(m.keyMap)+len(m.valMap))
dupe := make(sql.Row, len(m.keyMap)+len(m.valMap))
_ = m.mut.Get(ctx, key, func(key, value val.Tuple) (err error) {
kd := m.keyBld.Desc
for from := range m.keyMap {
to := m.keyMap.MapOrdinal(from)
existing[to] = kd.GetField(from, key)
if dupe[to], err = index.GetField(kd, from, key); err != nil {
return err
}
}
vd := m.valBld.Desc
for from := range m.valMap {
to := m.valMap.MapOrdinal(from)
existing[to] = vd.GetField(from, value)
if dupe[to], err = index.GetField(vd, from, value); err != nil {
return err
}
}
return
})
s := m.keyBld.Desc.Format(key)
return sql.NewUniqueKeyErr(s, true, existing)
return sql.NewUniqueKeyErr(s, true, dupe)
}
func ordinalMappingsFromSchema(from sql.Schema, to schema.Schema) (km, vm val.OrdinalMapping) {

View File

@@ -0,0 +1,52 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package memprof
import (
"context"
"flag"
"os"
"testing"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/store/types"
)
var loc = flag.String("doltDir", "", "Directory of dolt database")
var urlStr string
var ddb *doltdb.DoltDB
func TestMain(m *testing.M) {
flag.Parse()
urlStr = "file://" + *loc + dbfactory.DoltDataDir
code := m.Run()
os.Exit(code)
}
func BenchmarkLoadDoltDBMemory(b *testing.B) {
b.SkipNow()
for i := 0; i < b.N; i++ {
ctx := context.Background()
var err error
ddb, err = doltdb.LoadDoltDB(ctx, types.Format_Default, urlStr, filesys.LocalFS)
if err != nil {
b.Fatalf("failed to load doltdb, err: %s", err.Error())
}
}
}

View File

@@ -0,0 +1,81 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geometry
import (
"encoding/binary"
"math"
"github.com/dolthub/go-mysql-server/sql"
)
// ParseEWKBHeader converts the header potion of a EWKB byte array to srid, endianness, and geometry type
func ParseEWKBHeader(buf []byte) (srid uint32, bigEndian bool, typ uint32) {
srid = binary.LittleEndian.Uint32(buf[0:SRIDSize]) // First 4 bytes is SRID always in little endian
bigEndian = buf[SRIDSize] == 0 // Next byte is endianness
typ = binary.LittleEndian.Uint32(buf[SRIDSize+EndianSize : EWKBHeaderSize]) // Next 4 bytes is type
return
}
func ParseEWKBPoint(buf []byte) (x, y float64) {
x = math.Float64frombits(binary.LittleEndian.Uint64(buf[:PointSize/2]))
y = math.Float64frombits(binary.LittleEndian.Uint64(buf[PointSize/2:]))
return
}
func DeserializePoint(buf []byte, srid uint32) (p sql.Point) {
p.SRID = srid
p.X, p.Y = ParseEWKBPoint(buf)
return
}
func DeserializeLinestring(buf []byte, srid uint32) (l sql.Linestring) {
l.SRID = srid
l.Points = readPointSlice(buf, srid)
return
}
func DeserializePolygon(srid uint32, buf []byte) (p sql.Polygon) {
p.SRID = srid
p.Lines = readLineSlice(buf, srid)
return
}
func readCount(buf []byte) uint32 {
return binary.LittleEndian.Uint32(buf)
}
func readPointSlice(buf []byte, srid uint32) (points []sql.Point) {
points = make([]sql.Point, readCount(buf))
buf = buf[CountSize:]
for i := range points {
points[i].SRID = srid
points[i].X, points[i].Y = ParseEWKBPoint(buf)
buf = buf[PointSize:]
}
return
}
func readLineSlice(buf []byte, srid uint32) (lines []sql.Linestring) {
lines = make([]sql.Linestring, readCount(buf))
buf = buf[CountSize:]
for i := range lines {
lines[i].SRID = srid
lines[i].Points = readPointSlice(buf, srid)
sz := len(lines[i].Points) * PointSize
buf = buf[sz:]
}
return
}

View File

@@ -0,0 +1,104 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geometry
import (
"encoding/binary"
"math"
"github.com/dolthub/go-mysql-server/sql"
)
const (
SRIDSize = 4
EndianSize = 1
TypeSize = 4
EWKBHeaderSize = SRIDSize + EndianSize + TypeSize
PointSize = 16
CountSize = 4
)
const (
PointType = 1
LinestringType = 2
PolygonType = 3
)
func allocateBuffer(numPoints, numCounts int) []byte {
return make([]byte, EWKBHeaderSize+PointSize*numPoints+CountSize*numCounts)
}
func WriteEWKBHeader(buf []byte, srid, typ uint32) {
binary.LittleEndian.PutUint32(buf[0:SRIDSize], srid)
buf[SRIDSize] = 1
binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], typ)
}
func WriteEWKBPointData(buf []byte, x, y float64) {
binary.LittleEndian.PutUint64(buf[:PointSize/2], math.Float64bits(x))
binary.LittleEndian.PutUint64(buf[PointSize/2:], math.Float64bits(y))
}
func SerializePoint(p sql.Point) (buf []byte) {
buf = allocateBuffer(1, 0)
WriteEWKBHeader(buf[:EWKBHeaderSize], p.SRID, PointType)
WriteEWKBPointData(buf[EWKBHeaderSize:], p.X, p.Y)
return
}
func SerializeLinestring(l sql.Linestring) (buf []byte) {
buf = allocateBuffer(len(l.Points), 1)
WriteEWKBHeader(buf[:EWKBHeaderSize], l.SRID, LinestringType)
writePointSlice(buf[EWKBHeaderSize:], l.Points)
return
}
func SerializePolygon(p sql.Polygon) (buf []byte) {
buf = allocateBuffer(countPoints(p), len(p.Lines)+1)
WriteEWKBHeader(buf[:EWKBHeaderSize], p.SRID, PolygonType)
writeLineSlice(buf[EWKBHeaderSize:], p.Lines)
return
}
func writeCount(buf []byte, count uint32) {
binary.LittleEndian.PutUint32(buf, count)
}
func writePointSlice(buf []byte, points []sql.Point) {
writeCount(buf, uint32(len(points)))
buf = buf[CountSize:]
for _, p := range points {
WriteEWKBPointData(buf, p.X, p.Y)
buf = buf[PointSize:]
}
}
func writeLineSlice(buf []byte, lines []sql.Linestring) {
writeCount(buf, uint32(len(lines)))
buf = buf[CountSize:]
for _, l := range lines {
writePointSlice(buf, l.Points)
sz := len(l.Points) * PointSize
buf = buf[sz:]
}
}
func countPoints(p sql.Polygon) (cnt int) {
for _, line := range p.Lines {
cnt += len(line.Points)
}
return
}

View File

@@ -43,53 +43,59 @@ func newAWSChunkSource(ctx context.Context, ddb *ddbTableStore, s3 *s3ObjectRead
if index, found := indexCache.get(name); found {
tra := &awsTableReaderAt{al: al, ddb: ddb, s3: s3, name: name, chunkCount: chunkCount}
return &chunkSourceAdapter{newTableReader(index, tra, s3BlockSize), name}, nil
tr, err := newTableReader(index, tra, s3BlockSize)
if err != nil {
return &chunkSourceAdapter{}, err
}
return &chunkSourceAdapter{tr, name}, nil
}
}
t1 := time.Now()
indexBytes, tra, err := func() ([]byte, tableReaderAt, error) {
index, tra, err := func() (tableIndex, tableReaderAt, error) {
if al.tableMayBeInDynamo(chunkCount) {
data, err := ddb.ReadTable(ctx, name, stats)
if data == nil && err == nil { // There MUST be either data or an error
return nil, &dynamoTableReaderAt{}, errors.New("no data available")
return onHeapTableIndex{}, &dynamoTableReaderAt{}, errors.New("no data available")
}
if data != nil {
return data, &dynamoTableReaderAt{ddb: ddb, h: name}, nil
stats.IndexBytesPerRead.Sample(uint64(len(data)))
ind, err := parseTableIndexByCopy(data)
if err != nil {
return onHeapTableIndex{}, nil, err
}
return ind, &dynamoTableReaderAt{ddb: ddb, h: name}, nil
}
if _, ok := err.(tableNotInDynamoErr); !ok {
return nil, &dynamoTableReaderAt{}, err
return onHeapTableIndex{}, &dynamoTableReaderAt{}, err
}
}
size := indexSize(chunkCount) + footerSize
buff := make([]byte, size)
n, _, err := s3.ReadFromEnd(ctx, name, buff, stats)
if err != nil {
return nil, &dynamoTableReaderAt{}, err
return onHeapTableIndex{}, &dynamoTableReaderAt{}, err
}
if size != uint64(n) {
return nil, &dynamoTableReaderAt{}, errors.New("failed to read all data")
return onHeapTableIndex{}, &dynamoTableReaderAt{}, errors.New("failed to read all data")
}
return buff, &s3TableReaderAt{s3: s3, h: name}, nil
stats.IndexBytesPerRead.Sample(uint64(len(buff)))
ind, err := parseTableIndex(buff)
if err != nil {
return onHeapTableIndex{}, &dynamoTableReaderAt{}, err
}
return ind, &s3TableReaderAt{s3: s3, h: name}, nil
}()
if err != nil {
return &chunkSourceAdapter{}, err
}
stats.IndexBytesPerRead.Sample(uint64(len(indexBytes)))
stats.IndexReadLatency.SampleTimeSince(t1)
index, err := parseIndex(indexBytes)
if err != nil {
return emptyChunkSource{}, err
}
@@ -98,7 +104,11 @@ func newAWSChunkSource(ctx context.Context, ddb *ddbTableStore, s3 *s3ObjectRead
indexCache.put(name, ohi)
}
return &chunkSourceAdapter{newTableReader(index, tra, s3BlockSize), name}, nil
tr, err := newTableReader(index, tra, s3BlockSize)
if err != nil {
return &chunkSourceAdapter{}, err
}
return &chunkSourceAdapter{tr, name}, nil
}
type awsTableReaderAt struct {

View File

@@ -74,7 +74,7 @@ func TestAWSChunkSource(t *testing.T) {
t.Run("WithIndexCache", func(t *testing.T) {
assert := assert.New(t)
index, err := parseTableIndex(tableData)
index, err := parseTableIndexByCopy(tableData)
require.NoError(t, err)
cache := newIndexCache(1024)
cache.put(h, index)
@@ -98,7 +98,7 @@ func TestAWSChunkSource(t *testing.T) {
t.Run("WithIndexCache", func(t *testing.T) {
assert := assert.New(t)
index, err := parseTableIndex(tableData)
index, err := parseTableIndexByCopy(tableData)
require.NoError(t, err)
cache := newIndexCache(1024)
cache.put(h, index)

View File

@@ -546,8 +546,9 @@ func bytesToChunkSource(t *testing.T, bs ...[]byte) chunkSource {
tableSize, name, err := tw.finish()
require.NoError(t, err)
data := buff[:tableSize]
ti, err := parseTableIndex(data)
ti, err := parseTableIndexByCopy(data)
require.NoError(t, err)
rdr, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
require.NoError(t, err)
rdr := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
return chunkSourceAdapter{rdr, name}
}

View File

@@ -418,7 +418,9 @@ func TestBlockStoreConjoinOnCommit(t *testing.T) {
assertContainAll := func(t *testing.T, store chunks.ChunkStore, srcs ...chunkSource) {
rdrs := make(chunkReaderGroup, len(srcs))
for i, src := range srcs {
rdrs[i] = src.Clone()
c, err := src.Clone()
require.NoError(t, err)
rdrs[i] = c
}
chunkChan := make(chan extractRecord, mustUint32(rdrs.count()))
err := rdrs.extract(context.Background(), chunkChan)

View File

@@ -110,7 +110,11 @@ func newBSChunkSource(ctx context.Context, bs blobstore.Blobstore, name addr, ch
if index, found := indexCache.get(name); found {
bsTRA := &bsTableReaderAt{name.String(), bs}
return &chunkSourceAdapter{newTableReader(index, bsTRA, blockSize), name}, nil
tr, err := newTableReader(index, bsTRA, blockSize)
if err != nil {
return nil, err
}
return &chunkSourceAdapter{tr, name}, nil
}
}
@@ -148,7 +152,11 @@ func newBSChunkSource(ctx context.Context, bs blobstore.Blobstore, name addr, ch
indexCache.put(name, index)
}
return &chunkSourceAdapter{newTableReader(index, tra, s3BlockSize), name}, nil
tr, err := newTableReader(index, tra, s3BlockSize)
if err != nil {
return nil, err
}
return &chunkSourceAdapter{tr, name}, nil
}
func (bsp *blobstorePersister) PruneTableFiles(ctx context.Context, contents manifestContents) error {

View File

@@ -24,7 +24,7 @@ func (csa chunkSourceAdapter) hash() (addr, error) {
}
func newReaderFromIndexData(indexCache *indexCache, idxData []byte, name addr, tra tableReaderAt, blockSize uint64) (cs chunkSource, err error) {
index, err := parseTableIndex(idxData)
index, err := parseTableIndexByCopy(idxData)
if err != nil {
return nil, err
@@ -42,13 +42,21 @@ func newReaderFromIndexData(indexCache *indexCache, idxData []byte, name addr, t
indexCache.put(name, index)
}
return &chunkSourceAdapter{newTableReader(index, tra, blockSize), name}, nil
tr, err := newTableReader(index, tra, blockSize)
if err != nil {
return nil, err
}
return &chunkSourceAdapter{tr, name}, nil
}
func (csa chunkSourceAdapter) Close() error {
return csa.tableReader.Close()
}
func (csa chunkSourceAdapter) Clone() chunkSource {
return &chunkSourceAdapter{csa.tableReader.Clone(), csa.h}
func (csa chunkSourceAdapter) Clone() (chunkSource, error) {
tr, err := csa.tableReader.Clone()
if err != nil {
return &chunkSourceAdapter{}, err
}
return &chunkSourceAdapter{tr, csa.h}, nil
}

View File

@@ -35,9 +35,10 @@ func TestCmpChunkTableWriter(t *testing.T) {
require.NoError(t, err)
// Setup a TableReader to read compressed chunks out of
ti, err := parseTableIndex(buff)
ti, err := parseTableIndexByCopy(buff)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
hashes := make(hash.HashSet)
for _, chnk := range testMDChunks {
@@ -72,9 +73,10 @@ func TestCmpChunkTableWriter(t *testing.T) {
require.NoError(t, err)
outputBuff := output.Bytes()
outputTI, err := parseTableIndex(outputBuff)
outputTI, err := parseTableIndexByCopy(outputBuff)
require.NoError(t, err)
outputTR, err := newTableReader(outputTI, tableReaderAtFromBytes(buff), fileBlockSize)
require.NoError(t, err)
outputTR := newTableReader(outputTI, tableReaderAtFromBytes(buff), fileBlockSize)
compareContentsOfTables(t, ctx, hashes, tr, outputTR)
}

View File

@@ -64,7 +64,9 @@ func makeTestSrcs(t *testing.T, tableSizes []uint32, p tablePersister) (srcs chu
}
cs, err := p.Persist(context.Background(), mt, nil, &Stats{})
require.NoError(t, err)
srcs = append(srcs, cs.Clone())
c, err := cs.Clone()
require.NoError(t, err)
srcs = append(srcs, c)
}
return
}

View File

@@ -62,7 +62,12 @@ func (m *fakeDDB) readerForTable(name addr) (chunkReader, error) {
return nil, err
}
return newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize), nil
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
if err != nil {
return nil, err
}
return tr, nil
}
return nil, nil
}

View File

@@ -90,7 +90,7 @@ func (ftp *fsTablePersister) persistTable(ctx context.Context, name addr, data [
return "", ferr
}
index, ferr := parseTableIndex(data)
index, ferr := parseTableIndexByCopy(data)
if ferr != nil {
return "", ferr

View File

@@ -127,9 +127,10 @@ func TestFSTablePersisterPersist(t *testing.T) {
if assert.True(mustUint32(src.count()) > 0) {
buff, err := os.ReadFile(filepath.Join(dir, mustAddr(src.hash()).String()))
require.NoError(t, err)
ti, err := parseTableIndex(buff)
ti, err := parseTableIndexByCopy(buff)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
assertChunksInReader(testChunks, tr, assert)
}
}
@@ -227,9 +228,10 @@ func TestFSTablePersisterConjoinAll(t *testing.T) {
if assert.True(mustUint32(src.count()) > 0) {
buff, err := os.ReadFile(filepath.Join(dir, mustAddr(src.hash()).String()))
require.NoError(t, err)
ti, err := parseTableIndex(buff)
ti, err := parseTableIndexByCopy(buff)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
assertChunksInReader(testChunks, tr, assert)
}
@@ -265,9 +267,10 @@ func TestFSTablePersisterConjoinAllDups(t *testing.T) {
if assert.True(mustUint32(src.count()) > 0) {
buff, err := os.ReadFile(filepath.Join(dir, mustAddr(src.hash()).String()))
require.NoError(t, err)
ti, err := parseTableIndex(buff)
ti, err := parseTableIndexByCopy(buff)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
assertChunksInReader(testChunks, tr, assert)
assert.EqualValues(reps*len(testChunks), mustUint32(tr.count()))
}

View File

@@ -0,0 +1,91 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nbs
import (
"encoding/binary"
"errors"
"io"
)
var (
ErrNotEnoughBytes = errors.New("reader did not return enough bytes")
)
func NewIndexTransformer(src io.Reader, chunkCount int) io.Reader {
tuplesSize := chunkCount * prefixTupleSize
lengthsSize := chunkCount * lengthSize
suffixesSize := chunkCount * addrSuffixSize
tupleReader := io.LimitReader(src, int64(tuplesSize))
lengthsReader := io.LimitReader(src, int64(lengthsSize))
suffixesReader := io.LimitReader(src, int64(suffixesSize))
return io.MultiReader(
tupleReader,
NewOffsetsReader(lengthsReader),
suffixesReader,
)
}
// OffsetsReader transforms a byte stream of table file lengths
// into a byte stream of table file offsets
type OffsetsReader struct {
lengthsReader io.Reader
offset uint64
}
func NewOffsetsReader(lengthsReader io.Reader) *OffsetsReader {
return &OffsetsReader{
lengthsReader: lengthsReader,
}
}
func (tra *OffsetsReader) Read(p []byte) (n int, err error) {
// Read as many lengths, as offsets we can fit into p. Which is half.
// Below assumes that lengthSize * 2 = offsetSize
// Strategy is to first read lengths into the second half of p
// Then, while iterating the lengths, compute the current offset,
// and write it to the beginning of p.
// Align p
rem := len(p) % offsetSize
p = p[:len(p)-rem]
// Read lengths into second half of p
secondHalf := p[len(p)/2:]
n, err = tra.lengthsReader.Read(secondHalf)
if err != nil {
return 0, err
}
if n%lengthSize != 0 {
return 0, ErrNotEnoughBytes
}
// Iterate lengths in second half of p while writing offsets starting from the beginning.
// On the last iteration, we overwrite the last length with the final offset.
for l, r := 0, 0; r < n; l, r = l+offsetSize, r+lengthSize {
lengthBytes := secondHalf[r : r+lengthSize]
length := binary.BigEndian.Uint32(lengthBytes)
tra.offset += uint64(length)
offsetBytes := p[l : l+offsetSize]
binary.BigEndian.PutUint64(offsetBytes, tra.offset)
}
return n * 2, nil
}

View File

@@ -0,0 +1,189 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nbs
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math/rand"
"testing"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/libraries/utils/test"
)
// minByteReader is a copy of smallerByteReader from testing/iotest
// but with a minimum read size of min bytes.
type minByteReader struct {
r io.Reader
min int
n int
off int
}
func (r *minByteReader) Read(p []byte) (int, error) {
if len(p) == 0 {
return 0, nil
}
r.n = r.min + rand.Intn(r.min*100)
n := r.n
if n > len(p) {
n = len(p)
}
n, err := r.r.Read(p[0:n])
if err != nil && err != io.EOF {
err = fmt.Errorf("Read(%d bytes at offset %d): %v", n, r.off, err)
}
r.off += n
return n, err
}
// Altered from testing/iotest.TestReader to use minByteReader
func testReader(r io.Reader, content []byte) error {
if len(content) > 0 {
n, err := r.Read(nil)
if n != 0 || err != nil {
return fmt.Errorf("Read(0) = %d, %v, want 0, nil", n, err)
}
}
data, err := io.ReadAll(&minByteReader{r: r, min: offsetSize})
if err != nil {
return err
}
if !bytes.Equal(data, content) {
return fmt.Errorf("ReadAll(varied amounts) = %q\n\twant %q", data, content)
}
n, err := r.Read(make([]byte, offsetSize))
if n != 0 || err != io.EOF {
return fmt.Errorf("Read(offsetSize) at EOF = %v, %v, want 0, EOF", n, err)
}
return nil
}
func get32Bytes(src []uint32) []byte {
dst := make([]byte, len(src)*uint32Size)
for i, start, end := 0, 0, lengthSize; i < len(src); i, start, end = i+1, end, end+lengthSize {
p := dst[start:end]
binary.BigEndian.PutUint32(p, src[i])
}
return dst
}
func get64Bytes(src []uint64) []byte {
dst := make([]byte, len(src)*uint64Size)
for i, start, end := 0, 0, offsetSize; i < len(src); i, start, end = i+1, end, end+offsetSize {
p := dst[start:end]
binary.BigEndian.PutUint64(p, src[i])
}
return dst
}
func randomUInt32s(n int) []uint32 {
out := make([]uint32, n)
for i := 0; i < n; i++ {
out[i] = uint32(rand.Intn(1000))
}
return out
}
func calcOffsets(arr []uint32) []uint64 {
out := make([]uint64, len(arr))
out[0] = uint64(arr[0])
for i := 1; i < len(arr); i++ {
out[i] = out[i-1] + uint64(arr[i])
}
return out
}
func TestOffsetReader(t *testing.T) {
testSize := rand.Intn(10) + 1
lengths := randomUInt32s(testSize)
offsets := calcOffsets(lengths)
lengthBytes := get32Bytes(lengths)
offsetBytes := get64Bytes(offsets)
t.Run("converts lengths into offsets", func(t *testing.T) {
lengthsReader := bytes.NewReader(lengthBytes)
offsetReader := NewOffsetsReader(lengthsReader)
err := testReader(offsetReader, offsetBytes)
require.NoError(t, err)
})
t.Run("err not enough bytes when expected", func(t *testing.T) {
lengthsReader := bytes.NewReader(lengthBytes[:len(lengthBytes)-1])
offsetReader := NewOffsetsReader(lengthsReader)
_, err := io.ReadAll(offsetReader)
require.ErrorAsf(t, err, &ErrNotEnoughBytes, "should return ErrNotEnoughBytes")
})
t.Run("fills provided buffer correctly", func(t *testing.T) {
lengthsReader := bytes.NewReader(lengthBytes)
offsetReader := NewOffsetsReader(lengthsReader)
p := make([]byte, offsetSize)
n, err := offsetReader.Read(p)
require.NoError(t, err)
require.Equal(t, offsetSize, n)
})
t.Run("works with io.ReadAll", func(t *testing.T) {
lengthsReader := bytes.NewReader(lengthBytes[:lengthSize])
offsetReader := NewOffsetsReader(lengthsReader)
data, err := io.ReadAll(offsetReader)
require.NoError(t, err)
require.True(t, bytes.Equal(data, offsetBytes[:offsetSize]))
})
}
func TestIndexTransformer(t *testing.T) {
chunkCount := rand.Intn(10) + 1
lengths := randomUInt32s(chunkCount)
offsets := calcOffsets(lengths)
lengthBytes := get32Bytes(lengths)
offsetBytes := get64Bytes(offsets)
tupleBytes := test.RandomData(chunkCount * prefixTupleSize)
suffixBytes := test.RandomData(chunkCount * addrSuffixSize)
var inBytes []byte
inBytes = append(inBytes, tupleBytes...)
inBytes = append(inBytes, lengthBytes...)
inBytes = append(inBytes, suffixBytes...)
var outBytes []byte
outBytes = append(outBytes, tupleBytes...)
outBytes = append(outBytes, offsetBytes...)
outBytes = append(outBytes, suffixBytes...)
t.Run("only converts lengths into offsets", func(t *testing.T) {
inReader := bytes.NewBuffer(inBytes)
outReader := NewIndexTransformer(inReader, chunkCount)
err := testReader(outReader, outBytes)
require.NoError(t, err)
})
}

View File

@@ -150,25 +150,28 @@ func TestMemTableWrite(t *testing.T) {
td1, _, err := buildTable(chunks[1:2])
require.NoError(t, err)
ti1, err := parseTableIndex(td1)
ti1, err := parseTableIndexByCopy(td1)
require.NoError(t, err)
tr1, err := newTableReader(ti1, tableReaderAtFromBytes(td1), fileBlockSize)
require.NoError(t, err)
tr1 := newTableReader(ti1, tableReaderAtFromBytes(td1), fileBlockSize)
assert.True(tr1.has(computeAddr(chunks[1])))
td2, _, err := buildTable(chunks[2:])
require.NoError(t, err)
ti2, err := parseTableIndex(td2)
ti2, err := parseTableIndexByCopy(td2)
require.NoError(t, err)
tr2, err := newTableReader(ti2, tableReaderAtFromBytes(td2), fileBlockSize)
require.NoError(t, err)
tr2 := newTableReader(ti2, tableReaderAtFromBytes(td2), fileBlockSize)
assert.True(tr2.has(computeAddr(chunks[2])))
_, data, count, err := mt.write(chunkReaderGroup{tr1, tr2}, &Stats{})
require.NoError(t, err)
assert.Equal(uint32(1), count)
ti, err := parseTableIndex(data)
ti, err := parseTableIndexByCopy(data)
require.NoError(t, err)
outReader, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
require.NoError(t, err)
outReader := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
assert.True(outReader.has(computeAddr(chunks[0])))
assert.False(outReader.has(computeAddr(chunks[1])))
assert.False(outReader.has(computeAddr(chunks[2])))

View File

@@ -105,29 +105,35 @@ func newMmapTableReader(dir string, h addr, chunkCount uint32, indexCache *index
// index. Mmap won't take an offset that's not page-aligned, so find the nearest page boundary preceding the index.
indexOffset := fi.Size() - int64(footerSize) - int64(indexSize(chunkCount))
aligned := indexOffset / mmapAlignment * mmapAlignment // Thanks, integer arithmetic!
length := int(fi.Size() - aligned)
if fi.Size()-aligned > maxInt {
err = fmt.Errorf("%s - size: %d alignment: %d> maxInt: %d", path, fi.Size(), aligned, maxInt)
return
}
var mm mmap.MMap
mm, err = mmap.MapRegion(f, int(fi.Size()-aligned), mmap.RDONLY, 0, aligned)
buff := make([]byte, indexSize(chunkCount)+footerSize)
func() {
var mm mmap.MMap
mm, err = mmap.MapRegion(f, length, mmap.RDONLY, 0, aligned)
if err != nil {
return
}
defer func() {
unmapErr := mm.Unmap()
if unmapErr != nil {
err = unmapErr
}
}()
copy(buff, mm[indexOffset-aligned:])
}()
if err != nil {
return
return onHeapTableIndex{}, err
}
defer func() {
unmapErr := mm.Unmap()
if unmapErr != nil {
err = unmapErr
}
}()
buff := []byte(mm)
ti, err = parseTableIndex(buff[indexOffset-aligned:])
ti, err = parseTableIndex(buff)
if err != nil {
return
@@ -152,8 +158,12 @@ func newMmapTableReader(dir string, h addr, chunkCount uint32, indexCache *index
return nil, errors.New("unexpected chunk count")
}
tr, err := newTableReader(index, &cacheReaderAt{path, fc}, fileBlockSize)
if err != nil {
return nil, err
}
return &mmapTableReader{
newTableReader(index, &cacheReaderAt{path, fc}, fileBlockSize),
tr,
fc,
h,
}, nil
@@ -167,8 +177,12 @@ func (mmtr *mmapTableReader) Close() error {
return mmtr.tableReader.Close()
}
func (mmtr *mmapTableReader) Clone() chunkSource {
return &mmapTableReader{mmtr.tableReader.Clone(), mmtr.fc, mmtr.h}
func (mmtr *mmapTableReader) Clone() (chunkSource, error) {
tr, err := mmtr.tableReader.Clone()
if err != nil {
return &mmapTableReader{}, err
}
return &mmapTableReader{tr, mmtr.fc, mmtr.h}, nil
}
type cacheReaderAt struct {

View File

@@ -100,9 +100,9 @@ func (ccs *persistingChunkSource) Close() error {
return nil
}
func (ccs *persistingChunkSource) Clone() chunkSource {
func (ccs *persistingChunkSource) Clone() (chunkSource, error) {
// persistingChunkSource does not own |cs| or |mt|. No need to Clone.
return ccs
return ccs, nil
}
func (ccs *persistingChunkSource) has(h addr) (bool, error) {
@@ -308,6 +308,6 @@ func (ecs emptyChunkSource) Close() error {
return nil
}
func (ecs emptyChunkSource) Clone() chunkSource {
return ecs
func (ecs emptyChunkSource) Clone() (chunkSource, error) {
return ecs, nil
}

View File

@@ -461,13 +461,17 @@ func (ftp fakeTablePersister) Persist(ctx context.Context, mt *memTable, haver c
if chunkCount > 0 {
ftp.mu.Lock()
defer ftp.mu.Unlock()
ti, err := parseTableIndex(data)
ti, err := parseTableIndexByCopy(data)
if err != nil {
return nil, err
}
ftp.sources[name] = newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
s, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
if err != nil {
return emptyChunkSource{}, err
}
ftp.sources[name] = s
return chunkSourceAdapter{ftp.sources[name], name}, nil
}
}
@@ -484,13 +488,17 @@ func (ftp fakeTablePersister) ConjoinAll(ctx context.Context, sources chunkSourc
if chunkCount > 0 {
ftp.mu.Lock()
defer ftp.mu.Unlock()
ti, err := parseTableIndex(data)
ti, err := parseTableIndexByCopy(data)
if err != nil {
return nil, err
}
ftp.sources[name] = newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
s, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
if err != nil {
return nil, err
}
ftp.sources[name] = s
return chunkSourceAdapter{ftp.sources[name], name}, nil
}
return emptyChunkSource{}, nil

View File

@@ -76,12 +76,16 @@ func (m *fakeS3) readerForTable(name addr) (chunkReader, error) {
m.mu.Lock()
defer m.mu.Unlock()
if buff, present := m.data[name.String()]; present {
ti, err := parseTableIndex(buff)
ti, err := parseTableIndexByCopy(buff)
if err != nil {
return nil, err
}
return newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize), nil
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize)
if err != nil {
return nil, err
}
return tr, nil
}
return nil, nil
}
@@ -94,13 +98,17 @@ func (m *fakeS3) readerForTableWithNamespace(ns string, name addr) (chunkReader,
key = ns + "/" + key
}
if buff, present := m.data[key]; present {
ti, err := parseTableIndex(buff)
ti, err := parseTableIndexByCopy(buff)
if err != nil {
return nil, err
}
return newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize), nil
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize)
if err != nil {
return nil, err
}
return tr, nil
}
return nil, nil
}

View File

@@ -117,7 +117,10 @@ func (nbs *NomsBlockStore) GetChunkLocations(hashes hash.HashSet) (map[hash.Hash
for _, cs := range css {
switch tr := cs.(type) {
case *mmapTableReader:
offsetRecSlice, _ := tr.findOffsets(gr)
offsetRecSlice, _, err := tr.findOffsets(gr)
if err != nil {
return err
}
if len(offsetRecSlice) > 0 {
y, ok := ranges[hash.Hash(tr.h)]
@@ -154,7 +157,10 @@ func (nbs *NomsBlockStore) GetChunkLocations(hashes hash.HashSet) (map[hash.Hash
var foundHashes []hash.Hash
for h := range hashes {
a := addr(h)
e, ok := tableIndex.Lookup(&a)
e, ok, err := tableIndex.Lookup(&a)
if err != nil {
return err
}
if ok {
foundHashes = append(foundHashes, h)
y[h] = Range{Offset: e.Offset(), Length: e.Length()}

View File

@@ -525,3 +525,15 @@ func TestNBSCommitRetainsAppendix(t *testing.T) {
assert.Equal(upstream.GetAppendixTableSpecInfo(0), newUpstream.GetTableSpecInfo(0))
assert.Equal(newUpstream.GetTableSpecInfo(0), newUpstream.GetAppendixTableSpecInfo(0))
}
func TestGuessPrefixOrdinal(t *testing.T) {
prefixes := make([]uint64, 256)
for i := range prefixes {
prefixes[i] = uint64(i << 56)
}
for i, pre := range prefixes {
guess := GuessPrefixOrdinal(pre, 256)
assert.Equal(t, i, guess)
}
}

View File

@@ -130,6 +130,7 @@ const (
uint32Size = 4
ordinalSize = uint32Size
lengthSize = uint32Size
offsetSize = uint64Size
magicNumber = "\xff\xb5\xd8\xc2\x24\x63\xee\x50"
magicNumberSize = 8 //len(magicNumber)
footerSize = uint32Size + uint64Size + magicNumberSize
@@ -238,7 +239,7 @@ type chunkReader interface {
}
type chunkReadPlanner interface {
findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool)
findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool, err error)
getManyAtOffsets(
ctx context.Context,
eg *errgroup.Group,
@@ -269,7 +270,7 @@ type chunkSource interface {
// cannot be |Close|d more than once, so if a |chunkSource| is being
// retained in two objects with independent life-cycle, it should be
// |Clone|d first.
Clone() chunkSource
Clone() (chunkSource, error)
}
type chunkSources []chunkSource

522
go/store/nbs/table_index.go Normal file
View File

@@ -0,0 +1,522 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nbs
import (
"bytes"
"encoding/binary"
"errors"
"io"
"os"
"sort"
"sync/atomic"
"github.com/dolthub/mmap-go"
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
)
var (
ErrWrongBufferSize = errors.New("buffer length and/or capacity incorrect for chunkCount specified in footer")
ErrWrongCopySize = errors.New("could not copy enough bytes")
)
type tableIndex interface {
// ChunkCount returns the total number of chunks in the indexed file.
ChunkCount() uint32
// EntrySuffixMatches returns true if the entry at index |idx| matches
// the suffix of the address |h|. Used by |Lookup| after finding
// matching indexes based on |Prefixes|.
EntrySuffixMatches(idx uint32, h *addr) (bool, error)
// IndexEntry returns the |indexEntry| at |idx|. Optionally puts the
// full address of that entry in |a| if |a| is not |nil|.
IndexEntry(idx uint32, a *addr) (indexEntry, error)
// Lookup returns an |indexEntry| for the chunk corresponding to the
// provided address |h|. Second returns is |true| if an entry exists
// and |false| otherwise.
Lookup(h *addr) (indexEntry, bool, error)
// Ordinals returns a slice of indexes which maps the |i|th chunk in
// the indexed file to its corresponding entry in index. The |i|th
// entry in the result is the |i|th chunk in the indexed file, and its
// corresponding value in the slice is the index entry that maps to it.
Ordinals() ([]uint32, error)
// Prefixes returns the sorted slice of |uint64| |addr| prefixes; each
// entry corresponds to an indexed chunk address.
Prefixes() ([]uint64, error)
// TableFileSize returns the total size of the indexed table file, in bytes.
TableFileSize() uint64
// TotalUncompressedData returns the total uncompressed data size of
// the table file. Used for informational statistics only.
TotalUncompressedData() uint64
// Close releases any resources used by this tableIndex.
Close() error
// Clone returns a |tableIndex| with the same contents which can be
// |Close|d independently.
Clone() (tableIndex, error)
}
func ReadTableFooter(rd io.ReadSeeker) (chunkCount uint32, totalUncompressedData uint64, err error) {
footerSize := int64(magicNumberSize + uint64Size + uint32Size)
_, err = rd.Seek(-footerSize, io.SeekEnd)
if err != nil {
return 0, 0, err
}
footer, err := iohelp.ReadNBytes(rd, int(footerSize))
if err != nil {
return 0, 0, err
}
if string(footer[uint32Size+uint64Size:]) != magicNumber {
return 0, 0, ErrInvalidTableFile
}
chunkCount = binary.BigEndian.Uint32(footer)
totalUncompressedData = binary.BigEndian.Uint64(footer[uint32Size:])
return
}
// parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index
// and footer and its length and capacity must match the expected indexSize for the chunkCount specified in the footer.
// Retains the buffer and does not allocate new memory except for offsets, computes on buff in place.
func parseTableIndex(buff []byte) (onHeapTableIndex, error) {
chunkCount, totalUncompressedData, err := ReadTableFooter(bytes.NewReader(buff))
if err != nil {
return onHeapTableIndex{}, err
}
iS := indexSize(chunkCount) + footerSize
if uint64(len(buff)) != iS || uint64(cap(buff)) != iS {
return onHeapTableIndex{}, ErrWrongBufferSize
}
buff = buff[:len(buff)-footerSize]
return NewOnHeapTableIndex(buff, chunkCount, totalUncompressedData)
}
// parseTableIndexByCopy reads the footer, copies indexSize(chunkCount) bytes, and parses an on heap table index.
// Useful to create an onHeapTableIndex without retaining the entire underlying array of data.
func parseTableIndexByCopy(buff []byte) (onHeapTableIndex, error) {
r := bytes.NewReader(buff)
return ReadTableIndexByCopy(r)
}
// ReadTableIndexByCopy loads an index into memory from an io.ReadSeeker
// Caution: Allocates new memory for entire index
func ReadTableIndexByCopy(rd io.ReadSeeker) (onHeapTableIndex, error) {
chunkCount, totalUncompressedData, err := ReadTableFooter(rd)
if err != nil {
return onHeapTableIndex{}, err
}
iS := int64(indexSize(chunkCount))
_, err = rd.Seek(-(iS + footerSize), io.SeekEnd)
if err != nil {
return onHeapTableIndex{}, ErrInvalidTableFile
}
buff := make([]byte, iS)
_, err = io.ReadFull(rd, buff)
if err != nil {
return onHeapTableIndex{}, err
}
return NewOnHeapTableIndex(buff, chunkCount, totalUncompressedData)
}
type onHeapTableIndex struct {
tableFileSize uint64
// Tuple bytes
tupleB []byte
// Offset bytes
offsetB []byte
// Suffix bytes
suffixB []byte
chunkCount uint32
totalUncompressedData uint64
}
var _ tableIndex = &onHeapTableIndex{}
// NewOnHeapTableIndex creates a table index given a buffer of just the table index (no footer)
func NewOnHeapTableIndex(b []byte, chunkCount uint32, totalUncompressedData uint64) (onHeapTableIndex, error) {
tuples := b[:prefixTupleSize*chunkCount]
lengths := b[prefixTupleSize*chunkCount : prefixTupleSize*chunkCount+lengthSize*chunkCount]
suffixes := b[prefixTupleSize*chunkCount+lengthSize*chunkCount:]
lR := bytes.NewReader(lengths)
offsets := make([]byte, chunkCount*offsetSize)
_, err := io.ReadFull(NewOffsetsReader(lR), offsets)
if err != nil {
return onHeapTableIndex{}, err
}
/**
TODO: Optimize memory usage further
There's wasted space here. The lengths segment in the buffer is retained unnecessarily. We can use that space to
store half the offsets and then allocate an additional len(lengths) to store the rest.
*/
return onHeapTableIndex{
tupleB: tuples,
offsetB: offsets,
suffixB: suffixes,
chunkCount: chunkCount,
totalUncompressedData: totalUncompressedData,
}, nil
}
func (ti onHeapTableIndex) ChunkCount() uint32 {
return ti.chunkCount
}
func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) {
ord := ti.ordinalAt(idx)
o := ord * addrSuffixSize
b := ti.suffixB[o : o+addrSuffixSize]
return bytes.Equal(h[addrPrefixSize:], b), nil
}
func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) (entry indexEntry, err error) {
prefix, ord := ti.tupleAt(idx)
if a != nil {
binary.BigEndian.PutUint64(a[:], prefix)
o := int64(addrSuffixSize * ord)
b := ti.suffixB[o : o+addrSuffixSize]
copy(a[addrPrefixSize:], b)
}
return ti.getIndexEntry(ord), nil
}
func (ti onHeapTableIndex) getIndexEntry(ord uint32) indexEntry {
var prevOff uint64
if ord == 0 {
prevOff = 0
} else {
prevOff = ti.offsetAt(ord - 1)
}
ordOff := ti.offsetAt(ord)
length := uint32(ordOff - prevOff)
return indexResult{
o: prevOff,
l: length,
}
}
func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool, error) {
ord, err := ti.lookupOrdinal(h)
if err != nil {
return indexResult{}, false, err
}
if ord == ti.chunkCount {
return indexResult{}, false, nil
}
return ti.getIndexEntry(ord), true, nil
}
// lookupOrdinal returns the ordinal of |h| if present. Returns |ti.chunkCount|
// if absent.
func (ti onHeapTableIndex) lookupOrdinal(h *addr) (uint32, error) {
prefix := h.Prefix()
for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixAt(idx) == prefix; idx++ {
m, err := ti.EntrySuffixMatches(idx, h)
if err != nil {
return ti.chunkCount, err
}
if m {
return ti.ordinalAt(idx), nil
}
}
return ti.chunkCount, nil
}
// prefixIdx returns the first position in |tr.prefixes| whose value ==
// |prefix|. Returns |tr.chunkCount| if absent
func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) {
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
// an extremely tight loop and inlining the code was a significant perf improvement.
idx, j := 0, ti.chunkCount
for idx < j {
h := idx + (j-idx)/2 // avoid overflow when computing h
// i ≤ h < j
if ti.prefixAt(h) < prefix {
idx = h + 1 // preserves f(i-1) == false
} else {
j = h // preserves f(j) == true
}
}
return
}
func (ti onHeapTableIndex) tupleAt(idx uint32) (prefix uint64, ord uint32) {
off := int64(prefixTupleSize * idx)
b := ti.tupleB[off : off+prefixTupleSize]
prefix = binary.BigEndian.Uint64(b[:])
ord = binary.BigEndian.Uint32(b[addrPrefixSize:])
return prefix, ord
}
func (ti onHeapTableIndex) prefixAt(idx uint32) uint64 {
off := int64(prefixTupleSize * idx)
b := ti.tupleB[off : off+addrPrefixSize]
return binary.BigEndian.Uint64(b)
}
func (ti onHeapTableIndex) ordinalAt(idx uint32) uint32 {
off := int64(prefixTupleSize*idx) + addrPrefixSize
b := ti.tupleB[off : off+ordinalSize]
return binary.BigEndian.Uint32(b)
}
func (ti onHeapTableIndex) offsetAt(ord uint32) uint64 {
off := int64(offsetSize * ord)
b := ti.offsetB[off : off+offsetSize]
return binary.BigEndian.Uint64(b)
}
func (ti onHeapTableIndex) Ordinals() ([]uint32, error) {
o := make([]uint32, ti.chunkCount)
for i, off := uint32(0), 0; i < ti.chunkCount; i, off = i+1, off+prefixTupleSize {
b := ti.tupleB[off+addrPrefixSize : off+prefixTupleSize]
o[i] = binary.BigEndian.Uint32(b)
}
return o, nil
}
func (ti onHeapTableIndex) Prefixes() ([]uint64, error) {
p := make([]uint64, ti.chunkCount)
for i, off := uint32(0), 0; i < ti.chunkCount; i, off = i+1, off+prefixTupleSize {
b := ti.tupleB[off : off+addrPrefixSize]
p[i] = binary.BigEndian.Uint64(b)
}
return p, nil
}
// TableFileSize returns the size of the table file that this index references.
// This assumes that the index follows immediately after the last chunk in the
// file and that the last chunk in the file is in the index.
func (ti onHeapTableIndex) TableFileSize() uint64 {
if ti.chunkCount == 0 {
return footerSize
}
entry := ti.getIndexEntry(ti.chunkCount - 1)
offset, len := entry.Offset(), uint64(entry.Length())
return offset + len + indexSize(ti.chunkCount) + footerSize
}
func (ti onHeapTableIndex) TotalUncompressedData() uint64 {
return ti.totalUncompressedData
}
func (ti onHeapTableIndex) Close() error {
return nil
}
func (ti onHeapTableIndex) Clone() (tableIndex, error) {
return ti, nil
}
// mmap table index
type mmapIndexEntry []byte
const mmapIndexEntryOffsetStart = addrSuffixSize
const mmapIndexEntryLengthStart = addrSuffixSize + uint64Size
func (e mmapIndexEntry) suffix() []byte {
return e[:addrSuffixSize]
}
func (e mmapIndexEntry) Offset() uint64 {
return binary.BigEndian.Uint64(e[mmapIndexEntryOffsetStart:])
}
func (e mmapIndexEntry) Length() uint32 {
return binary.BigEndian.Uint32(e[mmapIndexEntryLengthStart:])
}
func mmapOffheapSize(chunks int) int {
pageSize := 4096
esz := addrSuffixSize + uint64Size + lengthSize
min := esz * chunks
if min%pageSize == 0 {
return min
} else {
return (min/pageSize + 1) * pageSize
}
}
// An mmapIndexEntry is an addrSuffix, a BigEndian uint64 for the offset and a
// BigEnding uint32 for the chunk size.
const mmapIndexEntrySize = addrSuffixSize + uint64Size + lengthSize
type mmapOrdinal struct {
idx int
offset uint64
}
type mmapOrdinalSlice []mmapOrdinal
func (s mmapOrdinalSlice) Len() int { return len(s) }
func (s mmapOrdinalSlice) Less(i, j int) bool { return s[i].offset < s[j].offset }
func (s mmapOrdinalSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
type mmapTableIndex struct {
chunkCount uint32
totalUncompressedData uint64
fileSz uint64
prefixes []uint64
data mmap.MMap
refCnt *int32
}
func newMmapTableIndex(ti onHeapTableIndex, f *os.File) (mmapTableIndex, error) {
flags := 0
if f == nil {
flags = mmap.ANON
}
arr, err := mmap.MapRegion(f, mmapOffheapSize(int(ti.chunkCount)), mmap.RDWR, flags, 0)
if err != nil {
return mmapTableIndex{}, err
}
var a addr
for i := uint32(0); i < ti.chunkCount; i++ {
idx := i * mmapIndexEntrySize
si := addrSuffixSize * ti.ordinalAt(i)
copy(arr[idx:], ti.suffixB[si:si+addrSuffixSize])
e, err := ti.IndexEntry(i, &a)
if err != nil {
return mmapTableIndex{}, err
}
binary.BigEndian.PutUint64(arr[idx+mmapIndexEntryOffsetStart:], e.Offset())
binary.BigEndian.PutUint32(arr[idx+mmapIndexEntryLengthStart:], e.Length())
}
refCnt := new(int32)
*refCnt = 1
p, err := ti.Prefixes()
if err != nil {
return mmapTableIndex{}, err
}
return mmapTableIndex{
ti.chunkCount,
ti.totalUncompressedData,
ti.TableFileSize(),
p,
arr,
refCnt,
}, nil
}
func (i mmapTableIndex) ChunkCount() uint32 {
return i.chunkCount
}
func (i mmapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) {
mi := idx * mmapIndexEntrySize
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
return bytes.Equal(e.suffix(), h[addrPrefixSize:]), nil
}
func (i mmapTableIndex) IndexEntry(idx uint32, a *addr) (indexEntry, error) {
mi := idx * mmapIndexEntrySize
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
if a != nil {
binary.BigEndian.PutUint64(a[:], i.prefixes[idx])
copy(a[addrPrefixSize:], e.suffix())
}
return e, nil
}
func (i mmapTableIndex) Lookup(h *addr) (indexEntry, bool, error) {
prefix := binary.BigEndian.Uint64(h[:])
for idx := i.prefixIdx(prefix); idx < i.chunkCount && i.prefixes[idx] == prefix; idx++ {
mi := idx * mmapIndexEntrySize
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
if bytes.Equal(e.suffix(), h[addrPrefixSize:]) {
return e, true, nil
}
}
return mmapIndexEntry{}, false, nil
}
func (i mmapTableIndex) Ordinals() ([]uint32, error) {
s := mmapOrdinalSlice(make([]mmapOrdinal, i.chunkCount))
for idx := 0; uint32(idx) < i.chunkCount; idx++ {
mi := idx * mmapIndexEntrySize
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
s[idx] = mmapOrdinal{idx, e.Offset()}
}
sort.Sort(s)
res := make([]uint32, i.chunkCount)
for j, r := range s {
res[r.idx] = uint32(j)
}
return res, nil
}
func (i mmapTableIndex) Prefixes() ([]uint64, error) {
return i.prefixes, nil
}
func (i mmapTableIndex) TableFileSize() uint64 {
return i.fileSz
}
func (i mmapTableIndex) TotalUncompressedData() uint64 {
return i.totalUncompressedData
}
func (i mmapTableIndex) Close() error {
cnt := atomic.AddInt32(i.refCnt, -1)
if cnt == 0 {
return i.data.Unmap()
}
if cnt < 0 {
panic("Close() called and reduced ref count to < 0.")
}
return nil
}
func (i mmapTableIndex) Clone() (tableIndex, error) {
cnt := atomic.AddInt32(i.refCnt, 1)
if cnt == 1 {
panic("Clone() called after last Close(). This index is no longer valid.")
}
return i, nil
}
func (i mmapTableIndex) prefixIdx(prefix uint64) (idx uint32) {
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
// an extremely tight loop and inlining the code was a significant perf improvement.
idx, j := 0, i.chunkCount
for idx < j {
h := idx + (j-idx)/2 // avoid overflow when computing h
// i ≤ h < j
if i.prefixes[h] < prefix {
idx = h + 1 // preserves f(i-1) == false
} else {
j = h // preserves f(j) == true
}
}
return
}

View File

@@ -0,0 +1,104 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nbs
import (
"io"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestParseTableIndex(t *testing.T) {
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
require.NoError(t, err)
defer f.Close()
bs, err := io.ReadAll(f)
require.NoError(t, err)
idx, err := parseTableIndexByCopy(bs)
require.NoError(t, err)
defer idx.Close()
assert.Equal(t, uint32(596), idx.ChunkCount())
seen := make(map[addr]bool)
for i := uint32(0); i < idx.ChunkCount(); i++ {
var onheapaddr addr
e, err := idx.IndexEntry(i, &onheapaddr)
require.NoError(t, err)
if _, ok := seen[onheapaddr]; !ok {
seen[onheapaddr] = true
lookupe, ok, err := idx.Lookup(&onheapaddr)
require.NoError(t, err)
assert.True(t, ok)
assert.Equal(t, e.Offset(), lookupe.Offset(), "%v does not match %v for address %v", e, lookupe, onheapaddr)
assert.Equal(t, e.Length(), lookupe.Length())
}
}
}
func TestMMapIndex(t *testing.T) {
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
require.NoError(t, err)
defer f.Close()
bs, err := io.ReadAll(f)
require.NoError(t, err)
idx, err := parseTableIndexByCopy(bs)
require.NoError(t, err)
defer idx.Close()
mmidx, err := newMmapTableIndex(idx, nil)
require.NoError(t, err)
defer mmidx.Close()
assert.Equal(t, idx.ChunkCount(), mmidx.ChunkCount())
seen := make(map[addr]bool)
for i := uint32(0); i < idx.ChunkCount(); i++ {
var onheapaddr addr
onheapentry, err := idx.IndexEntry(i, &onheapaddr)
require.NoError(t, err)
var mmaddr addr
mmentry, err := mmidx.IndexEntry(i, &mmaddr)
require.NoError(t, err)
assert.Equal(t, onheapaddr, mmaddr)
assert.Equal(t, onheapentry.Offset(), mmentry.Offset())
assert.Equal(t, onheapentry.Length(), mmentry.Length())
if _, ok := seen[onheapaddr]; !ok {
seen[onheapaddr] = true
mmentry, found, err := mmidx.Lookup(&onheapaddr)
require.NoError(t, err)
assert.True(t, found)
assert.Equal(t, onheapentry.Offset(), mmentry.Offset(), "%v does not match %v for address %v", onheapentry, mmentry, onheapaddr)
assert.Equal(t, onheapentry.Length(), mmentry.Length())
}
wrongaddr := onheapaddr
if wrongaddr[19] != 0 {
wrongaddr[19] = 0
_, found, err := mmidx.Lookup(&wrongaddr)
require.NoError(t, err)
assert.False(t, found)
}
}
o1, err := idx.Ordinals()
require.NoError(t, err)
o2, err := mmidx.Ordinals()
require.NoError(t, err)
assert.Equal(t, o1, o2)
p1, err := idx.Prefixes()
require.NoError(t, err)
p2, err := mmidx.Prefixes()
require.NoError(t, err)
assert.Equal(t, p1, p2)
assert.Equal(t, idx.TableFileSize(), mmidx.TableFileSize())
assert.Equal(t, idx.TotalUncompressedData(), mmidx.TotalUncompressedData())
}

View File

@@ -256,8 +256,14 @@ func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan, err e
return compactionPlan{}, err
}
ordinals := index.Ordinals()
prefixes := index.Prefixes()
ordinals, err := index.Ordinals()
if err != nil {
return compactionPlan{}, err
}
prefixes, err := index.Prefixes()
if err != nil {
return compactionPlan{}, err
}
// Add all the prefix tuples from this index to the list of all prefixIndexRecs, modifying the ordinals such that all entries from the 1st item in sources come after those in the 0th and so on.
for j, prefix := range prefixes {
@@ -277,15 +283,16 @@ func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan, err e
if onHeap, ok := index.(onHeapTableIndex); ok {
// TODO: copy the lengths and suffixes as a byte-copy from src BUG #3438
// Bring over the lengths block, in order
for _, length := range onHeap.lengths {
binary.BigEndian.PutUint32(plan.mergedIndex[lengthsPos:], length)
for ord := uint32(0); ord < onHeap.chunkCount; ord++ {
e := onHeap.getIndexEntry(ord)
binary.BigEndian.PutUint32(plan.mergedIndex[lengthsPos:], e.Length())
lengthsPos += lengthSize
}
// Bring over the suffixes block, in order
n := copy(plan.mergedIndex[suffixesPos:], onHeap.suffixes)
n := copy(plan.mergedIndex[suffixesPos:], onHeap.suffixB)
if n != len(onHeap.suffixes) {
if n != len(onHeap.suffixB) {
return compactionPlan{}, errors.New("failed to copy all data")
}
@@ -294,7 +301,10 @@ func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan, err e
// Build up the index one entry at a time.
var a addr
for i := 0; i < len(ordinals); i++ {
e := index.IndexEntry(uint32(i), &a)
e, err := index.IndexEntry(uint32(i), &a)
if err != nil {
return compactionPlan{}, err
}
li := lengthsPos + lengthSize*uint64(ordinals[i])
si := suffixesPos + addrSuffixSize*uint64(ordinals[i])
binary.BigEndian.PutUint32(plan.mergedIndex[li:], e.Length())

View File

@@ -45,9 +45,11 @@ func TestPlanCompaction(t *testing.T) {
}
data, name, err := buildTable(content)
require.NoError(t, err)
ti, err := parseTableIndex(data)
ti, err := parseTableIndexByCopy(data)
require.NoError(t, err)
src := chunkSourceAdapter{newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize), name}
tr, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
require.NoError(t, err)
src := chunkSourceAdapter{tr, name}
dataLens = append(dataLens, uint64(len(data))-indexSize(mustUint32(src.count()))-footerSize)
sources = append(sources, src)
}
@@ -67,7 +69,8 @@ func TestPlanCompaction(t *testing.T) {
assert.Equal(totalChunks, idx.chunkCount)
assert.Equal(totalUnc, idx.totalUncompressedData)
tr := newTableReader(idx, tableReaderAtFromBytes(nil), fileBlockSize)
tr, err := newTableReader(idx, tableReaderAtFromBytes(nil), fileBlockSize)
require.NoError(t, err)
for _, content := range tableContents {
assertChunksInReader(content, tr, assert)
}

View File

@@ -22,20 +22,16 @@
package nbs
import (
"bytes"
"context"
"encoding/binary"
"errors"
"io"
"os"
"sort"
"sync/atomic"
"github.com/dolthub/mmap-go"
"github.com/golang/snappy"
"golang.org/x/sync/errgroup"
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
"github.com/dolthub/dolt/go/store/chunks"
"github.com/dolthub/dolt/go/store/hash"
)
@@ -107,14 +103,6 @@ func init() {
// ErrInvalidTableFile is an error returned when a table file is corrupt or invalid.
var ErrInvalidTableFile = errors.New("invalid or corrupt table file")
type onHeapTableIndex struct {
chunkCount uint32
totalUncompressedData uint64
prefixes, offsets []uint64
lengths, ordinals []uint32
suffixes []byte
}
type indexEntry interface {
Offset() uint64
Length() uint32
@@ -133,181 +121,6 @@ func (ir indexResult) Length() uint32 {
return ir.l
}
// An mmapIndexEntry is an addrSuffix, a BigEndian uint64 for the offset and a
// BigEnding uint32 for the chunk size.
const mmapIndexEntrySize = addrSuffixSize + uint64Size + lengthSize
type mmapOrdinalSlice []mmapOrdinal
func (s mmapOrdinalSlice) Len() int { return len(s) }
func (s mmapOrdinalSlice) Less(i, j int) bool { return s[i].offset < s[j].offset }
func (s mmapOrdinalSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (i mmapTableIndex) Ordinals() []uint32 {
s := mmapOrdinalSlice(make([]mmapOrdinal, i.chunkCount))
for idx := 0; uint32(idx) < i.chunkCount; idx++ {
mi := idx * mmapIndexEntrySize
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
s[idx] = mmapOrdinal{idx, e.Offset()}
}
sort.Sort(s)
res := make([]uint32, i.chunkCount)
for j, r := range s {
res[r.idx] = uint32(j)
}
return res
}
type mmapTableIndex struct {
chunkCount uint32
totalUncompressedData uint64
fileSz uint64
prefixes []uint64
data mmap.MMap
refCnt *int32
}
func (i mmapTableIndex) Prefixes() []uint64 {
return i.prefixes
}
type mmapOrdinal struct {
idx int
offset uint64
}
func (i mmapTableIndex) TableFileSize() uint64 {
return i.fileSz
}
func (i mmapTableIndex) ChunkCount() uint32 {
return i.chunkCount
}
func (i mmapTableIndex) TotalUncompressedData() uint64 {
return i.totalUncompressedData
}
func (i mmapTableIndex) Close() error {
cnt := atomic.AddInt32(i.refCnt, -1)
if cnt == 0 {
return i.data.Unmap()
}
if cnt < 0 {
panic("Close() called and reduced ref count to < 0.")
}
return nil
}
func (i mmapTableIndex) Clone() tableIndex {
cnt := atomic.AddInt32(i.refCnt, 1)
if cnt == 1 {
panic("Clone() called after last Close(). This index is no longer valid.")
}
return i
}
func (i mmapTableIndex) prefixIdx(prefix uint64) (idx uint32) {
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
// an extremely tight loop and inlining the code was a significant perf improvement.
idx, j := 0, i.chunkCount
for idx < j {
h := idx + (j-idx)/2 // avoid overflow when computing h
// i ≤ h < j
if i.prefixes[h] < prefix {
idx = h + 1 // preserves f(i-1) == false
} else {
j = h // preserves f(j) == true
}
}
return
}
func (i mmapTableIndex) Lookup(h *addr) (indexEntry, bool) {
prefix := binary.BigEndian.Uint64(h[:])
for idx := i.prefixIdx(prefix); idx < i.chunkCount && i.prefixes[idx] == prefix; idx++ {
mi := idx * mmapIndexEntrySize
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
if bytes.Equal(e.suffix(), h[addrPrefixSize:]) {
return e, true
}
}
return mmapIndexEntry{}, false
}
func (i mmapTableIndex) EntrySuffixMatches(idx uint32, h *addr) bool {
mi := idx * mmapIndexEntrySize
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
return bytes.Equal(e.suffix(), h[addrPrefixSize:])
}
func (i mmapTableIndex) IndexEntry(idx uint32, a *addr) indexEntry {
mi := idx * mmapIndexEntrySize
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
if a != nil {
binary.BigEndian.PutUint64(a[:], i.prefixes[idx])
copy(a[addrPrefixSize:], e.suffix())
}
return e
}
type mmapIndexEntry []byte
const mmapIndexEntryOffsetStart = addrSuffixSize
const mmapIndexEntryLengthStart = addrSuffixSize + uint64Size
func (e mmapIndexEntry) suffix() []byte {
return e[:addrSuffixSize]
}
func (e mmapIndexEntry) Offset() uint64 {
return binary.BigEndian.Uint64(e[mmapIndexEntryOffsetStart:])
}
func (e mmapIndexEntry) Length() uint32 {
return binary.BigEndian.Uint32(e[mmapIndexEntryLengthStart:])
}
func mmapOffheapSize(chunks int) int {
pageSize := 4096
esz := addrSuffixSize + uint64Size + lengthSize
min := esz * chunks
if min%pageSize == 0 {
return min
} else {
return (min/pageSize + 1) * pageSize
}
}
func newMmapTableIndex(ti onHeapTableIndex, f *os.File) (mmapTableIndex, error) {
flags := 0
if f == nil {
flags = mmap.ANON
}
arr, err := mmap.MapRegion(f, mmapOffheapSize(len(ti.ordinals)), mmap.RDWR, flags, 0)
if err != nil {
return mmapTableIndex{}, err
}
for i := range ti.ordinals {
idx := i * mmapIndexEntrySize
si := addrSuffixSize * ti.ordinals[i]
copy(arr[idx:], ti.suffixes[si:si+addrSuffixSize])
binary.BigEndian.PutUint64(arr[idx+mmapIndexEntryOffsetStart:], ti.offsets[ti.ordinals[i]])
binary.BigEndian.PutUint32(arr[idx+mmapIndexEntryLengthStart:], ti.lengths[ti.ordinals[i]])
}
refCnt := new(int32)
*refCnt = 1
return mmapTableIndex{
ti.chunkCount,
ti.totalUncompressedData,
ti.TableFileSize(),
ti.Prefixes(),
arr,
refCnt,
}, nil
}
type tableReaderAt interface {
ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error)
}
@@ -326,234 +139,22 @@ type tableReader struct {
blockSize uint64
}
type tableIndex interface {
// ChunkCount returns the total number of chunks in the indexed file.
ChunkCount() uint32
// EntrySuffixMatches returns true if the entry at index |idx| matches
// the suffix of the address |h|. Used by |Lookup| after finding
// matching indexes based on |Prefixes|.
EntrySuffixMatches(idx uint32, h *addr) bool
// IndexEntry returns the |indexEntry| at |idx|. Optionally puts the
// full address of that entry in |a| if |a| is not |nil|.
IndexEntry(idx uint32, a *addr) indexEntry
// Lookup returns an |indexEntry| for the chunk corresponding to the
// provided address |h|. Second returns is |true| if an entry exists
// and |false| otherwise.
Lookup(h *addr) (indexEntry, bool)
// Ordinals returns a slice of indexes which maps the |i|th chunk in
// the indexed file to its corresponding entry in index. The |i|th
// entry in the result is the |i|th chunk in the indexed file, and its
// corresponding value in the slice is the index entry that maps to it.
Ordinals() []uint32
// Prefixes returns the sorted slice of |uint64| |addr| prefixes; each
// entry corresponds to an indexed chunk address.
Prefixes() []uint64
// TableFileSize returns the total size of the indexed table file, in bytes.
TableFileSize() uint64
// TotalUncompressedData returns the total uncompressed data size of
// the table file. Used for informational statistics only.
TotalUncompressedData() uint64
// Close releases any resources used by this tableIndex.
Close() error
// Clone returns a |tableIndex| with the same contents which can be
// |Close|d independently.
Clone() tableIndex
}
var _ tableIndex = mmapTableIndex{}
// parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index
// and footer, though it may contain an unspecified number of bytes before that data.
// |tableIndex| doesn't keep alive any references to |buff|.
func parseTableIndex(buff []byte) (onHeapTableIndex, error) {
return ReadTableIndex(bytes.NewReader(buff))
}
func ReadTableIndex(rd io.ReadSeeker) (onHeapTableIndex, error) {
footerSize := int64(magicNumberSize + uint64Size + uint32Size)
_, err := rd.Seek(-footerSize, io.SeekEnd)
if err != nil {
return onHeapTableIndex{}, err
}
footer, err := iohelp.ReadNBytes(rd, int(footerSize))
if err != nil {
return onHeapTableIndex{}, err
}
if string(footer[uint32Size+uint64Size:]) != magicNumber {
return onHeapTableIndex{}, ErrInvalidTableFile
}
chunkCount := binary.BigEndian.Uint32(footer)
totalUncompressedData := binary.BigEndian.Uint64(footer[uint32Size:])
// index
suffixesSize := int64(chunkCount) * addrSuffixSize
lengthsSize := int64(chunkCount) * lengthSize
tuplesSize := int64(chunkCount) * prefixTupleSize
indexSize := suffixesSize + lengthsSize + tuplesSize
_, err = rd.Seek(-(indexSize + footerSize), io.SeekEnd)
if err != nil {
return onHeapTableIndex{}, ErrInvalidTableFile
}
indexBytes, err := iohelp.ReadNBytes(rd, int(indexSize))
if err != nil {
return onHeapTableIndex{}, ErrInvalidTableFile
}
prefixes, ordinals := computePrefixes(chunkCount, indexBytes[:tuplesSize])
lengths, offsets := computeOffsets(chunkCount, indexBytes[tuplesSize:tuplesSize+lengthsSize])
suffixes := indexBytes[tuplesSize+lengthsSize:]
return onHeapTableIndex{
chunkCount, totalUncompressedData,
prefixes, offsets,
lengths, ordinals,
suffixes,
}, nil
}
func computeOffsets(count uint32, buff []byte) (lengths []uint32, offsets []uint64) {
lengths = make([]uint32, count)
offsets = make([]uint64, count)
lengths[0] = binary.BigEndian.Uint32(buff)
for i := uint64(1); i < uint64(count); i++ {
lengths[i] = binary.BigEndian.Uint32(buff[i*lengthSize:])
offsets[i] = offsets[i-1] + uint64(lengths[i-1])
}
return
}
func computePrefixes(count uint32, buff []byte) (prefixes []uint64, ordinals []uint32) {
prefixes = make([]uint64, count)
ordinals = make([]uint32, count)
for i := uint64(0); i < uint64(count); i++ {
idx := i * prefixTupleSize
prefixes[i] = binary.BigEndian.Uint64(buff[idx:])
ordinals[i] = binary.BigEndian.Uint32(buff[idx+addrPrefixSize:])
}
return
}
func (ti onHeapTableIndex) prefixIdxToOrdinal(idx uint32) uint32 {
return ti.ordinals[idx]
}
// TableFileSize returns the size of the table file that this index references.
// This assumes that the index follows immediately after the last chunk in the
// file and that the last chunk in the file is in the index.
func (ti onHeapTableIndex) TableFileSize() uint64 {
if ti.chunkCount == 0 {
return footerSize
}
len, offset := ti.offsets[ti.chunkCount-1], uint64(ti.lengths[ti.chunkCount-1])
return offset + len + indexSize(ti.chunkCount) + footerSize
}
// prefixIdx returns the first position in |tr.prefixes| whose value ==
// |prefix|. Returns |tr.chunkCount| if absent
func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) {
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
// an extremely tight loop and inlining the code was a significant perf improvement.
idx, j := 0, ti.chunkCount
for idx < j {
h := idx + (j-idx)/2 // avoid overflow when computing h
// i ≤ h < j
if ti.prefixes[h] < prefix {
idx = h + 1 // preserves f(i-1) == false
} else {
j = h // preserves f(j) == true
}
}
return
}
// EntrySuffixMatches returns true IFF the suffix for prefix entry |idx|
// matches the address |a|.
func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) bool {
li := uint64(ti.ordinals[idx]) * addrSuffixSize
return bytes.Equal(h[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize])
}
// lookupOrdinal returns the ordinal of |h| if present. Returns |ti.chunkCount|
// if absent.
func (ti onHeapTableIndex) lookupOrdinal(h *addr) uint32 {
prefix := h.Prefix()
for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixes[idx] == prefix; idx++ {
if ti.EntrySuffixMatches(idx, h) {
return ti.ordinals[idx]
}
}
return ti.chunkCount
}
func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) indexEntry {
ord := ti.ordinals[idx]
if a != nil {
binary.BigEndian.PutUint64(a[:], ti.prefixes[idx])
li := uint64(ord) * addrSuffixSize
copy(a[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize])
}
return indexResult{ti.offsets[ord], ti.lengths[ord]}
}
func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool) {
ord := ti.lookupOrdinal(h)
if ord == ti.chunkCount {
return indexResult{}, false
}
return indexResult{ti.offsets[ord], ti.lengths[ord]}, true
}
func (ti onHeapTableIndex) Prefixes() []uint64 {
return ti.prefixes
}
func (ti onHeapTableIndex) Ordinals() []uint32 {
return ti.ordinals
}
func (i onHeapTableIndex) ChunkCount() uint32 {
return i.chunkCount
}
func (i onHeapTableIndex) TotalUncompressedData() uint64 {
return i.totalUncompressedData
}
func (i onHeapTableIndex) Close() error {
return nil
}
func (i onHeapTableIndex) Clone() tableIndex {
return i
}
// newTableReader parses a valid nbs table byte stream and returns a reader. buff must end with an NBS index
// and footer, though it may contain an unspecified number of bytes before that data. r should allow
// retrieving any desired range of bytes from the table.
func newTableReader(index tableIndex, r tableReaderAt, blockSize uint64) tableReader {
func newTableReader(index tableIndex, r tableReaderAt, blockSize uint64) (tableReader, error) {
p, err := index.Prefixes()
if err != nil {
return tableReader{}, err
}
return tableReader{
index,
index.Prefixes(),
p,
index.ChunkCount(),
index.TotalUncompressedData(),
r,
blockSize,
}
}, nil
}
// Scan across (logically) two ordered slices of address prefixes.
@@ -584,7 +185,11 @@ func (tr tableReader) hasMany(addrs []hasRecord) (bool, error) {
// prefixes are equal, so locate and compare against the corresponding suffix
for j := filterIdx; j < filterLen && addr.prefix == tr.prefixes[j]; j++ {
if tr.EntrySuffixMatches(j, addr.a) {
m, err := tr.EntrySuffixMatches(j, addr.a)
if err != nil {
return false, err
}
if m {
addrs[i].has = true
break
}
@@ -612,14 +217,17 @@ func (tr tableReader) index() (tableIndex, error) {
// returns true iff |h| can be found in this table.
func (tr tableReader) has(h addr) (bool, error) {
_, ok := tr.Lookup(&h)
return ok, nil
_, ok, err := tr.Lookup(&h)
return ok, err
}
// returns the storage associated with |h|, iff present. Returns nil if absent. On success,
// the returned byte slice directly references the underlying storage.
func (tr tableReader) get(ctx context.Context, h addr, stats *Stats) ([]byte, error) {
e, found := tr.Lookup(&h)
e, found, err := tr.Lookup(&h)
if err != nil {
return nil, err
}
if !found {
return nil, nil
}
@@ -746,15 +354,21 @@ func (tr tableReader) getMany(
// Pass #1: Iterate over |reqs| and |tr.prefixes| (both sorted by address) and build the set
// of table locations which must be read in order to satisfy the getMany operation.
offsetRecords, remaining := tr.findOffsets(reqs)
err := tr.getManyAtOffsets(ctx, eg, offsetRecords, found, stats)
offsetRecords, remaining, err := tr.findOffsets(reqs)
if err != nil {
return false, err
}
err = tr.getManyAtOffsets(ctx, eg, offsetRecords, found, stats)
return remaining, err
}
func (tr tableReader) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, CompressedChunk), stats *Stats) (bool, error) {
// Pass #1: Iterate over |reqs| and |tr.prefixes| (both sorted by address) and build the set
// of table locations which must be read in order to satisfy the getMany operation.
offsetRecords, remaining := tr.findOffsets(reqs)
err := tr.getManyCompressedAtOffsets(ctx, eg, offsetRecords, found, stats)
offsetRecords, remaining, err := tr.findOffsets(reqs)
if err != nil {
return false, err
}
err = tr.getManyCompressedAtOffsets(ctx, eg, offsetRecords, found, stats)
return remaining, err
}
@@ -867,7 +481,7 @@ func (tr tableReader) getManyAtOffsetsWithReadFunc(
// chunks remaining will be set to false upon return. If some are not here,
// then remaining will be true. The result offsetRecSlice is sorted in offset
// order.
func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool) {
func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool, err error) {
filterIdx := uint32(0)
filterLen := uint32(len(tr.prefixes))
ors = make(offsetRecSlice, 0, len(reqs))
@@ -896,9 +510,16 @@ func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaini
// record all offsets within the table which contain the data required.
for j := filterIdx; j < filterLen && req.prefix == tr.prefixes[j]; j++ {
if tr.EntrySuffixMatches(j, req.a) {
m, err := tr.EntrySuffixMatches(j, req.a)
if err != nil {
return nil, false, err
}
if m {
reqs[i].found = true
entry := tr.IndexEntry(j, nil)
entry, err := tr.IndexEntry(j, nil)
if err != nil {
return nil, false, err
}
ors = append(ors, offsetRec{req.a, entry.Offset(), entry.Length()})
break
}
@@ -906,7 +527,7 @@ func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaini
}
sort.Sort(ors)
return ors, remaining
return ors, remaining, nil
}
func canReadAhead(fRec offsetRec, curStart, curEnd, blockSize uint64) (newEnd uint64, canRead bool) {
@@ -933,7 +554,10 @@ func canReadAhead(fRec offsetRec, curStart, curEnd, blockSize uint64) (newEnd ui
func (tr tableReader) calcReads(reqs []getRecord, blockSize uint64) (reads int, remaining bool, err error) {
var offsetRecords offsetRecSlice
// Pass #1: Build the set of table locations which must be read in order to find all the elements of |reqs| which are present in this table.
offsetRecords, remaining = tr.findOffsets(reqs)
offsetRecords, remaining, err = tr.findOffsets(reqs)
if err != nil {
return 0, false, err
}
// Now |offsetRecords| contains all locations within the table which must
// be searched (note that there may be duplicates of a particular
@@ -997,7 +621,10 @@ func (tr tableReader) extract(ctx context.Context, chunks chan<- extractRecord)
var ors offsetRecSlice
for i := uint32(0); i < tr.chunkCount; i++ {
a := new(addr)
e := tr.IndexEntry(i, a)
e, err := tr.IndexEntry(i, a)
if err != nil {
return err
}
ors = append(ors, offsetRec{a, e.Offset(), e.Length()})
}
sort.Sort(ors)
@@ -1020,8 +647,12 @@ func (tr tableReader) Close() error {
return tr.tableIndex.Close()
}
func (tr tableReader) Clone() tableReader {
return tableReader{tr.tableIndex.Clone(), tr.prefixes, tr.chunkCount, tr.totalUncompressedData, tr.r, tr.blockSize}
func (tr tableReader) Clone() (tableReader, error) {
ti, err := tr.tableIndex.Clone()
if err != nil {
return tableReader{}, err
}
return tableReader{ti, tr.prefixes, tr.chunkCount, tr.totalUncompressedData, tr.r, tr.blockSize}, nil
}
type readerAdapter struct {

View File

@@ -15,12 +15,9 @@
package nbs
import (
"io"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestCompressedChunkIsEmpty(t *testing.T) {
@@ -32,73 +29,6 @@ func TestCompressedChunkIsEmpty(t *testing.T) {
}
}
func TestParseTableIndex(t *testing.T) {
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
require.NoError(t, err)
defer f.Close()
bs, err := io.ReadAll(f)
require.NoError(t, err)
idx, err := parseTableIndex(bs)
require.NoError(t, err)
defer idx.Close()
assert.Equal(t, uint32(596), idx.ChunkCount())
seen := make(map[addr]bool)
for i := uint32(0); i < idx.ChunkCount(); i++ {
var onheapaddr addr
e := idx.IndexEntry(i, &onheapaddr)
if _, ok := seen[onheapaddr]; !ok {
seen[onheapaddr] = true
lookupe, ok := idx.Lookup(&onheapaddr)
assert.True(t, ok)
assert.Equal(t, e.Offset(), lookupe.Offset(), "%v does not match %v for address %v", e, lookupe, onheapaddr)
assert.Equal(t, e.Length(), lookupe.Length())
}
}
}
func TestMMapIndex(t *testing.T) {
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
require.NoError(t, err)
defer f.Close()
bs, err := io.ReadAll(f)
require.NoError(t, err)
idx, err := parseTableIndex(bs)
require.NoError(t, err)
defer idx.Close()
mmidx, err := newMmapTableIndex(idx, nil)
require.NoError(t, err)
defer mmidx.Close()
assert.Equal(t, idx.ChunkCount(), mmidx.ChunkCount())
seen := make(map[addr]bool)
for i := uint32(0); i < idx.ChunkCount(); i++ {
var onheapaddr addr
onheapentry := idx.IndexEntry(i, &onheapaddr)
var mmaddr addr
mmentry := mmidx.IndexEntry(i, &mmaddr)
assert.Equal(t, onheapaddr, mmaddr)
assert.Equal(t, onheapentry.Offset(), mmentry.Offset())
assert.Equal(t, onheapentry.Length(), mmentry.Length())
if _, ok := seen[onheapaddr]; !ok {
seen[onheapaddr] = true
mmentry, found := mmidx.Lookup(&onheapaddr)
assert.True(t, found)
assert.Equal(t, onheapentry.Offset(), mmentry.Offset(), "%v does not match %v for address %v", onheapentry, mmentry, onheapaddr)
assert.Equal(t, onheapentry.Length(), mmentry.Length())
}
wrongaddr := onheapaddr
if wrongaddr[19] != 0 {
wrongaddr[19] = 0
_, found := mmidx.Lookup(&wrongaddr)
assert.False(t, found)
}
}
assert.Equal(t, idx.Ordinals(), mmidx.Ordinals())
assert.Equal(t, idx.Prefixes(), mmidx.Prefixes())
assert.Equal(t, idx.TableFileSize(), mmidx.TableFileSize())
assert.Equal(t, idx.TotalUncompressedData(), mmidx.TotalUncompressedData())
}
func TestCanReadAhead(t *testing.T) {
type expected struct {
end uint64

View File

@@ -137,7 +137,10 @@ func (ts tableSet) getMany(ctx context.Context, eg *errgroup.Group, reqs []getRe
f := func(css chunkSources) bool {
for _, haver := range css {
if rp, ok := haver.(chunkReadPlanner); ok {
offsets, remaining := rp.findOffsets(reqs)
offsets, remaining, err := rp.findOffsets(reqs)
if err != nil {
return true
}
err = rp.getManyAtOffsets(ctx, eg, offsets, found, stats)
if err != nil {
return true
@@ -165,7 +168,10 @@ func (ts tableSet) getManyCompressed(ctx context.Context, eg *errgroup.Group, re
f := func(css chunkSources) bool {
for _, haver := range css {
if rp, ok := haver.(chunkReadPlanner); ok {
offsets, remaining := rp.findOffsets(reqs)
offsets, remaining, err := rp.findOffsets(reqs)
if err != nil {
return true
}
if len(offsets) > 0 {
err = rp.getManyCompressedAtOffsets(ctx, eg, offsets, found, stats)
if err != nil {
@@ -428,7 +434,11 @@ func (ts tableSet) Rebase(ctx context.Context, specs []tableSpec, stats *Stats)
}
if cnt > 0 {
merged.novel = append(merged.novel, t.Clone())
t2, err := t.Clone()
if err != nil {
return tableSet{}, err
}
merged.novel = append(merged.novel, t2)
}
}
@@ -465,7 +475,12 @@ func (ts tableSet) Rebase(ctx context.Context, specs []tableSpec, stats *Stats)
return
}
if spec.name == h {
merged.upstream[idx] = existing.Clone()
c, err := existing.Clone()
if err != nil {
ae.SetIfError(err)
return
}
merged.upstream[idx] = c
return
}
}

View File

@@ -77,9 +77,10 @@ func TestSimple(t *testing.T) {
tableData, _, err := buildTable(chunks)
require.NoError(t, err)
ti, err := parseTableIndex(tableData)
ti, err := parseTableIndexByCopy(tableData)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
assertChunksInReader(chunks, tr, assert)
@@ -123,9 +124,10 @@ func TestHasMany(t *testing.T) {
tableData, _, err := buildTable(chunks)
require.NoError(t, err)
ti, err := parseTableIndex(tableData)
ti, err := parseTableIndexByCopy(tableData)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
hasAddrs := []hasRecord{
@@ -173,9 +175,10 @@ func TestHasManySequentialPrefix(t *testing.T) {
require.NoError(t, err)
buff = buff[:length]
ti, err := parseTableIndex(buff)
ti, err := parseTableIndexByCopy(buff)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
hasAddrs := make([]hasRecord, 2)
// Leave out the first address
@@ -201,9 +204,10 @@ func TestGetMany(t *testing.T) {
tableData, _, err := buildTable(data)
require.NoError(t, err)
ti, err := parseTableIndex(tableData)
ti, err := parseTableIndexByCopy(tableData)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
addrs := addrSlice{computeAddr(data[0]), computeAddr(data[1]), computeAddr(data[2])}
getBatch := []getRecord{
@@ -234,9 +238,10 @@ func TestCalcReads(t *testing.T) {
tableData, _, err := buildTable(chunks)
require.NoError(t, err)
ti, err := parseTableIndex(tableData)
ti, err := parseTableIndexByCopy(tableData)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), 0)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), 0)
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
getBatch := []getRecord{
{&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false},
@@ -270,9 +275,10 @@ func TestExtract(t *testing.T) {
tableData, _, err := buildTable(chunks)
require.NoError(t, err)
ti, err := parseTableIndex(tableData)
ti, err := parseTableIndexByCopy(tableData)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
@@ -308,9 +314,10 @@ func Test65k(t *testing.T) {
tableData, _, err := buildTable(chunks)
require.NoError(t, err)
ti, err := parseTableIndex(tableData)
ti, err := parseTableIndexByCopy(tableData)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
for i := 0; i < count; i++ {
data := dataFn(i)
@@ -360,9 +367,10 @@ func doTestNGetMany(t *testing.T, count int) {
tableData, _, err := buildTable(data)
require.NoError(t, err)
ti, err := parseTableIndex(tableData)
ti, err := parseTableIndexByCopy(tableData)
require.NoError(t, err)
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
require.NoError(t, err)
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
getBatch := make([]getRecord, len(data))
for i := 0; i < count; i++ {

View File

@@ -16,6 +16,7 @@ package nbs
import (
"io"
"math"
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
@@ -24,7 +25,7 @@ import (
)
func IterChunks(rd io.ReadSeeker, cb func(chunk chunks.Chunk) (stop bool, err error)) error {
idx, err := ReadTableIndex(rd)
idx, err := ReadTableIndexByCopy(rd)
if err != nil {
return err
}
@@ -34,7 +35,10 @@ func IterChunks(rd io.ReadSeeker, cb func(chunk chunks.Chunk) (stop bool, err er
seen := make(map[addr]bool)
for i := uint32(0); i < idx.ChunkCount(); i++ {
var a addr
ie := idx.IndexEntry(i, &a)
ie, err := idx.IndexEntry(i, &a)
if err != nil {
return err
}
if _, ok := seen[a]; !ok {
seen[a] = true
chunkBytes, err := readNFrom(rd, ie.Offset(), ie.Length())
@@ -64,6 +68,26 @@ func IterChunks(rd io.ReadSeeker, cb func(chunk chunks.Chunk) (stop bool, err er
return nil
}
func GetTableIndexPrefixes(rd io.ReadSeeker) (prefixes []uint64, err error) {
idx, err := ReadTableIndexByCopy(rd)
if err != nil {
return nil, err
}
defer func() {
cerr := idx.Close()
if err == nil {
err = cerr
}
}()
return idx.Prefixes()
}
func GuessPrefixOrdinal(prefix uint64, n uint32) int {
hi := prefix >> 32
return int((hi * uint64(n)) / uint64(math.MaxUint32))
}
func readNFrom(rd io.ReadSeeker, offset uint64, length uint32) ([]byte, error) {
_, err := rd.Seek(int64(offset), io.SeekStart)

View File

@@ -1,21 +0,0 @@
This is a performance test rig for the two main types of hashing we do in NOMS - buzhash and sha1. There's also support for sha256, sha512, and blake2b hash functions for comparison.
As of May 9, these are the numbers I get on a macbook pro 3.1 GHz Intel Core i7.
- no hashing : 3500 MB/s
- sha1 only : 470 MB/s
- sha256 only : 185 MB/s
- sha512 only : 299 MB/s
- blake2b only : 604 MB/s
- bh only : 139 MB/s
- sha1 and bh : 110 MB/s
- sha256 and bh : 80 MB/s
- sha512 and bh : 96 MB/s
- blake2b and bh: 115 MB/s
I think that in the no hashing case there is some compiler optimization going
on because I note that if all I do is add a loop that reads out bytes one by
one from the slice, it drops to 1000MB/s.
One outcome of this is that there's no sense going to sha256 - we should just
jump straight to sha512.

View File

@@ -1,96 +0,0 @@
// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file incorporates work covered by the following copyright and
// permission notice:
//
// Copyright 2016 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package main
import (
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"fmt"
"hash"
"io"
"os"
"time"
"github.com/codahale/blake2"
humanize "github.com/dustin/go-humanize"
flag "github.com/juju/gnuflag"
"github.com/silvasur/buzhash"
)
func main() {
useSHA := flag.String("use-sha", "", "<default>=no hashing, 1=sha1, 256=sha256, 512=sha512, blake=blake2b")
useBH := flag.Bool("use-bh", false, "whether we buzhash the bytes")
flag.Parse(true)
flag.Usage = func() {
fmt.Printf("%s <big-file>\n", os.Args[0])
flag.PrintDefaults()
}
if len(flag.Args()) < 1 {
flag.Usage()
return
}
p := flag.Args()[0]
bh := buzhash.NewBuzHash(64 * 8)
f, _ := os.Open(p)
defer f.Close()
t0 := time.Now()
buf := make([]byte, 4*1024)
l := uint64(0)
var h hash.Hash
if *useSHA == "1" {
h = sha1.New()
} else if *useSHA == "256" {
h = sha256.New()
} else if *useSHA == "512" {
h = sha512.New()
} else if *useSHA == "blake" {
h = blake2.NewBlake2B()
}
for {
n, err := f.Read(buf)
l += uint64(n)
if err == io.EOF {
break
}
s := buf[:n]
if h != nil {
h.Write(s)
}
if *useBH {
bh.Write(s)
}
}
t1 := time.Now()
d := t1.Sub(t0)
fmt.Printf("Read %s in %s (%s/s)\n", humanize.Bytes(l), d, humanize.Bytes(uint64(float64(l)/d.Seconds())))
digest := []byte{}
if h != nil {
fmt.Printf("%x\n", h.Sum(digest))
}
}

View File

@@ -27,6 +27,7 @@ import (
"github.com/dolthub/dolt/go/store/val"
)
// todo(andy): randomize test seed
var testRand = rand.New(rand.NewSource(1))
func TestMap(t *testing.T) {
@@ -76,7 +77,7 @@ func makeProllyMap(t *testing.T, count int) (orderedMap, [][2]val.Tuple) {
)
tuples := randomTuplePairs(count, kd, vd)
om := prollyMapFromTuples(t, count, kd, vd, tuples)
om := prollyMapFromTuples(t, kd, vd, tuples)
return om, tuples
}
@@ -89,12 +90,12 @@ func makeProllySecondaryIndex(t *testing.T, count int) (orderedMap, [][2]val.Tup
vd := val.NewTupleDescriptor()
tuples := randomCompositeTuplePairs(count, kd, vd)
om := prollyMapFromTuples(t, count, kd, vd, tuples)
om := prollyMapFromTuples(t, kd, vd, tuples)
return om, tuples
}
func prollyMapFromTuples(t *testing.T, count int, kd, vd val.TupleDesc, tuples [][2]val.Tuple) orderedMap {
func prollyMapFromTuples(t *testing.T, kd, vd val.TupleDesc, tuples [][2]val.Tuple) orderedMap {
ctx := context.Background()
ns := newTestNodeStore()

View File

@@ -171,7 +171,7 @@ func (it *memRangeIter) iterate(context.Context) (err error) {
}
}
func (it *memRangeIter) nextMutation() (key, value val.Tuple) {
func (it *memRangeIter) nextMutation(context.Context) (key, value val.Tuple) {
key, value = it.iter.Current()
if key == nil {
return
@@ -180,10 +180,6 @@ func (it *memRangeIter) nextMutation() (key, value val.Tuple) {
return
}
func (it *memRangeIter) count() int {
return it.iter.Count()
}
func (it *memRangeIter) close() error {
return nil
}

View File

@@ -44,7 +44,7 @@ func fetchChild(ctx context.Context, ns NodeStore, ref hash.Hash) (Node, error)
}
func writeNewChild(ctx context.Context, ns NodeStore, level uint64, keys, values []nodeItem) (Node, metaPair, error) {
child := makeMapNode(ns.Pool(), level, keys, values)
child := buildMapNode(ns.Pool(), level, keys, values)
ref, err := ns.Write(ctx, child)
if err != nil {

View File

@@ -476,10 +476,10 @@ func materializeMap(t *testing.T, mut MutableMap) Map {
// ensure edits are provided in order
iter := mut.overlay.mutations()
prev, _ := iter.nextMutation()
prev, _ := iter.nextMutation(ctx)
require.NotNil(t, prev)
for {
next, _ := iter.nextMutation()
next, _ := iter.nextMutation(ctx)
if next == nil {
break
}

View File

@@ -21,21 +21,18 @@ import (
)
type mutationIter interface {
nextMutation() (key, val val.Tuple)
count() int
nextMutation(ctx context.Context) (key, value val.Tuple)
close() error
}
var _ mutationIter = &memRangeIter{}
func materializeMutations(ctx context.Context, m Map, edits mutationIter) (Map, error) {
var err error
if edits.count() == 0 {
return m, err
newKey, newValue := edits.nextMutation(ctx)
if newKey == nil {
return m, nil // no mutations
}
newKey, newValue := edits.nextMutation()
cur, err := newCursorAtItem(ctx, m.ns, m.root, nodeItem(newKey), m.searchNode)
if err != nil {
return m, err
@@ -65,11 +62,11 @@ func materializeMutations(ctx context.Context, m Map, edits mutationIter) (Map,
}
if oldValue == nil && newValue == nil {
newKey, newValue = edits.nextMutation()
newKey, newValue = edits.nextMutation(ctx)
continue // already non-present
}
if oldValue != nil && compareValues(m, newValue, oldValue) == 0 {
newKey, newValue = edits.nextMutation()
newKey, newValue = edits.nextMutation(ctx)
continue // same newValue
}
@@ -94,7 +91,7 @@ func materializeMutations(ctx context.Context, m Map, edits mutationIter) (Map,
}
}
newKey, newValue = edits.nextMutation()
newKey, newValue = edits.nextMutation(ctx)
}
m.root, err = chunker.Done(ctx)

View File

@@ -23,25 +23,68 @@ import (
"github.com/dolthub/dolt/go/gen/fb/serial"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/pool"
"github.com/dolthub/dolt/go/store/val"
)
const (
maxVectorOffset = uint64(math.MaxUint16)
refSize = hash.ByteLen
// These constants are mirrored from serial.TupleMap.KeyOffsetsLength()
// and serial.TupleMap.ValueOffsetsLength() respectively.
// They are only as stable as the flatbuffers schemas that define them.
keyOffsetsVOffset = 6
valueOffsetsVOffset = 10
)
func init() {
//emptyNode = makeMapNode(sharedPool, 0, nil, nil)
}
type Node struct {
buf serial.TupleMap
cnt int
emptyNode = buildMapNode(sharedPool, 0, nil, nil)
}
var emptyNode Node
func makeMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (node Node) {
type Node struct {
keys, values val.SlicedBuffer
refs refBuffer
count, level int
buf serial.TupleMap
}
func mapNodeFromBytes(bb []byte) Node {
buf := serial.GetRootAsTupleMap(bb, 0)
return mapNodeFromFlatbuffer(*buf)
}
func mapNodeFromFlatbuffer(buf serial.TupleMap) Node {
keys := val.SlicedBuffer{
Buf: buf.KeyTuplesBytes(),
Offs: getKeyOffsetsVector(buf),
}
values := val.SlicedBuffer{
Buf: buf.ValueTuplesBytes(),
Offs: getValueOffsetsVector(buf),
}
refs := refBuffer{
buf: buf.RefArrayBytes(),
}
count := buf.KeyOffsetsLength() + 1
if len(keys.Buf) == 0 {
count = 0
}
return Node{
keys: keys,
values: values,
refs: refs,
count: count,
level: int(buf.TreeLevel()),
buf: buf,
}
}
func buildMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (node Node) {
var (
keyTups, keyOffs fb.UOffsetT
valTups, valOffs fb.UOffsetT
@@ -85,6 +128,57 @@ func makeMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (nod
return mapNodeFromBytes(b.FinishedBytes())
}
func (nd Node) hashOf() hash.Hash {
return hash.Of(nd.bytes())
}
func (nd Node) getKey(i int) nodeItem {
return nd.keys.GetSlice(i)
}
func (nd Node) getValue(i int) nodeItem {
if nd.leafNode() {
return nd.values.GetSlice(i)
} else {
r := nd.getRef(i)
return r[:]
}
}
func (nd Node) getRef(i int) hash.Hash {
return nd.refs.getRef(i)
}
func (nd Node) nodeCount() int {
return nd.count
}
// todo(andy): should we support this?
//func (nd Node) cumulativeCount() uint64 {
// return nd.buf.TreeCount()
//}
func (nd Node) leafNode() bool {
return nd.level == 0
}
func (nd Node) empty() bool {
return nd.bytes() == nil || nd.nodeCount() == 0
}
func (nd Node) bytes() []byte {
return nd.buf.Table().Bytes
}
type refBuffer struct {
buf []byte
}
func (rb refBuffer) getRef(i int) hash.Hash {
start, stop := i*refSize, (i+1)*refSize
return hash.New(rb.buf[start:stop])
}
func getMapBuilder(pool pool.BuffPool, sz int) *fb.Builder {
// todo(andy): initialize builder buffer from pool
return fb.NewBuilder(sz)
@@ -138,87 +232,22 @@ func writeItemOffsets(b *fb.Builder, items []nodeItem, sz int) (cnt int) {
return
}
func mapNodeFromBytes(bb []byte) Node {
buf := serial.GetRootAsTupleMap(bb, 0)
// first key offset omitted
cnt := buf.KeyOffsetsLength() + 1
if len(buf.KeyTuplesBytes()) == 0 {
cnt = 0
}
return Node{
buf: *buf,
cnt: cnt,
}
func getKeyOffsetsVector(buf serial.TupleMap) []byte {
sz := buf.KeyOffsetsLength() * 2
tab := buf.Table()
vec := tab.Offset(keyOffsetsVOffset)
start := int(tab.Vector(fb.UOffsetT(vec)))
stop := start + sz
return tab.Bytes[start:stop]
}
func (nd Node) hashOf() hash.Hash {
return hash.Of(nd.bytes())
}
func (nd Node) getKey(i int) nodeItem {
keys := nd.buf.KeyTuplesBytes()
start, stop := uint16(0), uint16(len(keys))
if i > 0 {
start = nd.buf.KeyOffsets(i - 1)
}
if i < nd.buf.KeyOffsetsLength() {
stop = nd.buf.KeyOffsets(i)
}
return keys[start:stop]
}
func (nd Node) getValue(i int) nodeItem {
if nd.leafNode() {
return nd.getValueTuple(i)
} else {
r := nd.getRef(i)
return r[:]
}
}
func (nd Node) getValueTuple(i int) nodeItem {
values := nd.buf.ValueTuplesBytes()
start, stop := uint16(0), uint16(len(values))
if i > 0 {
start = nd.buf.ValueOffsets(i - 1)
}
if i < nd.buf.ValueOffsetsLength() {
stop = nd.buf.ValueOffsets(i)
}
return values[start:stop]
}
func (nd Node) getRef(i int) hash.Hash {
refs := nd.buf.RefArrayBytes()
start, stop := i*refSize, (i+1)*refSize
return hash.New(refs[start:stop])
}
func (nd Node) level() int {
return int(nd.buf.TreeLevel())
}
func (nd Node) nodeCount() int {
return nd.cnt
}
// todo(andy): should we support this?
//func (nd Node) cumulativeCount() uint64 {
// return nd.buf.TreeCount()
//}
func (nd Node) leafNode() bool {
return nd.level() == 0
}
func (nd Node) empty() bool {
return nd.bytes() == nil || nd.nodeCount() == 0
}
func (nd Node) bytes() []byte {
return nd.buf.Table().Bytes
func getValueOffsetsVector(buf serial.TupleMap) []byte {
sz := buf.ValueOffsetsLength() * 2
tab := buf.Table()
vec := tab.Offset(valueOffsetsVOffset)
start := int(tab.Vector(fb.UOffsetT(vec)))
stop := start + sz
return tab.Bytes[start:stop]
}

View File

@@ -203,7 +203,7 @@ func (cur *nodeCursor) isLeaf() bool {
}
func (cur *nodeCursor) level() uint64 {
return uint64(cur.nd.level())
return uint64(cur.nd.level)
}
func (cur *nodeCursor) seek(ctx context.Context, item nodeItem, cb compareFn) (err error) {

View File

@@ -18,6 +18,10 @@ import (
"encoding/binary"
"math/rand"
"testing"
"unsafe"
"github.com/dolthub/dolt/go/gen/fb/serial"
"github.com/dolthub/dolt/go/store/val"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -51,8 +55,33 @@ func TestRoundTripNodeItems(t *testing.T) {
}
}
func TestGetKeyValueOffsetsVectors(t *testing.T) {
for trial := 0; trial < 100; trial++ {
keys, values := randomNodeItemPairs(t, (rand.Int()%101)+50)
require.True(t, sumSize(keys)+sumSize(values) < maxVectorOffset)
nd := newLeafNode(keys, values)
ko1, vo1 := offsetsFromSlicedBuffers(nd.keys, nd.values)
ko2, vo2 := offsetsFromFlatbuffer(nd.buf)
assert.Equal(t, len(ko1), len(ko2))
assert.Equal(t, len(ko1), len(keys)-1)
assert.Equal(t, ko1, ko2)
assert.Equal(t, len(vo1), len(vo2))
assert.Equal(t, len(vo1), len(values)-1)
assert.Equal(t, vo1, vo2)
}
}
func TestNodeSize(t *testing.T) {
sz := unsafe.Sizeof(Node{})
assert.Equal(t, 168, int(sz))
}
func newLeafNode(keys, values []nodeItem) Node {
return makeMapNode(sharedPool, 0, keys, values)
return buildMapNode(sharedPool, 0, keys, values)
}
func randomNodeItemPairs(t *testing.T, count int) (keys, values []nodeItem) {
@@ -89,3 +118,32 @@ func sumSize(items []nodeItem) (sz uint64) {
}
return
}
func offsetsFromFlatbuffer(buf serial.TupleMap) (ko, vo []uint16) {
ko = make([]uint16, buf.KeyOffsetsLength())
for i := range ko {
ko[i] = buf.KeyOffsets(i)
}
vo = make([]uint16, buf.ValueOffsetsLength())
for i := range vo {
vo[i] = buf.ValueOffsets(i)
}
return
}
func offsetsFromSlicedBuffers(keys, values val.SlicedBuffer) (ko, vo []uint16) {
ko = deserializeOffsets(keys.Offs)
vo = deserializeOffsets(values.Offs)
return
}
func deserializeOffsets(buf []byte) (offs []uint16) {
offs = make([]uint16, len(buf)/2)
for i := range offs {
start, stop := i*2, (i+1)*2
offs[i] = binary.LittleEndian.Uint16(buf[start:stop])
}
return
}

View File

@@ -138,7 +138,7 @@ func encodingFromSqlType(typ query.Type) val.Encoding {
case query.Type_YEAR:
return val.YearEnc
case query.Type_GEOMETRY:
return val.BytesEnc
return val.GeometryEnc
}
switch typ {

View File

@@ -32,7 +32,7 @@ func roundTripTreeItems(t *testing.T) {
root, items, ns := randomTree(t, 1000)
assert.NotNil(t, root)
assert.True(t, root.nodeCount() > 0)
assert.True(t, root.level() > 0)
assert.True(t, root.level > 0)
//assert.Equal(t, uint64(1000), root.cumulativeCount())
assert.Equal(t, countTree(t, ns, root), 1000)
validateTreeItems(t, ns, root, items)
@@ -40,7 +40,7 @@ func roundTripTreeItems(t *testing.T) {
root, items, ns = randomTree(t, 10_000)
assert.NotNil(t, root)
assert.True(t, root.nodeCount() > 0)
assert.True(t, root.level() > 0)
assert.True(t, root.level > 0)
//assert.Equal(t, uint64(10_000), root.cumulativeCount())
assert.Equal(t, countTree(t, ns, root), 10_000)
validateTreeItems(t, ns, root, items)
@@ -48,7 +48,7 @@ func roundTripTreeItems(t *testing.T) {
root, items, ns = randomTree(t, 100_000)
assert.NotNil(t, root)
assert.True(t, root.nodeCount() > 0)
assert.True(t, root.level() > 0)
assert.True(t, root.level > 0)
//assert.Equal(t, uint64(100_000), root.cumulativeCount())
assert.Equal(t, countTree(t, ns, root), 100_000)
validateTreeItems(t, ns, root, items)

View File

@@ -195,27 +195,21 @@ func testUpdateDiffs(t *testing.T, from Map, tups [][2]val.Tuple, numUpdates int
tups[i], tups[j] = tups[j], tups[i]
})
oldPairs := tups[:numUpdates]
sort.Slice(oldPairs, func(i, j int) bool {
return from.keyDesc.Compare(oldPairs[i][0], oldPairs[j][0]) < 0
sub := tups[:numUpdates]
sort.Slice(sub, func(i, j int) bool {
return from.keyDesc.Compare(sub[i][0], sub[j][0]) < 0
})
kd, vd := from.Descriptors()
newPairs := randomTuplePairs(numUpdates, kd, vd)
require.Equal(t, len(oldPairs), len(newPairs))
for i := range oldPairs {
// set keys for updates
newPairs[i][0] = oldPairs[i][0]
}
to := makeMapWithUpdates(t, from, newPairs...)
updates := makeUpdatesToTuples(kd, vd, sub...)
to := makeMapWithUpdates(t, from, updates...)
var cnt int
err := DiffMaps(ctx, from, to, func(ctx context.Context, diff Diff) error {
assert.Equal(t, ModifiedDiff, diff.Type)
assert.Equal(t, oldPairs[cnt][0], diff.Key)
assert.Equal(t, oldPairs[cnt][1], diff.From)
assert.Equal(t, newPairs[cnt][0], diff.Key)
assert.Equal(t, newPairs[cnt][1], diff.To)
assert.Equal(t, updates[cnt][0], diff.Key)
assert.Equal(t, updates[cnt][1], diff.From)
assert.Equal(t, updates[cnt][2], diff.To)
cnt++
return nil
})
@@ -247,6 +241,31 @@ func makeMapWithInserts(t *testing.T, m Map, inserts ...[2]val.Tuple) Map {
return mm
}
func makeMapWithUpdates(t *testing.T, m Map, updates ...[2]val.Tuple) Map {
return makeMapWithInserts(t, m, updates...)
func makeMapWithUpdates(t *testing.T, m Map, updates ...[3]val.Tuple) Map {
ctx := context.Background()
mut := m.Mutate()
for _, pair := range updates {
err := mut.Put(ctx, pair[0], pair[2])
require.NoError(t, err)
}
mm, err := mut.Map(ctx)
require.NoError(t, err)
return mm
}
func makeUpdatesToTuples(kd, vd val.TupleDesc, tuples ...[2]val.Tuple) (updates [][3]val.Tuple) {
updates = make([][3]val.Tuple, len(tuples))
valBuilder := val.NewTupleBuilder(vd)
for i := range updates {
updates[i][0] = tuples[i][0]
updates[i][1] = tuples[i][1]
updates[i][2] = randomTuple(valBuilder)
}
sort.Slice(updates, func(i, j int) bool {
return kd.Compare(updates[i][0], updates[j][0]) < 0
})
return
}

View File

@@ -0,0 +1,229 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package prolly
import (
"bytes"
"context"
"io"
"golang.org/x/sync/errgroup"
"github.com/dolthub/dolt/go/store/val"
)
const patchBufferSize = 1024
// TupleMergeFn is a callback that handles 3-way merging of tuples.
// A typical implementation will attempt a cell-wise merge of the tuples,
// or register a conflict if such a merge is not possible.
type TupleMergeFn func(left, right Diff) (Diff, bool)
// ThreeWayMerge implements a three-way merge algorithm using |base| as the common ancestor, |right| as
// the source branch, and |left| as the destination branch. Both |left| and |right| are diff'd against
// |base| to compute merge patches, but rather than applying both sets of patches to |base|, patches from
// |right| are applied directly to |left|. This reduces the amount of write work and improves performance.
// In the case that a key-value pair was modified on both |left| and |right| with different resulting
// values, the TupleMergeFn is called to perform a cell-wise merge, or to throw a conflict.
func ThreeWayMerge(ctx context.Context, left, right, base Map, cb TupleMergeFn) (final Map, err error) {
ld, err := treeDifferFromMaps(ctx, base, left)
if err != nil {
return Map{}, err
}
rd, err := treeDifferFromMaps(ctx, base, right)
if err != nil {
return Map{}, err
}
eg, ctx := errgroup.WithContext(ctx)
buf := newPatchBuffer(patchBufferSize)
// iterate |ld| and |rd| in parallel, populating |buf|
eg.Go(func() (err error) {
defer func() {
if cerr := buf.close(); err == nil {
err = cerr
}
}()
err = sendPatches(ctx, ld, rd, buf, cb)
return
})
// consume patches from |buf| and apply them to |left|
eg.Go(func() error {
final, err = materializeMutations(ctx, left, buf)
return err
})
if err = eg.Wait(); err != nil {
return Map{}, err
}
return final, nil
}
// patchBuffer implements mutationIter. It consumes Diffs
// from the parallel treeDiffers and transforms them into
// patches for the treeChunker to apply.
type patchBuffer struct {
buf chan patch
}
var _ mutationIter = patchBuffer{}
type patch [2]val.Tuple
func newPatchBuffer(sz int) patchBuffer {
return patchBuffer{buf: make(chan patch, sz)}
}
func (ps patchBuffer) sendPatch(ctx context.Context, diff Diff) error {
p := patch{diff.Key, diff.To}
select {
case <-ctx.Done():
return ctx.Err()
case ps.buf <- p:
return nil
}
}
// nextMutation implements mutationIter.
func (ps patchBuffer) nextMutation(ctx context.Context) (key, value val.Tuple) {
var p patch
select {
case p = <-ps.buf:
return p[0], p[1]
case <-ctx.Done():
return nil, nil
}
}
func (ps patchBuffer) close() error {
close(ps.buf)
return nil
}
func sendPatches(ctx context.Context, l, r treeDiffer, buf patchBuffer, cb TupleMergeFn) (err error) {
var (
left, right Diff
lok, rok = true, true
)
left, err = l.Next(ctx)
if err == io.EOF {
err, lok = nil, false
}
if err != nil {
return err
}
right, err = r.Next(ctx)
if err == io.EOF {
err, rok = nil, false
}
if err != nil {
return err
}
for lok && rok {
cmp := compareDiffKeys(left, right, l.cmp)
switch {
case cmp < 0:
// already in left
left, err = l.Next(ctx)
if err == io.EOF {
err, lok = nil, false
}
if err != nil {
return err
}
case cmp > 0:
err = buf.sendPatch(ctx, right)
if err != nil {
return err
}
right, err = r.Next(ctx)
if err == io.EOF {
err, rok = nil, false
}
if err != nil {
return err
}
case cmp == 0:
if !equalDiffVals(left, right) {
resolved, ok := cb(left, right)
if ok {
err = buf.sendPatch(ctx, resolved)
}
if err != nil {
return err
}
}
left, err = l.Next(ctx)
if err == io.EOF {
err, lok = nil, false
}
if err != nil {
return err
}
right, err = r.Next(ctx)
if err == io.EOF {
err, rok = nil, false
}
if err != nil {
return err
}
}
}
if lok {
// already in left
return nil
}
for rok {
err = buf.sendPatch(ctx, right)
if err != nil {
return err
}
right, err = r.Next(ctx)
if err == io.EOF {
err, rok = nil, false
}
if err != nil {
return err
}
}
return nil
}
func compareDiffKeys(left, right Diff, cmp compareFn) int {
return cmp(nodeItem(left.Key), nodeItem(right.Key))
}
func equalDiffVals(left, right Diff) bool {
// todo(andy): bytes must be comparable
ok := left.Type == right.Type
return ok && bytes.Equal(left.To, right.To)
}

View File

@@ -0,0 +1,272 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package prolly
import (
"context"
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/store/val"
)
func Test3WayMapMerge(t *testing.T) {
scales := []int{
10,
100,
1000,
10000,
}
kd := val.NewTupleDescriptor(
val.Type{Enc: val.Uint32Enc, Nullable: false},
)
vd := val.NewTupleDescriptor(
val.Type{Enc: val.Uint32Enc, Nullable: true},
val.Type{Enc: val.Uint32Enc, Nullable: true},
val.Type{Enc: val.Uint32Enc, Nullable: true},
)
for _, s := range scales {
name := fmt.Sprintf("test proCur map at scale %d", s)
t.Run(name, func(t *testing.T) {
t.Run("merge identical maps", func(t *testing.T) {
testEqualMapMerge(t, s)
})
t.Run("3way merge inserts", func(t *testing.T) {
for k := 0; k < 10; k++ {
testThreeWayMapMerge(t, kd, vd, s)
}
})
t.Run("tuple merge fn", func(t *testing.T) {
for k := 0; k < 10; k++ {
testTupleMergeFn(t, kd, vd, s)
}
})
})
}
}
func testEqualMapMerge(t *testing.T, sz int) {
om, _ := makeProllyMap(t, sz)
m := om.(Map)
ctx := context.Background()
mm, err := ThreeWayMerge(ctx, m, m, m, panicOnConflict)
require.NoError(t, err)
assert.NotNil(t, mm)
assert.Equal(t, m.HashOf(), mm.HashOf())
}
func testThreeWayMapMerge(t *testing.T, kd, vd val.TupleDesc, sz int) {
baseTuples, leftEdits, rightEdits := makeTuplesAndMutations(kd, vd, sz)
om := prollyMapFromTuples(t, kd, vd, baseTuples)
base := om.(Map)
left := applyMutationSet(t, base, leftEdits)
right := applyMutationSet(t, base, rightEdits)
ctx := context.Background()
final, err := ThreeWayMerge(ctx, left, right, base, panicOnConflict)
assert.NoError(t, err)
for _, add := range leftEdits.adds {
ok, err := final.Has(ctx, add[0])
assert.NoError(t, err)
assert.True(t, ok)
err = final.Get(ctx, add[0], func(key, value val.Tuple) error {
assert.Equal(t, value, add[1])
return nil
})
assert.NoError(t, err)
}
for _, add := range rightEdits.adds {
ok, err := final.Has(ctx, add[0])
assert.NoError(t, err)
assert.True(t, ok)
err = final.Get(ctx, add[0], func(key, value val.Tuple) error {
assert.Equal(t, value, add[1])
return nil
})
assert.NoError(t, err)
}
for _, del := range leftEdits.deletes {
ok, err := final.Has(ctx, del)
assert.NoError(t, err)
assert.False(t, ok)
}
for _, del := range rightEdits.deletes {
ok, err := final.Has(ctx, del)
assert.NoError(t, err)
assert.False(t, ok)
}
for _, up := range leftEdits.updates {
ok, err := final.Has(ctx, up[0])
assert.NoError(t, err)
assert.True(t, ok)
err = final.Get(ctx, up[0], func(key, value val.Tuple) error {
assert.Equal(t, value, up[1])
return nil
})
assert.NoError(t, err)
}
for _, up := range rightEdits.updates {
ok, err := final.Has(ctx, up[0])
assert.NoError(t, err)
assert.True(t, ok)
err = final.Get(ctx, up[0], func(key, value val.Tuple) error {
assert.Equal(t, value, up[1])
return nil
})
assert.NoError(t, err)
}
}
func testTupleMergeFn(t *testing.T, kd, vd val.TupleDesc, sz int) {
ctx := context.Background()
tuples := randomTuplePairs(sz, kd, vd)
om := prollyMapFromTuples(t, kd, vd, tuples)
base := om.(Map)
mutSz := sz / 10
testRand.Shuffle(len(tuples), func(i, j int) {
tuples[i], tuples[j] = tuples[j], tuples[i]
})
// make overlapping edits
left := makeUpdatesToTuples(kd, vd, tuples[:mutSz]...)
right := makeUpdatesToTuples(kd, vd, tuples[:mutSz]...)
l := base.Mutate()
for _, update := range left {
err := l.Put(ctx, update[0], update[2])
require.NoError(t, err)
}
leftMap, err := l.Map(ctx)
require.NoError(t, err)
r := base.Mutate()
for _, update := range right {
err := r.Put(ctx, update[0], update[2])
require.NoError(t, err)
}
rightMap, err := r.Map(ctx)
require.NoError(t, err)
idx := 0
final, err := ThreeWayMerge(ctx, leftMap, rightMap, base, func(l, r Diff) (merged Diff, ok bool) {
assert.Equal(t, l.Key, r.Key)
assert.Equal(t, l.From, r.From)
assert.Equal(t, l.To, left[idx][2])
assert.Equal(t, r.To, right[idx][2])
// right diff wins
merged, ok = r, true
idx++
return
})
require.NoError(t, err)
for _, update := range left {
err = final.Get(ctx, update[0], func(key, value val.Tuple) error {
assert.Equal(t, key, update[0])
assert.NotEqual(t, value, update[2])
return nil
})
require.NoError(t, err)
}
for _, update := range right {
err = final.Get(ctx, update[0], func(key, value val.Tuple) error {
assert.Equal(t, key, update[0])
assert.Equal(t, value, update[2])
return nil
})
require.NoError(t, err)
}
}
type mutationSet struct {
adds [][2]val.Tuple
deletes []val.Tuple
updates [][3]val.Tuple
}
func makeTuplesAndMutations(kd, vd val.TupleDesc, sz int) (base [][2]val.Tuple, left, right mutationSet) {
mutSz := sz / 10
totalSz := sz + (mutSz * 2)
tuples := randomTuplePairs(totalSz, kd, vd)
base = tuples[:sz]
left = mutationSet{
adds: tuples[sz : sz+mutSz],
deletes: make([]val.Tuple, mutSz),
}
right = mutationSet{
adds: tuples[sz+mutSz:],
deletes: make([]val.Tuple, mutSz),
}
edits := make([][2]val.Tuple, len(base))
copy(edits, base)
testRand.Shuffle(len(edits), func(i, j int) {
edits[i], edits[j] = edits[j], edits[i]
})
for i, pair := range edits[:mutSz] {
left.deletes[i] = pair[0]
}
for i, pair := range edits[mutSz : mutSz*2] {
right.deletes[i] = pair[0]
}
left.updates = makeUpdatesToTuples(kd, vd, edits[mutSz*2:mutSz*3]...)
right.updates = makeUpdatesToTuples(kd, vd, edits[mutSz*3:mutSz*4]...)
return
}
func applyMutationSet(t *testing.T, base Map, edits mutationSet) (m Map) {
ctx := context.Background()
mut := base.Mutate()
var err error
for _, add := range edits.adds {
err = mut.Put(ctx, add[0], add[1])
require.NoError(t, err)
}
for _, del := range edits.deletes {
err = mut.Delete(ctx, del)
require.NoError(t, err)
}
for _, up := range edits.updates {
err = mut.Put(ctx, up[0], up[1])
require.NoError(t, err)
}
m, err = mut.Map(ctx)
require.NoError(t, err)
return
}
func panicOnConflict(left, right Diff) (Diff, bool) {
panic("cannot merge cells")
}

View File

@@ -51,7 +51,7 @@ func countOrderedMap(t *testing.T, om orderedMap) (cnt int) {
require.NoError(t, err)
cnt++
}
return
return cnt
}
func keyDescFromMap(om orderedMap) val.TupleDesc {
@@ -77,23 +77,34 @@ func randomTuplePairs(count int, keyDesc, valDesc val.TupleDesc) (items [][2]val
items[i][1] = randomTuple(valBuilder)
}
sortTuplePairs(items, keyDesc)
dupes := make([]int, 0, count)
for {
sortTuplePairs(items, keyDesc)
for i := range items {
if i == 0 {
continue
}
if keyDesc.Compare(items[i][0], items[i-1][0]) == 0 {
dupes = append(dupes, i)
}
}
if len(dupes) == 0 {
break
}
for i := range items {
if i == 0 {
continue
}
if keyDesc.Compare(items[i][0], items[i-1][0]) == 0 {
panic("duplicate key, unlucky!")
// replace duplicates and validate again
for _, d := range dupes {
items[d][0] = randomTuple(keyBuilder)
}
dupes = dupes[:0]
}
return
return items
}
func randomCompositeTuplePairs(count int, keyDesc, valDesc val.TupleDesc) (items [][2]val.Tuple) {
// preconditions
if count%5 != 0 {
panic("expected count divisible by 5")
panic("expected empty divisible by 5")
}
if len(keyDesc.Types) < 2 {
panic("expected composite key")

View File

@@ -40,6 +40,7 @@ import (
"github.com/aws/aws-sdk-go/service/dynamodb"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/store/chunks"
"github.com/dolthub/dolt/go/store/d"
"github.com/dolthub/dolt/go/store/datas"
@@ -472,9 +473,27 @@ func (sp Spec) createDatabase(ctx context.Context) (datas.Database, types.ValueR
vrw := types.NewValueStore(cs)
return datas.NewTypesDatabase(vrw), vrw
case "nbs":
os.Mkdir(sp.DatabaseName, 0777)
cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28)
// If the database is the oldgen database return a standard NBS store.
if strings.Contains(sp.DatabaseName, "oldgen") {
return getStandardLocalStore(ctx, sp.DatabaseName)
}
oldgenDb := filepath.Join(sp.DatabaseName, "oldgen")
err := validateDir(oldgenDb)
// If we can't validate that an oldgen db exists just use a standard local store.
if err != nil {
return getStandardLocalStore(ctx, sp.DatabaseName)
}
newGenSt, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28)
d.PanicIfError(err)
oldGenSt, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), oldgenDb, 1<<28)
d.PanicIfError(err)
cs := nbs.NewGenerationalCS(oldGenSt, newGenSt)
vrw := types.NewValueStore(cs)
return datas.NewTypesDatabase(vrw), vrw
case "mem":
@@ -494,6 +513,28 @@ func (sp Spec) createDatabase(ctx context.Context) (datas.Database, types.ValueR
}
}
func getStandardLocalStore(ctx context.Context, dbName string) (datas.Database, types.ValueReadWriter) {
os.Mkdir(dbName, 0777)
cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), dbName, 1<<28)
d.PanicIfError(err)
vrw := types.NewValueStore(cs)
return datas.NewTypesDatabase(vrw), vrw
}
func validateDir(path string) error {
info, err := os.Stat(path)
if err != nil {
return err
} else if !info.IsDir() {
return filesys.ErrIsFile
}
return nil
}
func parseDatabaseSpec(spec string) (protocol, name string, err error) {
if len(spec) == 0 {
err = fmt.Errorf("empty spec")

View File

@@ -22,6 +22,8 @@ import (
"strconv"
"strings"
"github.com/dolthub/dolt/go/store/geometry"
"github.com/dolthub/dolt/go/store/hash"
)
@@ -134,7 +136,7 @@ func WriteEWKBLineData(l Linestring, buf []byte) {
binary.LittleEndian.PutUint32(buf[:LengthSize], uint32(len(l.Points)))
// Append each point
for i, p := range l.Points {
WriteEWKBPointData(p, buf[LengthSize+PointDataSize*i:LengthSize+PointDataSize*(i+1)])
WriteEWKBPointData(p, buf[LengthSize+geometry.PointSize*i:LengthSize+geometry.PointSize*(i+1)])
}
}
@@ -145,11 +147,11 @@ func (v Linestring) writeTo(w nomsWriter, nbf *NomsBinFormat) error {
}
// Allocate buffer for linestring
buf := make([]byte, EWKBHeaderSize+LengthSize+PointDataSize*len(v.Points))
buf := make([]byte, geometry.EWKBHeaderSize+LengthSize+geometry.PointSize*len(v.Points))
// Write header and data to buffer
WriteEWKBHeader(v, buf)
WriteEWKBLineData(v, buf[EWKBHeaderSize:])
WriteEWKBLineData(v, buf[geometry.EWKBHeaderSize:])
w.writeString(string(buf))
return nil
@@ -164,7 +166,7 @@ func ParseEWKBLine(buf []byte, srid uint32) Linestring {
// Parse points
points := make([]Point, numPoints)
for i := uint32(0); i < numPoints; i++ {
points[i] = ParseEWKBPoint(buf[LengthSize+PointDataSize*i:LengthSize+PointDataSize*(i+1)], srid)
points[i] = ParseEWKBPoint(buf[LengthSize+geometry.PointSize*i:LengthSize+geometry.PointSize*(i+1)], srid)
}
return Linestring{SRID: srid, Points: points}
@@ -172,20 +174,20 @@ func ParseEWKBLine(buf []byte, srid uint32) Linestring {
func readLinestring(nbf *NomsBinFormat, b *valueDecoder) (Linestring, error) {
buf := []byte(b.ReadString())
srid, _, geomType := ParseEWKBHeader(buf)
if geomType != LinestringID {
srid, _, geomType := geometry.ParseEWKBHeader(buf)
if geomType != geometry.LinestringType {
return Linestring{}, errors.New("not a linestring")
}
return ParseEWKBLine(buf[EWKBHeaderSize:], srid), nil
return ParseEWKBLine(buf[geometry.EWKBHeaderSize:], srid), nil
}
func (v Linestring) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) {
buf := []byte(b.ReadString())
srid, _, geomType := ParseEWKBHeader(buf)
if geomType != LinestringID {
srid, _, geomType := geometry.ParseEWKBHeader(buf)
if geomType != geometry.LinestringType {
return nil, errors.New("not a linestring")
}
return ParseEWKBLine(buf[EWKBHeaderSize:], srid), nil
return ParseEWKBLine(buf[geometry.EWKBHeaderSize:], srid), nil
}
func (v Linestring) skip(nbf *NomsBinFormat, b *binaryNomsReader) {

View File

@@ -16,24 +16,13 @@ package types
import (
"context"
"encoding/binary"
"errors"
"fmt"
"math"
"strconv"
"github.com/dolthub/dolt/go/store/hash"
)
"github.com/dolthub/dolt/go/store/geometry"
const (
SRIDSize = 4
EndianSize = 1
TypeSize = 4
EWKBHeaderSize = SRIDSize + EndianSize + TypeSize
PointDataSize = 16
PointID = 1
LinestringID = 2
PolygonID = 3
"github.com/dolthub/dolt/go/store/hash"
)
// Point is a Noms Value wrapper around the primitive string type (for now).
@@ -93,29 +82,21 @@ func (v Point) valueReadWriter() ValueReadWriter {
// WriteEWKBHeader writes the SRID, endianness, and type to the byte buffer
// This function assumes v is a valid spatial type
func WriteEWKBHeader(v interface{}, buf []byte) {
// Write endianness byte (always little endian)
buf[SRIDSize] = 1
// Parse data
switch v := v.(type) {
case Point:
// Write SRID and type
binary.LittleEndian.PutUint32(buf[0:SRIDSize], v.SRID)
binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], PointID)
geometry.WriteEWKBHeader(buf, v.SRID, geometry.PointType)
case Linestring:
binary.LittleEndian.PutUint32(buf[0:SRIDSize], v.SRID)
binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], LinestringID)
geometry.WriteEWKBHeader(buf, v.SRID, geometry.LinestringType)
case Polygon:
binary.LittleEndian.PutUint32(buf[0:SRIDSize], v.SRID)
binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], PolygonID)
geometry.WriteEWKBHeader(buf, v.SRID, geometry.PolygonType)
}
}
// WriteEWKBPointData converts a Point into a byte array in EWKB format
// Very similar to function in GMS
func WriteEWKBPointData(p Point, buf []byte) {
binary.LittleEndian.PutUint64(buf[:PointDataSize/2], math.Float64bits(p.X))
binary.LittleEndian.PutUint64(buf[PointDataSize/2:], math.Float64bits(p.Y))
geometry.WriteEWKBPointData(buf, p.X, p.Y)
}
func (v Point) writeTo(w nomsWriter, nbf *NomsBinFormat) error {
@@ -126,49 +107,39 @@ func (v Point) writeTo(w nomsWriter, nbf *NomsBinFormat) error {
}
// Allocate buffer for point 4 + 1 + 4 + 16
buf := make([]byte, EWKBHeaderSize+PointDataSize)
buf := make([]byte, geometry.EWKBHeaderSize+geometry.PointSize)
// Write header and data to buffer
WriteEWKBHeader(v, buf)
WriteEWKBPointData(v, buf[EWKBHeaderSize:])
WriteEWKBPointData(v, buf[geometry.EWKBHeaderSize:])
w.writeString(string(buf))
return nil
}
// ParseEWKBHeader converts the header potion of a EWKB byte array to srid, endianness, and geometry type
func ParseEWKBHeader(buf []byte) (uint32, bool, uint32) {
srid := binary.LittleEndian.Uint32(buf[0:SRIDSize]) // First 4 bytes is SRID always in little endian
isBig := buf[SRIDSize] == 0 // Next byte is endianness
geomType := binary.LittleEndian.Uint32(buf[SRIDSize+EndianSize : EWKBHeaderSize]) // Next 4 bytes is type
return srid, isBig, geomType
}
// ParseEWKBPoint converts the data portion of a WKB point to Point
// Very similar logic to the function in GMS
func ParseEWKBPoint(buf []byte, srid uint32) Point {
// Read floats x and y
x := math.Float64frombits(binary.LittleEndian.Uint64(buf[:PointDataSize/2]))
y := math.Float64frombits(binary.LittleEndian.Uint64(buf[PointDataSize/2:]))
x, y := geometry.ParseEWKBPoint(buf)
return Point{SRID: srid, X: x, Y: y}
}
func readPoint(nbf *NomsBinFormat, b *valueDecoder) (Point, error) {
buf := []byte(b.ReadString())
srid, _, geomType := ParseEWKBHeader(buf) // Assume it's always little endian
if geomType != PointID {
srid, _, geomType := geometry.ParseEWKBHeader(buf) // Assume it's always little endian
if geomType != geometry.PointType {
return Point{}, errors.New("not a point")
}
return ParseEWKBPoint(buf[EWKBHeaderSize:], srid), nil
return ParseEWKBPoint(buf[geometry.EWKBHeaderSize:], srid), nil
}
func (v Point) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) {
buf := []byte(b.ReadString())
srid, _, geomType := ParseEWKBHeader(buf) // Assume it's always little endian
if geomType != PointID {
srid, _, geomType := geometry.ParseEWKBHeader(buf) // Assume it's always little endian
if geomType != geometry.PointType {
return Point{}, errors.New("not a point")
}
return ParseEWKBPoint(buf[EWKBHeaderSize:], srid), nil
return ParseEWKBPoint(buf[geometry.EWKBHeaderSize:], srid), nil
}
func (v Point) skip(nbf *NomsBinFormat, b *binaryNomsReader) {

View File

@@ -22,6 +22,8 @@ import (
"strconv"
"strings"
"github.com/dolthub/dolt/go/store/geometry"
"github.com/dolthub/dolt/go/store/hash"
)
@@ -128,7 +130,7 @@ func WriteEWKBPolyData(p Polygon, buf []byte) {
// Write each line
start, stop := 0, LengthSize
for _, l := range p.Lines {
start, stop = stop, stop+LengthSize+PointDataSize*len(l.Points)
start, stop = stop, stop+LengthSize+geometry.PointSize*len(l.Points)
WriteEWKBLineData(l, buf[start:stop])
}
}
@@ -142,15 +144,15 @@ func (v Polygon) writeTo(w nomsWriter, nbf *NomsBinFormat) error {
// Calculate space for polygon buffer
size := 0
for _, l := range v.Lines {
size += LengthSize + PointDataSize*len(l.Points)
size += LengthSize + geometry.PointSize*len(l.Points)
}
// Allocate buffer for poly
buf := make([]byte, EWKBHeaderSize+LengthSize+size)
buf := make([]byte, geometry.EWKBHeaderSize+LengthSize+size)
// Write header and data to buffer
WriteEWKBHeader(v, buf)
WriteEWKBPolyData(v, buf[EWKBHeaderSize:])
WriteEWKBPolyData(v, buf[geometry.EWKBHeaderSize:])
w.writeString(string(buf))
return nil
@@ -167,7 +169,7 @@ func ParseEWKBPoly(buf []byte, srid uint32) Polygon {
lines := make([]Linestring, numLines)
for i := uint32(0); i < numLines; i++ {
lines[i] = ParseEWKBLine(buf[s:], srid)
s += LengthSize * PointDataSize * len(lines[i].Points)
s += LengthSize * geometry.PointSize * len(lines[i].Points)
}
return Polygon{SRID: srid, Lines: lines}
@@ -175,20 +177,20 @@ func ParseEWKBPoly(buf []byte, srid uint32) Polygon {
func readPolygon(nbf *NomsBinFormat, b *valueDecoder) (Polygon, error) {
buf := []byte(b.ReadString())
srid, _, geomType := ParseEWKBHeader(buf)
if geomType != PolygonID {
srid, _, geomType := geometry.ParseEWKBHeader(buf)
if geomType != geometry.PolygonType {
return Polygon{}, errors.New("not a polygon")
}
return ParseEWKBPoly(buf[EWKBHeaderSize:], srid), nil
return ParseEWKBPoly(buf[geometry.EWKBHeaderSize:], srid), nil
}
func (v Polygon) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) {
buf := []byte(b.ReadString())
srid, _, geomType := ParseEWKBHeader(buf)
if geomType != PolygonID {
srid, _, geomType := geometry.ParseEWKBHeader(buf)
if geomType != geometry.PolygonType {
return nil, errors.New("not a polygon")
}
return ParseEWKBPoly(buf[EWKBHeaderSize:], srid), nil
return ParseEWKBPoly(buf[geometry.EWKBHeaderSize:], srid), nil
}
func (v Polygon) skip(nbf *NomsBinFormat, b *binaryNomsReader) {

View File

@@ -29,6 +29,7 @@ import (
"github.com/shopspring/decimal"
"github.com/dolthub/dolt/go/store/d"
"github.com/dolthub/dolt/go/store/geometry"
)
var ErrUnknownType = errors.New("unknown type $@")
@@ -374,27 +375,27 @@ func (r *valueDecoder) readValue(nbf *NomsBinFormat) (Value, error) {
case PointKind:
r.skipKind()
buf := []byte(r.ReadString())
srid, _, geomType := ParseEWKBHeader(buf)
if geomType != PointID {
srid, _, geomType := geometry.ParseEWKBHeader(buf)
if geomType != geometry.PointType {
return nil, ErrUnknownType
}
return ParseEWKBPoint(buf[EWKBHeaderSize:], srid), nil
return ParseEWKBPoint(buf[geometry.EWKBHeaderSize:], srid), nil
case LinestringKind:
r.skipKind()
buf := []byte(r.ReadString())
srid, _, geomType := ParseEWKBHeader(buf)
if geomType != LinestringID {
srid, _, geomType := geometry.ParseEWKBHeader(buf)
if geomType != geometry.LinestringType {
return nil, ErrUnknownType
}
return ParseEWKBLine(buf[EWKBHeaderSize:], srid), nil
return ParseEWKBLine(buf[geometry.EWKBHeaderSize:], srid), nil
case PolygonKind:
r.skipKind()
buf := []byte(r.ReadString())
srid, _, geomType := ParseEWKBHeader(buf)
if geomType != PolygonID {
srid, _, geomType := geometry.ParseEWKBHeader(buf)
if geomType != geometry.PolygonType {
return nil, ErrUnknownType
}
return ParseEWKBPoly(buf[EWKBHeaderSize:], srid), nil
return ParseEWKBPoly(buf[geometry.EWKBHeaderSize:], srid), nil
case TypeKind:
r.skipKind()
return r.readType()

View File

@@ -19,13 +19,10 @@ import (
"encoding/binary"
"math"
"time"
"github.com/shopspring/decimal"
)
type Type struct {
Enc Encoding
Coll Collation
Nullable bool
}
@@ -38,34 +35,24 @@ const (
uint16Size ByteSize = 2
int32Size ByteSize = 4
uint32Size ByteSize = 4
int48Size ByteSize = 6
uint48Size ByteSize = 6
int64Size ByteSize = 8
uint64Size ByteSize = 8
float32Size ByteSize = 4
float64Size ByteSize = 8
// todo(andy): experimental encoding
timestampSize ByteSize = 15
)
type Collation uint16
const (
ByteOrderCollation Collation = 0
timestampSize ByteSize = 8
)
type Encoding uint8
// Constant Size Encodings
const (
NullEnc Encoding = 0
Int8Enc Encoding = 1
Uint8Enc Encoding = 2
Int16Enc Encoding = 3
Uint16Enc Encoding = 4
// Int24Enc Encoding = 5
// Uint24Enc Encoding = 6
NullEnc Encoding = 0
Int8Enc Encoding = 1
Uint8Enc Encoding = 2
Int16Enc Encoding = 3
Uint16Enc Encoding = 4
Int32Enc Encoding = 7
Uint32Enc Encoding = 8
Int64Enc Encoding = 9
@@ -74,7 +61,6 @@ const (
Float64Enc Encoding = 12
// todo(andy): experimental encodings
// consolidate into one
TimestampEnc Encoding = 14
DateEnc Encoding = 15
DatetimeEnc Encoding = 16
@@ -89,9 +75,10 @@ const (
BytesEnc Encoding = 129
// todo(andy): experimental encodings
DecimalEnc Encoding = 130
JSONEnc Encoding = 131
TimeEnc Encoding = 132
DecimalEnc Encoding = 130
JSONEnc Encoding = 131
TimeEnc Encoding = 132
GeometryEnc Encoding = 133
// TODO
// BitEnc
@@ -139,93 +126,10 @@ func sizeFromType(t Type) (ByteSize, bool) {
}
}
func ReadBool(val []byte) bool {
func readBool(val []byte) bool {
expectSize(val, int8Size)
return val[0] == 1
}
func ReadInt8(val []byte) int8 {
expectSize(val, int8Size)
return int8(val[0])
}
func ReadUint8(val []byte) uint8 {
expectSize(val, uint8Size)
return val[0]
}
func ReadInt16(val []byte) int16 {
expectSize(val, int16Size)
return int16(binary.LittleEndian.Uint16(val))
}
func ReadUint16(val []byte) uint16 {
expectSize(val, uint16Size)
return binary.LittleEndian.Uint16(val)
}
func ReadInt32(val []byte) int32 {
expectSize(val, int32Size)
return int32(binary.LittleEndian.Uint32(val))
}
func ReadUint32(val []byte) uint32 {
expectSize(val, uint32Size)
return binary.LittleEndian.Uint32(val)
}
func ReadUint48(val []byte) (u uint64) {
expectSize(val, uint48Size)
var tmp [8]byte
// copy |val| to |tmp|
tmp[5], tmp[4] = val[5], val[4]
tmp[3], tmp[2] = val[3], val[2]
tmp[1], tmp[0] = val[1], val[0]
u = binary.LittleEndian.Uint64(tmp[:])
return
}
func ReadInt64(val []byte) int64 {
expectSize(val, int64Size)
return int64(binary.LittleEndian.Uint64(val))
}
func ReadUint64(val []byte) uint64 {
expectSize(val, uint64Size)
return binary.LittleEndian.Uint64(val)
}
func ReadFloat32(val []byte) float32 {
expectSize(val, float32Size)
return math.Float32frombits(ReadUint32(val))
}
func ReadFloat64(val []byte) float64 {
expectSize(val, float64Size)
return math.Float64frombits(ReadUint64(val))
}
func ReadDecimal(val []byte) decimal.Decimal {
// todo(andy): temporary lossy implementation
//return decimal.NewFromFloat(ReadFloat64(val))
return decimal.NewFromFloat(ReadFloat64(val))
}
func ReadTime(buf []byte) (t time.Time) {
expectSize(buf, timestampSize)
if err := t.UnmarshalBinary(buf); err != nil {
panic(err)
}
return t
}
func ReadString(val []byte) string {
// todo(andy): fix allocation
return string(val)
}
func readBytes(val []byte) []byte {
return val
}
func writeBool(buf []byte, val bool) {
expectSize(buf, 1)
@@ -236,94 +140,265 @@ func writeBool(buf []byte, val bool) {
}
}
func WriteInt8(buf []byte, val int8) {
// false is less that true
func compareBool(l, r bool) int {
if l == r {
return 0
}
if !l && r {
return -1
}
return 1
}
func readInt8(val []byte) int8 {
expectSize(val, int8Size)
return int8(val[0])
}
func writeInt8(buf []byte, val int8) {
expectSize(buf, int8Size)
buf[0] = byte(val)
}
func WriteUint8(buf []byte, val uint8) {
func compareInt8(l, r int8) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func readUint8(val []byte) uint8 {
expectSize(val, uint8Size)
return val[0]
}
func writeUint8(buf []byte, val uint8) {
expectSize(buf, uint8Size)
buf[0] = byte(val)
}
func WriteInt16(buf []byte, val int16) {
func compareUint8(l, r uint8) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func readInt16(val []byte) int16 {
expectSize(val, int16Size)
return int16(binary.LittleEndian.Uint16(val))
}
func writeInt16(buf []byte, val int16) {
expectSize(buf, int16Size)
binary.LittleEndian.PutUint16(buf, uint16(val))
}
func WriteUint16(buf []byte, val uint16) {
func compareInt16(l, r int16) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func readUint16(val []byte) uint16 {
expectSize(val, uint16Size)
return binary.LittleEndian.Uint16(val)
}
func writeUint16(buf []byte, val uint16) {
expectSize(buf, uint16Size)
binary.LittleEndian.PutUint16(buf, val)
}
func WriteInt32(buf []byte, val int32) {
func compareUint16(l, r uint16) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func readInt32(val []byte) int32 {
expectSize(val, int32Size)
return int32(binary.LittleEndian.Uint32(val))
}
func writeInt32(buf []byte, val int32) {
expectSize(buf, int32Size)
binary.LittleEndian.PutUint32(buf, uint32(val))
}
func WriteUint32(buf []byte, val uint32) {
func compareInt32(l, r int32) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func readUint32(val []byte) uint32 {
expectSize(val, uint32Size)
return binary.LittleEndian.Uint32(val)
}
func writeUint32(buf []byte, val uint32) {
expectSize(buf, uint32Size)
binary.LittleEndian.PutUint32(buf, val)
}
func WriteUint48(buf []byte, u uint64) {
const maxUint48 = uint64(1<<48 - 1)
expectSize(buf, uint48Size)
if u > maxUint48 {
panic("uint is greater than max uint48")
func compareUint32(l, r uint32) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
var tmp [8]byte
binary.LittleEndian.PutUint64(tmp[:], u)
// copy |tmp| to |buf|
buf[5], buf[4] = tmp[5], tmp[4]
buf[3], buf[2] = tmp[3], tmp[2]
buf[1], buf[0] = tmp[1], tmp[0]
}
func WriteInt64(buf []byte, val int64) {
func readInt64(val []byte) int64 {
expectSize(val, int64Size)
return int64(binary.LittleEndian.Uint64(val))
}
func writeInt64(buf []byte, val int64) {
expectSize(buf, int64Size)
binary.LittleEndian.PutUint64(buf, uint64(val))
}
func WriteUint64(buf []byte, val uint64) {
func compareInt64(l, r int64) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func readUint64(val []byte) uint64 {
expectSize(val, uint64Size)
return binary.LittleEndian.Uint64(val)
}
func writeUint64(buf []byte, val uint64) {
expectSize(buf, uint64Size)
binary.LittleEndian.PutUint64(buf, val)
}
func WriteFloat32(buf []byte, val float32) {
func compareUint64(l, r uint64) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func readFloat32(val []byte) float32 {
expectSize(val, float32Size)
return math.Float32frombits(readUint32(val))
}
func writeFloat32(buf []byte, val float32) {
expectSize(buf, float32Size)
binary.LittleEndian.PutUint32(buf, math.Float32bits(val))
}
func WriteFloat64(buf []byte, val float64) {
func compareFloat32(l, r float32) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func readFloat64(val []byte) float64 {
expectSize(val, float64Size)
return math.Float64frombits(readUint64(val))
}
func writeFloat64(buf []byte, val float64) {
expectSize(buf, float64Size)
binary.LittleEndian.PutUint64(buf, math.Float64bits(val))
}
func WriteTime(buf []byte, val time.Time) {
expectSize(buf, timestampSize)
// todo(andy): fix allocation here
m, _ := val.MarshalBinary()
copy(buf, m)
}
func writeString(buf []byte, val string, coll Collation) {
expectSize(buf, ByteSize(len(val)))
copy(buf, val)
}
func writeBytes(buf, val []byte, coll Collation) {
expectSize(buf, ByteSize(len(val)))
copy(buf, val)
}
func expectSize(buf []byte, sz ByteSize) {
if ByteSize(len(buf)) != sz {
panic("byte slice is not of expected size")
func compareFloat64(l, r float64) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func readTimestamp(buf []byte) (t time.Time) {
expectSize(buf, timestampSize)
t = time.Unix(0, readInt64(buf)).UTC()
return
}
func writeTimestamp(buf []byte, val time.Time) {
expectSize(buf, timestampSize)
writeInt64(buf, val.UnixNano())
}
func compareTimestamp(l, r time.Time) int {
if l.Equal(r) {
return 0
} else if l.Before(r) {
return -1
} else {
return 1
}
}
func readString(val []byte) string {
// todo(andy): fix allocation
return string(val)
}
func writeString(buf []byte, val string) {
expectSize(buf, ByteSize(len(val)))
copy(buf, val)
}
func compareString(l, r string) int {
return bytes.Compare([]byte(l), []byte(r))
}
func readBytes(val []byte) []byte {
return val
}
func writeBytes(buf, val []byte) {
expectSize(buf, ByteSize(len(val)))
copy(buf, val)
}
func compareBytes(l, r []byte) int {
return bytes.Compare(l, r)
}
func compare(typ Type, left, right []byte) int {
// order NULLs last
if left == nil {
@@ -342,231 +417,57 @@ func compare(typ Type, left, right []byte) int {
switch typ.Enc {
case Int8Enc:
return compareInt8(ReadInt8(left), ReadInt8(right))
return compareInt8(readInt8(left), readInt8(right))
case Uint8Enc:
return compareUint8(ReadUint8(left), ReadUint8(right))
return compareUint8(readUint8(left), readUint8(right))
case Int16Enc:
return compareInt16(ReadInt16(left), ReadInt16(right))
return compareInt16(readInt16(left), readInt16(right))
case Uint16Enc:
return compareUint16(ReadUint16(left), ReadUint16(right))
return compareUint16(readUint16(left), readUint16(right))
case Int32Enc:
return compareInt32(ReadInt32(left), ReadInt32(right))
return compareInt32(readInt32(left), readInt32(right))
case Uint32Enc:
return compareUint32(ReadUint32(left), ReadUint32(right))
return compareUint32(readUint32(left), readUint32(right))
case Int64Enc:
return compareInt64(ReadInt64(left), ReadInt64(right))
return compareInt64(readInt64(left), readInt64(right))
case Uint64Enc:
return compareUint64(ReadUint64(left), ReadUint64(right))
return compareUint64(readUint64(left), readUint64(right))
case Float32Enc:
return compareFloat32(ReadFloat32(left), ReadFloat32(right))
return compareFloat32(readFloat32(left), readFloat32(right))
case Float64Enc:
return compareFloat64(ReadFloat64(left), ReadFloat64(right))
return compareFloat64(readFloat64(left), readFloat64(right))
case YearEnc:
return compareInt16(ReadInt16(left), ReadInt16(right))
return compareInt16(readInt16(left), readInt16(right))
case DateEnc, DatetimeEnc, TimestampEnc:
return compareTimestamp(ReadTime(left), ReadTime(right))
return compareTimestamp(readTimestamp(left), readTimestamp(right))
case TimeEnc:
panic("unimplemented")
case DecimalEnc:
// todo(andy): temporary Decimal implementation
fallthrough
case StringEnc:
return compareString(ReadString(left), ReadString(right), typ.Coll)
return compareString(readString(left), readString(right))
case BytesEnc:
return compareBytes(readBytes(left), readBytes(right), typ.Coll)
return compareBytes(readBytes(left), readBytes(right))
default:
panic("unknown encoding")
}
}
// false is less that true
func compareBool(l, r bool) int {
if l == r {
return 0
}
if !l && r {
return -1
}
return 1
}
func compareInt8(l, r int8) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
func expectSize(buf []byte, sz ByteSize) {
if ByteSize(len(buf)) != sz {
panic("byte slice is not of expected size")
}
}
func compareUint8(l, r uint8) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
func expectTrue(b bool) {
if !b {
panic("expected true")
}
}
func compareInt16(l, r int16) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
func expectFalse(b bool) {
if b {
panic("expected false")
}
}
func compareUint16(l, r uint16) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func compareInt32(l, r int32) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func compareUint32(l, r uint32) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func compareInt64(l, r int64) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func compareUint64(l, r uint64) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func compareFloat32(l, r float32) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func compareFloat64(l, r float64) int {
if l == r {
return 0
} else if l < r {
return -1
} else {
return 1
}
}
func compareTimestamp(l, r time.Time) int {
if l.Equal(r) {
return 0
}
if l.Before(r) {
return -1
} else {
return 1
}
}
func compareString(l, r string, coll Collation) int {
// todo(andy): collations
return bytes.Compare([]byte(l), []byte(r))
}
func compareBytes(l, r []byte, coll Collation) int {
// todo(andy): collations
return bytes.Compare(l, r)
}
// rawCmp is an array of indexes used to perform raw Tuple comparisons.
// Under certain conditions, Tuple comparisons can be optimized by
// directly comparing Tuples as byte slices, rather than accessing
// and deserializing each field.
// If each of these conditions is met, raw comparisons can be used:
// (1) All fields in the Tuple must be non-nullable.
// (2) All fields in the Tuple must be of constant size
// (eg Ints, Uints, Floats, Time types, etc.)
//
type rawCmp []int
var rawCmpLookup = map[Encoding]rawCmp{
Int8Enc: {0},
Uint8Enc: {0},
Int16Enc: {1, 0},
Uint16Enc: {1, 0},
Int32Enc: {3, 2, 1, 0},
Uint32Enc: {3, 2, 1, 0},
Int64Enc: {7, 6, 5, 4, 3, 2, 1, 0},
Uint64Enc: {7, 6, 5, 4, 3, 2, 1, 0},
}
func compareRaw(left, right Tuple, mapping rawCmp) int {
var l, r byte
for _, idx := range mapping {
l, r = left[idx], right[idx]
if l != r {
break
}
}
if l > r {
return 1
} else if l < r {
return -1
}
return 0
}
func maybeGetRawComparison(types ...Type) rawCmp {
var raw []int
offset := 0
for _, typ := range types {
if typ.Nullable {
return nil
}
mapping, ok := rawCmpLookup[typ.Enc]
if !ok {
return nil
}
for i := range mapping {
mapping[i] += offset
}
raw = append(raw, mapping...)
offset += len(mapping)
}
return raw
}

View File

@@ -116,19 +116,19 @@ func TestCompare(t *testing.T) {
func encInt(i int64) []byte {
buf := make([]byte, 8)
WriteInt64(buf, i)
writeInt64(buf, i)
return buf
}
func encUint(u uint64) []byte {
buf := make([]byte, 8)
WriteUint64(buf, u)
writeUint64(buf, u)
return buf
}
func encFloat(f float64) []byte {
buf := make([]byte, 8)
WriteFloat64(buf, f)
writeFloat64(buf, f)
return buf
}
@@ -156,7 +156,7 @@ func roundTripBools(t *testing.T) {
integers := []bool{true, false}
for _, exp := range integers {
writeBool(buf, exp)
assert.Equal(t, exp, ReadBool(buf))
assert.Equal(t, exp, readBool(buf))
zero(buf)
}
}
@@ -166,8 +166,8 @@ func roundTripInts(t *testing.T) {
integers := []int64{-1, 0, -1, math.MaxInt8, math.MinInt8}
for _, value := range integers {
exp := int8(value)
WriteInt8(buf, exp)
assert.Equal(t, exp, ReadInt8(buf))
writeInt8(buf, exp)
assert.Equal(t, exp, readInt8(buf))
zero(buf)
}
@@ -175,8 +175,8 @@ func roundTripInts(t *testing.T) {
integers = append(integers, math.MaxInt16, math.MaxInt16)
for _, value := range integers {
exp := int16(value)
WriteInt16(buf, exp)
assert.Equal(t, exp, ReadInt16(buf))
writeInt16(buf, exp)
assert.Equal(t, exp, readInt16(buf))
zero(buf)
}
@@ -184,8 +184,8 @@ func roundTripInts(t *testing.T) {
integers = append(integers, math.MaxInt32, math.MaxInt32)
for _, value := range integers {
exp := int32(value)
WriteInt32(buf, exp)
assert.Equal(t, exp, ReadInt32(buf))
writeInt32(buf, exp)
assert.Equal(t, exp, readInt32(buf))
zero(buf)
}
@@ -193,8 +193,8 @@ func roundTripInts(t *testing.T) {
integers = append(integers, math.MaxInt64, math.MaxInt64)
for _, value := range integers {
exp := int64(value)
WriteInt64(buf, exp)
assert.Equal(t, exp, ReadInt64(buf))
writeInt64(buf, exp)
assert.Equal(t, exp, readInt64(buf))
zero(buf)
}
}
@@ -204,8 +204,8 @@ func roundTripUints(t *testing.T) {
uintegers := []uint64{0, 1, math.MaxUint8}
for _, value := range uintegers {
exp := uint8(value)
WriteUint8(buf, exp)
assert.Equal(t, exp, ReadUint8(buf))
writeUint8(buf, exp)
assert.Equal(t, exp, readUint8(buf))
zero(buf)
}
@@ -213,8 +213,8 @@ func roundTripUints(t *testing.T) {
uintegers = append(uintegers, math.MaxUint16)
for _, value := range uintegers {
exp := uint16(value)
WriteUint16(buf, exp)
assert.Equal(t, exp, ReadUint16(buf))
writeUint16(buf, exp)
assert.Equal(t, exp, readUint16(buf))
zero(buf)
}
@@ -222,8 +222,8 @@ func roundTripUints(t *testing.T) {
uintegers = append(uintegers, math.MaxUint32)
for _, value := range uintegers {
exp := uint32(value)
WriteUint32(buf, exp)
assert.Equal(t, exp, ReadUint32(buf))
writeUint32(buf, exp)
assert.Equal(t, exp, readUint32(buf))
zero(buf)
}
@@ -231,8 +231,8 @@ func roundTripUints(t *testing.T) {
uintegers = append(uintegers, math.MaxUint64)
for _, value := range uintegers {
exp := uint64(value)
WriteUint64(buf, exp)
assert.Equal(t, exp, ReadUint64(buf))
writeUint64(buf, exp)
assert.Equal(t, exp, readUint64(buf))
zero(buf)
}
}
@@ -242,8 +242,8 @@ func roundTripFloats(t *testing.T) {
floats := []float64{-1, 0, 1, math.MaxFloat32, math.SmallestNonzeroFloat32}
for _, value := range floats {
exp := float32(value)
WriteFloat32(buf, exp)
assert.Equal(t, exp, ReadFloat32(buf))
writeFloat32(buf, exp)
assert.Equal(t, exp, readFloat32(buf))
zero(buf)
}
@@ -251,8 +251,8 @@ func roundTripFloats(t *testing.T) {
floats = append(floats, math.MaxFloat64, math.SmallestNonzeroFloat64)
for _, value := range floats {
exp := float64(value)
WriteFloat64(buf, exp)
assert.Equal(t, exp, ReadFloat64(buf))
writeFloat64(buf, exp)
assert.Equal(t, exp, readFloat64(buf))
zero(buf)
}
}

View File

@@ -1,73 +0,0 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package val
// todo(andy): more ergonomic offsets
// type SlicedBuffer struct {
// buf []byte
// offs []uint16
// }
type Offsets []byte
// OffsetsSize returns the number of bytes needed to
// store |fieldCount| offsets.
func OffsetsSize(count int) ByteSize {
if count == 0 {
return 0
}
return ByteSize((count - 1) * 2)
}
// Count returns the number of offsets stored in |sl|.
func (os Offsets) Count() int {
return (len(os) / 2) + 1
}
// GetBounds returns the ith offset. |last| is the byte position
// of the _end_ of the last element.
func (os Offsets) GetBounds(i int, last ByteSize) (start, stop ByteSize) {
start = os.getOffset(i)
if os.isLastIndex(i) {
stop = last
} else {
stop = os.getOffset(i + 1)
}
return
}
// getOffset gets the byte position of the _start_ of element |i|.
func (os Offsets) getOffset(i int) ByteSize {
if i == 0 {
return 0
}
start := (i - 1) * 2
off := ReadUint16(os[start : start+2])
return ByteSize(off)
}
// Put writes offset |pos| at index |i|.
func (os Offsets) Put(i int, off ByteSize) {
if i == 0 {
return
}
start := (i - 1) * 2
WriteUint16(os[start:start+2], uint16(off))
}
// isLastIndex returns true if |i| is the last index in |sl|.
func (os Offsets) isLastIndex(i int) bool {
return len(os) == i*2
}

View File

@@ -0,0 +1,76 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package val
type SlicedBuffer struct {
Buf []byte
Offs offsets
}
func slicedTupleBuffer(tup Tuple) SlicedBuffer {
mask := tup.mask()
offStop := tup.size() - numFieldsSize - mask.size()
bufStop := offStop - offsetsSize(mask.count())
return SlicedBuffer{
Buf: tup[:bufStop],
Offs: offsets(tup[bufStop:offStop]),
}
}
// GetSlice returns the ith slice of |sb.Buf|.
func (sb SlicedBuffer) GetSlice(i int) []byte {
start := sb.Offs.getOffset(i)
stop := ByteSize(len(sb.Buf))
if !sb.isLastIndex(i) {
stop = sb.Offs.getOffset(i + 1)
}
return sb.Buf[start:stop]
}
// isLastIndex returns true if |i| is the last index in |sl|.
func (sb SlicedBuffer) isLastIndex(i int) bool {
return len(sb.Offs) == i*2
}
type offsets []byte
// offsetsSize returns the number of bytes needed to
// store |fieldCount| offsets.
func offsetsSize(count int) ByteSize {
if count == 0 {
return 0
}
return ByteSize((count - 1) * 2)
}
// getOffset gets the byte position of the _start_ of element |i|.
func (os offsets) getOffset(i int) ByteSize {
if i == 0 {
return 0
}
start := (i - 1) * 2
off := readUint16(os[start : start+2])
return ByteSize(off)
}
// putOffset writes offset |pos| at index |i|.
func (os offsets) putOffset(i int, off ByteSize) {
if i == 0 {
return
}
start := (i - 1) * 2
writeUint16(os[start:start+2], uint16(off))
}

View File

@@ -30,7 +30,7 @@ const (
// Tuples are byte slices containing field values and a footer. Tuples only
// contain Values for non-NULL Fields. Value i contains the data for ith non-
// NULL Field. Values are packed contiguously from the front of the Tuple. The
// footer contains offsets, a member mask, and a field count. Offsets enable
// footer contains offsets, a member mask, and a field count. offsets enable
// random access to Values. The member mask enables NULL-compaction for Values.
//
// Tuples read and write Values as byte slices. (De)serialization is delegated
@@ -42,10 +42,10 @@ const (
//
// Tuple:
// +---------+---------+-----+---------+---------+-------------+-------------+
// | Value 0 | Value 1 | ... | Value K | Offsets | Member Mask | Field Count |
// | Value 0 | Value 1 | ... | Value K | offsets | Member Mask | Field Count |
// +---------+---------+-----+---------+---------+-------------+-------------+
//
// Offsets:
// offsets:
// The offset array contains a uint16 for each non-NULL field after field 0.
// Offset i encodes the distance to the ith Value from the front of the Tuple.
// The size of the offset array is 2*(K-1) bytes, where K is the number of
@@ -91,7 +91,7 @@ func NewTuple(pool pool.BuffPool, values ...[]byte) Tuple {
panic("tuple data size exceeds maximum")
}
tup, offs, mask := makeTuple(pool, pos, count, len(values))
tup, offs, mask := allocateTuple(pool, pos, count, len(values))
count = 0
pos = ByteSize(0)
@@ -100,7 +100,7 @@ func NewTuple(pool pool.BuffPool, values ...[]byte) Tuple {
continue
}
mask.set(i)
offs.Put(count, pos)
offs.putOffset(count, pos)
count++
copy(tup[pos:pos+sizeOf(v)], v)
@@ -116,15 +116,15 @@ func CloneTuple(pool pool.BuffPool, tup Tuple) Tuple {
return buf
}
func makeTuple(pool pool.BuffPool, bufSz ByteSize, values, fields int) (tup Tuple, offs Offsets, ms memberMask) {
offSz := OffsetsSize(values)
func allocateTuple(pool pool.BuffPool, bufSz ByteSize, values, fields int) (tup Tuple, offs offsets, ms memberMask) {
offSz := offsetsSize(values)
maskSz := maskSize(fields)
countSz := numFieldsSize
tup = pool.Get(uint64(bufSz + offSz + maskSz + countSz))
writeFieldCount(tup, fields)
offs = Offsets(tup[bufSz : bufSz+offSz])
offs = offsets(tup[bufSz : bufSz+offSz])
ms = memberMask(tup[bufSz+offSz : bufSz+offSz+maskSz])
return
@@ -141,10 +141,7 @@ func (tup Tuple) GetField(i int) []byte {
// index to compensate for NULL fields
i = tup.fieldToValue(i)
offs, valStop := tup.offsets()
start, stop := offs.GetBounds(i, valStop)
return tup[start:stop]
return slicedTupleBuffer(tup).GetSlice(i)
}
func (tup Tuple) size() ByteSize {
@@ -157,7 +154,7 @@ func (tup Tuple) Count() int {
func (tup Tuple) fieldCount() int {
sl := tup[tup.size()-numFieldsSize:]
return int(ReadUint16(sl))
return int(readUint16(sl))
}
func (tup Tuple) valueCount() int {
@@ -170,14 +167,6 @@ func (tup Tuple) mask() memberMask {
return memberMask(tup[start:stop])
}
func (tup Tuple) offsets() (offs Offsets, valStop ByteSize) {
mask := tup.mask()
offStop := tup.size() - numFieldsSize - mask.size()
valStop = offStop - OffsetsSize(mask.count())
offs = Offsets(tup[valStop:offStop])
return
}
func (tup Tuple) fieldToValue(i int) int {
return tup.mask().countPrefix(i) - 1
}
@@ -192,5 +181,5 @@ func sizeOf(val []byte) ByteSize {
func writeFieldCount(tup Tuple, count int) {
sl := tup[len(tup)-int(numFieldsSize):]
WriteUint16(sl, uint16(count))
writeUint16(sl, uint16(count))
}

View File

@@ -15,12 +15,8 @@
package val
import (
"encoding/json"
"fmt"
"time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/store/pool"
)
@@ -86,7 +82,7 @@ func (tb *TupleBuilder) PutBool(i int, v bool) {
func (tb *TupleBuilder) PutInt8(i int, v int8) {
tb.Desc.expectEncoding(i, Int8Enc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+int8Size]
WriteInt8(tb.fields[i], v)
writeInt8(tb.fields[i], v)
tb.pos += int8Size
}
@@ -94,7 +90,7 @@ func (tb *TupleBuilder) PutInt8(i int, v int8) {
func (tb *TupleBuilder) PutUint8(i int, v uint8) {
tb.Desc.expectEncoding(i, Uint8Enc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+uint8Size]
WriteUint8(tb.fields[i], v)
writeUint8(tb.fields[i], v)
tb.pos += uint8Size
}
@@ -102,7 +98,7 @@ func (tb *TupleBuilder) PutUint8(i int, v uint8) {
func (tb *TupleBuilder) PutInt16(i int, v int16) {
tb.Desc.expectEncoding(i, Int16Enc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+int16Size]
WriteInt16(tb.fields[i], v)
writeInt16(tb.fields[i], v)
tb.pos += int16Size
}
@@ -110,7 +106,7 @@ func (tb *TupleBuilder) PutInt16(i int, v int16) {
func (tb *TupleBuilder) PutUint16(i int, v uint16) {
tb.Desc.expectEncoding(i, Uint16Enc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+uint16Size]
WriteUint16(tb.fields[i], v)
writeUint16(tb.fields[i], v)
tb.pos += uint16Size
}
@@ -118,7 +114,7 @@ func (tb *TupleBuilder) PutUint16(i int, v uint16) {
func (tb *TupleBuilder) PutInt32(i int, v int32) {
tb.Desc.expectEncoding(i, Int32Enc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+int32Size]
WriteInt32(tb.fields[i], v)
writeInt32(tb.fields[i], v)
tb.pos += int32Size
}
@@ -126,7 +122,7 @@ func (tb *TupleBuilder) PutInt32(i int, v int32) {
func (tb *TupleBuilder) PutUint32(i int, v uint32) {
tb.Desc.expectEncoding(i, Uint32Enc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+uint32Size]
WriteUint32(tb.fields[i], v)
writeUint32(tb.fields[i], v)
tb.pos += uint32Size
}
@@ -134,7 +130,7 @@ func (tb *TupleBuilder) PutUint32(i int, v uint32) {
func (tb *TupleBuilder) PutInt64(i int, v int64) {
tb.Desc.expectEncoding(i, Int64Enc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+int64Size]
WriteInt64(tb.fields[i], v)
writeInt64(tb.fields[i], v)
tb.pos += int64Size
}
@@ -142,7 +138,7 @@ func (tb *TupleBuilder) PutInt64(i int, v int64) {
func (tb *TupleBuilder) PutUint64(i int, v uint64) {
tb.Desc.expectEncoding(i, Uint64Enc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+uint64Size]
WriteUint64(tb.fields[i], v)
writeUint64(tb.fields[i], v)
tb.pos += uint64Size
}
@@ -150,7 +146,7 @@ func (tb *TupleBuilder) PutUint64(i int, v uint64) {
func (tb *TupleBuilder) PutFloat32(i int, v float32) {
tb.Desc.expectEncoding(i, Float32Enc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+float32Size]
WriteFloat32(tb.fields[i], v)
writeFloat32(tb.fields[i], v)
tb.pos += float32Size
}
@@ -158,14 +154,14 @@ func (tb *TupleBuilder) PutFloat32(i int, v float32) {
func (tb *TupleBuilder) PutFloat64(i int, v float64) {
tb.Desc.expectEncoding(i, Float64Enc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+float64Size]
WriteFloat64(tb.fields[i], v)
writeFloat64(tb.fields[i], v)
tb.pos += float64Size
}
func (tb *TupleBuilder) PutTimestamp(i int, v time.Time) {
tb.Desc.expectEncoding(i, DateEnc, DatetimeEnc, TimestampEnc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+timestampSize]
WriteTime(tb.fields[i], v)
writeTimestamp(tb.fields[i], v)
tb.pos += timestampSize
}
@@ -174,7 +170,7 @@ func (tb *TupleBuilder) PutSqlTime(i int, v string) {
tb.Desc.expectEncoding(i, TimeEnc)
sz := ByteSize(len(v))
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
writeString(tb.fields[i], v, tb.Desc.Types[i].Coll)
writeString(tb.fields[i], v)
tb.pos += sz
}
@@ -183,7 +179,7 @@ func (tb *TupleBuilder) PutYear(i int, v int16) {
// todo(andy): yearSize, etc?
tb.Desc.expectEncoding(i, YearEnc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+int16Size]
WriteInt16(tb.fields[i], v)
writeInt16(tb.fields[i], v)
tb.pos += int16Size
}
@@ -192,7 +188,7 @@ func (tb *TupleBuilder) PutDecimal(i int, v string) {
// todo(andy): temporary implementation
sz := ByteSize(len(v))
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
writeString(tb.fields[i], v, tb.Desc.Types[i].Coll)
writeString(tb.fields[i], v)
tb.pos += sz
}
@@ -201,7 +197,7 @@ func (tb *TupleBuilder) PutString(i int, v string) {
tb.Desc.expectEncoding(i, StringEnc)
sz := ByteSize(len(v))
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
writeString(tb.fields[i], v, tb.Desc.Types[i].Coll)
writeString(tb.fields[i], v)
tb.pos += sz
}
@@ -210,20 +206,25 @@ func (tb *TupleBuilder) PutBytes(i int, v []byte) {
tb.Desc.expectEncoding(i, BytesEnc)
sz := ByteSize(len(v))
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
writeBytes(tb.fields[i], v, tb.Desc.Types[i].Coll)
writeBytes(tb.fields[i], v)
tb.pos += sz
}
// PutJSON writes a []byte to the ith field of the Tuple being built.
func (tb *TupleBuilder) PutJSON(i int, v interface{}) {
func (tb *TupleBuilder) PutJSON(i int, v []byte) {
tb.Desc.expectEncoding(i, JSONEnc)
buf, err := json.Marshal(v)
if err != nil {
panic(err)
}
sz := ByteSize(len(buf))
sz := ByteSize(len(v))
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
writeBytes(tb.fields[i], buf, tb.Desc.Types[i].Coll)
writeBytes(tb.fields[i], v)
tb.pos += sz
}
// PutGeometry writes a []byte to the ith field of the Tuple being built.
func (tb *TupleBuilder) PutGeometry(i int, v []byte) {
tb.Desc.expectEncoding(i, GeometryEnc)
sz := ByteSize(len(v))
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
writeBytes(tb.fields[i], v)
tb.pos += sz
}
@@ -236,109 +237,6 @@ func (tb *TupleBuilder) PutRaw(i int, buf []byte) {
}
sz := ByteSize(len(buf))
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
writeBytes(tb.fields[i], buf, tb.Desc.Types[i].Coll)
writeBytes(tb.fields[i], buf)
tb.pos += sz
}
// PutField writes an interface{} to the ith field of the Tuple being built.
func (tb *TupleBuilder) PutField(i int, v interface{}) {
if v == nil {
return // NULL
}
enc := tb.Desc.Types[i].Enc
switch enc {
case Int8Enc:
tb.PutInt8(i, int8(convInt(v)))
case Uint8Enc:
tb.PutUint8(i, uint8(convUint(v)))
case Int16Enc:
tb.PutInt16(i, int16(convInt(v)))
case Uint16Enc:
tb.PutUint16(i, uint16(convUint(v)))
case Int32Enc:
tb.PutInt32(i, int32(convInt(v)))
case Uint32Enc:
tb.PutUint32(i, uint32(convUint(v)))
case Int64Enc:
tb.PutInt64(i, int64(convInt(v)))
case Uint64Enc:
tb.PutUint64(i, uint64(convUint(v)))
case Float32Enc:
tb.PutFloat32(i, v.(float32))
case Float64Enc:
tb.PutFloat64(i, v.(float64))
case DecimalEnc:
tb.PutDecimal(i, v.(string))
case TimeEnc:
tb.PutSqlTime(i, v.(string))
case YearEnc:
tb.PutYear(i, v.(int16))
case DateEnc, DatetimeEnc, TimestampEnc:
tb.PutTimestamp(i, v.(time.Time))
case StringEnc:
tb.PutString(i, v.(string))
case BytesEnc:
if s, ok := v.(string); ok {
v = []byte(s)
}
tb.PutBytes(i, v.([]byte))
case JSONEnc:
// todo(andy): remove GMS dependency
tb.PutJSON(i, v.(sql.JSONDocument).Val)
default:
panic(fmt.Sprintf("unknown encoding %v %v", enc, v))
}
}
func convInt(v interface{}) int {
switch i := v.(type) {
case int:
return i
case int8:
return int(i)
case uint8:
return int(i)
case int16:
return int(i)
case uint16:
return int(i)
case int32:
return int(i)
case uint32:
return int(i)
case int64:
return int(i)
case uint64:
return int(i)
default:
panic("impossible conversion")
}
}
func convUint(v interface{}) uint {
switch i := v.(type) {
case uint:
return i
case int:
return uint(i)
case int8:
return uint(i)
case uint8:
return uint(i)
case int16:
return uint(i)
case uint16:
return uint(i)
case int32:
return uint(i)
case uint32:
return uint(i)
case int64:
return uint(i)
case uint64:
return uint(i)
default:
panic("impossible conversion")
}
}

View File

@@ -15,24 +15,15 @@
package val
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
"github.com/dolthub/go-mysql-server/sql"
)
type TupleDesc struct {
Types []Type
cmp TupleComparator
// Under certain conditions, Tuple comparisons can be
// optimized by directly comparing Tuples as byte slices,
// rather than accessing and deserializing each field.
// See definition of rawCmp for more information.
raw rawCmp
}
type TupleComparator interface {
@@ -71,7 +62,7 @@ func NewTupleDescriptorWithComparator(cmp TupleComparator, types ...Type) (td Tu
td.Types = types
td.cmp = cmp
td.raw = maybeGetRawComparison(types...)
return
}
@@ -81,13 +72,6 @@ func TupleDescriptorPrefix(td TupleDesc, count int) TupleDesc {
// Compare returns the Comaparison of |left| and |right|.
func (td TupleDesc) Compare(left, right Tuple) (cmp int) {
// todo(andy): compare raw is broken
//if td.raw != nil {
// return compareRaw(left, right, td.raw)
//} else {
// return td.cmp(left, right, td)
//}
return td.cmp.Compare(left, right, td)
}
@@ -108,7 +92,7 @@ func (td TupleDesc) GetBool(i int, tup Tuple) (v bool, ok bool) {
td.expectEncoding(i, Int8Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadBool(b), true
v, ok = readBool(b), true
}
return
}
@@ -119,7 +103,7 @@ func (td TupleDesc) GetInt8(i int, tup Tuple) (v int8, ok bool) {
td.expectEncoding(i, Int8Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadInt8(b), true
v, ok = readInt8(b), true
}
return
}
@@ -130,7 +114,7 @@ func (td TupleDesc) GetUint8(i int, tup Tuple) (v uint8, ok bool) {
td.expectEncoding(i, Uint8Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadUint8(b), true
v, ok = readUint8(b), true
}
return
}
@@ -141,7 +125,7 @@ func (td TupleDesc) GetInt16(i int, tup Tuple) (v int16, ok bool) {
td.expectEncoding(i, Int16Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadInt16(b), true
v, ok = readInt16(b), true
}
return
}
@@ -152,7 +136,7 @@ func (td TupleDesc) GetUint16(i int, tup Tuple) (v uint16, ok bool) {
td.expectEncoding(i, Uint16Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadUint16(b), true
v, ok = readUint16(b), true
}
return
}
@@ -163,7 +147,7 @@ func (td TupleDesc) GetInt32(i int, tup Tuple) (v int32, ok bool) {
td.expectEncoding(i, Int32Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadInt32(b), true
v, ok = readInt32(b), true
}
return
}
@@ -174,7 +158,7 @@ func (td TupleDesc) GetUint32(i int, tup Tuple) (v uint32, ok bool) {
td.expectEncoding(i, Uint32Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadUint32(b), true
v, ok = readUint32(b), true
}
return
}
@@ -185,7 +169,7 @@ func (td TupleDesc) GetInt64(i int, tup Tuple) (v int64, ok bool) {
td.expectEncoding(i, Int64Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadInt64(b), true
v, ok = readInt64(b), true
}
return
}
@@ -196,7 +180,7 @@ func (td TupleDesc) GetUint64(i int, tup Tuple) (v uint64, ok bool) {
td.expectEncoding(i, Uint64Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadUint64(b), true
v, ok = readUint64(b), true
}
return
}
@@ -207,7 +191,7 @@ func (td TupleDesc) GetFloat32(i int, tup Tuple) (v float32, ok bool) {
td.expectEncoding(i, Float32Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadFloat32(b), true
v, ok = readFloat32(b), true
}
return
}
@@ -218,7 +202,7 @@ func (td TupleDesc) GetFloat64(i int, tup Tuple) (v float64, ok bool) {
td.expectEncoding(i, Float64Enc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadFloat64(b), true
v, ok = readFloat64(b), true
}
return
}
@@ -229,7 +213,7 @@ func (td TupleDesc) GetDecimal(i int, tup Tuple) (v string, ok bool) {
td.expectEncoding(i, DecimalEnc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadString(b), true
v, ok = readString(b), true
}
return
}
@@ -240,7 +224,7 @@ func (td TupleDesc) GetTimestamp(i int, tup Tuple) (v time.Time, ok bool) {
td.expectEncoding(i, TimestampEnc, DateEnc, DatetimeEnc, YearEnc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadTime(b), true
v, ok = readTimestamp(b), true
}
return
}
@@ -251,7 +235,7 @@ func (td TupleDesc) GetSqlTime(i int, tup Tuple) (v string, ok bool) {
td.expectEncoding(i, TimeEnc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadString(b), true
v, ok = readString(b), true
}
return
}
@@ -262,7 +246,7 @@ func (td TupleDesc) GetYear(i int, tup Tuple) (v int16, ok bool) {
td.expectEncoding(i, YearEnc)
b := tup.GetField(i)
if b != nil {
v, ok = ReadInt16(b), true
v, ok = readInt16(b), true
}
return
}
@@ -273,7 +257,7 @@ func (td TupleDesc) GetString(i int, tup Tuple) (v string, ok bool) {
td.expectEncoding(i, StringEnc)
b := tup.GetField(i)
if b != nil {
v = ReadString(b)
v = readString(b)
ok = true
}
return
@@ -291,69 +275,28 @@ func (td TupleDesc) GetBytes(i int, tup Tuple) (v []byte, ok bool) {
return
}
// GetBytes reads a []byte from the ith field of the Tuple.
// GetJSON reads a []byte from the ith field of the Tuple.
// If the ith field is NULL, |ok| is set to false.
func (td TupleDesc) GetJSON(i int, tup Tuple) (v interface{}, ok bool) {
func (td TupleDesc) GetJSON(i int, tup Tuple) (v []byte, ok bool) {
td.expectEncoding(i, JSONEnc)
b := tup.GetField(i)
if b != nil {
if err := json.Unmarshal(b, &v); err != nil {
panic(err)
}
v = readBytes(b)
ok = true
}
return
}
// GetField reads the value from the ith field of the Tuple as an interface{}.
func (td TupleDesc) GetField(i int, tup Tuple) (v interface{}) {
var ok bool
switch td.Types[i].Enc {
case Int8Enc:
v, ok = td.GetInt8(i, tup)
case Uint8Enc:
v, ok = td.GetUint8(i, tup)
case Int16Enc:
v, ok = td.GetInt16(i, tup)
case Uint16Enc:
v, ok = td.GetUint16(i, tup)
case Int32Enc:
v, ok = td.GetInt32(i, tup)
case Uint32Enc:
v, ok = td.GetUint32(i, tup)
case Int64Enc:
v, ok = td.GetInt64(i, tup)
case Uint64Enc:
v, ok = td.GetUint64(i, tup)
case Float32Enc:
v, ok = td.GetFloat32(i, tup)
case Float64Enc:
v, ok = td.GetFloat64(i, tup)
case DecimalEnc:
v, ok = td.GetDecimal(i, tup)
case TimeEnc:
v, ok = td.GetSqlTime(i, tup)
case YearEnc:
v, ok = td.GetYear(i, tup)
case TimestampEnc, DateEnc, DatetimeEnc:
v, ok = td.GetTimestamp(i, tup)
case StringEnc:
v, ok = td.GetString(i, tup)
case BytesEnc:
v, ok = td.GetBytes(i, tup)
case JSONEnc:
var js interface{}
js, ok = td.GetJSON(i, tup)
if ok {
v = sql.JSONDocument{Val: js}
}
default:
panic("unknown encoding")
// GetBytes reads a []byte from the ith field of the Tuple.
// If the ith field is NULL, |ok| is set to false.
func (td TupleDesc) GetGeometry(i int, tup Tuple) (v []byte, ok bool) {
td.expectEncoding(i, GeometryEnc)
b := tup.GetField(i)
if b != nil {
v = readBytes(b)
ok = true
}
if !ok {
return nil
}
return v
return
}
func (td TupleDesc) expectEncoding(i int, encodings ...Encoding) {

View File

@@ -240,7 +240,6 @@ var CopiedNomsFiles []CopiedNomsFile = []CopiedNomsFile{
{Path: "store/nomdl/parser.go", NomsPath: "go/nomdl/parser.go", HadCopyrightNotice: true},
{Path: "store/nomdl/parser_test.go", NomsPath: "go/nomdl/parser_test.go", HadCopyrightNotice: true},
{Path: "store/perf/codec-perf-rig/main.go", NomsPath: "go/perf/codec-perf-rig/main.go", HadCopyrightNotice: true},
{Path: "store/perf/hash-perf-rig/main.go", NomsPath: "go/perf/hash-perf-rig/main.go", HadCopyrightNotice: true},
{Path: "store/perf/suite/suite.go", NomsPath: "go/perf/suite/suite.go", HadCopyrightNotice: true},
{Path: "store/perf/suite/suite_test.go", NomsPath: "go/perf/suite/suite_test.go", HadCopyrightNotice: true},
{Path: "store/sloppy/sloppy.go", NomsPath: "go/sloppy/sloppy.go", HadCopyrightNotice: true},

View File

@@ -682,3 +682,16 @@ SQL
[[ "${lines[2]}" =~ "2" ]] || false
}
@test "auto_increment: alter table add constraint for different database" {
skip "add constraint for different database fix in progress"
dolt sql <<SQL
CREATE DATABASE public;
CREATE TABLE public.test (pk integer NOT NULL, c1 integer, c2 integer);
ALTER TABLE public.test ADD CONSTRAINT serial_pk_pkey PRIMARY KEY (pk);
ALTER TABLE public.test MODIFY pk integer auto_increment;
SQL
run dolt sql -q "SHOW CREATE TABLE public.test"
[ $status -eq 0 ]
[[ "$output" =~ "NOT NULL AUTO_INCREMENT" ]] || false
}

View File

@@ -344,3 +344,28 @@ SQL
[[ "$output" =~ "| > | a | b | c |" ]] || false
}
@test "drop-create: drop table from different database" {
skip "fix not merged yet"
dolt sql <<SQL
create table test (currentId int primary key, currentText text);
insert into test values (1, 'text1'), (2, 'text2');
create schema common;
create table common.test (commonId integer, commonText text);
insert into test values (999, 'common database text1');
SQL
run dolt sql -q "select * from test"
currenttest=$output
run dolt sql -q "select * from common.test"
[[ "$output" =~ "common database text1" ]] || false
dolt sql -q "drop table common.test"
run dolt sql -q "select * from test"
[ "$output" = "$currenttest" ]
run dolt sql -q "select * from common.test"
[ "$status" -eq 1 ]
[[ "$output" =~ "table not found: test" ]] || false
}

View File

@@ -1853,3 +1853,37 @@ SQL
[[ "$output" =~ "4,5,6" ]] || false
[[ "${#lines[@]}" = "2" ]] || false
}
@test "foreign-keys: alter table add constraint for different database" {
skip "add constraint on foreign key without create index should be failing"
run dolt sql <<SQL
CREATE DATABASE public;
CREATE TABLE public.cities (pk integer NOT NULL, city varchar(255), state varchar(2));
CREATE TABLE public.states (state_id integer NOT NULL, state varchar(2));
ALTER TABLE public.cities ADD CONSTRAINT cities_pkey PRIMARY KEY (pk);
ALTER TABLE public.states ADD CONSTRAINT states_pkey PRIMARY KEY (state_id);
ALTER TABLE public.cities ADD CONSTRAINT foreign_key1 FOREIGN KEY (state) REFERENCES public.states(state)";
SQL
[ $status -eq 1 ]
[[ $output =~ "error" ]] || false
run dolt sql -q "SHOW CREATE TABLE public.cities"
[[ $output =~ "PRIMARY KEY (\`pk\`)" ]] || false
[[ ! $output =~ "CONSTRAINT" ]] || false
run dolt sql -q "SHOW CREATE TABLE public.states"
[[ $output =~ "PRIMARY KEY (\`state_id\`)" ]] || false
[[ ! $output =~ "KEY \`foreign_key1\` (\`state\`)" ]] || false
run dolt sql <<SQL
CREATE INDEX foreign_key1 ON public.states(state);
ALTER TABLE public.cities ADD CONSTRAINT foreign_key1 FOREIGN KEY (state) REFERENCES public.states(state);
SQL
[ $status -eq 0 ]
run dolt sql -q "SHOW CREATE TABLE public.cities"
[[ $output =~ "CONSTRAINT \`foreign_key1\` FOREIGN KEY (\`state\`) REFERENCES \`states\` (\`state\`)" ]] || false
run dolt sql -q "SHOW CREATE TABLE public.states"
[[ $output =~ "KEY \`foreign_key1\` (\`state\`)" ]] || false
}

View File

@@ -192,5 +192,5 @@ EOF
}
get_head_commit() {
dolt log -n 1 | grep -m 1 commit | cut -c 15-46
dolt log -n 1 | grep -m 1 commit | cut -c 13-44
}

View File

@@ -704,3 +704,40 @@ DELIM
run dolt sql -r csv -q "select * from keyless"
[ "${lines[1]}" = "0,42,2" ]
}
@test "import-create-tables: auto-increment table" {
cat <<SQL > schema.sql
CREATE TABLE test (
pk int PRIMARY KEY AUTO_INCREMENT,
v1 int
);
SQL
cat <<DELIM > data.csv
pk,v1
1,1
2,2
3,3
4,4
DELIM
run dolt table import -s schema.sql -c test data.csv
[ "$status" -eq 0 ]
[[ "$output" =~ "Rows Processed: 4, Additions: 4, Modifications: 0, Had No Effect: 0" ]] || false
[[ "$output" =~ "Import completed successfully." ]] || false
run dolt sql -r csv -q "select * from test order by pk ASC"
[ "$status" -eq 0 ]
[ "${#lines[@]}" -eq 5 ]
[ "${lines[1]}" = 1,1 ]
[ "${lines[2]}" = 2,2 ]
[ "${lines[3]}" = 3,3 ]
[ "${lines[4]}" = 4,4 ]
dolt sql -q "insert into test values (NULL, 5)"
run dolt sql -r csv -q "select * from test where pk = 5"
[ "$status" -eq 0 ]
[ "${#lines[@]}" -eq 2 ]
[ "${lines[1]}" = 5,5 ]
}

View File

@@ -2599,3 +2599,17 @@ SQL
[[ "$output" =~ "Bbbb" ]] || false
[[ "$output" =~ "bBbb" ]] || false
}
@test "index: alter table create index for different database" {
skip "create index for different database fix in progress"
dolt sql <<SQL
CREATE DATABASE public;
CREATE TABLE public.test (pk integer NOT NULL, c1 integer);
ALTER TABLE public.test ADD CONSTRAINT index_test_pkey PRIMARY KEY (pk);
CREATE INDEX index_test_c1_idx ON public.test (c1);
SQL
run dolt sql -q "show create table public.test"
[ $status -eq 0 ]
[[ "$output" =~ "KEY \`index_test_c1_idx\`" ]] || false
}

Some files were not shown because too many files have changed in this diff Show More