mirror of
https://github.com/dolthub/dolt.git
synced 2026-03-18 09:40:59 -05:00
Merge remote-tracking branch 'origin/main' into aaron/database-cleanup
This commit is contained in:
@@ -167,7 +167,8 @@ func (se *SqlEngine) NewContext(ctx context.Context) (*sql.Context, error) {
|
||||
}
|
||||
|
||||
func (se *SqlEngine) NewDoltSession(ctx context.Context, mysqlSess *sql.BaseSession) (*dsess.DoltSession, error) {
|
||||
return se.dsessFactory(ctx, mysqlSess, se.engine.Analyzer.Catalog.AllDatabases())
|
||||
tempCtx := sql.NewContext(ctx, sql.WithSession(mysqlSess))
|
||||
return se.dsessFactory(ctx, mysqlSess, se.engine.Analyzer.Catalog.AllDatabases(tempCtx))
|
||||
}
|
||||
|
||||
// GetReturnFormat() returns the printing format the engine is associated with.
|
||||
|
||||
167
go/cmd/dolt/commands/inspect.go
Normal file
167
go/cmd/dolt/commands/inspect.go
Normal file
@@ -0,0 +1,167 @@
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package commands
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/dolthub/dolt/go/cmd/dolt/cli"
|
||||
"github.com/dolthub/dolt/go/cmd/dolt/errhand"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/env"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/argparser"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
"github.com/dolthub/dolt/go/store/nbs"
|
||||
)
|
||||
|
||||
const tableFileIndexFlag = "index"
|
||||
|
||||
type InspectCmd struct {
|
||||
}
|
||||
|
||||
// Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command
|
||||
func (cmd InspectCmd) Name() string {
|
||||
return "inspect"
|
||||
}
|
||||
|
||||
// Hidden should return true if this command should be hidden from the help text
|
||||
func (cmd InspectCmd) Hidden() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// RequiresRepo should return false if this interface is implemented, and the command does not have the requirement
|
||||
// that it be run from within a data repository directory
|
||||
func (cmd InspectCmd) RequiresRepo() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// Description returns a description of the command
|
||||
func (cmd InspectCmd) Description() string {
|
||||
return "Inspects a Dolt Database and collects stats."
|
||||
}
|
||||
|
||||
// CreateMarkdown creates a markdown file containing the helptext for the command at the given path
|
||||
func (cmd InspectCmd) CreateMarkdown(wr io.Writer, commandStr string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cmd InspectCmd) ArgParser() *argparser.ArgParser {
|
||||
ap := argparser.NewArgParser()
|
||||
ap.SupportsFlag(tableFileIndexFlag, "i", "Measure distribution error in table file chunk indexes.")
|
||||
return ap
|
||||
}
|
||||
|
||||
// Exec executes the command
|
||||
func (cmd InspectCmd) Exec(ctx context.Context, commandStr string, args []string, dEnv *env.DoltEnv) int {
|
||||
ap := cmd.ArgParser()
|
||||
help, usage := cli.HelpAndUsagePrinters(cli.GetCommandDocumentation(commandStr, cli.CommandDocumentationContent{}, ap))
|
||||
apr := cli.ParseArgsOrDie(ap, args, help)
|
||||
|
||||
var verr errhand.VerboseError
|
||||
if apr.Contains(tableFileIndexFlag) {
|
||||
verr = cmd.measureChunkIndexDistribution(ctx, dEnv)
|
||||
}
|
||||
|
||||
return HandleVErrAndExitCode(verr, usage)
|
||||
}
|
||||
|
||||
func (cmd InspectCmd) measureChunkIndexDistribution(ctx context.Context, dEnv *env.DoltEnv) errhand.VerboseError {
|
||||
newGen := filepath.Join(dEnv.GetDoltDir(), dbfactory.DataDir)
|
||||
oldGen := filepath.Join(newGen, "oldgen")
|
||||
|
||||
itr, err := NewTableFileIter([]string{newGen, oldGen}, dEnv.FS)
|
||||
if err != nil {
|
||||
return errhand.VerboseErrorFromError(err)
|
||||
}
|
||||
|
||||
sumErr, sumCnt := 0.0, 0
|
||||
for {
|
||||
path, _ := itr.next()
|
||||
if path == "" {
|
||||
break
|
||||
}
|
||||
|
||||
summary, err := cmd.processTableFile(path, dEnv.FS)
|
||||
if err != nil {
|
||||
return errhand.VerboseErrorFromError(err)
|
||||
}
|
||||
sumErr += summary.sumErr
|
||||
sumCnt += int(summary.count)
|
||||
|
||||
cli.Println(summary.format())
|
||||
}
|
||||
cli.Printf("average guess error: %f", sumErr/float64(sumCnt))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cmd InspectCmd) processTableFile(path string, fs filesys.Filesys) (sum *chunkIndexSummary, err error) {
|
||||
var rdr io.ReadCloser
|
||||
rdr, err = fs.OpenForRead(path)
|
||||
if err != nil {
|
||||
return sum, err
|
||||
}
|
||||
defer func() {
|
||||
cerr := rdr.Close()
|
||||
if err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
var prefixes []uint64
|
||||
prefixes, err = nbs.GetTableIndexPrefixes(rdr.(io.ReadSeeker))
|
||||
if err != nil {
|
||||
return sum, err
|
||||
}
|
||||
|
||||
sum = &chunkIndexSummary{
|
||||
file: path,
|
||||
count: uint32(len(prefixes)),
|
||||
//errs: make([]float64, 0, len(prefixes)),
|
||||
}
|
||||
|
||||
for i, prefix := range prefixes {
|
||||
sum.addPrefix(i, prefix)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type chunkIndexSummary struct {
|
||||
file string
|
||||
count uint32
|
||||
//errs []float64
|
||||
sumErr float64
|
||||
maxErr float64
|
||||
}
|
||||
|
||||
func (s *chunkIndexSummary) format() string {
|
||||
return fmt.Sprintf("file: %s \t count: %d sum error: %f \t max error: %f ",
|
||||
s.file, s.count, s.sumErr, s.maxErr)
|
||||
}
|
||||
|
||||
func (s *chunkIndexSummary) addPrefix(i int, prefix uint64) {
|
||||
g := nbs.GuessPrefixOrdinal(prefix, s.count)
|
||||
guessErr := math.Abs(float64(i - g))
|
||||
|
||||
//s.errs = append(s.errs, guessErr)
|
||||
s.sumErr += guessErr
|
||||
if guessErr > s.maxErr {
|
||||
s.maxErr = guessErr
|
||||
}
|
||||
}
|
||||
@@ -38,18 +38,24 @@ const (
|
||||
mergesParam = "merges"
|
||||
minParentsParam = "min-parents"
|
||||
parentsParam = "parents"
|
||||
decorateParam = "decorate"
|
||||
oneLineParam = "oneline"
|
||||
)
|
||||
|
||||
type logOpts struct {
|
||||
numLines int
|
||||
showParents bool
|
||||
minParents int
|
||||
decoration string
|
||||
oneLine bool
|
||||
}
|
||||
|
||||
type logNode struct {
|
||||
commitMeta *doltdb.CommitMeta
|
||||
commitHash hash.Hash
|
||||
parentHashes []hash.Hash
|
||||
branchNames []string
|
||||
isHead bool
|
||||
}
|
||||
|
||||
var logDocs = cli.CommandDocumentationContent{
|
||||
@@ -91,6 +97,8 @@ func (cmd LogCmd) ArgParser() *argparser.ArgParser {
|
||||
ap.SupportsInt(minParentsParam, "", "parent_count", "The minimum number of parents a commit must have to be included in the log.")
|
||||
ap.SupportsFlag(mergesParam, "", "Equivalent to min-parents == 2, this will limit the log to commits with 2 or more parents.")
|
||||
ap.SupportsFlag(parentsParam, "", "Shows all parents of each commit in the log.")
|
||||
ap.SupportsString(decorateParam, "", "decorate_fmt", "Shows refs next to commits. Valid options are short, full, no, and auto")
|
||||
ap.SupportsFlag(oneLineParam, "", "Shows logs in a compact format.")
|
||||
return ap
|
||||
}
|
||||
|
||||
@@ -114,10 +122,19 @@ func (cmd LogCmd) logWithLoggerFunc(ctx context.Context, commandStr string, args
|
||||
minParents = 2
|
||||
}
|
||||
|
||||
decorateOption := apr.GetValueOrDefault(decorateParam, "auto")
|
||||
switch decorateOption {
|
||||
case "short", "full", "auto", "no":
|
||||
default:
|
||||
cli.PrintErrln(color.HiRedString("fatal: invalid --decorate option: " + decorateOption))
|
||||
return 1
|
||||
}
|
||||
opts := logOpts{
|
||||
numLines: apr.GetIntOrDefault(numLinesParam, -1),
|
||||
showParents: apr.Contains(parentsParam),
|
||||
minParents: minParents,
|
||||
oneLine: apr.Contains(oneLineParam),
|
||||
decoration: decorateOption,
|
||||
}
|
||||
|
||||
// Just dolt log
|
||||
@@ -152,6 +169,53 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o
|
||||
return 1
|
||||
}
|
||||
|
||||
cHashToRefs := map[hash.Hash][]string{}
|
||||
|
||||
// Get all branches
|
||||
branches, err := dEnv.DoltDB.GetBranchesWithHashes(ctx)
|
||||
if err != nil {
|
||||
cli.PrintErrln(color.HiRedString("Fatal error: cannot get Branch information."))
|
||||
return 1
|
||||
}
|
||||
for _, b := range branches {
|
||||
refName := b.Ref.String()
|
||||
if opts.decoration != "full" {
|
||||
refName = b.Ref.GetPath() // trim out "refs/heads/"
|
||||
}
|
||||
refName = fmt.Sprintf("\033[32;1m%s\033[0m", refName) // branch names are bright green (32;1m)
|
||||
cHashToRefs[b.Hash] = append(cHashToRefs[b.Hash], refName)
|
||||
}
|
||||
|
||||
// Get all remote branches
|
||||
remotes, err := dEnv.DoltDB.GetRemotesWithHashes(ctx)
|
||||
if err != nil {
|
||||
cli.PrintErrln(color.HiRedString("Fatal error: cannot get Branch information."))
|
||||
return 1
|
||||
}
|
||||
for _, r := range remotes {
|
||||
refName := r.Ref.String()
|
||||
if opts.decoration != "full" {
|
||||
refName = r.Ref.GetPath() // trim out "refs/remotes/"
|
||||
}
|
||||
refName = fmt.Sprintf("\033[31;1m%s\033[0m", refName) // remote names are bright red (31;1m)
|
||||
cHashToRefs[r.Hash] = append(cHashToRefs[r.Hash], refName)
|
||||
}
|
||||
|
||||
// Get all tags
|
||||
tags, err := dEnv.DoltDB.GetTagsWithHashes(ctx)
|
||||
if err != nil {
|
||||
cli.PrintErrln(color.HiRedString("Fatal error: cannot get Tag information."))
|
||||
return 1
|
||||
}
|
||||
for _, t := range tags {
|
||||
refName := t.Ref.String()
|
||||
if opts.decoration != "full" {
|
||||
refName = t.Ref.GetPath() // trim out "refs/tags/"
|
||||
}
|
||||
refName = fmt.Sprintf("\033[33;1mtag: %s\033[0m", refName) // tags names are bright yellow (33;1m)
|
||||
cHashToRefs[t.Hash] = append(cHashToRefs[t.Hash], refName)
|
||||
}
|
||||
|
||||
h, err := commit.HashOf()
|
||||
|
||||
if err != nil {
|
||||
@@ -195,7 +259,12 @@ func logCommits(ctx context.Context, dEnv *env.DoltEnv, cs *doltdb.CommitSpec, o
|
||||
return 1
|
||||
}
|
||||
|
||||
commitsInfo = append(commitsInfo, logNode{meta, cmHash, pHashes})
|
||||
commitsInfo = append(commitsInfo, logNode{
|
||||
commitMeta: meta,
|
||||
commitHash: cmHash,
|
||||
parentHashes: pHashes,
|
||||
branchNames: cHashToRefs[cmHash],
|
||||
isHead: cmHash == h})
|
||||
}
|
||||
|
||||
logToStdOut(opts, commitsInfo)
|
||||
@@ -293,7 +362,10 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d
|
||||
return err
|
||||
}
|
||||
|
||||
commitsInfo = append(commitsInfo, logNode{meta, prevHash, ph})
|
||||
commitsInfo = append(commitsInfo, logNode{
|
||||
commitMeta: meta,
|
||||
commitHash: prevHash,
|
||||
parentHashes: ph})
|
||||
|
||||
numLines--
|
||||
}
|
||||
@@ -307,6 +379,84 @@ func logTableCommits(ctx context.Context, dEnv *env.DoltEnv, opts logOpts, cs *d
|
||||
return nil
|
||||
}
|
||||
|
||||
func logRefs(pager *outputpager.Pager, comm logNode) {
|
||||
// Do nothing if no associate branches
|
||||
if len(comm.branchNames) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
pager.Writer.Write([]byte("\033[33m(\033[0m"))
|
||||
if comm.isHead {
|
||||
pager.Writer.Write([]byte("\033[36;1mHEAD -> \033[0m"))
|
||||
}
|
||||
pager.Writer.Write([]byte(strings.Join(comm.branchNames, "\033[33m, \033[0m"))) // Separate with Dim Yellow comma
|
||||
pager.Writer.Write([]byte("\033[33m) \033[0m"))
|
||||
}
|
||||
|
||||
func logCompact(pager *outputpager.Pager, opts logOpts, commits []logNode) {
|
||||
for _, comm := range commits {
|
||||
if len(comm.parentHashes) < opts.minParents {
|
||||
return
|
||||
}
|
||||
|
||||
chStr := comm.commitHash.String()
|
||||
if opts.showParents {
|
||||
for _, h := range comm.parentHashes {
|
||||
chStr += " " + h.String()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: use short hash instead
|
||||
// Write commit hash
|
||||
pager.Writer.Write([]byte(fmt.Sprintf("\033[33m%s \033[0m", chStr)))
|
||||
|
||||
if opts.decoration != "no" {
|
||||
logRefs(pager, comm)
|
||||
}
|
||||
|
||||
formattedDesc := strings.Replace(comm.commitMeta.Description, "\n", " ", -1) + "\n"
|
||||
pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc)))
|
||||
}
|
||||
}
|
||||
|
||||
func logDefault(pager *outputpager.Pager, opts logOpts, commits []logNode) {
|
||||
for _, comm := range commits {
|
||||
if len(comm.parentHashes) < opts.minParents {
|
||||
return
|
||||
}
|
||||
|
||||
chStr := comm.commitHash.String()
|
||||
if opts.showParents {
|
||||
for _, h := range comm.parentHashes {
|
||||
chStr += " " + h.String()
|
||||
}
|
||||
}
|
||||
|
||||
// Write commit hash
|
||||
pager.Writer.Write([]byte(fmt.Sprintf("\033[33mcommit %s \033[0m", chStr))) // Use Dim Yellow (33m)
|
||||
|
||||
// Show decoration
|
||||
if opts.decoration != "no" {
|
||||
logRefs(pager, comm)
|
||||
}
|
||||
|
||||
if len(comm.parentHashes) > 1 {
|
||||
pager.Writer.Write([]byte(fmt.Sprintf("\nMerge:")))
|
||||
for _, h := range comm.parentHashes {
|
||||
pager.Writer.Write([]byte(fmt.Sprintf(" " + h.String())))
|
||||
}
|
||||
}
|
||||
|
||||
pager.Writer.Write([]byte(fmt.Sprintf("\nAuthor: %s <%s>", comm.commitMeta.Name, comm.commitMeta.Email)))
|
||||
|
||||
timeStr := comm.commitMeta.FormatTS()
|
||||
pager.Writer.Write([]byte(fmt.Sprintf("\nDate: %s", timeStr)))
|
||||
|
||||
formattedDesc := "\n\n\t" + strings.Replace(comm.commitMeta.Description, "\n", "\n\t", -1) + "\n\n"
|
||||
pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc)))
|
||||
}
|
||||
}
|
||||
|
||||
func logToStdOut(opts logOpts, commits []logNode) {
|
||||
if cli.ExecuteWithStdioRestored == nil {
|
||||
return
|
||||
@@ -314,35 +464,10 @@ func logToStdOut(opts logOpts, commits []logNode) {
|
||||
cli.ExecuteWithStdioRestored(func() {
|
||||
pager := outputpager.Start()
|
||||
defer pager.Stop()
|
||||
|
||||
for _, comm := range commits {
|
||||
if len(comm.parentHashes) < opts.minParents {
|
||||
return
|
||||
}
|
||||
|
||||
chStr := comm.commitHash.String()
|
||||
if opts.showParents {
|
||||
for _, h := range comm.parentHashes {
|
||||
chStr += " " + h.String()
|
||||
}
|
||||
}
|
||||
|
||||
pager.Writer.Write([]byte(fmt.Sprintf("\033[1;33mcommit %s \033[0m", chStr)))
|
||||
|
||||
if len(comm.parentHashes) > 1 {
|
||||
pager.Writer.Write([]byte(fmt.Sprintf("\nMerge:")))
|
||||
for _, h := range comm.parentHashes {
|
||||
pager.Writer.Write([]byte(fmt.Sprintf(" " + h.String())))
|
||||
}
|
||||
}
|
||||
|
||||
pager.Writer.Write([]byte(fmt.Sprintf("\nAuthor: %s <%s>", comm.commitMeta.Name, comm.commitMeta.Email)))
|
||||
|
||||
timeStr := comm.commitMeta.FormatTS()
|
||||
pager.Writer.Write([]byte(fmt.Sprintf("\nDate: %s", timeStr)))
|
||||
|
||||
formattedDesc := "\n\n\t" + strings.Replace(comm.commitMeta.Description, "\n", "\n\t", -1) + "\n\n"
|
||||
pager.Writer.Write([]byte(fmt.Sprintf(formattedDesc)))
|
||||
if opts.oneLine {
|
||||
logCompact(pager, opts, commits)
|
||||
} else {
|
||||
logDefault(pager, opts, commits)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -45,8 +45,15 @@ var loginDocs = cli.CommandDocumentationContent{
|
||||
Synopsis: []string{"[{{.LessThan}}creds{{.GreaterThan}}]"},
|
||||
}
|
||||
|
||||
// The LoginCmd doesn't handle its own signals, but should stop cancel global context when receiving SIGINT signal
|
||||
func (cmd LoginCmd) InstallsSignalHandlers() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
type LoginCmd struct{}
|
||||
|
||||
var _ cli.SignalCommand = SqlCmd{}
|
||||
|
||||
// Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command
|
||||
func (cmd LoginCmd) Name() string {
|
||||
return "login"
|
||||
|
||||
@@ -200,6 +200,10 @@ func NewTableFileIter(dirs []string, fs filesys.Filesys) (*TableFileIter, error)
|
||||
}
|
||||
|
||||
func (itr *TableFileIter) next() (string, time.Time) {
|
||||
if itr.pos >= len(itr.files) {
|
||||
return "", time.Time{}
|
||||
}
|
||||
|
||||
curr := itr.files[itr.pos]
|
||||
itr.pos++
|
||||
|
||||
|
||||
@@ -996,7 +996,7 @@ func processQuery(ctx *sql.Context, query string, se *engine.SqlEngine) (sql.Sch
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
_, err = sql.RowIterToRows(ctx, ri)
|
||||
_, err = sql.RowIterToRows(ctx, nil, ri)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@@ -1007,7 +1007,7 @@ func processQuery(ctx *sql.Context, query string, se *engine.SqlEngine) (sql.Sch
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
_, err = sql.RowIterToRows(ctx, ri)
|
||||
_, err = sql.RowIterToRows(ctx, nil, ri)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@@ -1017,7 +1017,7 @@ func processQuery(ctx *sql.Context, query string, se *engine.SqlEngine) (sql.Sch
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
_, err = sql.RowIterToRows(ctx, ri)
|
||||
_, err = sql.RowIterToRows(ctx, nil, ri)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
Version = "0.36.1"
|
||||
Version = "0.36.2"
|
||||
)
|
||||
|
||||
var dumpDocsCommand = &commands.DumpDocsCmd{}
|
||||
@@ -97,6 +97,7 @@ var doltCommand = cli.NewSubCommandHandler("dolt", "it's git for data", []cli.Co
|
||||
commands.RootsCmd{},
|
||||
commands.VersionCmd{VersionStr: Version},
|
||||
commands.DumpCmd{},
|
||||
commands.InspectCmd{},
|
||||
dumpDocsCommand,
|
||||
dumpZshCommand,
|
||||
})
|
||||
|
||||
@@ -13,14 +13,13 @@ require (
|
||||
github.com/bcicen/jstream v1.0.0
|
||||
github.com/boltdb/bolt v1.3.1
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible
|
||||
github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf
|
||||
github.com/denisbrodbeck/machineid v1.0.1
|
||||
github.com/dolthub/dolt/go/gen/proto/dolt/services/eventsapi v0.0.0-20201005193433-3ee972b1d078
|
||||
github.com/dolthub/fslock v0.0.3
|
||||
github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371
|
||||
github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66
|
||||
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81
|
||||
github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a
|
||||
github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38
|
||||
github.com/dustin/go-humanize v1.0.0
|
||||
github.com/fatih/color v1.9.0
|
||||
github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568
|
||||
@@ -69,7 +68,7 @@ require (
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/dolthub/go-mysql-server v0.11.1-0.20220208174427-9756c7d7167f
|
||||
github.com/dolthub/go-mysql-server v0.11.1-0.20220215141938-c484f95c3408
|
||||
github.com/google/flatbuffers v2.0.5+incompatible
|
||||
github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6
|
||||
github.com/prometheus/client_golang v1.11.0
|
||||
|
||||
10
go/go.sum
10
go/go.sum
@@ -142,8 +142,6 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk
|
||||
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
|
||||
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
|
||||
github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8=
|
||||
github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf h1:5ZeQB3mThuz5C2MSER6T5GdtXTF9CMMk42F9BOyRsEQ=
|
||||
github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf/go.mod h1:BO2rLUAZMrpgh6GBVKi0Gjdqw2MgCtJrtmUdDeZRKjY=
|
||||
github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI=
|
||||
github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c=
|
||||
github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
|
||||
@@ -172,8 +170,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm
|
||||
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
|
||||
github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
|
||||
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
|
||||
github.com/dolthub/go-mysql-server v0.11.1-0.20220208174427-9756c7d7167f h1:bbLVyFtC7Wm2q1QZZGFUSyclY9nKUTsQ+Sn3ywkwpOs=
|
||||
github.com/dolthub/go-mysql-server v0.11.1-0.20220208174427-9756c7d7167f/go.mod h1:X2i6+DzsBgl5uDu1dzNayauCEZFUE+qIEriSv4M8v3s=
|
||||
github.com/dolthub/go-mysql-server v0.11.1-0.20220215141938-c484f95c3408 h1:+pG8pYVBWPfDtAF1YMGfvyHvfjDrOjii+AkzNcZLBTA=
|
||||
github.com/dolthub/go-mysql-server v0.11.1-0.20220215141938-c484f95c3408/go.mod h1:fa5urhUZz6+UWMVrTcgxl/INnDGaqmkl89V/4mocqrY=
|
||||
github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371 h1:oyPHJlzumKta1vnOQqUnfdz+pk3EmnHS3Nd0cCT0I2g=
|
||||
github.com/dolthub/ishell v0.0.0-20220112232610-14e753f0f371/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms=
|
||||
github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8=
|
||||
@@ -182,8 +180,8 @@ github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66 h1:WRPDbpJWEnPxP
|
||||
github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66/go.mod h1:N5ZIbMGuDUpTpOFQ7HcsN6WSIpTGQjHP+Mz27AfmAgk=
|
||||
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9XGFa6q5Ap4Z/OhNkAMBaK5YeuEzwJt+NZdhiE=
|
||||
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY=
|
||||
github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a h1:+61CpK9SwG/QFNE+vn6Fxk00GRQgtR+CA6Nvsr87y8g=
|
||||
github.com/dolthub/vitess v0.0.0-20220205072827-9c6acb39686a/go.mod h1:qpZ4j0dval04OgZJ5fyKnlniSFUosTH280pdzUjUJig=
|
||||
github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38 h1:qUbVRsX2CPyjj/uLrPu9L69rGiYRb5vwzw7PC5c/Wh8=
|
||||
github.com/dolthub/vitess v0.0.0-20220207220721-35d6793fac38/go.mod h1:qpZ4j0dval04OgZJ5fyKnlniSFUosTH280pdzUjUJig=
|
||||
github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
|
||||
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
|
||||
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/zap/buffer"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
|
||||
@@ -128,13 +129,17 @@ func TestPushOnWriteHook(t *testing.T) {
|
||||
|
||||
t.Run("replicate to remote", func(t *testing.T) {
|
||||
srcCommit, err := ddb.Commit(context.Background(), valHash, ref.NewBranchRef(defaultBranch), meta)
|
||||
require.NoError(t, err)
|
||||
|
||||
ds, err := ddb.db.GetDataset(ctx, "refs/heads/main")
|
||||
require.NoError(t, err)
|
||||
|
||||
err = hook.Execute(ctx, ds, ddb.db)
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
cs, _ = NewCommitSpec(defaultBranch)
|
||||
destCommit, err := destDB.Resolve(context.Background(), cs, nil)
|
||||
|
||||
require.NoError(t, err)
|
||||
srcHash, _ := srcCommit.HashOf()
|
||||
destHash, _ := destCommit.HashOf()
|
||||
assert.Equal(t, srcHash, destHash)
|
||||
@@ -252,12 +257,15 @@ func TestAsyncPushOnWrite(t *testing.T) {
|
||||
|
||||
meta, err = NewCommitMeta(committerName, committerEmail, "Sample data")
|
||||
if err != nil {
|
||||
t.Error("Failed to commit")
|
||||
t.Error("Failed to create CommitMeta")
|
||||
}
|
||||
|
||||
_, err = ddb.Commit(context.Background(), valHash, ref.NewBranchRef(defaultBranch), meta)
|
||||
require.NoError(t, err)
|
||||
ds, err := ddb.db.GetDataset(ctx, "refs/heads/main")
|
||||
require.NoError(t, err)
|
||||
err = hook.Execute(ctx, ds, ddb.db)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -803,6 +803,31 @@ func (ddb *DoltDB) GetTags(ctx context.Context) ([]ref.DoltRef, error) {
|
||||
return ddb.GetRefsOfType(ctx, tagsRefFilter)
|
||||
}
|
||||
|
||||
type TagWithHash struct {
|
||||
Ref ref.DoltRef
|
||||
Hash hash.Hash
|
||||
}
|
||||
|
||||
// GetTagsWithHashes returns a list of objects containing TagRefs with their associated Commit's hash
|
||||
func (ddb *DoltDB) GetTagsWithHashes(ctx context.Context) ([]TagWithHash, error) {
|
||||
var refs []TagWithHash
|
||||
err := ddb.VisitRefsOfType(ctx, tagsRefFilter, func(r ref.DoltRef, v types.Value) error {
|
||||
if tr, ok := r.(ref.TagRef); ok {
|
||||
tag, err := ddb.ResolveTag(ctx, tr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
h, err := tag.Commit.HashOf()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
refs = append(refs, TagWithHash{r, h})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return refs, err
|
||||
}
|
||||
|
||||
var workspacesRefFilter = map[ref.RefType]struct{}{ref.WorkspaceRefType: {}}
|
||||
|
||||
// GetWorkspaces returns a list of all workspaces in the database.
|
||||
@@ -817,6 +842,22 @@ func (ddb *DoltDB) GetRemoteRefs(ctx context.Context) ([]ref.DoltRef, error) {
|
||||
return ddb.GetRefsOfType(ctx, remotesRefFilter)
|
||||
}
|
||||
|
||||
type RemoteWithHash struct {
|
||||
Ref ref.DoltRef
|
||||
Hash hash.Hash
|
||||
}
|
||||
|
||||
func (ddb *DoltDB) GetRemotesWithHashes(ctx context.Context) ([]RemoteWithHash, error) {
|
||||
var refs []RemoteWithHash
|
||||
err := ddb.VisitRefsOfType(ctx, remotesRefFilter, func(r ref.DoltRef, v types.Value) error {
|
||||
if tr, ok := v.(types.Ref); ok {
|
||||
refs = append(refs, RemoteWithHash{r, tr.TargetHash()})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return refs, err
|
||||
}
|
||||
|
||||
// GetHeadRefs returns a list of all refs that point to a Commit
|
||||
func (ddb *DoltDB) GetHeadRefs(ctx context.Context) ([]ref.DoltRef, error) {
|
||||
return ddb.GetRefsOfType(ctx, ref.HeadRefTypes)
|
||||
@@ -1163,6 +1204,9 @@ func (ddb *DoltDB) GC(ctx context.Context, uncommitedVals ...hash.Hash) error {
|
||||
}
|
||||
|
||||
datasets, err := ddb.db.Datasets(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newGen := hash.NewHashSet(uncommitedVals...)
|
||||
oldGen := make(hash.HashSet)
|
||||
err = datasets.IterAll(ctx, func(key, value types.Value) error {
|
||||
|
||||
@@ -70,6 +70,11 @@ func (c *ChannelRowSource) WithChildren(children ...sql.Node) (sql.Node, error)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// CheckPrivileges implements the sql.Node interface.
|
||||
func (c *ChannelRowSource) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// channelRowIter wraps the channel under the sql.RowIter interface
|
||||
type channelRowIter struct {
|
||||
rowChannel chan sql.Row
|
||||
|
||||
@@ -298,7 +298,7 @@ func (s *SqlEngineTableWriter) createTable() error {
|
||||
return err
|
||||
}
|
||||
|
||||
analyzedQueryProcess := analyzer.StripQueryProcess(analyzed.(*plan.QueryProcess))
|
||||
analyzedQueryProcess := analyzer.StripPassthroughNodes(analyzed.(*plan.QueryProcess))
|
||||
|
||||
ri, err := analyzedQueryProcess.RowIter(s.sqlCtx, nil)
|
||||
if err != nil {
|
||||
@@ -342,7 +342,7 @@ func (s *SqlEngineTableWriter) createInsertImportNode(source chan sql.Row, ignor
|
||||
return nil, err
|
||||
}
|
||||
|
||||
analyzed = analyzer.StripQueryProcess(analyzed)
|
||||
analyzed = analyzer.StripPassthroughNodes(analyzed)
|
||||
|
||||
// Get the first insert (wrapped with the error handler)
|
||||
plan.Inspect(analyzed, func(node sql.Node) bool {
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/geometry"
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
@@ -108,9 +109,9 @@ func (ti *linestringType) Equals(other TypeInfo) bool {
|
||||
// FormatValue implements TypeInfo interface.
|
||||
func (ti *linestringType) FormatValue(v types.Value) (*string, error) {
|
||||
if val, ok := v.(types.Linestring); ok {
|
||||
buf := make([]byte, types.EWKBHeaderSize+types.LengthSize+types.PointDataSize*len(val.Points))
|
||||
types.WriteEWKBHeader(val, buf[:types.EWKBHeaderSize])
|
||||
types.WriteEWKBLineData(val, buf[types.EWKBHeaderSize:])
|
||||
buf := make([]byte, geometry.EWKBHeaderSize+types.LengthSize+geometry.PointSize*len(val.Points))
|
||||
types.WriteEWKBHeader(val, buf[:geometry.EWKBHeaderSize])
|
||||
types.WriteEWKBLineData(val, buf[geometry.EWKBHeaderSize:])
|
||||
resStr := string(buf)
|
||||
return &resStr, nil
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/geometry"
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
@@ -102,9 +103,9 @@ func (ti *pointType) Equals(other TypeInfo) bool {
|
||||
// FormatValue implements TypeInfo interface.
|
||||
func (ti *pointType) FormatValue(v types.Value) (*string, error) {
|
||||
if val, ok := v.(types.Point); ok {
|
||||
buf := make([]byte, types.EWKBHeaderSize+types.PointDataSize)
|
||||
types.WriteEWKBHeader(val, buf[:types.EWKBHeaderSize])
|
||||
types.WriteEWKBPointData(val, buf[types.EWKBHeaderSize:])
|
||||
buf := make([]byte, geometry.EWKBHeaderSize+geometry.PointSize)
|
||||
types.WriteEWKBHeader(val, buf[:geometry.EWKBHeaderSize])
|
||||
types.WriteEWKBPointData(val, buf[geometry.EWKBHeaderSize:])
|
||||
resStr := string(buf)
|
||||
return &resStr, nil
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/geometry"
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
@@ -108,13 +109,13 @@ func (ti *polygonType) Equals(other TypeInfo) bool {
|
||||
// FormatValue implements TypeInfo interface.
|
||||
func (ti *polygonType) FormatValue(v types.Value) (*string, error) {
|
||||
if val, ok := v.(types.Polygon); ok {
|
||||
size := types.EWKBHeaderSize + types.LengthSize
|
||||
size := geometry.EWKBHeaderSize + types.LengthSize
|
||||
for _, l := range val.Lines {
|
||||
size += types.LengthSize + types.PointDataSize*len(l.Points)
|
||||
size += types.LengthSize + geometry.PointSize*len(l.Points)
|
||||
}
|
||||
buf := make([]byte, size)
|
||||
types.WriteEWKBHeader(val, buf[:types.EWKBHeaderSize])
|
||||
types.WriteEWKBPolyData(val, buf[types.EWKBHeaderSize:])
|
||||
types.WriteEWKBHeader(val, buf[:geometry.EWKBHeaderSize])
|
||||
types.WriteEWKBPolyData(val, buf[geometry.EWKBHeaderSize:])
|
||||
resStr := string(buf)
|
||||
return &resStr, nil
|
||||
}
|
||||
|
||||
@@ -18,8 +18,6 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/vitess/go/sqltypes"
|
||||
@@ -27,24 +25,6 @@ import (
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
const spatialTypesFeatureFlagKey = "DOLT_ENABLE_SPATIAL_TYPES"
|
||||
|
||||
// use SpatialTypesEnabled() to check, don't access directly
|
||||
var spatialTypesFeatureFlag = false
|
||||
|
||||
func init() {
|
||||
// set the spatial types feature flag to true if the env var is set
|
||||
if v, ok := os.LookupEnv(spatialTypesFeatureFlagKey); ok && v != "" {
|
||||
spatialTypesFeatureFlag = true
|
||||
}
|
||||
}
|
||||
|
||||
var spatialTypesLock = &sync.RWMutex{}
|
||||
|
||||
func SpatialTypesEnabled() bool {
|
||||
return spatialTypesFeatureFlag
|
||||
}
|
||||
|
||||
type Identifier string
|
||||
|
||||
const (
|
||||
@@ -266,17 +246,6 @@ func FromSqlType(sqlType sql.Type) (TypeInfo, error) {
|
||||
|
||||
// FromTypeParams constructs a TypeInfo from the given identifier and parameters.
|
||||
func FromTypeParams(id Identifier, params map[string]string) (TypeInfo, error) {
|
||||
if SpatialTypesEnabled() {
|
||||
switch id {
|
||||
case PointTypeIdentifier:
|
||||
return PointType, nil
|
||||
case LinestringTypeIdentifier:
|
||||
return LinestringType, nil
|
||||
case PolygonTypeIdentifier:
|
||||
return PolygonType, nil
|
||||
}
|
||||
}
|
||||
|
||||
switch id {
|
||||
case BitTypeIdentifier:
|
||||
return CreateBitTypeFromParams(params)
|
||||
@@ -298,6 +267,12 @@ func FromTypeParams(id Identifier, params map[string]string) (TypeInfo, error) {
|
||||
return CreateIntTypeFromParams(params)
|
||||
case JSONTypeIdentifier:
|
||||
return JSONType, nil
|
||||
case PointTypeIdentifier:
|
||||
return PointType, nil
|
||||
case LinestringTypeIdentifier:
|
||||
return LinestringType, nil
|
||||
case PolygonTypeIdentifier:
|
||||
return PolygonType, nil
|
||||
case SetTypeIdentifier:
|
||||
return CreateSetTypeFromParams(params)
|
||||
case TimeTypeIdentifier:
|
||||
|
||||
@@ -29,15 +29,6 @@ import (
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
func testWithSpatialTypesEnabled(cb func()) {
|
||||
spatialTypesLock.Lock()
|
||||
defer spatialTypesLock.Unlock()
|
||||
|
||||
spatialTypesFeatureFlag = true
|
||||
cb()
|
||||
spatialTypesFeatureFlag = false
|
||||
}
|
||||
|
||||
func TestTypeInfoSuite(t *testing.T) {
|
||||
typeInfoArrays, validTypeValues := generateTypeInfoArrays(t)
|
||||
t.Run("VerifyArray", func(t *testing.T) {
|
||||
@@ -234,11 +225,9 @@ func testTypeInfoGetTypeParams(t *testing.T, tiArrays [][]TypeInfo) {
|
||||
ti.GetTypeIdentifier() == LinestringTypeIdentifier ||
|
||||
ti.GetTypeIdentifier() == PolygonTypeIdentifier {
|
||||
t.Run(ti.String(), func(t *testing.T) {
|
||||
testWithSpatialTypesEnabled(func() {
|
||||
newTi, err := FromTypeParams(ti.GetTypeIdentifier(), ti.GetTypeParams())
|
||||
require.NoError(t, err)
|
||||
require.True(t, ti.Equals(newTi), "%v\n%v", ti.String(), newTi.String())
|
||||
})
|
||||
newTi, err := FromTypeParams(ti.GetTypeIdentifier(), ti.GetTypeParams())
|
||||
require.NoError(t, err)
|
||||
require.True(t, ti.Equals(newTi), "%v\n%v", ti.String(), newTi.String())
|
||||
})
|
||||
} else {
|
||||
t.Run(ti.String(), func(t *testing.T) {
|
||||
|
||||
@@ -22,6 +22,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/grant_tables"
|
||||
"github.com/dolthub/vitess/go/vt/proto/query"
|
||||
"gopkg.in/src-d/go-errors.v1"
|
||||
|
||||
@@ -59,8 +60,15 @@ type SqlDatabase interface {
|
||||
func DbsAsDSQLDBs(dbs []sql.Database) []SqlDatabase {
|
||||
dsqlDBs := make([]SqlDatabase, 0, len(dbs))
|
||||
for _, db := range dbs {
|
||||
sqlDb, ok := db.(SqlDatabase)
|
||||
if !ok {
|
||||
var sqlDb SqlDatabase
|
||||
if sqlDatabase, ok := db.(SqlDatabase); ok {
|
||||
sqlDb = sqlDatabase
|
||||
} else if privDatabase, ok := db.(grant_tables.PrivilegedDatabase); ok {
|
||||
if sqlDatabase, ok := privDatabase.Unwrap().(SqlDatabase); ok {
|
||||
sqlDb = sqlDatabase
|
||||
}
|
||||
}
|
||||
if sqlDb == nil {
|
||||
continue
|
||||
}
|
||||
switch v := sqlDb.(type) {
|
||||
|
||||
@@ -98,7 +98,7 @@ func (p DoltDatabaseProvider) WithDbFactoryUrl(url string) DoltDatabaseProvider
|
||||
return p
|
||||
}
|
||||
|
||||
func (p DoltDatabaseProvider) Database(name string) (db sql.Database, err error) {
|
||||
func (p DoltDatabaseProvider) Database(ctx *sql.Context, name string) (db sql.Database, err error) {
|
||||
name = strings.ToLower(name)
|
||||
var ok bool
|
||||
p.mu.RLock()
|
||||
@@ -108,7 +108,7 @@ func (p DoltDatabaseProvider) Database(name string) (db sql.Database, err error)
|
||||
return db, nil
|
||||
}
|
||||
|
||||
db, _, ok, err = p.databaseForRevision(context.Background(), name)
|
||||
db, _, ok, err = p.databaseForRevision(ctx, name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -128,12 +128,12 @@ func (p DoltDatabaseProvider) Database(name string) (db sql.Database, err error)
|
||||
|
||||
}
|
||||
|
||||
func (p DoltDatabaseProvider) HasDatabase(name string) bool {
|
||||
_, err := p.Database(name)
|
||||
func (p DoltDatabaseProvider) HasDatabase(ctx *sql.Context, name string) bool {
|
||||
_, err := p.Database(ctx, name)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func (p DoltDatabaseProvider) AllDatabases() (all []sql.Database) {
|
||||
func (p DoltDatabaseProvider) AllDatabases(ctx *sql.Context) (all []sql.Database) {
|
||||
p.mu.RLock()
|
||||
defer p.mu.RUnlock()
|
||||
|
||||
@@ -286,7 +286,7 @@ func (p DoltDatabaseProvider) RevisionDbState(ctx context.Context, revDB string)
|
||||
return init, nil
|
||||
}
|
||||
|
||||
func (p DoltDatabaseProvider) Function(name string) (sql.Function, error) {
|
||||
func (p DoltDatabaseProvider) Function(ctx *sql.Context, name string) (sql.Function, error) {
|
||||
fn, ok := p.functions[strings.ToLower(name)]
|
||||
if !ok {
|
||||
return nil, sql.ErrFunctionNotFound.New(name)
|
||||
|
||||
@@ -39,7 +39,7 @@ func init() {
|
||||
sql.SystemVariables.AddSystemVariables([]sql.SystemVariable{
|
||||
{ // If true, causes a Dolt commit to occur when you commit a transaction.
|
||||
Name: DoltCommitOnTransactionCommit,
|
||||
Scope: sql.SystemVariableScope_Session,
|
||||
Scope: sql.SystemVariableScope_Both,
|
||||
Dynamic: true,
|
||||
SetVarHintApplies: false,
|
||||
Type: sql.NewSystemBoolType(DoltCommitOnTransactionCommit),
|
||||
|
||||
@@ -73,14 +73,20 @@ func TestSingleScript(t *testing.T) {
|
||||
|
||||
var scripts = []enginetest.ScriptTest{
|
||||
{
|
||||
Name: "CrossDB Queries",
|
||||
Name: "insert into sparse auto_increment table",
|
||||
SetUpScript: []string{
|
||||
"create table mytable (i bigint primary key, s varchar(200));",
|
||||
"create table auto (pk int primary key auto_increment)",
|
||||
"insert into auto values (10), (20), (30)",
|
||||
"insert into auto values (NULL)",
|
||||
"insert into auto values (40)",
|
||||
"insert into auto values (0)",
|
||||
},
|
||||
Assertions: []enginetest.ScriptTestAssertion{
|
||||
{
|
||||
Query: "ALTER TABLE mytable ADD COLUMN s2 TEXT COMMENT 'hello' AFTER i",
|
||||
Expected: nil,
|
||||
Query: "select * from auto order by 1",
|
||||
Expected: []sql.Row{
|
||||
{10}, {20}, {30}, {31}, {40}, {41},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -213,6 +219,14 @@ func TestScripts(t *testing.T) {
|
||||
enginetest.TestScripts(t, newDoltHarness(t).WithSkippedQueries(skipped))
|
||||
}
|
||||
|
||||
func TestUserPrivileges(t *testing.T) {
|
||||
enginetest.TestUserPrivileges(t, newDoltHarness(t))
|
||||
}
|
||||
|
||||
func TestUserAuthentication(t *testing.T) {
|
||||
enginetest.TestUserAuthentication(t, newDoltHarness(t))
|
||||
}
|
||||
|
||||
func TestComplexIndexQueries(t *testing.T) {
|
||||
enginetest.TestComplexIndexQueries(t, newDoltHarness(t))
|
||||
}
|
||||
@@ -303,6 +317,22 @@ func TestVersionedViews(t *testing.T) {
|
||||
enginetest.TestVersionedViews(t, newDoltHarness(t))
|
||||
}
|
||||
|
||||
func TestWindowFunctions(t *testing.T) {
|
||||
enginetest.TestWindowFunctions(t, newDoltHarness(t))
|
||||
}
|
||||
|
||||
func TestWindowRowFrames(t *testing.T) {
|
||||
enginetest.TestWindowRowFrames(t, newDoltHarness(t))
|
||||
}
|
||||
|
||||
func TestWindowRangeFrames(t *testing.T) {
|
||||
enginetest.TestWindowRangeFrames(t, newDoltHarness(t))
|
||||
}
|
||||
|
||||
func TestNamedWindows(t *testing.T) {
|
||||
enginetest.TestNamedWindows(t, newDoltHarness(t))
|
||||
}
|
||||
|
||||
func TestNaturalJoin(t *testing.T) {
|
||||
enginetest.TestNaturalJoin(t, newDoltHarness(t))
|
||||
}
|
||||
|
||||
@@ -51,6 +51,7 @@ type DoltHarness struct {
|
||||
|
||||
var _ enginetest.Harness = (*DoltHarness)(nil)
|
||||
var _ enginetest.SkippingHarness = (*DoltHarness)(nil)
|
||||
var _ enginetest.ClientHarness = (*DoltHarness)(nil)
|
||||
var _ enginetest.IndexHarness = (*DoltHarness)(nil)
|
||||
var _ enginetest.VersionedDBHarness = (*DoltHarness)(nil)
|
||||
var _ enginetest.ForeignKeyHarness = (*DoltHarness)(nil)
|
||||
@@ -144,32 +145,36 @@ func (d *DoltHarness) Parallelism() int {
|
||||
}
|
||||
|
||||
func (d *DoltHarness) NewContext() *sql.Context {
|
||||
return sql.NewContext(
|
||||
context.Background(),
|
||||
sql.WithSession(d.session))
|
||||
return sql.NewContext(context.Background(), sql.WithSession(d.session))
|
||||
}
|
||||
|
||||
func (d *DoltHarness) NewContextWithClient(client sql.Client) *sql.Context {
|
||||
return sql.NewContext(context.Background(), sql.WithSession(d.newSessionWithClient(client)))
|
||||
}
|
||||
|
||||
func (d *DoltHarness) NewSession() *sql.Context {
|
||||
d.session = d.newSessionWithClient(sql.Client{Address: "localhost", User: "root"})
|
||||
return d.NewContext()
|
||||
}
|
||||
|
||||
func (d *DoltHarness) newSessionWithClient(client sql.Client) *dsess.DoltSession {
|
||||
states := make([]dsess.InitialDbState, len(d.databases))
|
||||
for i, db := range d.databases {
|
||||
states[i] = getDbState(d.t, db, d.env)
|
||||
}
|
||||
dbs := dsqleDBsAsSqlDBs(d.databases)
|
||||
pro := d.NewDatabaseProvider(dbs...)
|
||||
|
||||
localConfig := d.env.Config.WriteableConfig()
|
||||
|
||||
var err error
|
||||
d.session, err = dsess.NewDoltSession(
|
||||
dSession, err := dsess.NewDoltSession(
|
||||
enginetest.NewContext(d),
|
||||
enginetest.NewBaseSession(),
|
||||
sql.NewBaseSessionWithClientServer("address", client, 1),
|
||||
pro.(dsess.RevisionDatabaseProvider),
|
||||
localConfig,
|
||||
states...,
|
||||
)
|
||||
require.NoError(d.t, err)
|
||||
|
||||
return d.NewContext()
|
||||
return dSession
|
||||
}
|
||||
|
||||
func (d *DoltHarness) SupportsNativeIndexCreation() bool {
|
||||
@@ -224,6 +229,9 @@ func (d *DoltHarness) NewReadOnlyDatabases(names ...string) (dbs []sql.ReadOnlyD
|
||||
}
|
||||
|
||||
func (d *DoltHarness) NewDatabaseProvider(dbs ...sql.Database) sql.MutableDatabaseProvider {
|
||||
if d.env == nil {
|
||||
d.env = dtestutils.CreateTestEnv()
|
||||
}
|
||||
mrEnv, err := env.DoltEnvAsMultiEnv(context.Background(), d.env)
|
||||
require.NoError(d.t, err)
|
||||
pro := sqle.NewDoltDatabaseProvider(d.env.Config, mrEnv.FileSystem(), dbs...)
|
||||
@@ -303,7 +311,7 @@ func (d *DoltHarness) SnapshotTable(db sql.VersionedDatabase, name string, asOf
|
||||
_, iter, err := e.Query(ctx,
|
||||
"set @@"+dsess.HeadKey(db.Name())+" = COMMIT('-m', 'test commit');")
|
||||
require.NoError(d.t, err)
|
||||
_, err = sql.RowIterToRows(ctx, iter)
|
||||
_, err = sql.RowIterToRows(ctx, nil, iter)
|
||||
require.NoError(d.t, err)
|
||||
|
||||
headHash, err := ctx.GetSessionVariable(ctx, dsess.HeadKey(db.Name()))
|
||||
@@ -318,7 +326,7 @@ func (d *DoltHarness) SnapshotTable(db sql.VersionedDatabase, name string, asOf
|
||||
_, iter, err = e.Query(ctx,
|
||||
query)
|
||||
require.NoError(d.t, err)
|
||||
_, err = sql.RowIterToRows(ctx, iter)
|
||||
_, err = sql.RowIterToRows(ctx, nil, iter)
|
||||
require.NoError(d.t, err)
|
||||
|
||||
return nil
|
||||
|
||||
@@ -478,7 +478,7 @@ func isBindingCut(cut sql.RangeCut) bool {
|
||||
|
||||
func tupleFromKeys(keys sql.Row, tb *val.TupleBuilder) (val.Tuple, error) {
|
||||
for i, v := range keys {
|
||||
tb.PutField(i, v)
|
||||
PutField(tb, i, v)
|
||||
}
|
||||
return tb.BuildPermissive(sharePool), nil
|
||||
}
|
||||
|
||||
@@ -1327,7 +1327,7 @@ func TestMergeableIndexes(t *testing.T) {
|
||||
|
||||
_, iter, err := engine.Query(sqlCtx, query)
|
||||
require.NoError(t, err)
|
||||
res, err := sql.RowIterToRows(sqlCtx, iter)
|
||||
res, err := sql.RowIterToRows(sqlCtx, nil, iter)
|
||||
require.NoError(t, err)
|
||||
|
||||
if assert.Equal(t, len(test.pks), len(res)) {
|
||||
@@ -1543,7 +1543,7 @@ func TestMergeableIndexesNulls(t *testing.T) {
|
||||
_, iter, err := engine.Query(sqlCtx, query)
|
||||
require.NoError(t, err)
|
||||
|
||||
res, err := sql.RowIterToRows(sqlCtx, iter)
|
||||
res, err := sql.RowIterToRows(sqlCtx, nil, iter)
|
||||
require.NoError(t, err)
|
||||
if assert.Equal(t, len(test.pks), len(res)) {
|
||||
for i, pk := range test.pks {
|
||||
|
||||
223
go/libraries/doltcore/sqle/index/prolly_fields.go
Normal file
223
go/libraries/doltcore/sqle/index/prolly_fields.go
Normal file
@@ -0,0 +1,223 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package index
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
geo "github.com/dolthub/dolt/go/store/geometry"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
// GetField reads the value from the ith field of the Tuple as an interface{}.
|
||||
func GetField(td val.TupleDesc, i int, tup val.Tuple) (v interface{}, err error) {
|
||||
var ok bool
|
||||
switch td.Types[i].Enc {
|
||||
case val.Int8Enc:
|
||||
v, ok = td.GetInt8(i, tup)
|
||||
case val.Uint8Enc:
|
||||
v, ok = td.GetUint8(i, tup)
|
||||
case val.Int16Enc:
|
||||
v, ok = td.GetInt16(i, tup)
|
||||
case val.Uint16Enc:
|
||||
v, ok = td.GetUint16(i, tup)
|
||||
case val.Int32Enc:
|
||||
v, ok = td.GetInt32(i, tup)
|
||||
case val.Uint32Enc:
|
||||
v, ok = td.GetUint32(i, tup)
|
||||
case val.Int64Enc:
|
||||
v, ok = td.GetInt64(i, tup)
|
||||
case val.Uint64Enc:
|
||||
v, ok = td.GetUint64(i, tup)
|
||||
case val.Float32Enc:
|
||||
v, ok = td.GetFloat32(i, tup)
|
||||
case val.Float64Enc:
|
||||
v, ok = td.GetFloat64(i, tup)
|
||||
case val.DecimalEnc:
|
||||
v, ok = td.GetDecimal(i, tup)
|
||||
case val.TimeEnc:
|
||||
v, ok = td.GetSqlTime(i, tup)
|
||||
case val.YearEnc:
|
||||
v, ok = td.GetYear(i, tup)
|
||||
case val.TimestampEnc, val.DateEnc, val.DatetimeEnc:
|
||||
v, ok = td.GetTimestamp(i, tup)
|
||||
case val.StringEnc:
|
||||
v, ok = td.GetString(i, tup)
|
||||
case val.BytesEnc:
|
||||
v, ok = td.GetBytes(i, tup)
|
||||
case val.JSONEnc:
|
||||
var buf []byte
|
||||
buf, ok = td.GetJSON(i, tup)
|
||||
if ok {
|
||||
var doc sql.JSONDocument
|
||||
err = json.Unmarshal(buf, &doc.Val)
|
||||
v = doc
|
||||
}
|
||||
case val.GeometryEnc:
|
||||
var buf []byte
|
||||
buf, ok = td.GetGeometry(i, tup)
|
||||
if ok {
|
||||
v = deserializeGeometry(buf)
|
||||
}
|
||||
default:
|
||||
panic("unknown val.encoding")
|
||||
}
|
||||
if !ok || err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return v, err
|
||||
}
|
||||
|
||||
// PutField writes an interface{} to the ith field of the Tuple being built.
|
||||
func PutField(tb *val.TupleBuilder, i int, v interface{}) error {
|
||||
if v == nil {
|
||||
return nil // NULL
|
||||
}
|
||||
|
||||
enc := tb.Desc.Types[i].Enc
|
||||
switch enc {
|
||||
case val.Int8Enc:
|
||||
tb.PutInt8(i, int8(convInt(v)))
|
||||
case val.Uint8Enc:
|
||||
tb.PutUint8(i, uint8(convUint(v)))
|
||||
case val.Int16Enc:
|
||||
tb.PutInt16(i, int16(convInt(v)))
|
||||
case val.Uint16Enc:
|
||||
tb.PutUint16(i, uint16(convUint(v)))
|
||||
case val.Int32Enc:
|
||||
tb.PutInt32(i, int32(convInt(v)))
|
||||
case val.Uint32Enc:
|
||||
tb.PutUint32(i, uint32(convUint(v)))
|
||||
case val.Int64Enc:
|
||||
tb.PutInt64(i, int64(convInt(v)))
|
||||
case val.Uint64Enc:
|
||||
tb.PutUint64(i, uint64(convUint(v)))
|
||||
case val.Float32Enc:
|
||||
tb.PutFloat32(i, v.(float32))
|
||||
case val.Float64Enc:
|
||||
tb.PutFloat64(i, v.(float64))
|
||||
case val.DecimalEnc:
|
||||
tb.PutDecimal(i, v.(string))
|
||||
case val.TimeEnc:
|
||||
tb.PutSqlTime(i, v.(string))
|
||||
case val.YearEnc:
|
||||
tb.PutYear(i, v.(int16))
|
||||
case val.DateEnc, val.DatetimeEnc, val.TimestampEnc:
|
||||
tb.PutTimestamp(i, v.(time.Time))
|
||||
case val.StringEnc:
|
||||
tb.PutString(i, v.(string))
|
||||
case val.BytesEnc:
|
||||
if s, ok := v.(string); ok {
|
||||
v = []byte(s)
|
||||
}
|
||||
tb.PutBytes(i, v.([]byte))
|
||||
case val.GeometryEnc:
|
||||
tb.PutGeometry(i, serializeGeometry(v))
|
||||
case val.JSONEnc:
|
||||
buf, err := json.Marshal(v.(sql.JSONDocument).Val)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tb.PutJSON(i, buf)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown encoding %v %v", enc, v))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func convInt(v interface{}) int {
|
||||
switch i := v.(type) {
|
||||
case int:
|
||||
return i
|
||||
case int8:
|
||||
return int(i)
|
||||
case uint8:
|
||||
return int(i)
|
||||
case int16:
|
||||
return int(i)
|
||||
case uint16:
|
||||
return int(i)
|
||||
case int32:
|
||||
return int(i)
|
||||
case uint32:
|
||||
return int(i)
|
||||
case int64:
|
||||
return int(i)
|
||||
case uint64:
|
||||
return int(i)
|
||||
default:
|
||||
panic("impossible conversion")
|
||||
}
|
||||
}
|
||||
|
||||
func convUint(v interface{}) uint {
|
||||
switch i := v.(type) {
|
||||
case uint:
|
||||
return i
|
||||
case int:
|
||||
return uint(i)
|
||||
case int8:
|
||||
return uint(i)
|
||||
case uint8:
|
||||
return uint(i)
|
||||
case int16:
|
||||
return uint(i)
|
||||
case uint16:
|
||||
return uint(i)
|
||||
case int32:
|
||||
return uint(i)
|
||||
case uint32:
|
||||
return uint(i)
|
||||
case int64:
|
||||
return uint(i)
|
||||
case uint64:
|
||||
return uint(i)
|
||||
default:
|
||||
panic("impossible conversion")
|
||||
}
|
||||
}
|
||||
|
||||
func deserializeGeometry(buf []byte) (v interface{}) {
|
||||
srid, _, typ := geo.ParseEWKBHeader(buf)
|
||||
buf = buf[geo.EWKBHeaderSize:]
|
||||
switch typ {
|
||||
case geo.PointType:
|
||||
v = geo.DeserializePoint(buf, srid)
|
||||
case geo.LinestringType:
|
||||
v = geo.DeserializeLinestring(buf, srid)
|
||||
case geo.PolygonType:
|
||||
v = geo.DeserializePolygon(srid, buf)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown geometry type %d", typ))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func serializeGeometry(v interface{}) []byte {
|
||||
switch t := v.(type) {
|
||||
case sql.Point:
|
||||
return geo.SerializePoint(t)
|
||||
case sql.Linestring:
|
||||
return geo.SerializeLinestring(t)
|
||||
case sql.Polygon:
|
||||
return geo.SerializePolygon(t)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown geometry %v", v))
|
||||
}
|
||||
}
|
||||
198
go/libraries/doltcore/sqle/index/prolly_fields_test.go
Normal file
198
go/libraries/doltcore/sqle/index/prolly_fields_test.go
Normal file
@@ -0,0 +1,198 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package index
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/dolthub/go-mysql-server/sql/expression/function"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/pool"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
type prollyFieldTest struct {
|
||||
name string
|
||||
value interface{}
|
||||
typ val.Type
|
||||
}
|
||||
|
||||
func TestRoundTripProllyFields(t *testing.T) {
|
||||
tests := []prollyFieldTest{
|
||||
{
|
||||
name: "null",
|
||||
typ: val.Type{
|
||||
Enc: val.Int8Enc,
|
||||
Nullable: true,
|
||||
},
|
||||
value: nil,
|
||||
},
|
||||
{
|
||||
name: "int8",
|
||||
typ: val.Type{Enc: val.Int8Enc},
|
||||
value: int8(-42),
|
||||
},
|
||||
{
|
||||
name: "uint8",
|
||||
typ: val.Type{Enc: val.Uint8Enc},
|
||||
value: uint8(42),
|
||||
},
|
||||
{
|
||||
name: "int16",
|
||||
typ: val.Type{Enc: val.Int16Enc},
|
||||
value: int16(-42),
|
||||
},
|
||||
{
|
||||
name: "uint16",
|
||||
typ: val.Type{Enc: val.Uint16Enc},
|
||||
value: uint16(42),
|
||||
},
|
||||
{
|
||||
name: "int32",
|
||||
typ: val.Type{Enc: val.Int32Enc},
|
||||
value: int32(-42),
|
||||
},
|
||||
{
|
||||
name: "uint32",
|
||||
typ: val.Type{Enc: val.Uint32Enc},
|
||||
value: uint32(42),
|
||||
},
|
||||
{
|
||||
name: "int64",
|
||||
typ: val.Type{Enc: val.Int64Enc},
|
||||
value: int64(-42),
|
||||
},
|
||||
{
|
||||
name: "uint64",
|
||||
typ: val.Type{Enc: val.Uint64Enc},
|
||||
value: uint64(42),
|
||||
},
|
||||
{
|
||||
name: "float32",
|
||||
typ: val.Type{Enc: val.Float32Enc},
|
||||
value: float32(math.Pi),
|
||||
},
|
||||
{
|
||||
name: "float64",
|
||||
typ: val.Type{Enc: val.Float64Enc},
|
||||
value: float64(-math.Pi),
|
||||
},
|
||||
{
|
||||
name: "string",
|
||||
typ: val.Type{Enc: val.StringEnc},
|
||||
value: "lorem ipsum",
|
||||
},
|
||||
{
|
||||
name: "bytes",
|
||||
typ: val.Type{Enc: val.BytesEnc},
|
||||
value: []byte("lorem ipsum"),
|
||||
},
|
||||
{
|
||||
name: "year",
|
||||
typ: val.Type{Enc: val.YearEnc},
|
||||
value: int16(2022),
|
||||
},
|
||||
{
|
||||
name: "date",
|
||||
typ: val.Type{Enc: val.DateEnc},
|
||||
value: time.Now().UTC(),
|
||||
},
|
||||
{
|
||||
name: "datetime",
|
||||
typ: val.Type{Enc: val.DatetimeEnc},
|
||||
value: time.Now().UTC(),
|
||||
},
|
||||
{
|
||||
name: "timestamp",
|
||||
typ: val.Type{Enc: val.TimestampEnc},
|
||||
value: time.Now().UTC(),
|
||||
},
|
||||
{
|
||||
name: "json",
|
||||
typ: val.Type{Enc: val.JSONEnc},
|
||||
value: mustParseJson(t, `{"a": 1, "b": false}`),
|
||||
},
|
||||
{
|
||||
name: "point",
|
||||
typ: val.Type{Enc: val.GeometryEnc},
|
||||
value: mustParseGeometryType(t, "POINT(1 2)"),
|
||||
},
|
||||
{
|
||||
name: "linestring",
|
||||
typ: val.Type{Enc: val.GeometryEnc},
|
||||
value: mustParseGeometryType(t, "LINESTRING(1 2,3 4)"),
|
||||
},
|
||||
{
|
||||
name: "polygon",
|
||||
typ: val.Type{Enc: val.GeometryEnc},
|
||||
value: mustParseGeometryType(t, "POLYGON((0 0,1 1,1 0,0 0))"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
testRoundTripProllyFields(t, test)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
var testPool = pool.NewBuffPool()
|
||||
|
||||
func testRoundTripProllyFields(t *testing.T, test prollyFieldTest) {
|
||||
desc := val.NewTupleDescriptor(test.typ)
|
||||
builder := val.NewTupleBuilder(desc)
|
||||
|
||||
err := PutField(builder, 0, test.value)
|
||||
assert.NoError(t, err)
|
||||
|
||||
tup := builder.Build(testPool)
|
||||
|
||||
v, err := GetField(desc, 0, tup)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, test.value, v)
|
||||
}
|
||||
|
||||
func mustParseGeometryType(t *testing.T, s string) (v interface{}) {
|
||||
// Determine type, and get data
|
||||
geomType, data, err := function.ParseWKTHeader(s)
|
||||
require.NoError(t, err)
|
||||
|
||||
srid, order := uint32(0), false
|
||||
switch geomType {
|
||||
case "point":
|
||||
v, err = function.WKTToPoint(data, srid, order)
|
||||
case "linestring":
|
||||
v, err = function.WKTToLine(data, srid, order)
|
||||
case "polygon":
|
||||
v, err = function.WKTToPoly(data, srid, order)
|
||||
default:
|
||||
panic("unknown geometry type")
|
||||
}
|
||||
require.NoError(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
func mustParseJson(t *testing.T, s string) sql.JSONDocument {
|
||||
var v interface{}
|
||||
err := json.Unmarshal([]byte(s), &v)
|
||||
require.NoError(t, err)
|
||||
return sql.JSONDocument{Val: v}
|
||||
}
|
||||
@@ -137,20 +137,26 @@ func (p prollyIndexIter) queueRows(ctx context.Context) error {
|
||||
}
|
||||
}
|
||||
|
||||
func (p prollyIndexIter) rowFromTuples(key, value val.Tuple, r sql.Row) {
|
||||
func (p prollyIndexIter) rowFromTuples(key, value val.Tuple, r sql.Row) (err error) {
|
||||
keyDesc, valDesc := p.primary.Descriptors()
|
||||
|
||||
for keyIdx, rowIdx := range p.keyMap {
|
||||
if rowIdx == -1 {
|
||||
continue
|
||||
}
|
||||
r[rowIdx] = keyDesc.GetField(keyIdx, key)
|
||||
r[rowIdx], err = GetField(keyDesc, keyIdx, key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for valIdx, rowIdx := range p.valMap {
|
||||
if rowIdx == -1 {
|
||||
continue
|
||||
}
|
||||
r[rowIdx] = valDesc.GetField(valIdx, value)
|
||||
r[rowIdx], err = GetField(valDesc, valIdx, value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
@@ -241,18 +247,23 @@ func (p prollyCoveringIndexIter) Next(ctx *sql.Context) (sql.Row, error) {
|
||||
}
|
||||
|
||||
r := make(sql.Row, len(p.keyMap))
|
||||
p.writeRowFromTuples(k, v, r)
|
||||
if err := p.writeRowFromTuples(k, v, r); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql.Row) {
|
||||
func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql.Row) (err error) {
|
||||
for to := range p.keyMap {
|
||||
from := p.keyMap.MapOrdinal(to)
|
||||
if from == -1 {
|
||||
continue
|
||||
}
|
||||
r[to] = p.keyDesc.GetField(from, key)
|
||||
r[to], err = GetField(p.keyDesc, from, key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for to := range p.valMap {
|
||||
@@ -260,7 +271,10 @@ func (p prollyCoveringIndexIter) writeRowFromTuples(key, value val.Tuple, r sql.
|
||||
if from == -1 {
|
||||
continue
|
||||
}
|
||||
r[to] = p.valDesc.GetField(from, value)
|
||||
r[to], err = GetField(p.valDesc, from, value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
@@ -22,12 +22,11 @@ import (
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
"github.com/dolthub/dolt/go/store/pool"
|
||||
"github.com/dolthub/dolt/go/store/prolly"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
type sqlRowIter struct {
|
||||
type prollyRowIter struct {
|
||||
ctx context.Context
|
||||
iter prolly.MapRangeIter
|
||||
|
||||
@@ -38,7 +37,7 @@ type sqlRowIter struct {
|
||||
rowLen int
|
||||
}
|
||||
|
||||
var _ sql.RowIter = sqlRowIter{}
|
||||
var _ sql.RowIter = prollyRowIter{}
|
||||
|
||||
func NewProllyRowIter(ctx context.Context, sch schema.Schema, rows prolly.Map, rng prolly.Range, projections []string) (sql.RowIter, error) {
|
||||
if schema.IsKeyless(sch) {
|
||||
@@ -71,7 +70,7 @@ func rowIterFromMapIter(
|
||||
|
||||
kd, vd := m.Descriptors()
|
||||
|
||||
return sqlRowIter{
|
||||
return prollyRowIter{
|
||||
ctx: ctx,
|
||||
iter: iter,
|
||||
keyDesc: kd,
|
||||
@@ -110,7 +109,7 @@ func projectionMappings(sch schema.Schema, projs []string) (keyMap, valMap val.O
|
||||
return
|
||||
}
|
||||
|
||||
func (it sqlRowIter) Next(ctx *sql.Context) (sql.Row, error) {
|
||||
func (it prollyRowIter) Next(ctx *sql.Context) (sql.Row, error) {
|
||||
key, value, err := it.iter.Next(it.ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -122,20 +121,24 @@ func (it sqlRowIter) Next(ctx *sql.Context) (sql.Row, error) {
|
||||
if rowIdx == -1 {
|
||||
continue
|
||||
}
|
||||
row[rowIdx] = it.keyDesc.GetField(keyIdx, key)
|
||||
row[rowIdx], err = GetField(it.keyDesc, keyIdx, key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
for valIdx, rowIdx := range it.valProj {
|
||||
if rowIdx == -1 {
|
||||
continue
|
||||
}
|
||||
row[rowIdx] = it.valDesc.GetField(valIdx, value)
|
||||
row[rowIdx], err = GetField(it.valDesc, valIdx, value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return row, nil
|
||||
}
|
||||
|
||||
func (it sqlRowIter) Close(ctx *sql.Context) error {
|
||||
func (it prollyRowIter) Close(ctx *sql.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
var shimPool = pool.NewBuffPool()
|
||||
@@ -133,7 +133,7 @@ func innerInit(h *DoltHarness, dEnv *env.DoltEnv) error {
|
||||
ctx := dsql.NewTestSQLCtx(context.Background())
|
||||
h.sess = ctx.Session.(*dsess.DoltSession)
|
||||
|
||||
dbs := h.engine.Analyzer.Catalog.AllDatabases()
|
||||
dbs := h.engine.Analyzer.Catalog.AllDatabases(ctx)
|
||||
dsqlDBs := make([]dsql.Database, len(dbs))
|
||||
for i, db := range dbs {
|
||||
dsqlDB := db.(dsql.Database)
|
||||
|
||||
@@ -46,7 +46,7 @@ func GetCreateTableStmt(ctx *sql.Context, engine *sqle.Engine, tableName string)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
rows, err := sql.RowIterToRows(ctx, rowIter)
|
||||
rows, err := sql.RowIterToRows(ctx, nil, rowIter)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/globalstate"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
|
||||
"github.com/dolthub/dolt/go/store/pool"
|
||||
"github.com/dolthub/dolt/go/store/prolly"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
@@ -284,7 +285,7 @@ func (m prollyIndexWriter) Map(ctx context.Context) (prolly.Map, error) {
|
||||
func (m prollyIndexWriter) Insert(ctx *sql.Context, sqlRow sql.Row) error {
|
||||
for to := range m.keyMap {
|
||||
from := m.keyMap.MapOrdinal(to)
|
||||
m.keyBld.PutField(to, sqlRow[from])
|
||||
index.PutField(m.keyBld, to, sqlRow[from])
|
||||
}
|
||||
k := m.keyBld.Build(sharePool)
|
||||
|
||||
@@ -297,7 +298,7 @@ func (m prollyIndexWriter) Insert(ctx *sql.Context, sqlRow sql.Row) error {
|
||||
|
||||
for to := range m.valMap {
|
||||
from := m.valMap.MapOrdinal(to)
|
||||
m.valBld.PutField(to, sqlRow[from])
|
||||
index.PutField(m.valBld, to, sqlRow[from])
|
||||
}
|
||||
v := m.valBld.Build(sharePool)
|
||||
|
||||
@@ -307,7 +308,7 @@ func (m prollyIndexWriter) Insert(ctx *sql.Context, sqlRow sql.Row) error {
|
||||
func (m prollyIndexWriter) Delete(ctx *sql.Context, sqlRow sql.Row) error {
|
||||
for to := range m.keyMap {
|
||||
from := m.keyMap.MapOrdinal(to)
|
||||
m.keyBld.PutField(to, sqlRow[from])
|
||||
index.PutField(m.keyBld, to, sqlRow[from])
|
||||
}
|
||||
k := m.keyBld.Build(sharePool)
|
||||
|
||||
@@ -317,7 +318,7 @@ func (m prollyIndexWriter) Delete(ctx *sql.Context, sqlRow sql.Row) error {
|
||||
func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.Row) error {
|
||||
for to := range m.keyMap {
|
||||
from := m.keyMap.MapOrdinal(to)
|
||||
m.keyBld.PutField(to, oldRow[from])
|
||||
index.PutField(m.keyBld, to, oldRow[from])
|
||||
}
|
||||
oldKey := m.keyBld.Build(sharePool)
|
||||
|
||||
@@ -329,7 +330,7 @@ func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.R
|
||||
|
||||
for to := range m.keyMap {
|
||||
from := m.keyMap.MapOrdinal(to)
|
||||
m.keyBld.PutField(to, newRow[from])
|
||||
index.PutField(m.keyBld, to, newRow[from])
|
||||
}
|
||||
newKey := m.keyBld.Build(sharePool)
|
||||
|
||||
@@ -342,7 +343,7 @@ func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.R
|
||||
|
||||
for to := range m.valMap {
|
||||
from := m.valMap.MapOrdinal(to)
|
||||
m.valBld.PutField(to, newRow[from])
|
||||
index.PutField(m.valBld, to, newRow[from])
|
||||
}
|
||||
v := m.valBld.Build(sharePool)
|
||||
|
||||
@@ -350,26 +351,30 @@ func (m prollyIndexWriter) Update(ctx *sql.Context, oldRow sql.Row, newRow sql.R
|
||||
}
|
||||
|
||||
func (m prollyIndexWriter) primaryKeyError(ctx context.Context, key val.Tuple) error {
|
||||
existing := make(sql.Row, len(m.keyMap)+len(m.valMap))
|
||||
dupe := make(sql.Row, len(m.keyMap)+len(m.valMap))
|
||||
|
||||
_ = m.mut.Get(ctx, key, func(key, value val.Tuple) (err error) {
|
||||
kd := m.keyBld.Desc
|
||||
for from := range m.keyMap {
|
||||
to := m.keyMap.MapOrdinal(from)
|
||||
existing[to] = kd.GetField(from, key)
|
||||
if dupe[to], err = index.GetField(kd, from, key); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
vd := m.valBld.Desc
|
||||
for from := range m.valMap {
|
||||
to := m.valMap.MapOrdinal(from)
|
||||
existing[to] = vd.GetField(from, value)
|
||||
if dupe[to], err = index.GetField(vd, from, value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return
|
||||
})
|
||||
|
||||
s := m.keyBld.Desc.Format(key)
|
||||
|
||||
return sql.NewUniqueKeyErr(s, true, existing)
|
||||
return sql.NewUniqueKeyErr(s, true, dupe)
|
||||
}
|
||||
|
||||
func ordinalMappingsFromSchema(from sql.Schema, to schema.Schema) (km, vm val.OrdinalMapping) {
|
||||
|
||||
52
go/performance/memprof/membench_test.go
Normal file
52
go/performance/memprof/membench_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package memprof
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
var loc = flag.String("doltDir", "", "Directory of dolt database")
|
||||
var urlStr string
|
||||
var ddb *doltdb.DoltDB
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
flag.Parse()
|
||||
|
||||
urlStr = "file://" + *loc + dbfactory.DoltDataDir
|
||||
|
||||
code := m.Run()
|
||||
os.Exit(code)
|
||||
}
|
||||
|
||||
func BenchmarkLoadDoltDBMemory(b *testing.B) {
|
||||
b.SkipNow()
|
||||
for i := 0; i < b.N; i++ {
|
||||
ctx := context.Background()
|
||||
var err error
|
||||
ddb, err = doltdb.LoadDoltDB(ctx, types.Format_Default, urlStr, filesys.LocalFS)
|
||||
if err != nil {
|
||||
b.Fatalf("failed to load doltdb, err: %s", err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
81
go/store/geometry/read_geometry.go
Normal file
81
go/store/geometry/read_geometry.go
Normal file
@@ -0,0 +1,81 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package geometry
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
)
|
||||
|
||||
// ParseEWKBHeader converts the header potion of a EWKB byte array to srid, endianness, and geometry type
|
||||
func ParseEWKBHeader(buf []byte) (srid uint32, bigEndian bool, typ uint32) {
|
||||
srid = binary.LittleEndian.Uint32(buf[0:SRIDSize]) // First 4 bytes is SRID always in little endian
|
||||
bigEndian = buf[SRIDSize] == 0 // Next byte is endianness
|
||||
typ = binary.LittleEndian.Uint32(buf[SRIDSize+EndianSize : EWKBHeaderSize]) // Next 4 bytes is type
|
||||
return
|
||||
}
|
||||
|
||||
func ParseEWKBPoint(buf []byte) (x, y float64) {
|
||||
x = math.Float64frombits(binary.LittleEndian.Uint64(buf[:PointSize/2]))
|
||||
y = math.Float64frombits(binary.LittleEndian.Uint64(buf[PointSize/2:]))
|
||||
return
|
||||
}
|
||||
|
||||
func DeserializePoint(buf []byte, srid uint32) (p sql.Point) {
|
||||
p.SRID = srid
|
||||
p.X, p.Y = ParseEWKBPoint(buf)
|
||||
return
|
||||
}
|
||||
|
||||
func DeserializeLinestring(buf []byte, srid uint32) (l sql.Linestring) {
|
||||
l.SRID = srid
|
||||
l.Points = readPointSlice(buf, srid)
|
||||
return
|
||||
}
|
||||
|
||||
func DeserializePolygon(srid uint32, buf []byte) (p sql.Polygon) {
|
||||
p.SRID = srid
|
||||
p.Lines = readLineSlice(buf, srid)
|
||||
return
|
||||
}
|
||||
|
||||
func readCount(buf []byte) uint32 {
|
||||
return binary.LittleEndian.Uint32(buf)
|
||||
}
|
||||
|
||||
func readPointSlice(buf []byte, srid uint32) (points []sql.Point) {
|
||||
points = make([]sql.Point, readCount(buf))
|
||||
buf = buf[CountSize:]
|
||||
for i := range points {
|
||||
points[i].SRID = srid
|
||||
points[i].X, points[i].Y = ParseEWKBPoint(buf)
|
||||
buf = buf[PointSize:]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func readLineSlice(buf []byte, srid uint32) (lines []sql.Linestring) {
|
||||
lines = make([]sql.Linestring, readCount(buf))
|
||||
buf = buf[CountSize:]
|
||||
for i := range lines {
|
||||
lines[i].SRID = srid
|
||||
lines[i].Points = readPointSlice(buf, srid)
|
||||
sz := len(lines[i].Points) * PointSize
|
||||
buf = buf[sz:]
|
||||
}
|
||||
return
|
||||
}
|
||||
104
go/store/geometry/write_geometry.go
Normal file
104
go/store/geometry/write_geometry.go
Normal file
@@ -0,0 +1,104 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package geometry
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
)
|
||||
|
||||
const (
|
||||
SRIDSize = 4
|
||||
EndianSize = 1
|
||||
TypeSize = 4
|
||||
EWKBHeaderSize = SRIDSize + EndianSize + TypeSize
|
||||
|
||||
PointSize = 16
|
||||
CountSize = 4
|
||||
)
|
||||
|
||||
const (
|
||||
PointType = 1
|
||||
LinestringType = 2
|
||||
PolygonType = 3
|
||||
)
|
||||
|
||||
func allocateBuffer(numPoints, numCounts int) []byte {
|
||||
return make([]byte, EWKBHeaderSize+PointSize*numPoints+CountSize*numCounts)
|
||||
}
|
||||
|
||||
func WriteEWKBHeader(buf []byte, srid, typ uint32) {
|
||||
binary.LittleEndian.PutUint32(buf[0:SRIDSize], srid)
|
||||
buf[SRIDSize] = 1
|
||||
binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], typ)
|
||||
}
|
||||
|
||||
func WriteEWKBPointData(buf []byte, x, y float64) {
|
||||
binary.LittleEndian.PutUint64(buf[:PointSize/2], math.Float64bits(x))
|
||||
binary.LittleEndian.PutUint64(buf[PointSize/2:], math.Float64bits(y))
|
||||
}
|
||||
|
||||
func SerializePoint(p sql.Point) (buf []byte) {
|
||||
buf = allocateBuffer(1, 0)
|
||||
WriteEWKBHeader(buf[:EWKBHeaderSize], p.SRID, PointType)
|
||||
WriteEWKBPointData(buf[EWKBHeaderSize:], p.X, p.Y)
|
||||
return
|
||||
}
|
||||
|
||||
func SerializeLinestring(l sql.Linestring) (buf []byte) {
|
||||
buf = allocateBuffer(len(l.Points), 1)
|
||||
WriteEWKBHeader(buf[:EWKBHeaderSize], l.SRID, LinestringType)
|
||||
writePointSlice(buf[EWKBHeaderSize:], l.Points)
|
||||
return
|
||||
}
|
||||
|
||||
func SerializePolygon(p sql.Polygon) (buf []byte) {
|
||||
buf = allocateBuffer(countPoints(p), len(p.Lines)+1)
|
||||
WriteEWKBHeader(buf[:EWKBHeaderSize], p.SRID, PolygonType)
|
||||
writeLineSlice(buf[EWKBHeaderSize:], p.Lines)
|
||||
return
|
||||
}
|
||||
|
||||
func writeCount(buf []byte, count uint32) {
|
||||
binary.LittleEndian.PutUint32(buf, count)
|
||||
}
|
||||
|
||||
func writePointSlice(buf []byte, points []sql.Point) {
|
||||
writeCount(buf, uint32(len(points)))
|
||||
buf = buf[CountSize:]
|
||||
for _, p := range points {
|
||||
WriteEWKBPointData(buf, p.X, p.Y)
|
||||
buf = buf[PointSize:]
|
||||
}
|
||||
}
|
||||
|
||||
func writeLineSlice(buf []byte, lines []sql.Linestring) {
|
||||
writeCount(buf, uint32(len(lines)))
|
||||
buf = buf[CountSize:]
|
||||
for _, l := range lines {
|
||||
writePointSlice(buf, l.Points)
|
||||
sz := len(l.Points) * PointSize
|
||||
buf = buf[sz:]
|
||||
}
|
||||
}
|
||||
|
||||
func countPoints(p sql.Polygon) (cnt int) {
|
||||
for _, line := range p.Lines {
|
||||
cnt += len(line.Points)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -43,53 +43,59 @@ func newAWSChunkSource(ctx context.Context, ddb *ddbTableStore, s3 *s3ObjectRead
|
||||
|
||||
if index, found := indexCache.get(name); found {
|
||||
tra := &awsTableReaderAt{al: al, ddb: ddb, s3: s3, name: name, chunkCount: chunkCount}
|
||||
return &chunkSourceAdapter{newTableReader(index, tra, s3BlockSize), name}, nil
|
||||
tr, err := newTableReader(index, tra, s3BlockSize)
|
||||
if err != nil {
|
||||
return &chunkSourceAdapter{}, err
|
||||
}
|
||||
return &chunkSourceAdapter{tr, name}, nil
|
||||
}
|
||||
}
|
||||
|
||||
t1 := time.Now()
|
||||
indexBytes, tra, err := func() ([]byte, tableReaderAt, error) {
|
||||
index, tra, err := func() (tableIndex, tableReaderAt, error) {
|
||||
if al.tableMayBeInDynamo(chunkCount) {
|
||||
data, err := ddb.ReadTable(ctx, name, stats)
|
||||
|
||||
if data == nil && err == nil { // There MUST be either data or an error
|
||||
return nil, &dynamoTableReaderAt{}, errors.New("no data available")
|
||||
return onHeapTableIndex{}, &dynamoTableReaderAt{}, errors.New("no data available")
|
||||
}
|
||||
|
||||
if data != nil {
|
||||
return data, &dynamoTableReaderAt{ddb: ddb, h: name}, nil
|
||||
stats.IndexBytesPerRead.Sample(uint64(len(data)))
|
||||
ind, err := parseTableIndexByCopy(data)
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, nil, err
|
||||
}
|
||||
return ind, &dynamoTableReaderAt{ddb: ddb, h: name}, nil
|
||||
}
|
||||
|
||||
if _, ok := err.(tableNotInDynamoErr); !ok {
|
||||
return nil, &dynamoTableReaderAt{}, err
|
||||
return onHeapTableIndex{}, &dynamoTableReaderAt{}, err
|
||||
}
|
||||
}
|
||||
|
||||
size := indexSize(chunkCount) + footerSize
|
||||
buff := make([]byte, size)
|
||||
|
||||
n, _, err := s3.ReadFromEnd(ctx, name, buff, stats)
|
||||
|
||||
if err != nil {
|
||||
return nil, &dynamoTableReaderAt{}, err
|
||||
return onHeapTableIndex{}, &dynamoTableReaderAt{}, err
|
||||
}
|
||||
|
||||
if size != uint64(n) {
|
||||
return nil, &dynamoTableReaderAt{}, errors.New("failed to read all data")
|
||||
return onHeapTableIndex{}, &dynamoTableReaderAt{}, errors.New("failed to read all data")
|
||||
}
|
||||
|
||||
return buff, &s3TableReaderAt{s3: s3, h: name}, nil
|
||||
stats.IndexBytesPerRead.Sample(uint64(len(buff)))
|
||||
ind, err := parseTableIndex(buff)
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, &dynamoTableReaderAt{}, err
|
||||
}
|
||||
return ind, &s3TableReaderAt{s3: s3, h: name}, nil
|
||||
}()
|
||||
|
||||
if err != nil {
|
||||
return &chunkSourceAdapter{}, err
|
||||
}
|
||||
|
||||
stats.IndexBytesPerRead.Sample(uint64(len(indexBytes)))
|
||||
stats.IndexReadLatency.SampleTimeSince(t1)
|
||||
|
||||
index, err := parseIndex(indexBytes)
|
||||
|
||||
if err != nil {
|
||||
return emptyChunkSource{}, err
|
||||
}
|
||||
@@ -98,7 +104,11 @@ func newAWSChunkSource(ctx context.Context, ddb *ddbTableStore, s3 *s3ObjectRead
|
||||
indexCache.put(name, ohi)
|
||||
}
|
||||
|
||||
return &chunkSourceAdapter{newTableReader(index, tra, s3BlockSize), name}, nil
|
||||
tr, err := newTableReader(index, tra, s3BlockSize)
|
||||
if err != nil {
|
||||
return &chunkSourceAdapter{}, err
|
||||
}
|
||||
return &chunkSourceAdapter{tr, name}, nil
|
||||
}
|
||||
|
||||
type awsTableReaderAt struct {
|
||||
|
||||
@@ -74,7 +74,7 @@ func TestAWSChunkSource(t *testing.T) {
|
||||
|
||||
t.Run("WithIndexCache", func(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
index, err := parseTableIndex(tableData)
|
||||
index, err := parseTableIndexByCopy(tableData)
|
||||
require.NoError(t, err)
|
||||
cache := newIndexCache(1024)
|
||||
cache.put(h, index)
|
||||
@@ -98,7 +98,7 @@ func TestAWSChunkSource(t *testing.T) {
|
||||
|
||||
t.Run("WithIndexCache", func(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
index, err := parseTableIndex(tableData)
|
||||
index, err := parseTableIndexByCopy(tableData)
|
||||
require.NoError(t, err)
|
||||
cache := newIndexCache(1024)
|
||||
cache.put(h, index)
|
||||
|
||||
@@ -546,8 +546,9 @@ func bytesToChunkSource(t *testing.T, bs ...[]byte) chunkSource {
|
||||
tableSize, name, err := tw.finish()
|
||||
require.NoError(t, err)
|
||||
data := buff[:tableSize]
|
||||
ti, err := parseTableIndex(data)
|
||||
ti, err := parseTableIndexByCopy(data)
|
||||
require.NoError(t, err)
|
||||
rdr, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
rdr := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
|
||||
return chunkSourceAdapter{rdr, name}
|
||||
}
|
||||
|
||||
@@ -418,7 +418,9 @@ func TestBlockStoreConjoinOnCommit(t *testing.T) {
|
||||
assertContainAll := func(t *testing.T, store chunks.ChunkStore, srcs ...chunkSource) {
|
||||
rdrs := make(chunkReaderGroup, len(srcs))
|
||||
for i, src := range srcs {
|
||||
rdrs[i] = src.Clone()
|
||||
c, err := src.Clone()
|
||||
require.NoError(t, err)
|
||||
rdrs[i] = c
|
||||
}
|
||||
chunkChan := make(chan extractRecord, mustUint32(rdrs.count()))
|
||||
err := rdrs.extract(context.Background(), chunkChan)
|
||||
|
||||
@@ -110,7 +110,11 @@ func newBSChunkSource(ctx context.Context, bs blobstore.Blobstore, name addr, ch
|
||||
|
||||
if index, found := indexCache.get(name); found {
|
||||
bsTRA := &bsTableReaderAt{name.String(), bs}
|
||||
return &chunkSourceAdapter{newTableReader(index, bsTRA, blockSize), name}, nil
|
||||
tr, err := newTableReader(index, bsTRA, blockSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &chunkSourceAdapter{tr, name}, nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -148,7 +152,11 @@ func newBSChunkSource(ctx context.Context, bs blobstore.Blobstore, name addr, ch
|
||||
indexCache.put(name, index)
|
||||
}
|
||||
|
||||
return &chunkSourceAdapter{newTableReader(index, tra, s3BlockSize), name}, nil
|
||||
tr, err := newTableReader(index, tra, s3BlockSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &chunkSourceAdapter{tr, name}, nil
|
||||
}
|
||||
|
||||
func (bsp *blobstorePersister) PruneTableFiles(ctx context.Context, contents manifestContents) error {
|
||||
|
||||
@@ -24,7 +24,7 @@ func (csa chunkSourceAdapter) hash() (addr, error) {
|
||||
}
|
||||
|
||||
func newReaderFromIndexData(indexCache *indexCache, idxData []byte, name addr, tra tableReaderAt, blockSize uint64) (cs chunkSource, err error) {
|
||||
index, err := parseTableIndex(idxData)
|
||||
index, err := parseTableIndexByCopy(idxData)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -42,13 +42,21 @@ func newReaderFromIndexData(indexCache *indexCache, idxData []byte, name addr, t
|
||||
indexCache.put(name, index)
|
||||
}
|
||||
|
||||
return &chunkSourceAdapter{newTableReader(index, tra, blockSize), name}, nil
|
||||
tr, err := newTableReader(index, tra, blockSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &chunkSourceAdapter{tr, name}, nil
|
||||
}
|
||||
|
||||
func (csa chunkSourceAdapter) Close() error {
|
||||
return csa.tableReader.Close()
|
||||
}
|
||||
|
||||
func (csa chunkSourceAdapter) Clone() chunkSource {
|
||||
return &chunkSourceAdapter{csa.tableReader.Clone(), csa.h}
|
||||
func (csa chunkSourceAdapter) Clone() (chunkSource, error) {
|
||||
tr, err := csa.tableReader.Clone()
|
||||
if err != nil {
|
||||
return &chunkSourceAdapter{}, err
|
||||
}
|
||||
return &chunkSourceAdapter{tr, csa.h}, nil
|
||||
}
|
||||
|
||||
@@ -35,9 +35,10 @@ func TestCmpChunkTableWriter(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
// Setup a TableReader to read compressed chunks out of
|
||||
ti, err := parseTableIndex(buff)
|
||||
ti, err := parseTableIndexByCopy(buff)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
|
||||
hashes := make(hash.HashSet)
|
||||
for _, chnk := range testMDChunks {
|
||||
@@ -72,9 +73,10 @@ func TestCmpChunkTableWriter(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
outputBuff := output.Bytes()
|
||||
outputTI, err := parseTableIndex(outputBuff)
|
||||
outputTI, err := parseTableIndexByCopy(outputBuff)
|
||||
require.NoError(t, err)
|
||||
outputTR, err := newTableReader(outputTI, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
outputTR := newTableReader(outputTI, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
|
||||
compareContentsOfTables(t, ctx, hashes, tr, outputTR)
|
||||
}
|
||||
|
||||
@@ -64,7 +64,9 @@ func makeTestSrcs(t *testing.T, tableSizes []uint32, p tablePersister) (srcs chu
|
||||
}
|
||||
cs, err := p.Persist(context.Background(), mt, nil, &Stats{})
|
||||
require.NoError(t, err)
|
||||
srcs = append(srcs, cs.Clone())
|
||||
c, err := cs.Clone()
|
||||
require.NoError(t, err)
|
||||
srcs = append(srcs, c)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -62,7 +62,12 @@ func (m *fakeDDB) readerForTable(name addr) (chunkReader, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize), nil
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tr, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -90,7 +90,7 @@ func (ftp *fsTablePersister) persistTable(ctx context.Context, name addr, data [
|
||||
return "", ferr
|
||||
}
|
||||
|
||||
index, ferr := parseTableIndex(data)
|
||||
index, ferr := parseTableIndexByCopy(data)
|
||||
|
||||
if ferr != nil {
|
||||
return "", ferr
|
||||
|
||||
@@ -127,9 +127,10 @@ func TestFSTablePersisterPersist(t *testing.T) {
|
||||
if assert.True(mustUint32(src.count()) > 0) {
|
||||
buff, err := os.ReadFile(filepath.Join(dir, mustAddr(src.hash()).String()))
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(buff)
|
||||
ti, err := parseTableIndexByCopy(buff)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
assertChunksInReader(testChunks, tr, assert)
|
||||
}
|
||||
}
|
||||
@@ -227,9 +228,10 @@ func TestFSTablePersisterConjoinAll(t *testing.T) {
|
||||
if assert.True(mustUint32(src.count()) > 0) {
|
||||
buff, err := os.ReadFile(filepath.Join(dir, mustAddr(src.hash()).String()))
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(buff)
|
||||
ti, err := parseTableIndexByCopy(buff)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
assertChunksInReader(testChunks, tr, assert)
|
||||
}
|
||||
|
||||
@@ -265,9 +267,10 @@ func TestFSTablePersisterConjoinAllDups(t *testing.T) {
|
||||
if assert.True(mustUint32(src.count()) > 0) {
|
||||
buff, err := os.ReadFile(filepath.Join(dir, mustAddr(src.hash()).String()))
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(buff)
|
||||
ti, err := parseTableIndexByCopy(buff)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
assertChunksInReader(testChunks, tr, assert)
|
||||
assert.EqualValues(reps*len(testChunks), mustUint32(tr.count()))
|
||||
}
|
||||
|
||||
91
go/store/nbs/index_transformer.go
Normal file
91
go/store/nbs/index_transformer.go
Normal file
@@ -0,0 +1,91 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nbs
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNotEnoughBytes = errors.New("reader did not return enough bytes")
|
||||
)
|
||||
|
||||
func NewIndexTransformer(src io.Reader, chunkCount int) io.Reader {
|
||||
tuplesSize := chunkCount * prefixTupleSize
|
||||
lengthsSize := chunkCount * lengthSize
|
||||
suffixesSize := chunkCount * addrSuffixSize
|
||||
|
||||
tupleReader := io.LimitReader(src, int64(tuplesSize))
|
||||
lengthsReader := io.LimitReader(src, int64(lengthsSize))
|
||||
suffixesReader := io.LimitReader(src, int64(suffixesSize))
|
||||
|
||||
return io.MultiReader(
|
||||
tupleReader,
|
||||
NewOffsetsReader(lengthsReader),
|
||||
suffixesReader,
|
||||
)
|
||||
}
|
||||
|
||||
// OffsetsReader transforms a byte stream of table file lengths
|
||||
// into a byte stream of table file offsets
|
||||
type OffsetsReader struct {
|
||||
lengthsReader io.Reader
|
||||
offset uint64
|
||||
}
|
||||
|
||||
func NewOffsetsReader(lengthsReader io.Reader) *OffsetsReader {
|
||||
return &OffsetsReader{
|
||||
lengthsReader: lengthsReader,
|
||||
}
|
||||
}
|
||||
|
||||
func (tra *OffsetsReader) Read(p []byte) (n int, err error) {
|
||||
|
||||
// Read as many lengths, as offsets we can fit into p. Which is half.
|
||||
// Below assumes that lengthSize * 2 = offsetSize
|
||||
|
||||
// Strategy is to first read lengths into the second half of p
|
||||
// Then, while iterating the lengths, compute the current offset,
|
||||
// and write it to the beginning of p.
|
||||
|
||||
// Align p
|
||||
rem := len(p) % offsetSize
|
||||
p = p[:len(p)-rem]
|
||||
|
||||
// Read lengths into second half of p
|
||||
secondHalf := p[len(p)/2:]
|
||||
n, err = tra.lengthsReader.Read(secondHalf)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if n%lengthSize != 0 {
|
||||
return 0, ErrNotEnoughBytes
|
||||
}
|
||||
|
||||
// Iterate lengths in second half of p while writing offsets starting from the beginning.
|
||||
// On the last iteration, we overwrite the last length with the final offset.
|
||||
for l, r := 0, 0; r < n; l, r = l+offsetSize, r+lengthSize {
|
||||
lengthBytes := secondHalf[r : r+lengthSize]
|
||||
length := binary.BigEndian.Uint32(lengthBytes)
|
||||
tra.offset += uint64(length)
|
||||
|
||||
offsetBytes := p[l : l+offsetSize]
|
||||
binary.BigEndian.PutUint64(offsetBytes, tra.offset)
|
||||
}
|
||||
|
||||
return n * 2, nil
|
||||
}
|
||||
189
go/store/nbs/index_transformer_test.go
Normal file
189
go/store/nbs/index_transformer_test.go
Normal file
@@ -0,0 +1,189 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nbs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/utils/test"
|
||||
)
|
||||
|
||||
// minByteReader is a copy of smallerByteReader from testing/iotest
|
||||
// but with a minimum read size of min bytes.
|
||||
|
||||
type minByteReader struct {
|
||||
r io.Reader
|
||||
min int
|
||||
|
||||
n int
|
||||
off int
|
||||
}
|
||||
|
||||
func (r *minByteReader) Read(p []byte) (int, error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
r.n = r.min + rand.Intn(r.min*100)
|
||||
|
||||
n := r.n
|
||||
if n > len(p) {
|
||||
n = len(p)
|
||||
}
|
||||
n, err := r.r.Read(p[0:n])
|
||||
if err != nil && err != io.EOF {
|
||||
err = fmt.Errorf("Read(%d bytes at offset %d): %v", n, r.off, err)
|
||||
}
|
||||
r.off += n
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Altered from testing/iotest.TestReader to use minByteReader
|
||||
func testReader(r io.Reader, content []byte) error {
|
||||
if len(content) > 0 {
|
||||
n, err := r.Read(nil)
|
||||
if n != 0 || err != nil {
|
||||
return fmt.Errorf("Read(0) = %d, %v, want 0, nil", n, err)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(&minByteReader{r: r, min: offsetSize})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !bytes.Equal(data, content) {
|
||||
return fmt.Errorf("ReadAll(varied amounts) = %q\n\twant %q", data, content)
|
||||
}
|
||||
|
||||
n, err := r.Read(make([]byte, offsetSize))
|
||||
if n != 0 || err != io.EOF {
|
||||
return fmt.Errorf("Read(offsetSize) at EOF = %v, %v, want 0, EOF", n, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func get32Bytes(src []uint32) []byte {
|
||||
dst := make([]byte, len(src)*uint32Size)
|
||||
for i, start, end := 0, 0, lengthSize; i < len(src); i, start, end = i+1, end, end+lengthSize {
|
||||
p := dst[start:end]
|
||||
binary.BigEndian.PutUint32(p, src[i])
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func get64Bytes(src []uint64) []byte {
|
||||
dst := make([]byte, len(src)*uint64Size)
|
||||
for i, start, end := 0, 0, offsetSize; i < len(src); i, start, end = i+1, end, end+offsetSize {
|
||||
p := dst[start:end]
|
||||
binary.BigEndian.PutUint64(p, src[i])
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func randomUInt32s(n int) []uint32 {
|
||||
out := make([]uint32, n)
|
||||
for i := 0; i < n; i++ {
|
||||
out[i] = uint32(rand.Intn(1000))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func calcOffsets(arr []uint32) []uint64 {
|
||||
out := make([]uint64, len(arr))
|
||||
out[0] = uint64(arr[0])
|
||||
for i := 1; i < len(arr); i++ {
|
||||
out[i] = out[i-1] + uint64(arr[i])
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func TestOffsetReader(t *testing.T) {
|
||||
testSize := rand.Intn(10) + 1
|
||||
lengths := randomUInt32s(testSize)
|
||||
offsets := calcOffsets(lengths)
|
||||
|
||||
lengthBytes := get32Bytes(lengths)
|
||||
offsetBytes := get64Bytes(offsets)
|
||||
|
||||
t.Run("converts lengths into offsets", func(t *testing.T) {
|
||||
lengthsReader := bytes.NewReader(lengthBytes)
|
||||
offsetReader := NewOffsetsReader(lengthsReader)
|
||||
|
||||
err := testReader(offsetReader, offsetBytes)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("err not enough bytes when expected", func(t *testing.T) {
|
||||
lengthsReader := bytes.NewReader(lengthBytes[:len(lengthBytes)-1])
|
||||
offsetReader := NewOffsetsReader(lengthsReader)
|
||||
_, err := io.ReadAll(offsetReader)
|
||||
require.ErrorAsf(t, err, &ErrNotEnoughBytes, "should return ErrNotEnoughBytes")
|
||||
})
|
||||
|
||||
t.Run("fills provided buffer correctly", func(t *testing.T) {
|
||||
lengthsReader := bytes.NewReader(lengthBytes)
|
||||
offsetReader := NewOffsetsReader(lengthsReader)
|
||||
p := make([]byte, offsetSize)
|
||||
n, err := offsetReader.Read(p)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, offsetSize, n)
|
||||
})
|
||||
|
||||
t.Run("works with io.ReadAll", func(t *testing.T) {
|
||||
lengthsReader := bytes.NewReader(lengthBytes[:lengthSize])
|
||||
offsetReader := NewOffsetsReader(lengthsReader)
|
||||
data, err := io.ReadAll(offsetReader)
|
||||
require.NoError(t, err)
|
||||
require.True(t, bytes.Equal(data, offsetBytes[:offsetSize]))
|
||||
})
|
||||
}
|
||||
|
||||
func TestIndexTransformer(t *testing.T) {
|
||||
chunkCount := rand.Intn(10) + 1
|
||||
lengths := randomUInt32s(chunkCount)
|
||||
offsets := calcOffsets(lengths)
|
||||
lengthBytes := get32Bytes(lengths)
|
||||
offsetBytes := get64Bytes(offsets)
|
||||
|
||||
tupleBytes := test.RandomData(chunkCount * prefixTupleSize)
|
||||
suffixBytes := test.RandomData(chunkCount * addrSuffixSize)
|
||||
|
||||
var inBytes []byte
|
||||
inBytes = append(inBytes, tupleBytes...)
|
||||
inBytes = append(inBytes, lengthBytes...)
|
||||
inBytes = append(inBytes, suffixBytes...)
|
||||
|
||||
var outBytes []byte
|
||||
outBytes = append(outBytes, tupleBytes...)
|
||||
outBytes = append(outBytes, offsetBytes...)
|
||||
outBytes = append(outBytes, suffixBytes...)
|
||||
|
||||
t.Run("only converts lengths into offsets", func(t *testing.T) {
|
||||
inReader := bytes.NewBuffer(inBytes)
|
||||
outReader := NewIndexTransformer(inReader, chunkCount)
|
||||
|
||||
err := testReader(outReader, outBytes)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
}
|
||||
@@ -150,25 +150,28 @@ func TestMemTableWrite(t *testing.T) {
|
||||
|
||||
td1, _, err := buildTable(chunks[1:2])
|
||||
require.NoError(t, err)
|
||||
ti1, err := parseTableIndex(td1)
|
||||
ti1, err := parseTableIndexByCopy(td1)
|
||||
require.NoError(t, err)
|
||||
tr1, err := newTableReader(ti1, tableReaderAtFromBytes(td1), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr1 := newTableReader(ti1, tableReaderAtFromBytes(td1), fileBlockSize)
|
||||
assert.True(tr1.has(computeAddr(chunks[1])))
|
||||
|
||||
td2, _, err := buildTable(chunks[2:])
|
||||
require.NoError(t, err)
|
||||
ti2, err := parseTableIndex(td2)
|
||||
ti2, err := parseTableIndexByCopy(td2)
|
||||
require.NoError(t, err)
|
||||
tr2, err := newTableReader(ti2, tableReaderAtFromBytes(td2), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr2 := newTableReader(ti2, tableReaderAtFromBytes(td2), fileBlockSize)
|
||||
assert.True(tr2.has(computeAddr(chunks[2])))
|
||||
|
||||
_, data, count, err := mt.write(chunkReaderGroup{tr1, tr2}, &Stats{})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(uint32(1), count)
|
||||
|
||||
ti, err := parseTableIndex(data)
|
||||
ti, err := parseTableIndexByCopy(data)
|
||||
require.NoError(t, err)
|
||||
outReader, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
outReader := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
|
||||
assert.True(outReader.has(computeAddr(chunks[0])))
|
||||
assert.False(outReader.has(computeAddr(chunks[1])))
|
||||
assert.False(outReader.has(computeAddr(chunks[2])))
|
||||
|
||||
@@ -105,29 +105,35 @@ func newMmapTableReader(dir string, h addr, chunkCount uint32, indexCache *index
|
||||
// index. Mmap won't take an offset that's not page-aligned, so find the nearest page boundary preceding the index.
|
||||
indexOffset := fi.Size() - int64(footerSize) - int64(indexSize(chunkCount))
|
||||
aligned := indexOffset / mmapAlignment * mmapAlignment // Thanks, integer arithmetic!
|
||||
length := int(fi.Size() - aligned)
|
||||
|
||||
if fi.Size()-aligned > maxInt {
|
||||
err = fmt.Errorf("%s - size: %d alignment: %d> maxInt: %d", path, fi.Size(), aligned, maxInt)
|
||||
return
|
||||
}
|
||||
|
||||
var mm mmap.MMap
|
||||
mm, err = mmap.MapRegion(f, int(fi.Size()-aligned), mmap.RDONLY, 0, aligned)
|
||||
buff := make([]byte, indexSize(chunkCount)+footerSize)
|
||||
func() {
|
||||
var mm mmap.MMap
|
||||
mm, err = mmap.MapRegion(f, length, mmap.RDONLY, 0, aligned)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
defer func() {
|
||||
unmapErr := mm.Unmap()
|
||||
|
||||
if unmapErr != nil {
|
||||
err = unmapErr
|
||||
}
|
||||
}()
|
||||
copy(buff, mm[indexOffset-aligned:])
|
||||
}()
|
||||
if err != nil {
|
||||
return
|
||||
return onHeapTableIndex{}, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
unmapErr := mm.Unmap()
|
||||
|
||||
if unmapErr != nil {
|
||||
err = unmapErr
|
||||
}
|
||||
}()
|
||||
|
||||
buff := []byte(mm)
|
||||
ti, err = parseTableIndex(buff[indexOffset-aligned:])
|
||||
ti, err = parseTableIndex(buff)
|
||||
|
||||
if err != nil {
|
||||
return
|
||||
@@ -152,8 +158,12 @@ func newMmapTableReader(dir string, h addr, chunkCount uint32, indexCache *index
|
||||
return nil, errors.New("unexpected chunk count")
|
||||
}
|
||||
|
||||
tr, err := newTableReader(index, &cacheReaderAt{path, fc}, fileBlockSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &mmapTableReader{
|
||||
newTableReader(index, &cacheReaderAt{path, fc}, fileBlockSize),
|
||||
tr,
|
||||
fc,
|
||||
h,
|
||||
}, nil
|
||||
@@ -167,8 +177,12 @@ func (mmtr *mmapTableReader) Close() error {
|
||||
return mmtr.tableReader.Close()
|
||||
}
|
||||
|
||||
func (mmtr *mmapTableReader) Clone() chunkSource {
|
||||
return &mmapTableReader{mmtr.tableReader.Clone(), mmtr.fc, mmtr.h}
|
||||
func (mmtr *mmapTableReader) Clone() (chunkSource, error) {
|
||||
tr, err := mmtr.tableReader.Clone()
|
||||
if err != nil {
|
||||
return &mmapTableReader{}, err
|
||||
}
|
||||
return &mmapTableReader{tr, mmtr.fc, mmtr.h}, nil
|
||||
}
|
||||
|
||||
type cacheReaderAt struct {
|
||||
|
||||
@@ -100,9 +100,9 @@ func (ccs *persistingChunkSource) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ccs *persistingChunkSource) Clone() chunkSource {
|
||||
func (ccs *persistingChunkSource) Clone() (chunkSource, error) {
|
||||
// persistingChunkSource does not own |cs| or |mt|. No need to Clone.
|
||||
return ccs
|
||||
return ccs, nil
|
||||
}
|
||||
|
||||
func (ccs *persistingChunkSource) has(h addr) (bool, error) {
|
||||
@@ -308,6 +308,6 @@ func (ecs emptyChunkSource) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ecs emptyChunkSource) Clone() chunkSource {
|
||||
return ecs
|
||||
func (ecs emptyChunkSource) Clone() (chunkSource, error) {
|
||||
return ecs, nil
|
||||
}
|
||||
|
||||
@@ -461,13 +461,17 @@ func (ftp fakeTablePersister) Persist(ctx context.Context, mt *memTable, haver c
|
||||
if chunkCount > 0 {
|
||||
ftp.mu.Lock()
|
||||
defer ftp.mu.Unlock()
|
||||
ti, err := parseTableIndex(data)
|
||||
ti, err := parseTableIndexByCopy(data)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ftp.sources[name] = newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
|
||||
s, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
|
||||
if err != nil {
|
||||
return emptyChunkSource{}, err
|
||||
}
|
||||
ftp.sources[name] = s
|
||||
return chunkSourceAdapter{ftp.sources[name], name}, nil
|
||||
}
|
||||
}
|
||||
@@ -484,13 +488,17 @@ func (ftp fakeTablePersister) ConjoinAll(ctx context.Context, sources chunkSourc
|
||||
if chunkCount > 0 {
|
||||
ftp.mu.Lock()
|
||||
defer ftp.mu.Unlock()
|
||||
ti, err := parseTableIndex(data)
|
||||
ti, err := parseTableIndexByCopy(data)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ftp.sources[name] = newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
|
||||
s, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ftp.sources[name] = s
|
||||
return chunkSourceAdapter{ftp.sources[name], name}, nil
|
||||
}
|
||||
return emptyChunkSource{}, nil
|
||||
|
||||
@@ -76,12 +76,16 @@ func (m *fakeS3) readerForTable(name addr) (chunkReader, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if buff, present := m.data[name.String()]; present {
|
||||
ti, err := parseTableIndex(buff)
|
||||
ti, err := parseTableIndexByCopy(buff)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize), nil
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return tr, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
@@ -94,13 +98,17 @@ func (m *fakeS3) readerForTableWithNamespace(ns string, name addr) (chunkReader,
|
||||
key = ns + "/" + key
|
||||
}
|
||||
if buff, present := m.data[key]; present {
|
||||
ti, err := parseTableIndex(buff)
|
||||
ti, err := parseTableIndexByCopy(buff)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize), nil
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), s3BlockSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return tr, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -117,7 +117,10 @@ func (nbs *NomsBlockStore) GetChunkLocations(hashes hash.HashSet) (map[hash.Hash
|
||||
for _, cs := range css {
|
||||
switch tr := cs.(type) {
|
||||
case *mmapTableReader:
|
||||
offsetRecSlice, _ := tr.findOffsets(gr)
|
||||
offsetRecSlice, _, err := tr.findOffsets(gr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(offsetRecSlice) > 0 {
|
||||
y, ok := ranges[hash.Hash(tr.h)]
|
||||
|
||||
@@ -154,7 +157,10 @@ func (nbs *NomsBlockStore) GetChunkLocations(hashes hash.HashSet) (map[hash.Hash
|
||||
var foundHashes []hash.Hash
|
||||
for h := range hashes {
|
||||
a := addr(h)
|
||||
e, ok := tableIndex.Lookup(&a)
|
||||
e, ok, err := tableIndex.Lookup(&a)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ok {
|
||||
foundHashes = append(foundHashes, h)
|
||||
y[h] = Range{Offset: e.Offset(), Length: e.Length()}
|
||||
|
||||
@@ -525,3 +525,15 @@ func TestNBSCommitRetainsAppendix(t *testing.T) {
|
||||
assert.Equal(upstream.GetAppendixTableSpecInfo(0), newUpstream.GetTableSpecInfo(0))
|
||||
assert.Equal(newUpstream.GetTableSpecInfo(0), newUpstream.GetAppendixTableSpecInfo(0))
|
||||
}
|
||||
|
||||
func TestGuessPrefixOrdinal(t *testing.T) {
|
||||
prefixes := make([]uint64, 256)
|
||||
for i := range prefixes {
|
||||
prefixes[i] = uint64(i << 56)
|
||||
}
|
||||
|
||||
for i, pre := range prefixes {
|
||||
guess := GuessPrefixOrdinal(pre, 256)
|
||||
assert.Equal(t, i, guess)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -130,6 +130,7 @@ const (
|
||||
uint32Size = 4
|
||||
ordinalSize = uint32Size
|
||||
lengthSize = uint32Size
|
||||
offsetSize = uint64Size
|
||||
magicNumber = "\xff\xb5\xd8\xc2\x24\x63\xee\x50"
|
||||
magicNumberSize = 8 //len(magicNumber)
|
||||
footerSize = uint32Size + uint64Size + magicNumberSize
|
||||
@@ -238,7 +239,7 @@ type chunkReader interface {
|
||||
}
|
||||
|
||||
type chunkReadPlanner interface {
|
||||
findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool)
|
||||
findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool, err error)
|
||||
getManyAtOffsets(
|
||||
ctx context.Context,
|
||||
eg *errgroup.Group,
|
||||
@@ -269,7 +270,7 @@ type chunkSource interface {
|
||||
// cannot be |Close|d more than once, so if a |chunkSource| is being
|
||||
// retained in two objects with independent life-cycle, it should be
|
||||
// |Clone|d first.
|
||||
Clone() chunkSource
|
||||
Clone() (chunkSource, error)
|
||||
}
|
||||
|
||||
type chunkSources []chunkSource
|
||||
|
||||
522
go/store/nbs/table_index.go
Normal file
522
go/store/nbs/table_index.go
Normal file
@@ -0,0 +1,522 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nbs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"sort"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/dolthub/mmap-go"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrWrongBufferSize = errors.New("buffer length and/or capacity incorrect for chunkCount specified in footer")
|
||||
ErrWrongCopySize = errors.New("could not copy enough bytes")
|
||||
)
|
||||
|
||||
type tableIndex interface {
|
||||
// ChunkCount returns the total number of chunks in the indexed file.
|
||||
ChunkCount() uint32
|
||||
// EntrySuffixMatches returns true if the entry at index |idx| matches
|
||||
// the suffix of the address |h|. Used by |Lookup| after finding
|
||||
// matching indexes based on |Prefixes|.
|
||||
EntrySuffixMatches(idx uint32, h *addr) (bool, error)
|
||||
// IndexEntry returns the |indexEntry| at |idx|. Optionally puts the
|
||||
// full address of that entry in |a| if |a| is not |nil|.
|
||||
IndexEntry(idx uint32, a *addr) (indexEntry, error)
|
||||
// Lookup returns an |indexEntry| for the chunk corresponding to the
|
||||
// provided address |h|. Second returns is |true| if an entry exists
|
||||
// and |false| otherwise.
|
||||
Lookup(h *addr) (indexEntry, bool, error)
|
||||
// Ordinals returns a slice of indexes which maps the |i|th chunk in
|
||||
// the indexed file to its corresponding entry in index. The |i|th
|
||||
// entry in the result is the |i|th chunk in the indexed file, and its
|
||||
// corresponding value in the slice is the index entry that maps to it.
|
||||
Ordinals() ([]uint32, error)
|
||||
// Prefixes returns the sorted slice of |uint64| |addr| prefixes; each
|
||||
// entry corresponds to an indexed chunk address.
|
||||
Prefixes() ([]uint64, error)
|
||||
// TableFileSize returns the total size of the indexed table file, in bytes.
|
||||
TableFileSize() uint64
|
||||
// TotalUncompressedData returns the total uncompressed data size of
|
||||
// the table file. Used for informational statistics only.
|
||||
TotalUncompressedData() uint64
|
||||
|
||||
// Close releases any resources used by this tableIndex.
|
||||
Close() error
|
||||
|
||||
// Clone returns a |tableIndex| with the same contents which can be
|
||||
// |Close|d independently.
|
||||
Clone() (tableIndex, error)
|
||||
}
|
||||
|
||||
func ReadTableFooter(rd io.ReadSeeker) (chunkCount uint32, totalUncompressedData uint64, err error) {
|
||||
footerSize := int64(magicNumberSize + uint64Size + uint32Size)
|
||||
_, err = rd.Seek(-footerSize, io.SeekEnd)
|
||||
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
footer, err := iohelp.ReadNBytes(rd, int(footerSize))
|
||||
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
if string(footer[uint32Size+uint64Size:]) != magicNumber {
|
||||
return 0, 0, ErrInvalidTableFile
|
||||
}
|
||||
|
||||
chunkCount = binary.BigEndian.Uint32(footer)
|
||||
totalUncompressedData = binary.BigEndian.Uint64(footer[uint32Size:])
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index
|
||||
// and footer and its length and capacity must match the expected indexSize for the chunkCount specified in the footer.
|
||||
// Retains the buffer and does not allocate new memory except for offsets, computes on buff in place.
|
||||
func parseTableIndex(buff []byte) (onHeapTableIndex, error) {
|
||||
chunkCount, totalUncompressedData, err := ReadTableFooter(bytes.NewReader(buff))
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, err
|
||||
}
|
||||
iS := indexSize(chunkCount) + footerSize
|
||||
if uint64(len(buff)) != iS || uint64(cap(buff)) != iS {
|
||||
return onHeapTableIndex{}, ErrWrongBufferSize
|
||||
}
|
||||
buff = buff[:len(buff)-footerSize]
|
||||
return NewOnHeapTableIndex(buff, chunkCount, totalUncompressedData)
|
||||
}
|
||||
|
||||
// parseTableIndexByCopy reads the footer, copies indexSize(chunkCount) bytes, and parses an on heap table index.
|
||||
// Useful to create an onHeapTableIndex without retaining the entire underlying array of data.
|
||||
func parseTableIndexByCopy(buff []byte) (onHeapTableIndex, error) {
|
||||
r := bytes.NewReader(buff)
|
||||
return ReadTableIndexByCopy(r)
|
||||
}
|
||||
|
||||
// ReadTableIndexByCopy loads an index into memory from an io.ReadSeeker
|
||||
// Caution: Allocates new memory for entire index
|
||||
func ReadTableIndexByCopy(rd io.ReadSeeker) (onHeapTableIndex, error) {
|
||||
chunkCount, totalUncompressedData, err := ReadTableFooter(rd)
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, err
|
||||
}
|
||||
iS := int64(indexSize(chunkCount))
|
||||
_, err = rd.Seek(-(iS + footerSize), io.SeekEnd)
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, ErrInvalidTableFile
|
||||
}
|
||||
buff := make([]byte, iS)
|
||||
_, err = io.ReadFull(rd, buff)
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, err
|
||||
}
|
||||
|
||||
return NewOnHeapTableIndex(buff, chunkCount, totalUncompressedData)
|
||||
}
|
||||
|
||||
type onHeapTableIndex struct {
|
||||
tableFileSize uint64
|
||||
// Tuple bytes
|
||||
tupleB []byte
|
||||
// Offset bytes
|
||||
offsetB []byte
|
||||
// Suffix bytes
|
||||
suffixB []byte
|
||||
chunkCount uint32
|
||||
totalUncompressedData uint64
|
||||
}
|
||||
|
||||
var _ tableIndex = &onHeapTableIndex{}
|
||||
|
||||
// NewOnHeapTableIndex creates a table index given a buffer of just the table index (no footer)
|
||||
func NewOnHeapTableIndex(b []byte, chunkCount uint32, totalUncompressedData uint64) (onHeapTableIndex, error) {
|
||||
tuples := b[:prefixTupleSize*chunkCount]
|
||||
lengths := b[prefixTupleSize*chunkCount : prefixTupleSize*chunkCount+lengthSize*chunkCount]
|
||||
suffixes := b[prefixTupleSize*chunkCount+lengthSize*chunkCount:]
|
||||
|
||||
lR := bytes.NewReader(lengths)
|
||||
offsets := make([]byte, chunkCount*offsetSize)
|
||||
_, err := io.ReadFull(NewOffsetsReader(lR), offsets)
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, err
|
||||
}
|
||||
/**
|
||||
TODO: Optimize memory usage further
|
||||
There's wasted space here. The lengths segment in the buffer is retained unnecessarily. We can use that space to
|
||||
store half the offsets and then allocate an additional len(lengths) to store the rest.
|
||||
*/
|
||||
|
||||
return onHeapTableIndex{
|
||||
tupleB: tuples,
|
||||
offsetB: offsets,
|
||||
suffixB: suffixes,
|
||||
chunkCount: chunkCount,
|
||||
totalUncompressedData: totalUncompressedData,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) ChunkCount() uint32 {
|
||||
return ti.chunkCount
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) {
|
||||
ord := ti.ordinalAt(idx)
|
||||
o := ord * addrSuffixSize
|
||||
b := ti.suffixB[o : o+addrSuffixSize]
|
||||
return bytes.Equal(h[addrPrefixSize:], b), nil
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) (entry indexEntry, err error) {
|
||||
prefix, ord := ti.tupleAt(idx)
|
||||
|
||||
if a != nil {
|
||||
binary.BigEndian.PutUint64(a[:], prefix)
|
||||
|
||||
o := int64(addrSuffixSize * ord)
|
||||
b := ti.suffixB[o : o+addrSuffixSize]
|
||||
copy(a[addrPrefixSize:], b)
|
||||
}
|
||||
|
||||
return ti.getIndexEntry(ord), nil
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) getIndexEntry(ord uint32) indexEntry {
|
||||
var prevOff uint64
|
||||
if ord == 0 {
|
||||
prevOff = 0
|
||||
} else {
|
||||
prevOff = ti.offsetAt(ord - 1)
|
||||
}
|
||||
ordOff := ti.offsetAt(ord)
|
||||
length := uint32(ordOff - prevOff)
|
||||
return indexResult{
|
||||
o: prevOff,
|
||||
l: length,
|
||||
}
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool, error) {
|
||||
ord, err := ti.lookupOrdinal(h)
|
||||
if err != nil {
|
||||
return indexResult{}, false, err
|
||||
}
|
||||
if ord == ti.chunkCount {
|
||||
return indexResult{}, false, nil
|
||||
}
|
||||
return ti.getIndexEntry(ord), true, nil
|
||||
}
|
||||
|
||||
// lookupOrdinal returns the ordinal of |h| if present. Returns |ti.chunkCount|
|
||||
// if absent.
|
||||
func (ti onHeapTableIndex) lookupOrdinal(h *addr) (uint32, error) {
|
||||
prefix := h.Prefix()
|
||||
|
||||
for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixAt(idx) == prefix; idx++ {
|
||||
m, err := ti.EntrySuffixMatches(idx, h)
|
||||
if err != nil {
|
||||
return ti.chunkCount, err
|
||||
}
|
||||
if m {
|
||||
return ti.ordinalAt(idx), nil
|
||||
}
|
||||
}
|
||||
|
||||
return ti.chunkCount, nil
|
||||
}
|
||||
|
||||
// prefixIdx returns the first position in |tr.prefixes| whose value ==
|
||||
// |prefix|. Returns |tr.chunkCount| if absent
|
||||
func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) {
|
||||
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
|
||||
// an extremely tight loop and inlining the code was a significant perf improvement.
|
||||
idx, j := 0, ti.chunkCount
|
||||
for idx < j {
|
||||
h := idx + (j-idx)/2 // avoid overflow when computing h
|
||||
// i ≤ h < j
|
||||
if ti.prefixAt(h) < prefix {
|
||||
idx = h + 1 // preserves f(i-1) == false
|
||||
} else {
|
||||
j = h // preserves f(j) == true
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) tupleAt(idx uint32) (prefix uint64, ord uint32) {
|
||||
off := int64(prefixTupleSize * idx)
|
||||
b := ti.tupleB[off : off+prefixTupleSize]
|
||||
|
||||
prefix = binary.BigEndian.Uint64(b[:])
|
||||
ord = binary.BigEndian.Uint32(b[addrPrefixSize:])
|
||||
return prefix, ord
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) prefixAt(idx uint32) uint64 {
|
||||
off := int64(prefixTupleSize * idx)
|
||||
b := ti.tupleB[off : off+addrPrefixSize]
|
||||
return binary.BigEndian.Uint64(b)
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) ordinalAt(idx uint32) uint32 {
|
||||
off := int64(prefixTupleSize*idx) + addrPrefixSize
|
||||
b := ti.tupleB[off : off+ordinalSize]
|
||||
return binary.BigEndian.Uint32(b)
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) offsetAt(ord uint32) uint64 {
|
||||
off := int64(offsetSize * ord)
|
||||
b := ti.offsetB[off : off+offsetSize]
|
||||
return binary.BigEndian.Uint64(b)
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) Ordinals() ([]uint32, error) {
|
||||
o := make([]uint32, ti.chunkCount)
|
||||
for i, off := uint32(0), 0; i < ti.chunkCount; i, off = i+1, off+prefixTupleSize {
|
||||
b := ti.tupleB[off+addrPrefixSize : off+prefixTupleSize]
|
||||
o[i] = binary.BigEndian.Uint32(b)
|
||||
}
|
||||
return o, nil
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) Prefixes() ([]uint64, error) {
|
||||
p := make([]uint64, ti.chunkCount)
|
||||
for i, off := uint32(0), 0; i < ti.chunkCount; i, off = i+1, off+prefixTupleSize {
|
||||
b := ti.tupleB[off : off+addrPrefixSize]
|
||||
p[i] = binary.BigEndian.Uint64(b)
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
||||
// TableFileSize returns the size of the table file that this index references.
|
||||
// This assumes that the index follows immediately after the last chunk in the
|
||||
// file and that the last chunk in the file is in the index.
|
||||
func (ti onHeapTableIndex) TableFileSize() uint64 {
|
||||
if ti.chunkCount == 0 {
|
||||
return footerSize
|
||||
}
|
||||
entry := ti.getIndexEntry(ti.chunkCount - 1)
|
||||
offset, len := entry.Offset(), uint64(entry.Length())
|
||||
return offset + len + indexSize(ti.chunkCount) + footerSize
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) TotalUncompressedData() uint64 {
|
||||
return ti.totalUncompressedData
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) Clone() (tableIndex, error) {
|
||||
return ti, nil
|
||||
}
|
||||
|
||||
// mmap table index
|
||||
|
||||
type mmapIndexEntry []byte
|
||||
|
||||
const mmapIndexEntryOffsetStart = addrSuffixSize
|
||||
const mmapIndexEntryLengthStart = addrSuffixSize + uint64Size
|
||||
|
||||
func (e mmapIndexEntry) suffix() []byte {
|
||||
return e[:addrSuffixSize]
|
||||
}
|
||||
|
||||
func (e mmapIndexEntry) Offset() uint64 {
|
||||
return binary.BigEndian.Uint64(e[mmapIndexEntryOffsetStart:])
|
||||
}
|
||||
|
||||
func (e mmapIndexEntry) Length() uint32 {
|
||||
return binary.BigEndian.Uint32(e[mmapIndexEntryLengthStart:])
|
||||
}
|
||||
|
||||
func mmapOffheapSize(chunks int) int {
|
||||
pageSize := 4096
|
||||
esz := addrSuffixSize + uint64Size + lengthSize
|
||||
min := esz * chunks
|
||||
if min%pageSize == 0 {
|
||||
return min
|
||||
} else {
|
||||
return (min/pageSize + 1) * pageSize
|
||||
}
|
||||
}
|
||||
|
||||
// An mmapIndexEntry is an addrSuffix, a BigEndian uint64 for the offset and a
|
||||
// BigEnding uint32 for the chunk size.
|
||||
const mmapIndexEntrySize = addrSuffixSize + uint64Size + lengthSize
|
||||
|
||||
type mmapOrdinal struct {
|
||||
idx int
|
||||
offset uint64
|
||||
}
|
||||
type mmapOrdinalSlice []mmapOrdinal
|
||||
|
||||
func (s mmapOrdinalSlice) Len() int { return len(s) }
|
||||
func (s mmapOrdinalSlice) Less(i, j int) bool { return s[i].offset < s[j].offset }
|
||||
func (s mmapOrdinalSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
|
||||
type mmapTableIndex struct {
|
||||
chunkCount uint32
|
||||
totalUncompressedData uint64
|
||||
fileSz uint64
|
||||
prefixes []uint64
|
||||
data mmap.MMap
|
||||
refCnt *int32
|
||||
}
|
||||
|
||||
func newMmapTableIndex(ti onHeapTableIndex, f *os.File) (mmapTableIndex, error) {
|
||||
flags := 0
|
||||
if f == nil {
|
||||
flags = mmap.ANON
|
||||
}
|
||||
arr, err := mmap.MapRegion(f, mmapOffheapSize(int(ti.chunkCount)), mmap.RDWR, flags, 0)
|
||||
if err != nil {
|
||||
return mmapTableIndex{}, err
|
||||
}
|
||||
var a addr
|
||||
for i := uint32(0); i < ti.chunkCount; i++ {
|
||||
idx := i * mmapIndexEntrySize
|
||||
si := addrSuffixSize * ti.ordinalAt(i)
|
||||
copy(arr[idx:], ti.suffixB[si:si+addrSuffixSize])
|
||||
|
||||
e, err := ti.IndexEntry(i, &a)
|
||||
if err != nil {
|
||||
return mmapTableIndex{}, err
|
||||
}
|
||||
binary.BigEndian.PutUint64(arr[idx+mmapIndexEntryOffsetStart:], e.Offset())
|
||||
binary.BigEndian.PutUint32(arr[idx+mmapIndexEntryLengthStart:], e.Length())
|
||||
}
|
||||
|
||||
refCnt := new(int32)
|
||||
*refCnt = 1
|
||||
p, err := ti.Prefixes()
|
||||
if err != nil {
|
||||
return mmapTableIndex{}, err
|
||||
}
|
||||
return mmapTableIndex{
|
||||
ti.chunkCount,
|
||||
ti.totalUncompressedData,
|
||||
ti.TableFileSize(),
|
||||
p,
|
||||
arr,
|
||||
refCnt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) ChunkCount() uint32 {
|
||||
return i.chunkCount
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) EntrySuffixMatches(idx uint32, h *addr) (bool, error) {
|
||||
mi := idx * mmapIndexEntrySize
|
||||
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
|
||||
return bytes.Equal(e.suffix(), h[addrPrefixSize:]), nil
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) IndexEntry(idx uint32, a *addr) (indexEntry, error) {
|
||||
mi := idx * mmapIndexEntrySize
|
||||
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
|
||||
if a != nil {
|
||||
binary.BigEndian.PutUint64(a[:], i.prefixes[idx])
|
||||
copy(a[addrPrefixSize:], e.suffix())
|
||||
}
|
||||
return e, nil
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) Lookup(h *addr) (indexEntry, bool, error) {
|
||||
prefix := binary.BigEndian.Uint64(h[:])
|
||||
for idx := i.prefixIdx(prefix); idx < i.chunkCount && i.prefixes[idx] == prefix; idx++ {
|
||||
mi := idx * mmapIndexEntrySize
|
||||
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
|
||||
if bytes.Equal(e.suffix(), h[addrPrefixSize:]) {
|
||||
return e, true, nil
|
||||
}
|
||||
}
|
||||
return mmapIndexEntry{}, false, nil
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) Ordinals() ([]uint32, error) {
|
||||
s := mmapOrdinalSlice(make([]mmapOrdinal, i.chunkCount))
|
||||
for idx := 0; uint32(idx) < i.chunkCount; idx++ {
|
||||
mi := idx * mmapIndexEntrySize
|
||||
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
|
||||
s[idx] = mmapOrdinal{idx, e.Offset()}
|
||||
}
|
||||
sort.Sort(s)
|
||||
res := make([]uint32, i.chunkCount)
|
||||
for j, r := range s {
|
||||
res[r.idx] = uint32(j)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) Prefixes() ([]uint64, error) {
|
||||
return i.prefixes, nil
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) TableFileSize() uint64 {
|
||||
return i.fileSz
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) TotalUncompressedData() uint64 {
|
||||
return i.totalUncompressedData
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) Close() error {
|
||||
cnt := atomic.AddInt32(i.refCnt, -1)
|
||||
if cnt == 0 {
|
||||
return i.data.Unmap()
|
||||
}
|
||||
if cnt < 0 {
|
||||
panic("Close() called and reduced ref count to < 0.")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) Clone() (tableIndex, error) {
|
||||
cnt := atomic.AddInt32(i.refCnt, 1)
|
||||
if cnt == 1 {
|
||||
panic("Clone() called after last Close(). This index is no longer valid.")
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) prefixIdx(prefix uint64) (idx uint32) {
|
||||
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
|
||||
// an extremely tight loop and inlining the code was a significant perf improvement.
|
||||
idx, j := 0, i.chunkCount
|
||||
for idx < j {
|
||||
h := idx + (j-idx)/2 // avoid overflow when computing h
|
||||
// i ≤ h < j
|
||||
if i.prefixes[h] < prefix {
|
||||
idx = h + 1 // preserves f(i-1) == false
|
||||
} else {
|
||||
j = h // preserves f(j) == true
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
104
go/store/nbs/table_index_test.go
Normal file
104
go/store/nbs/table_index_test.go
Normal file
@@ -0,0 +1,104 @@
|
||||
// Copyright 2022 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nbs
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseTableIndex(t *testing.T) {
|
||||
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
bs, err := io.ReadAll(f)
|
||||
require.NoError(t, err)
|
||||
idx, err := parseTableIndexByCopy(bs)
|
||||
require.NoError(t, err)
|
||||
defer idx.Close()
|
||||
assert.Equal(t, uint32(596), idx.ChunkCount())
|
||||
seen := make(map[addr]bool)
|
||||
for i := uint32(0); i < idx.ChunkCount(); i++ {
|
||||
var onheapaddr addr
|
||||
e, err := idx.IndexEntry(i, &onheapaddr)
|
||||
require.NoError(t, err)
|
||||
if _, ok := seen[onheapaddr]; !ok {
|
||||
seen[onheapaddr] = true
|
||||
lookupe, ok, err := idx.Lookup(&onheapaddr)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, e.Offset(), lookupe.Offset(), "%v does not match %v for address %v", e, lookupe, onheapaddr)
|
||||
assert.Equal(t, e.Length(), lookupe.Length())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMMapIndex(t *testing.T) {
|
||||
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
bs, err := io.ReadAll(f)
|
||||
require.NoError(t, err)
|
||||
idx, err := parseTableIndexByCopy(bs)
|
||||
require.NoError(t, err)
|
||||
defer idx.Close()
|
||||
mmidx, err := newMmapTableIndex(idx, nil)
|
||||
require.NoError(t, err)
|
||||
defer mmidx.Close()
|
||||
assert.Equal(t, idx.ChunkCount(), mmidx.ChunkCount())
|
||||
seen := make(map[addr]bool)
|
||||
for i := uint32(0); i < idx.ChunkCount(); i++ {
|
||||
var onheapaddr addr
|
||||
onheapentry, err := idx.IndexEntry(i, &onheapaddr)
|
||||
require.NoError(t, err)
|
||||
var mmaddr addr
|
||||
mmentry, err := mmidx.IndexEntry(i, &mmaddr)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, onheapaddr, mmaddr)
|
||||
assert.Equal(t, onheapentry.Offset(), mmentry.Offset())
|
||||
assert.Equal(t, onheapentry.Length(), mmentry.Length())
|
||||
if _, ok := seen[onheapaddr]; !ok {
|
||||
seen[onheapaddr] = true
|
||||
mmentry, found, err := mmidx.Lookup(&onheapaddr)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, found)
|
||||
assert.Equal(t, onheapentry.Offset(), mmentry.Offset(), "%v does not match %v for address %v", onheapentry, mmentry, onheapaddr)
|
||||
assert.Equal(t, onheapentry.Length(), mmentry.Length())
|
||||
}
|
||||
wrongaddr := onheapaddr
|
||||
if wrongaddr[19] != 0 {
|
||||
wrongaddr[19] = 0
|
||||
_, found, err := mmidx.Lookup(&wrongaddr)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, found)
|
||||
}
|
||||
}
|
||||
o1, err := idx.Ordinals()
|
||||
require.NoError(t, err)
|
||||
o2, err := mmidx.Ordinals()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, o1, o2)
|
||||
p1, err := idx.Prefixes()
|
||||
require.NoError(t, err)
|
||||
p2, err := mmidx.Prefixes()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, p1, p2)
|
||||
assert.Equal(t, idx.TableFileSize(), mmidx.TableFileSize())
|
||||
assert.Equal(t, idx.TotalUncompressedData(), mmidx.TotalUncompressedData())
|
||||
}
|
||||
@@ -256,8 +256,14 @@ func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan, err e
|
||||
return compactionPlan{}, err
|
||||
}
|
||||
|
||||
ordinals := index.Ordinals()
|
||||
prefixes := index.Prefixes()
|
||||
ordinals, err := index.Ordinals()
|
||||
if err != nil {
|
||||
return compactionPlan{}, err
|
||||
}
|
||||
prefixes, err := index.Prefixes()
|
||||
if err != nil {
|
||||
return compactionPlan{}, err
|
||||
}
|
||||
|
||||
// Add all the prefix tuples from this index to the list of all prefixIndexRecs, modifying the ordinals such that all entries from the 1st item in sources come after those in the 0th and so on.
|
||||
for j, prefix := range prefixes {
|
||||
@@ -277,15 +283,16 @@ func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan, err e
|
||||
if onHeap, ok := index.(onHeapTableIndex); ok {
|
||||
// TODO: copy the lengths and suffixes as a byte-copy from src BUG #3438
|
||||
// Bring over the lengths block, in order
|
||||
for _, length := range onHeap.lengths {
|
||||
binary.BigEndian.PutUint32(plan.mergedIndex[lengthsPos:], length)
|
||||
for ord := uint32(0); ord < onHeap.chunkCount; ord++ {
|
||||
e := onHeap.getIndexEntry(ord)
|
||||
binary.BigEndian.PutUint32(plan.mergedIndex[lengthsPos:], e.Length())
|
||||
lengthsPos += lengthSize
|
||||
}
|
||||
|
||||
// Bring over the suffixes block, in order
|
||||
n := copy(plan.mergedIndex[suffixesPos:], onHeap.suffixes)
|
||||
n := copy(plan.mergedIndex[suffixesPos:], onHeap.suffixB)
|
||||
|
||||
if n != len(onHeap.suffixes) {
|
||||
if n != len(onHeap.suffixB) {
|
||||
return compactionPlan{}, errors.New("failed to copy all data")
|
||||
}
|
||||
|
||||
@@ -294,7 +301,10 @@ func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan, err e
|
||||
// Build up the index one entry at a time.
|
||||
var a addr
|
||||
for i := 0; i < len(ordinals); i++ {
|
||||
e := index.IndexEntry(uint32(i), &a)
|
||||
e, err := index.IndexEntry(uint32(i), &a)
|
||||
if err != nil {
|
||||
return compactionPlan{}, err
|
||||
}
|
||||
li := lengthsPos + lengthSize*uint64(ordinals[i])
|
||||
si := suffixesPos + addrSuffixSize*uint64(ordinals[i])
|
||||
binary.BigEndian.PutUint32(plan.mergedIndex[li:], e.Length())
|
||||
|
||||
@@ -45,9 +45,11 @@ func TestPlanCompaction(t *testing.T) {
|
||||
}
|
||||
data, name, err := buildTable(content)
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(data)
|
||||
ti, err := parseTableIndexByCopy(data)
|
||||
require.NoError(t, err)
|
||||
src := chunkSourceAdapter{newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize), name}
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
src := chunkSourceAdapter{tr, name}
|
||||
dataLens = append(dataLens, uint64(len(data))-indexSize(mustUint32(src.count()))-footerSize)
|
||||
sources = append(sources, src)
|
||||
}
|
||||
@@ -67,7 +69,8 @@ func TestPlanCompaction(t *testing.T) {
|
||||
assert.Equal(totalChunks, idx.chunkCount)
|
||||
assert.Equal(totalUnc, idx.totalUncompressedData)
|
||||
|
||||
tr := newTableReader(idx, tableReaderAtFromBytes(nil), fileBlockSize)
|
||||
tr, err := newTableReader(idx, tableReaderAtFromBytes(nil), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
for _, content := range tableContents {
|
||||
assertChunksInReader(content, tr, assert)
|
||||
}
|
||||
|
||||
@@ -22,20 +22,16 @@
|
||||
package nbs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"sort"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/dolthub/mmap-go"
|
||||
"github.com/golang/snappy"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
|
||||
"github.com/dolthub/dolt/go/store/chunks"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
)
|
||||
@@ -107,14 +103,6 @@ func init() {
|
||||
// ErrInvalidTableFile is an error returned when a table file is corrupt or invalid.
|
||||
var ErrInvalidTableFile = errors.New("invalid or corrupt table file")
|
||||
|
||||
type onHeapTableIndex struct {
|
||||
chunkCount uint32
|
||||
totalUncompressedData uint64
|
||||
prefixes, offsets []uint64
|
||||
lengths, ordinals []uint32
|
||||
suffixes []byte
|
||||
}
|
||||
|
||||
type indexEntry interface {
|
||||
Offset() uint64
|
||||
Length() uint32
|
||||
@@ -133,181 +121,6 @@ func (ir indexResult) Length() uint32 {
|
||||
return ir.l
|
||||
}
|
||||
|
||||
// An mmapIndexEntry is an addrSuffix, a BigEndian uint64 for the offset and a
|
||||
// BigEnding uint32 for the chunk size.
|
||||
const mmapIndexEntrySize = addrSuffixSize + uint64Size + lengthSize
|
||||
|
||||
type mmapOrdinalSlice []mmapOrdinal
|
||||
|
||||
func (s mmapOrdinalSlice) Len() int { return len(s) }
|
||||
func (s mmapOrdinalSlice) Less(i, j int) bool { return s[i].offset < s[j].offset }
|
||||
func (s mmapOrdinalSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
|
||||
func (i mmapTableIndex) Ordinals() []uint32 {
|
||||
s := mmapOrdinalSlice(make([]mmapOrdinal, i.chunkCount))
|
||||
for idx := 0; uint32(idx) < i.chunkCount; idx++ {
|
||||
mi := idx * mmapIndexEntrySize
|
||||
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
|
||||
s[idx] = mmapOrdinal{idx, e.Offset()}
|
||||
}
|
||||
sort.Sort(s)
|
||||
res := make([]uint32, i.chunkCount)
|
||||
for j, r := range s {
|
||||
res[r.idx] = uint32(j)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
type mmapTableIndex struct {
|
||||
chunkCount uint32
|
||||
totalUncompressedData uint64
|
||||
fileSz uint64
|
||||
prefixes []uint64
|
||||
data mmap.MMap
|
||||
refCnt *int32
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) Prefixes() []uint64 {
|
||||
return i.prefixes
|
||||
}
|
||||
|
||||
type mmapOrdinal struct {
|
||||
idx int
|
||||
offset uint64
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) TableFileSize() uint64 {
|
||||
return i.fileSz
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) ChunkCount() uint32 {
|
||||
return i.chunkCount
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) TotalUncompressedData() uint64 {
|
||||
return i.totalUncompressedData
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) Close() error {
|
||||
cnt := atomic.AddInt32(i.refCnt, -1)
|
||||
if cnt == 0 {
|
||||
return i.data.Unmap()
|
||||
}
|
||||
if cnt < 0 {
|
||||
panic("Close() called and reduced ref count to < 0.")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) Clone() tableIndex {
|
||||
cnt := atomic.AddInt32(i.refCnt, 1)
|
||||
if cnt == 1 {
|
||||
panic("Clone() called after last Close(). This index is no longer valid.")
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) prefixIdx(prefix uint64) (idx uint32) {
|
||||
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
|
||||
// an extremely tight loop and inlining the code was a significant perf improvement.
|
||||
idx, j := 0, i.chunkCount
|
||||
for idx < j {
|
||||
h := idx + (j-idx)/2 // avoid overflow when computing h
|
||||
// i ≤ h < j
|
||||
if i.prefixes[h] < prefix {
|
||||
idx = h + 1 // preserves f(i-1) == false
|
||||
} else {
|
||||
j = h // preserves f(j) == true
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) Lookup(h *addr) (indexEntry, bool) {
|
||||
prefix := binary.BigEndian.Uint64(h[:])
|
||||
for idx := i.prefixIdx(prefix); idx < i.chunkCount && i.prefixes[idx] == prefix; idx++ {
|
||||
mi := idx * mmapIndexEntrySize
|
||||
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
|
||||
if bytes.Equal(e.suffix(), h[addrPrefixSize:]) {
|
||||
return e, true
|
||||
}
|
||||
}
|
||||
return mmapIndexEntry{}, false
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) EntrySuffixMatches(idx uint32, h *addr) bool {
|
||||
mi := idx * mmapIndexEntrySize
|
||||
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
|
||||
return bytes.Equal(e.suffix(), h[addrPrefixSize:])
|
||||
}
|
||||
|
||||
func (i mmapTableIndex) IndexEntry(idx uint32, a *addr) indexEntry {
|
||||
mi := idx * mmapIndexEntrySize
|
||||
e := mmapIndexEntry(i.data[mi : mi+mmapIndexEntrySize])
|
||||
if a != nil {
|
||||
binary.BigEndian.PutUint64(a[:], i.prefixes[idx])
|
||||
copy(a[addrPrefixSize:], e.suffix())
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
type mmapIndexEntry []byte
|
||||
|
||||
const mmapIndexEntryOffsetStart = addrSuffixSize
|
||||
const mmapIndexEntryLengthStart = addrSuffixSize + uint64Size
|
||||
|
||||
func (e mmapIndexEntry) suffix() []byte {
|
||||
return e[:addrSuffixSize]
|
||||
}
|
||||
|
||||
func (e mmapIndexEntry) Offset() uint64 {
|
||||
return binary.BigEndian.Uint64(e[mmapIndexEntryOffsetStart:])
|
||||
}
|
||||
|
||||
func (e mmapIndexEntry) Length() uint32 {
|
||||
return binary.BigEndian.Uint32(e[mmapIndexEntryLengthStart:])
|
||||
}
|
||||
|
||||
func mmapOffheapSize(chunks int) int {
|
||||
pageSize := 4096
|
||||
esz := addrSuffixSize + uint64Size + lengthSize
|
||||
min := esz * chunks
|
||||
if min%pageSize == 0 {
|
||||
return min
|
||||
} else {
|
||||
return (min/pageSize + 1) * pageSize
|
||||
}
|
||||
}
|
||||
|
||||
func newMmapTableIndex(ti onHeapTableIndex, f *os.File) (mmapTableIndex, error) {
|
||||
flags := 0
|
||||
if f == nil {
|
||||
flags = mmap.ANON
|
||||
}
|
||||
arr, err := mmap.MapRegion(f, mmapOffheapSize(len(ti.ordinals)), mmap.RDWR, flags, 0)
|
||||
if err != nil {
|
||||
return mmapTableIndex{}, err
|
||||
}
|
||||
for i := range ti.ordinals {
|
||||
idx := i * mmapIndexEntrySize
|
||||
si := addrSuffixSize * ti.ordinals[i]
|
||||
copy(arr[idx:], ti.suffixes[si:si+addrSuffixSize])
|
||||
binary.BigEndian.PutUint64(arr[idx+mmapIndexEntryOffsetStart:], ti.offsets[ti.ordinals[i]])
|
||||
binary.BigEndian.PutUint32(arr[idx+mmapIndexEntryLengthStart:], ti.lengths[ti.ordinals[i]])
|
||||
}
|
||||
|
||||
refCnt := new(int32)
|
||||
*refCnt = 1
|
||||
return mmapTableIndex{
|
||||
ti.chunkCount,
|
||||
ti.totalUncompressedData,
|
||||
ti.TableFileSize(),
|
||||
ti.Prefixes(),
|
||||
arr,
|
||||
refCnt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type tableReaderAt interface {
|
||||
ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error)
|
||||
}
|
||||
@@ -326,234 +139,22 @@ type tableReader struct {
|
||||
blockSize uint64
|
||||
}
|
||||
|
||||
type tableIndex interface {
|
||||
// ChunkCount returns the total number of chunks in the indexed file.
|
||||
ChunkCount() uint32
|
||||
// EntrySuffixMatches returns true if the entry at index |idx| matches
|
||||
// the suffix of the address |h|. Used by |Lookup| after finding
|
||||
// matching indexes based on |Prefixes|.
|
||||
EntrySuffixMatches(idx uint32, h *addr) bool
|
||||
// IndexEntry returns the |indexEntry| at |idx|. Optionally puts the
|
||||
// full address of that entry in |a| if |a| is not |nil|.
|
||||
IndexEntry(idx uint32, a *addr) indexEntry
|
||||
// Lookup returns an |indexEntry| for the chunk corresponding to the
|
||||
// provided address |h|. Second returns is |true| if an entry exists
|
||||
// and |false| otherwise.
|
||||
Lookup(h *addr) (indexEntry, bool)
|
||||
// Ordinals returns a slice of indexes which maps the |i|th chunk in
|
||||
// the indexed file to its corresponding entry in index. The |i|th
|
||||
// entry in the result is the |i|th chunk in the indexed file, and its
|
||||
// corresponding value in the slice is the index entry that maps to it.
|
||||
Ordinals() []uint32
|
||||
// Prefixes returns the sorted slice of |uint64| |addr| prefixes; each
|
||||
// entry corresponds to an indexed chunk address.
|
||||
Prefixes() []uint64
|
||||
// TableFileSize returns the total size of the indexed table file, in bytes.
|
||||
TableFileSize() uint64
|
||||
// TotalUncompressedData returns the total uncompressed data size of
|
||||
// the table file. Used for informational statistics only.
|
||||
TotalUncompressedData() uint64
|
||||
|
||||
// Close releases any resources used by this tableIndex.
|
||||
Close() error
|
||||
|
||||
// Clone returns a |tableIndex| with the same contents which can be
|
||||
// |Close|d independently.
|
||||
Clone() tableIndex
|
||||
}
|
||||
|
||||
var _ tableIndex = mmapTableIndex{}
|
||||
|
||||
// parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index
|
||||
// and footer, though it may contain an unspecified number of bytes before that data.
|
||||
// |tableIndex| doesn't keep alive any references to |buff|.
|
||||
func parseTableIndex(buff []byte) (onHeapTableIndex, error) {
|
||||
return ReadTableIndex(bytes.NewReader(buff))
|
||||
}
|
||||
|
||||
func ReadTableIndex(rd io.ReadSeeker) (onHeapTableIndex, error) {
|
||||
footerSize := int64(magicNumberSize + uint64Size + uint32Size)
|
||||
_, err := rd.Seek(-footerSize, io.SeekEnd)
|
||||
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, err
|
||||
}
|
||||
|
||||
footer, err := iohelp.ReadNBytes(rd, int(footerSize))
|
||||
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, err
|
||||
}
|
||||
|
||||
if string(footer[uint32Size+uint64Size:]) != magicNumber {
|
||||
return onHeapTableIndex{}, ErrInvalidTableFile
|
||||
}
|
||||
|
||||
chunkCount := binary.BigEndian.Uint32(footer)
|
||||
totalUncompressedData := binary.BigEndian.Uint64(footer[uint32Size:])
|
||||
|
||||
// index
|
||||
suffixesSize := int64(chunkCount) * addrSuffixSize
|
||||
lengthsSize := int64(chunkCount) * lengthSize
|
||||
tuplesSize := int64(chunkCount) * prefixTupleSize
|
||||
indexSize := suffixesSize + lengthsSize + tuplesSize
|
||||
|
||||
_, err = rd.Seek(-(indexSize + footerSize), io.SeekEnd)
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, ErrInvalidTableFile
|
||||
}
|
||||
|
||||
indexBytes, err := iohelp.ReadNBytes(rd, int(indexSize))
|
||||
if err != nil {
|
||||
return onHeapTableIndex{}, ErrInvalidTableFile
|
||||
}
|
||||
|
||||
prefixes, ordinals := computePrefixes(chunkCount, indexBytes[:tuplesSize])
|
||||
lengths, offsets := computeOffsets(chunkCount, indexBytes[tuplesSize:tuplesSize+lengthsSize])
|
||||
suffixes := indexBytes[tuplesSize+lengthsSize:]
|
||||
|
||||
return onHeapTableIndex{
|
||||
chunkCount, totalUncompressedData,
|
||||
prefixes, offsets,
|
||||
lengths, ordinals,
|
||||
suffixes,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func computeOffsets(count uint32, buff []byte) (lengths []uint32, offsets []uint64) {
|
||||
lengths = make([]uint32, count)
|
||||
offsets = make([]uint64, count)
|
||||
|
||||
lengths[0] = binary.BigEndian.Uint32(buff)
|
||||
|
||||
for i := uint64(1); i < uint64(count); i++ {
|
||||
lengths[i] = binary.BigEndian.Uint32(buff[i*lengthSize:])
|
||||
offsets[i] = offsets[i-1] + uint64(lengths[i-1])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func computePrefixes(count uint32, buff []byte) (prefixes []uint64, ordinals []uint32) {
|
||||
prefixes = make([]uint64, count)
|
||||
ordinals = make([]uint32, count)
|
||||
|
||||
for i := uint64(0); i < uint64(count); i++ {
|
||||
idx := i * prefixTupleSize
|
||||
prefixes[i] = binary.BigEndian.Uint64(buff[idx:])
|
||||
ordinals[i] = binary.BigEndian.Uint32(buff[idx+addrPrefixSize:])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) prefixIdxToOrdinal(idx uint32) uint32 {
|
||||
return ti.ordinals[idx]
|
||||
}
|
||||
|
||||
// TableFileSize returns the size of the table file that this index references.
|
||||
// This assumes that the index follows immediately after the last chunk in the
|
||||
// file and that the last chunk in the file is in the index.
|
||||
func (ti onHeapTableIndex) TableFileSize() uint64 {
|
||||
if ti.chunkCount == 0 {
|
||||
return footerSize
|
||||
}
|
||||
len, offset := ti.offsets[ti.chunkCount-1], uint64(ti.lengths[ti.chunkCount-1])
|
||||
return offset + len + indexSize(ti.chunkCount) + footerSize
|
||||
}
|
||||
|
||||
// prefixIdx returns the first position in |tr.prefixes| whose value ==
|
||||
// |prefix|. Returns |tr.chunkCount| if absent
|
||||
func (ti onHeapTableIndex) prefixIdx(prefix uint64) (idx uint32) {
|
||||
// NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in
|
||||
// an extremely tight loop and inlining the code was a significant perf improvement.
|
||||
idx, j := 0, ti.chunkCount
|
||||
for idx < j {
|
||||
h := idx + (j-idx)/2 // avoid overflow when computing h
|
||||
// i ≤ h < j
|
||||
if ti.prefixes[h] < prefix {
|
||||
idx = h + 1 // preserves f(i-1) == false
|
||||
} else {
|
||||
j = h // preserves f(j) == true
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// EntrySuffixMatches returns true IFF the suffix for prefix entry |idx|
|
||||
// matches the address |a|.
|
||||
func (ti onHeapTableIndex) EntrySuffixMatches(idx uint32, h *addr) bool {
|
||||
li := uint64(ti.ordinals[idx]) * addrSuffixSize
|
||||
return bytes.Equal(h[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize])
|
||||
}
|
||||
|
||||
// lookupOrdinal returns the ordinal of |h| if present. Returns |ti.chunkCount|
|
||||
// if absent.
|
||||
func (ti onHeapTableIndex) lookupOrdinal(h *addr) uint32 {
|
||||
prefix := h.Prefix()
|
||||
|
||||
for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixes[idx] == prefix; idx++ {
|
||||
if ti.EntrySuffixMatches(idx, h) {
|
||||
return ti.ordinals[idx]
|
||||
}
|
||||
}
|
||||
|
||||
return ti.chunkCount
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) IndexEntry(idx uint32, a *addr) indexEntry {
|
||||
ord := ti.ordinals[idx]
|
||||
if a != nil {
|
||||
binary.BigEndian.PutUint64(a[:], ti.prefixes[idx])
|
||||
li := uint64(ord) * addrSuffixSize
|
||||
copy(a[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize])
|
||||
}
|
||||
return indexResult{ti.offsets[ord], ti.lengths[ord]}
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) Lookup(h *addr) (indexEntry, bool) {
|
||||
ord := ti.lookupOrdinal(h)
|
||||
if ord == ti.chunkCount {
|
||||
return indexResult{}, false
|
||||
}
|
||||
return indexResult{ti.offsets[ord], ti.lengths[ord]}, true
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) Prefixes() []uint64 {
|
||||
return ti.prefixes
|
||||
}
|
||||
|
||||
func (ti onHeapTableIndex) Ordinals() []uint32 {
|
||||
return ti.ordinals
|
||||
}
|
||||
|
||||
func (i onHeapTableIndex) ChunkCount() uint32 {
|
||||
return i.chunkCount
|
||||
}
|
||||
|
||||
func (i onHeapTableIndex) TotalUncompressedData() uint64 {
|
||||
return i.totalUncompressedData
|
||||
}
|
||||
|
||||
func (i onHeapTableIndex) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i onHeapTableIndex) Clone() tableIndex {
|
||||
return i
|
||||
}
|
||||
|
||||
// newTableReader parses a valid nbs table byte stream and returns a reader. buff must end with an NBS index
|
||||
// and footer, though it may contain an unspecified number of bytes before that data. r should allow
|
||||
// retrieving any desired range of bytes from the table.
|
||||
func newTableReader(index tableIndex, r tableReaderAt, blockSize uint64) tableReader {
|
||||
func newTableReader(index tableIndex, r tableReaderAt, blockSize uint64) (tableReader, error) {
|
||||
p, err := index.Prefixes()
|
||||
if err != nil {
|
||||
return tableReader{}, err
|
||||
}
|
||||
return tableReader{
|
||||
index,
|
||||
index.Prefixes(),
|
||||
p,
|
||||
index.ChunkCount(),
|
||||
index.TotalUncompressedData(),
|
||||
r,
|
||||
blockSize,
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Scan across (logically) two ordered slices of address prefixes.
|
||||
@@ -584,7 +185,11 @@ func (tr tableReader) hasMany(addrs []hasRecord) (bool, error) {
|
||||
|
||||
// prefixes are equal, so locate and compare against the corresponding suffix
|
||||
for j := filterIdx; j < filterLen && addr.prefix == tr.prefixes[j]; j++ {
|
||||
if tr.EntrySuffixMatches(j, addr.a) {
|
||||
m, err := tr.EntrySuffixMatches(j, addr.a)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if m {
|
||||
addrs[i].has = true
|
||||
break
|
||||
}
|
||||
@@ -612,14 +217,17 @@ func (tr tableReader) index() (tableIndex, error) {
|
||||
|
||||
// returns true iff |h| can be found in this table.
|
||||
func (tr tableReader) has(h addr) (bool, error) {
|
||||
_, ok := tr.Lookup(&h)
|
||||
return ok, nil
|
||||
_, ok, err := tr.Lookup(&h)
|
||||
return ok, err
|
||||
}
|
||||
|
||||
// returns the storage associated with |h|, iff present. Returns nil if absent. On success,
|
||||
// the returned byte slice directly references the underlying storage.
|
||||
func (tr tableReader) get(ctx context.Context, h addr, stats *Stats) ([]byte, error) {
|
||||
e, found := tr.Lookup(&h)
|
||||
e, found, err := tr.Lookup(&h)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !found {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -746,15 +354,21 @@ func (tr tableReader) getMany(
|
||||
|
||||
// Pass #1: Iterate over |reqs| and |tr.prefixes| (both sorted by address) and build the set
|
||||
// of table locations which must be read in order to satisfy the getMany operation.
|
||||
offsetRecords, remaining := tr.findOffsets(reqs)
|
||||
err := tr.getManyAtOffsets(ctx, eg, offsetRecords, found, stats)
|
||||
offsetRecords, remaining, err := tr.findOffsets(reqs)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
err = tr.getManyAtOffsets(ctx, eg, offsetRecords, found, stats)
|
||||
return remaining, err
|
||||
}
|
||||
func (tr tableReader) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, CompressedChunk), stats *Stats) (bool, error) {
|
||||
// Pass #1: Iterate over |reqs| and |tr.prefixes| (both sorted by address) and build the set
|
||||
// of table locations which must be read in order to satisfy the getMany operation.
|
||||
offsetRecords, remaining := tr.findOffsets(reqs)
|
||||
err := tr.getManyCompressedAtOffsets(ctx, eg, offsetRecords, found, stats)
|
||||
offsetRecords, remaining, err := tr.findOffsets(reqs)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
err = tr.getManyCompressedAtOffsets(ctx, eg, offsetRecords, found, stats)
|
||||
return remaining, err
|
||||
}
|
||||
|
||||
@@ -867,7 +481,7 @@ func (tr tableReader) getManyAtOffsetsWithReadFunc(
|
||||
// chunks remaining will be set to false upon return. If some are not here,
|
||||
// then remaining will be true. The result offsetRecSlice is sorted in offset
|
||||
// order.
|
||||
func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool) {
|
||||
func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool, err error) {
|
||||
filterIdx := uint32(0)
|
||||
filterLen := uint32(len(tr.prefixes))
|
||||
ors = make(offsetRecSlice, 0, len(reqs))
|
||||
@@ -896,9 +510,16 @@ func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaini
|
||||
|
||||
// record all offsets within the table which contain the data required.
|
||||
for j := filterIdx; j < filterLen && req.prefix == tr.prefixes[j]; j++ {
|
||||
if tr.EntrySuffixMatches(j, req.a) {
|
||||
m, err := tr.EntrySuffixMatches(j, req.a)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
if m {
|
||||
reqs[i].found = true
|
||||
entry := tr.IndexEntry(j, nil)
|
||||
entry, err := tr.IndexEntry(j, nil)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
ors = append(ors, offsetRec{req.a, entry.Offset(), entry.Length()})
|
||||
break
|
||||
}
|
||||
@@ -906,7 +527,7 @@ func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaini
|
||||
}
|
||||
|
||||
sort.Sort(ors)
|
||||
return ors, remaining
|
||||
return ors, remaining, nil
|
||||
}
|
||||
|
||||
func canReadAhead(fRec offsetRec, curStart, curEnd, blockSize uint64) (newEnd uint64, canRead bool) {
|
||||
@@ -933,7 +554,10 @@ func canReadAhead(fRec offsetRec, curStart, curEnd, blockSize uint64) (newEnd ui
|
||||
func (tr tableReader) calcReads(reqs []getRecord, blockSize uint64) (reads int, remaining bool, err error) {
|
||||
var offsetRecords offsetRecSlice
|
||||
// Pass #1: Build the set of table locations which must be read in order to find all the elements of |reqs| which are present in this table.
|
||||
offsetRecords, remaining = tr.findOffsets(reqs)
|
||||
offsetRecords, remaining, err = tr.findOffsets(reqs)
|
||||
if err != nil {
|
||||
return 0, false, err
|
||||
}
|
||||
|
||||
// Now |offsetRecords| contains all locations within the table which must
|
||||
// be searched (note that there may be duplicates of a particular
|
||||
@@ -997,7 +621,10 @@ func (tr tableReader) extract(ctx context.Context, chunks chan<- extractRecord)
|
||||
var ors offsetRecSlice
|
||||
for i := uint32(0); i < tr.chunkCount; i++ {
|
||||
a := new(addr)
|
||||
e := tr.IndexEntry(i, a)
|
||||
e, err := tr.IndexEntry(i, a)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ors = append(ors, offsetRec{a, e.Offset(), e.Length()})
|
||||
}
|
||||
sort.Sort(ors)
|
||||
@@ -1020,8 +647,12 @@ func (tr tableReader) Close() error {
|
||||
return tr.tableIndex.Close()
|
||||
}
|
||||
|
||||
func (tr tableReader) Clone() tableReader {
|
||||
return tableReader{tr.tableIndex.Clone(), tr.prefixes, tr.chunkCount, tr.totalUncompressedData, tr.r, tr.blockSize}
|
||||
func (tr tableReader) Clone() (tableReader, error) {
|
||||
ti, err := tr.tableIndex.Clone()
|
||||
if err != nil {
|
||||
return tableReader{}, err
|
||||
}
|
||||
return tableReader{ti, tr.prefixes, tr.chunkCount, tr.totalUncompressedData, tr.r, tr.blockSize}, nil
|
||||
}
|
||||
|
||||
type readerAdapter struct {
|
||||
|
||||
@@ -15,12 +15,9 @@
|
||||
package nbs
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCompressedChunkIsEmpty(t *testing.T) {
|
||||
@@ -32,73 +29,6 @@ func TestCompressedChunkIsEmpty(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTableIndex(t *testing.T) {
|
||||
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
bs, err := io.ReadAll(f)
|
||||
require.NoError(t, err)
|
||||
idx, err := parseTableIndex(bs)
|
||||
require.NoError(t, err)
|
||||
defer idx.Close()
|
||||
assert.Equal(t, uint32(596), idx.ChunkCount())
|
||||
seen := make(map[addr]bool)
|
||||
for i := uint32(0); i < idx.ChunkCount(); i++ {
|
||||
var onheapaddr addr
|
||||
e := idx.IndexEntry(i, &onheapaddr)
|
||||
if _, ok := seen[onheapaddr]; !ok {
|
||||
seen[onheapaddr] = true
|
||||
lookupe, ok := idx.Lookup(&onheapaddr)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, e.Offset(), lookupe.Offset(), "%v does not match %v for address %v", e, lookupe, onheapaddr)
|
||||
assert.Equal(t, e.Length(), lookupe.Length())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMMapIndex(t *testing.T) {
|
||||
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
bs, err := io.ReadAll(f)
|
||||
require.NoError(t, err)
|
||||
idx, err := parseTableIndex(bs)
|
||||
require.NoError(t, err)
|
||||
defer idx.Close()
|
||||
mmidx, err := newMmapTableIndex(idx, nil)
|
||||
require.NoError(t, err)
|
||||
defer mmidx.Close()
|
||||
assert.Equal(t, idx.ChunkCount(), mmidx.ChunkCount())
|
||||
seen := make(map[addr]bool)
|
||||
for i := uint32(0); i < idx.ChunkCount(); i++ {
|
||||
var onheapaddr addr
|
||||
onheapentry := idx.IndexEntry(i, &onheapaddr)
|
||||
var mmaddr addr
|
||||
mmentry := mmidx.IndexEntry(i, &mmaddr)
|
||||
assert.Equal(t, onheapaddr, mmaddr)
|
||||
assert.Equal(t, onheapentry.Offset(), mmentry.Offset())
|
||||
assert.Equal(t, onheapentry.Length(), mmentry.Length())
|
||||
if _, ok := seen[onheapaddr]; !ok {
|
||||
seen[onheapaddr] = true
|
||||
mmentry, found := mmidx.Lookup(&onheapaddr)
|
||||
assert.True(t, found)
|
||||
assert.Equal(t, onheapentry.Offset(), mmentry.Offset(), "%v does not match %v for address %v", onheapentry, mmentry, onheapaddr)
|
||||
assert.Equal(t, onheapentry.Length(), mmentry.Length())
|
||||
}
|
||||
wrongaddr := onheapaddr
|
||||
if wrongaddr[19] != 0 {
|
||||
wrongaddr[19] = 0
|
||||
_, found := mmidx.Lookup(&wrongaddr)
|
||||
assert.False(t, found)
|
||||
}
|
||||
}
|
||||
|
||||
assert.Equal(t, idx.Ordinals(), mmidx.Ordinals())
|
||||
assert.Equal(t, idx.Prefixes(), mmidx.Prefixes())
|
||||
assert.Equal(t, idx.TableFileSize(), mmidx.TableFileSize())
|
||||
assert.Equal(t, idx.TotalUncompressedData(), mmidx.TotalUncompressedData())
|
||||
}
|
||||
|
||||
func TestCanReadAhead(t *testing.T) {
|
||||
type expected struct {
|
||||
end uint64
|
||||
|
||||
@@ -137,7 +137,10 @@ func (ts tableSet) getMany(ctx context.Context, eg *errgroup.Group, reqs []getRe
|
||||
f := func(css chunkSources) bool {
|
||||
for _, haver := range css {
|
||||
if rp, ok := haver.(chunkReadPlanner); ok {
|
||||
offsets, remaining := rp.findOffsets(reqs)
|
||||
offsets, remaining, err := rp.findOffsets(reqs)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
err = rp.getManyAtOffsets(ctx, eg, offsets, found, stats)
|
||||
if err != nil {
|
||||
return true
|
||||
@@ -165,7 +168,10 @@ func (ts tableSet) getManyCompressed(ctx context.Context, eg *errgroup.Group, re
|
||||
f := func(css chunkSources) bool {
|
||||
for _, haver := range css {
|
||||
if rp, ok := haver.(chunkReadPlanner); ok {
|
||||
offsets, remaining := rp.findOffsets(reqs)
|
||||
offsets, remaining, err := rp.findOffsets(reqs)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
if len(offsets) > 0 {
|
||||
err = rp.getManyCompressedAtOffsets(ctx, eg, offsets, found, stats)
|
||||
if err != nil {
|
||||
@@ -428,7 +434,11 @@ func (ts tableSet) Rebase(ctx context.Context, specs []tableSpec, stats *Stats)
|
||||
}
|
||||
|
||||
if cnt > 0 {
|
||||
merged.novel = append(merged.novel, t.Clone())
|
||||
t2, err := t.Clone()
|
||||
if err != nil {
|
||||
return tableSet{}, err
|
||||
}
|
||||
merged.novel = append(merged.novel, t2)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -465,7 +475,12 @@ func (ts tableSet) Rebase(ctx context.Context, specs []tableSpec, stats *Stats)
|
||||
return
|
||||
}
|
||||
if spec.name == h {
|
||||
merged.upstream[idx] = existing.Clone()
|
||||
c, err := existing.Clone()
|
||||
if err != nil {
|
||||
ae.SetIfError(err)
|
||||
return
|
||||
}
|
||||
merged.upstream[idx] = c
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,9 +77,10 @@ func TestSimple(t *testing.T) {
|
||||
|
||||
tableData, _, err := buildTable(chunks)
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(tableData)
|
||||
ti, err := parseTableIndexByCopy(tableData)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
|
||||
assertChunksInReader(chunks, tr, assert)
|
||||
|
||||
@@ -123,9 +124,10 @@ func TestHasMany(t *testing.T) {
|
||||
|
||||
tableData, _, err := buildTable(chunks)
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(tableData)
|
||||
ti, err := parseTableIndexByCopy(tableData)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
|
||||
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
|
||||
hasAddrs := []hasRecord{
|
||||
@@ -173,9 +175,10 @@ func TestHasManySequentialPrefix(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
buff = buff[:length]
|
||||
|
||||
ti, err := parseTableIndex(buff)
|
||||
ti, err := parseTableIndexByCopy(buff)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
|
||||
|
||||
hasAddrs := make([]hasRecord, 2)
|
||||
// Leave out the first address
|
||||
@@ -201,9 +204,10 @@ func TestGetMany(t *testing.T) {
|
||||
|
||||
tableData, _, err := buildTable(data)
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(tableData)
|
||||
ti, err := parseTableIndexByCopy(tableData)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
|
||||
addrs := addrSlice{computeAddr(data[0]), computeAddr(data[1]), computeAddr(data[2])}
|
||||
getBatch := []getRecord{
|
||||
@@ -234,9 +238,10 @@ func TestCalcReads(t *testing.T) {
|
||||
|
||||
tableData, _, err := buildTable(chunks)
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(tableData)
|
||||
ti, err := parseTableIndexByCopy(tableData)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), 0)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), 0)
|
||||
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
|
||||
getBatch := []getRecord{
|
||||
{&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false},
|
||||
@@ -270,9 +275,10 @@ func TestExtract(t *testing.T) {
|
||||
|
||||
tableData, _, err := buildTable(chunks)
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(tableData)
|
||||
ti, err := parseTableIndexByCopy(tableData)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
|
||||
addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
|
||||
|
||||
@@ -308,9 +314,10 @@ func Test65k(t *testing.T) {
|
||||
|
||||
tableData, _, err := buildTable(chunks)
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(tableData)
|
||||
ti, err := parseTableIndexByCopy(tableData)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
|
||||
for i := 0; i < count; i++ {
|
||||
data := dataFn(i)
|
||||
@@ -360,9 +367,10 @@ func doTestNGetMany(t *testing.T, count int) {
|
||||
|
||||
tableData, _, err := buildTable(data)
|
||||
require.NoError(t, err)
|
||||
ti, err := parseTableIndex(tableData)
|
||||
ti, err := parseTableIndexByCopy(tableData)
|
||||
require.NoError(t, err)
|
||||
tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
require.NoError(t, err)
|
||||
tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
|
||||
|
||||
getBatch := make([]getRecord, len(data))
|
||||
for i := 0; i < count; i++ {
|
||||
|
||||
@@ -16,6 +16,7 @@ package nbs
|
||||
|
||||
import (
|
||||
"io"
|
||||
"math"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
|
||||
|
||||
@@ -24,7 +25,7 @@ import (
|
||||
)
|
||||
|
||||
func IterChunks(rd io.ReadSeeker, cb func(chunk chunks.Chunk) (stop bool, err error)) error {
|
||||
idx, err := ReadTableIndex(rd)
|
||||
idx, err := ReadTableIndexByCopy(rd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -34,7 +35,10 @@ func IterChunks(rd io.ReadSeeker, cb func(chunk chunks.Chunk) (stop bool, err er
|
||||
seen := make(map[addr]bool)
|
||||
for i := uint32(0); i < idx.ChunkCount(); i++ {
|
||||
var a addr
|
||||
ie := idx.IndexEntry(i, &a)
|
||||
ie, err := idx.IndexEntry(i, &a)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, ok := seen[a]; !ok {
|
||||
seen[a] = true
|
||||
chunkBytes, err := readNFrom(rd, ie.Offset(), ie.Length())
|
||||
@@ -64,6 +68,26 @@ func IterChunks(rd io.ReadSeeker, cb func(chunk chunks.Chunk) (stop bool, err er
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetTableIndexPrefixes(rd io.ReadSeeker) (prefixes []uint64, err error) {
|
||||
idx, err := ReadTableIndexByCopy(rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
cerr := idx.Close()
|
||||
if err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
return idx.Prefixes()
|
||||
}
|
||||
|
||||
func GuessPrefixOrdinal(prefix uint64, n uint32) int {
|
||||
hi := prefix >> 32
|
||||
return int((hi * uint64(n)) / uint64(math.MaxUint32))
|
||||
}
|
||||
|
||||
func readNFrom(rd io.ReadSeeker, offset uint64, length uint32) ([]byte, error) {
|
||||
_, err := rd.Seek(int64(offset), io.SeekStart)
|
||||
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
This is a performance test rig for the two main types of hashing we do in NOMS - buzhash and sha1. There's also support for sha256, sha512, and blake2b hash functions for comparison.
|
||||
|
||||
As of May 9, these are the numbers I get on a macbook pro 3.1 GHz Intel Core i7.
|
||||
|
||||
- no hashing : 3500 MB/s
|
||||
- sha1 only : 470 MB/s
|
||||
- sha256 only : 185 MB/s
|
||||
- sha512 only : 299 MB/s
|
||||
- blake2b only : 604 MB/s
|
||||
- bh only : 139 MB/s
|
||||
- sha1 and bh : 110 MB/s
|
||||
- sha256 and bh : 80 MB/s
|
||||
- sha512 and bh : 96 MB/s
|
||||
- blake2b and bh: 115 MB/s
|
||||
|
||||
I think that in the no hashing case there is some compiler optimization going
|
||||
on because I note that if all I do is add a loop that reads out bytes one by
|
||||
one from the slice, it drops to 1000MB/s.
|
||||
|
||||
One outcome of this is that there's no sense going to sha256 - we should just
|
||||
jump straight to sha512.
|
||||
@@ -1,96 +0,0 @@
|
||||
// Copyright 2019 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// This file incorporates work covered by the following copyright and
|
||||
// permission notice:
|
||||
//
|
||||
// Copyright 2016 Attic Labs, Inc. All rights reserved.
|
||||
// Licensed under the Apache License, version 2.0:
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/sha1"
|
||||
"crypto/sha256"
|
||||
"crypto/sha512"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/codahale/blake2"
|
||||
humanize "github.com/dustin/go-humanize"
|
||||
flag "github.com/juju/gnuflag"
|
||||
"github.com/silvasur/buzhash"
|
||||
)
|
||||
|
||||
func main() {
|
||||
useSHA := flag.String("use-sha", "", "<default>=no hashing, 1=sha1, 256=sha256, 512=sha512, blake=blake2b")
|
||||
useBH := flag.Bool("use-bh", false, "whether we buzhash the bytes")
|
||||
flag.Parse(true)
|
||||
|
||||
flag.Usage = func() {
|
||||
fmt.Printf("%s <big-file>\n", os.Args[0])
|
||||
flag.PrintDefaults()
|
||||
}
|
||||
|
||||
if len(flag.Args()) < 1 {
|
||||
flag.Usage()
|
||||
return
|
||||
}
|
||||
|
||||
p := flag.Args()[0]
|
||||
bh := buzhash.NewBuzHash(64 * 8)
|
||||
f, _ := os.Open(p)
|
||||
defer f.Close()
|
||||
t0 := time.Now()
|
||||
buf := make([]byte, 4*1024)
|
||||
l := uint64(0)
|
||||
|
||||
var h hash.Hash
|
||||
if *useSHA == "1" {
|
||||
h = sha1.New()
|
||||
} else if *useSHA == "256" {
|
||||
h = sha256.New()
|
||||
} else if *useSHA == "512" {
|
||||
h = sha512.New()
|
||||
} else if *useSHA == "blake" {
|
||||
h = blake2.NewBlake2B()
|
||||
}
|
||||
|
||||
for {
|
||||
n, err := f.Read(buf)
|
||||
l += uint64(n)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
s := buf[:n]
|
||||
if h != nil {
|
||||
h.Write(s)
|
||||
}
|
||||
if *useBH {
|
||||
bh.Write(s)
|
||||
}
|
||||
}
|
||||
|
||||
t1 := time.Now()
|
||||
d := t1.Sub(t0)
|
||||
fmt.Printf("Read %s in %s (%s/s)\n", humanize.Bytes(l), d, humanize.Bytes(uint64(float64(l)/d.Seconds())))
|
||||
digest := []byte{}
|
||||
if h != nil {
|
||||
fmt.Printf("%x\n", h.Sum(digest))
|
||||
}
|
||||
}
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
// todo(andy): randomize test seed
|
||||
var testRand = rand.New(rand.NewSource(1))
|
||||
|
||||
func TestMap(t *testing.T) {
|
||||
@@ -76,7 +77,7 @@ func makeProllyMap(t *testing.T, count int) (orderedMap, [][2]val.Tuple) {
|
||||
)
|
||||
|
||||
tuples := randomTuplePairs(count, kd, vd)
|
||||
om := prollyMapFromTuples(t, count, kd, vd, tuples)
|
||||
om := prollyMapFromTuples(t, kd, vd, tuples)
|
||||
|
||||
return om, tuples
|
||||
}
|
||||
@@ -89,12 +90,12 @@ func makeProllySecondaryIndex(t *testing.T, count int) (orderedMap, [][2]val.Tup
|
||||
vd := val.NewTupleDescriptor()
|
||||
|
||||
tuples := randomCompositeTuplePairs(count, kd, vd)
|
||||
om := prollyMapFromTuples(t, count, kd, vd, tuples)
|
||||
om := prollyMapFromTuples(t, kd, vd, tuples)
|
||||
|
||||
return om, tuples
|
||||
}
|
||||
|
||||
func prollyMapFromTuples(t *testing.T, count int, kd, vd val.TupleDesc, tuples [][2]val.Tuple) orderedMap {
|
||||
func prollyMapFromTuples(t *testing.T, kd, vd val.TupleDesc, tuples [][2]val.Tuple) orderedMap {
|
||||
ctx := context.Background()
|
||||
ns := newTestNodeStore()
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ func (it *memRangeIter) iterate(context.Context) (err error) {
|
||||
}
|
||||
}
|
||||
|
||||
func (it *memRangeIter) nextMutation() (key, value val.Tuple) {
|
||||
func (it *memRangeIter) nextMutation(context.Context) (key, value val.Tuple) {
|
||||
key, value = it.iter.Current()
|
||||
if key == nil {
|
||||
return
|
||||
@@ -180,10 +180,6 @@ func (it *memRangeIter) nextMutation() (key, value val.Tuple) {
|
||||
return
|
||||
}
|
||||
|
||||
func (it *memRangeIter) count() int {
|
||||
return it.iter.Count()
|
||||
}
|
||||
|
||||
func (it *memRangeIter) close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@ func fetchChild(ctx context.Context, ns NodeStore, ref hash.Hash) (Node, error)
|
||||
}
|
||||
|
||||
func writeNewChild(ctx context.Context, ns NodeStore, level uint64, keys, values []nodeItem) (Node, metaPair, error) {
|
||||
child := makeMapNode(ns.Pool(), level, keys, values)
|
||||
child := buildMapNode(ns.Pool(), level, keys, values)
|
||||
|
||||
ref, err := ns.Write(ctx, child)
|
||||
if err != nil {
|
||||
|
||||
@@ -476,10 +476,10 @@ func materializeMap(t *testing.T, mut MutableMap) Map {
|
||||
|
||||
// ensure edits are provided in order
|
||||
iter := mut.overlay.mutations()
|
||||
prev, _ := iter.nextMutation()
|
||||
prev, _ := iter.nextMutation(ctx)
|
||||
require.NotNil(t, prev)
|
||||
for {
|
||||
next, _ := iter.nextMutation()
|
||||
next, _ := iter.nextMutation(ctx)
|
||||
if next == nil {
|
||||
break
|
||||
}
|
||||
|
||||
@@ -21,21 +21,18 @@ import (
|
||||
)
|
||||
|
||||
type mutationIter interface {
|
||||
nextMutation() (key, val val.Tuple)
|
||||
count() int
|
||||
nextMutation(ctx context.Context) (key, value val.Tuple)
|
||||
close() error
|
||||
}
|
||||
|
||||
var _ mutationIter = &memRangeIter{}
|
||||
|
||||
func materializeMutations(ctx context.Context, m Map, edits mutationIter) (Map, error) {
|
||||
var err error
|
||||
if edits.count() == 0 {
|
||||
return m, err
|
||||
newKey, newValue := edits.nextMutation(ctx)
|
||||
if newKey == nil {
|
||||
return m, nil // no mutations
|
||||
}
|
||||
|
||||
newKey, newValue := edits.nextMutation()
|
||||
|
||||
cur, err := newCursorAtItem(ctx, m.ns, m.root, nodeItem(newKey), m.searchNode)
|
||||
if err != nil {
|
||||
return m, err
|
||||
@@ -65,11 +62,11 @@ func materializeMutations(ctx context.Context, m Map, edits mutationIter) (Map,
|
||||
}
|
||||
|
||||
if oldValue == nil && newValue == nil {
|
||||
newKey, newValue = edits.nextMutation()
|
||||
newKey, newValue = edits.nextMutation(ctx)
|
||||
continue // already non-present
|
||||
}
|
||||
if oldValue != nil && compareValues(m, newValue, oldValue) == 0 {
|
||||
newKey, newValue = edits.nextMutation()
|
||||
newKey, newValue = edits.nextMutation(ctx)
|
||||
continue // same newValue
|
||||
}
|
||||
|
||||
@@ -94,7 +91,7 @@ func materializeMutations(ctx context.Context, m Map, edits mutationIter) (Map,
|
||||
}
|
||||
}
|
||||
|
||||
newKey, newValue = edits.nextMutation()
|
||||
newKey, newValue = edits.nextMutation(ctx)
|
||||
}
|
||||
|
||||
m.root, err = chunker.Done(ctx)
|
||||
|
||||
@@ -23,25 +23,68 @@ import (
|
||||
"github.com/dolthub/dolt/go/gen/fb/serial"
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
"github.com/dolthub/dolt/go/store/pool"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
const (
|
||||
maxVectorOffset = uint64(math.MaxUint16)
|
||||
refSize = hash.ByteLen
|
||||
|
||||
// These constants are mirrored from serial.TupleMap.KeyOffsetsLength()
|
||||
// and serial.TupleMap.ValueOffsetsLength() respectively.
|
||||
// They are only as stable as the flatbuffers schemas that define them.
|
||||
keyOffsetsVOffset = 6
|
||||
valueOffsetsVOffset = 10
|
||||
)
|
||||
|
||||
func init() {
|
||||
//emptyNode = makeMapNode(sharedPool, 0, nil, nil)
|
||||
}
|
||||
|
||||
type Node struct {
|
||||
buf serial.TupleMap
|
||||
cnt int
|
||||
emptyNode = buildMapNode(sharedPool, 0, nil, nil)
|
||||
}
|
||||
|
||||
var emptyNode Node
|
||||
|
||||
func makeMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (node Node) {
|
||||
type Node struct {
|
||||
keys, values val.SlicedBuffer
|
||||
refs refBuffer
|
||||
count, level int
|
||||
|
||||
buf serial.TupleMap
|
||||
}
|
||||
|
||||
func mapNodeFromBytes(bb []byte) Node {
|
||||
buf := serial.GetRootAsTupleMap(bb, 0)
|
||||
return mapNodeFromFlatbuffer(*buf)
|
||||
}
|
||||
|
||||
func mapNodeFromFlatbuffer(buf serial.TupleMap) Node {
|
||||
keys := val.SlicedBuffer{
|
||||
Buf: buf.KeyTuplesBytes(),
|
||||
Offs: getKeyOffsetsVector(buf),
|
||||
}
|
||||
values := val.SlicedBuffer{
|
||||
Buf: buf.ValueTuplesBytes(),
|
||||
Offs: getValueOffsetsVector(buf),
|
||||
}
|
||||
refs := refBuffer{
|
||||
buf: buf.RefArrayBytes(),
|
||||
}
|
||||
|
||||
count := buf.KeyOffsetsLength() + 1
|
||||
if len(keys.Buf) == 0 {
|
||||
count = 0
|
||||
}
|
||||
|
||||
return Node{
|
||||
keys: keys,
|
||||
values: values,
|
||||
refs: refs,
|
||||
count: count,
|
||||
level: int(buf.TreeLevel()),
|
||||
buf: buf,
|
||||
}
|
||||
}
|
||||
|
||||
func buildMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (node Node) {
|
||||
var (
|
||||
keyTups, keyOffs fb.UOffsetT
|
||||
valTups, valOffs fb.UOffsetT
|
||||
@@ -85,6 +128,57 @@ func makeMapNode(pool pool.BuffPool, level uint64, keys, values []nodeItem) (nod
|
||||
return mapNodeFromBytes(b.FinishedBytes())
|
||||
}
|
||||
|
||||
func (nd Node) hashOf() hash.Hash {
|
||||
return hash.Of(nd.bytes())
|
||||
}
|
||||
|
||||
func (nd Node) getKey(i int) nodeItem {
|
||||
return nd.keys.GetSlice(i)
|
||||
}
|
||||
|
||||
func (nd Node) getValue(i int) nodeItem {
|
||||
if nd.leafNode() {
|
||||
return nd.values.GetSlice(i)
|
||||
} else {
|
||||
r := nd.getRef(i)
|
||||
return r[:]
|
||||
}
|
||||
}
|
||||
|
||||
func (nd Node) getRef(i int) hash.Hash {
|
||||
return nd.refs.getRef(i)
|
||||
}
|
||||
|
||||
func (nd Node) nodeCount() int {
|
||||
return nd.count
|
||||
}
|
||||
|
||||
// todo(andy): should we support this?
|
||||
//func (nd Node) cumulativeCount() uint64 {
|
||||
// return nd.buf.TreeCount()
|
||||
//}
|
||||
|
||||
func (nd Node) leafNode() bool {
|
||||
return nd.level == 0
|
||||
}
|
||||
|
||||
func (nd Node) empty() bool {
|
||||
return nd.bytes() == nil || nd.nodeCount() == 0
|
||||
}
|
||||
|
||||
func (nd Node) bytes() []byte {
|
||||
return nd.buf.Table().Bytes
|
||||
}
|
||||
|
||||
type refBuffer struct {
|
||||
buf []byte
|
||||
}
|
||||
|
||||
func (rb refBuffer) getRef(i int) hash.Hash {
|
||||
start, stop := i*refSize, (i+1)*refSize
|
||||
return hash.New(rb.buf[start:stop])
|
||||
}
|
||||
|
||||
func getMapBuilder(pool pool.BuffPool, sz int) *fb.Builder {
|
||||
// todo(andy): initialize builder buffer from pool
|
||||
return fb.NewBuilder(sz)
|
||||
@@ -138,87 +232,22 @@ func writeItemOffsets(b *fb.Builder, items []nodeItem, sz int) (cnt int) {
|
||||
return
|
||||
}
|
||||
|
||||
func mapNodeFromBytes(bb []byte) Node {
|
||||
buf := serial.GetRootAsTupleMap(bb, 0)
|
||||
// first key offset omitted
|
||||
cnt := buf.KeyOffsetsLength() + 1
|
||||
if len(buf.KeyTuplesBytes()) == 0 {
|
||||
cnt = 0
|
||||
}
|
||||
return Node{
|
||||
buf: *buf,
|
||||
cnt: cnt,
|
||||
}
|
||||
func getKeyOffsetsVector(buf serial.TupleMap) []byte {
|
||||
sz := buf.KeyOffsetsLength() * 2
|
||||
tab := buf.Table()
|
||||
vec := tab.Offset(keyOffsetsVOffset)
|
||||
start := int(tab.Vector(fb.UOffsetT(vec)))
|
||||
stop := start + sz
|
||||
|
||||
return tab.Bytes[start:stop]
|
||||
}
|
||||
|
||||
func (nd Node) hashOf() hash.Hash {
|
||||
return hash.Of(nd.bytes())
|
||||
}
|
||||
|
||||
func (nd Node) getKey(i int) nodeItem {
|
||||
keys := nd.buf.KeyTuplesBytes()
|
||||
|
||||
start, stop := uint16(0), uint16(len(keys))
|
||||
if i > 0 {
|
||||
start = nd.buf.KeyOffsets(i - 1)
|
||||
}
|
||||
if i < nd.buf.KeyOffsetsLength() {
|
||||
stop = nd.buf.KeyOffsets(i)
|
||||
}
|
||||
|
||||
return keys[start:stop]
|
||||
}
|
||||
|
||||
func (nd Node) getValue(i int) nodeItem {
|
||||
if nd.leafNode() {
|
||||
return nd.getValueTuple(i)
|
||||
} else {
|
||||
r := nd.getRef(i)
|
||||
return r[:]
|
||||
}
|
||||
}
|
||||
|
||||
func (nd Node) getValueTuple(i int) nodeItem {
|
||||
values := nd.buf.ValueTuplesBytes()
|
||||
|
||||
start, stop := uint16(0), uint16(len(values))
|
||||
if i > 0 {
|
||||
start = nd.buf.ValueOffsets(i - 1)
|
||||
}
|
||||
if i < nd.buf.ValueOffsetsLength() {
|
||||
stop = nd.buf.ValueOffsets(i)
|
||||
}
|
||||
|
||||
return values[start:stop]
|
||||
}
|
||||
|
||||
func (nd Node) getRef(i int) hash.Hash {
|
||||
refs := nd.buf.RefArrayBytes()
|
||||
start, stop := i*refSize, (i+1)*refSize
|
||||
return hash.New(refs[start:stop])
|
||||
}
|
||||
|
||||
func (nd Node) level() int {
|
||||
return int(nd.buf.TreeLevel())
|
||||
}
|
||||
|
||||
func (nd Node) nodeCount() int {
|
||||
return nd.cnt
|
||||
}
|
||||
|
||||
// todo(andy): should we support this?
|
||||
//func (nd Node) cumulativeCount() uint64 {
|
||||
// return nd.buf.TreeCount()
|
||||
//}
|
||||
|
||||
func (nd Node) leafNode() bool {
|
||||
return nd.level() == 0
|
||||
}
|
||||
|
||||
func (nd Node) empty() bool {
|
||||
return nd.bytes() == nil || nd.nodeCount() == 0
|
||||
}
|
||||
|
||||
func (nd Node) bytes() []byte {
|
||||
return nd.buf.Table().Bytes
|
||||
func getValueOffsetsVector(buf serial.TupleMap) []byte {
|
||||
sz := buf.ValueOffsetsLength() * 2
|
||||
tab := buf.Table()
|
||||
vec := tab.Offset(valueOffsetsVOffset)
|
||||
start := int(tab.Vector(fb.UOffsetT(vec)))
|
||||
stop := start + sz
|
||||
|
||||
return tab.Bytes[start:stop]
|
||||
}
|
||||
|
||||
@@ -203,7 +203,7 @@ func (cur *nodeCursor) isLeaf() bool {
|
||||
}
|
||||
|
||||
func (cur *nodeCursor) level() uint64 {
|
||||
return uint64(cur.nd.level())
|
||||
return uint64(cur.nd.level)
|
||||
}
|
||||
|
||||
func (cur *nodeCursor) seek(ctx context.Context, item nodeItem, cb compareFn) (err error) {
|
||||
|
||||
@@ -18,6 +18,10 @@ import (
|
||||
"encoding/binary"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"unsafe"
|
||||
|
||||
"github.com/dolthub/dolt/go/gen/fb/serial"
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -51,8 +55,33 @@ func TestRoundTripNodeItems(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetKeyValueOffsetsVectors(t *testing.T) {
|
||||
for trial := 0; trial < 100; trial++ {
|
||||
keys, values := randomNodeItemPairs(t, (rand.Int()%101)+50)
|
||||
require.True(t, sumSize(keys)+sumSize(values) < maxVectorOffset)
|
||||
nd := newLeafNode(keys, values)
|
||||
|
||||
ko1, vo1 := offsetsFromSlicedBuffers(nd.keys, nd.values)
|
||||
ko2, vo2 := offsetsFromFlatbuffer(nd.buf)
|
||||
|
||||
assert.Equal(t, len(ko1), len(ko2))
|
||||
assert.Equal(t, len(ko1), len(keys)-1)
|
||||
assert.Equal(t, ko1, ko2)
|
||||
|
||||
assert.Equal(t, len(vo1), len(vo2))
|
||||
assert.Equal(t, len(vo1), len(values)-1)
|
||||
assert.Equal(t, vo1, vo2)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestNodeSize(t *testing.T) {
|
||||
sz := unsafe.Sizeof(Node{})
|
||||
assert.Equal(t, 168, int(sz))
|
||||
}
|
||||
|
||||
func newLeafNode(keys, values []nodeItem) Node {
|
||||
return makeMapNode(sharedPool, 0, keys, values)
|
||||
return buildMapNode(sharedPool, 0, keys, values)
|
||||
}
|
||||
|
||||
func randomNodeItemPairs(t *testing.T, count int) (keys, values []nodeItem) {
|
||||
@@ -89,3 +118,32 @@ func sumSize(items []nodeItem) (sz uint64) {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func offsetsFromFlatbuffer(buf serial.TupleMap) (ko, vo []uint16) {
|
||||
ko = make([]uint16, buf.KeyOffsetsLength())
|
||||
for i := range ko {
|
||||
ko[i] = buf.KeyOffsets(i)
|
||||
}
|
||||
|
||||
vo = make([]uint16, buf.ValueOffsetsLength())
|
||||
for i := range vo {
|
||||
vo[i] = buf.ValueOffsets(i)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func offsetsFromSlicedBuffers(keys, values val.SlicedBuffer) (ko, vo []uint16) {
|
||||
ko = deserializeOffsets(keys.Offs)
|
||||
vo = deserializeOffsets(values.Offs)
|
||||
return
|
||||
}
|
||||
|
||||
func deserializeOffsets(buf []byte) (offs []uint16) {
|
||||
offs = make([]uint16, len(buf)/2)
|
||||
for i := range offs {
|
||||
start, stop := i*2, (i+1)*2
|
||||
offs[i] = binary.LittleEndian.Uint16(buf[start:stop])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -138,7 +138,7 @@ func encodingFromSqlType(typ query.Type) val.Encoding {
|
||||
case query.Type_YEAR:
|
||||
return val.YearEnc
|
||||
case query.Type_GEOMETRY:
|
||||
return val.BytesEnc
|
||||
return val.GeometryEnc
|
||||
}
|
||||
|
||||
switch typ {
|
||||
|
||||
@@ -32,7 +32,7 @@ func roundTripTreeItems(t *testing.T) {
|
||||
root, items, ns := randomTree(t, 1000)
|
||||
assert.NotNil(t, root)
|
||||
assert.True(t, root.nodeCount() > 0)
|
||||
assert.True(t, root.level() > 0)
|
||||
assert.True(t, root.level > 0)
|
||||
//assert.Equal(t, uint64(1000), root.cumulativeCount())
|
||||
assert.Equal(t, countTree(t, ns, root), 1000)
|
||||
validateTreeItems(t, ns, root, items)
|
||||
@@ -40,7 +40,7 @@ func roundTripTreeItems(t *testing.T) {
|
||||
root, items, ns = randomTree(t, 10_000)
|
||||
assert.NotNil(t, root)
|
||||
assert.True(t, root.nodeCount() > 0)
|
||||
assert.True(t, root.level() > 0)
|
||||
assert.True(t, root.level > 0)
|
||||
//assert.Equal(t, uint64(10_000), root.cumulativeCount())
|
||||
assert.Equal(t, countTree(t, ns, root), 10_000)
|
||||
validateTreeItems(t, ns, root, items)
|
||||
@@ -48,7 +48,7 @@ func roundTripTreeItems(t *testing.T) {
|
||||
root, items, ns = randomTree(t, 100_000)
|
||||
assert.NotNil(t, root)
|
||||
assert.True(t, root.nodeCount() > 0)
|
||||
assert.True(t, root.level() > 0)
|
||||
assert.True(t, root.level > 0)
|
||||
//assert.Equal(t, uint64(100_000), root.cumulativeCount())
|
||||
assert.Equal(t, countTree(t, ns, root), 100_000)
|
||||
validateTreeItems(t, ns, root, items)
|
||||
|
||||
@@ -195,27 +195,21 @@ func testUpdateDiffs(t *testing.T, from Map, tups [][2]val.Tuple, numUpdates int
|
||||
tups[i], tups[j] = tups[j], tups[i]
|
||||
})
|
||||
|
||||
oldPairs := tups[:numUpdates]
|
||||
sort.Slice(oldPairs, func(i, j int) bool {
|
||||
return from.keyDesc.Compare(oldPairs[i][0], oldPairs[j][0]) < 0
|
||||
sub := tups[:numUpdates]
|
||||
sort.Slice(sub, func(i, j int) bool {
|
||||
return from.keyDesc.Compare(sub[i][0], sub[j][0]) < 0
|
||||
})
|
||||
|
||||
kd, vd := from.Descriptors()
|
||||
newPairs := randomTuplePairs(numUpdates, kd, vd)
|
||||
require.Equal(t, len(oldPairs), len(newPairs))
|
||||
for i := range oldPairs {
|
||||
// set keys for updates
|
||||
newPairs[i][0] = oldPairs[i][0]
|
||||
}
|
||||
to := makeMapWithUpdates(t, from, newPairs...)
|
||||
updates := makeUpdatesToTuples(kd, vd, sub...)
|
||||
to := makeMapWithUpdates(t, from, updates...)
|
||||
|
||||
var cnt int
|
||||
err := DiffMaps(ctx, from, to, func(ctx context.Context, diff Diff) error {
|
||||
assert.Equal(t, ModifiedDiff, diff.Type)
|
||||
assert.Equal(t, oldPairs[cnt][0], diff.Key)
|
||||
assert.Equal(t, oldPairs[cnt][1], diff.From)
|
||||
assert.Equal(t, newPairs[cnt][0], diff.Key)
|
||||
assert.Equal(t, newPairs[cnt][1], diff.To)
|
||||
assert.Equal(t, updates[cnt][0], diff.Key)
|
||||
assert.Equal(t, updates[cnt][1], diff.From)
|
||||
assert.Equal(t, updates[cnt][2], diff.To)
|
||||
cnt++
|
||||
return nil
|
||||
})
|
||||
@@ -247,6 +241,31 @@ func makeMapWithInserts(t *testing.T, m Map, inserts ...[2]val.Tuple) Map {
|
||||
return mm
|
||||
}
|
||||
|
||||
func makeMapWithUpdates(t *testing.T, m Map, updates ...[2]val.Tuple) Map {
|
||||
return makeMapWithInserts(t, m, updates...)
|
||||
func makeMapWithUpdates(t *testing.T, m Map, updates ...[3]val.Tuple) Map {
|
||||
ctx := context.Background()
|
||||
mut := m.Mutate()
|
||||
for _, pair := range updates {
|
||||
err := mut.Put(ctx, pair[0], pair[2])
|
||||
require.NoError(t, err)
|
||||
}
|
||||
mm, err := mut.Map(ctx)
|
||||
require.NoError(t, err)
|
||||
return mm
|
||||
}
|
||||
|
||||
func makeUpdatesToTuples(kd, vd val.TupleDesc, tuples ...[2]val.Tuple) (updates [][3]val.Tuple) {
|
||||
updates = make([][3]val.Tuple, len(tuples))
|
||||
|
||||
valBuilder := val.NewTupleBuilder(vd)
|
||||
for i := range updates {
|
||||
updates[i][0] = tuples[i][0]
|
||||
updates[i][1] = tuples[i][1]
|
||||
updates[i][2] = randomTuple(valBuilder)
|
||||
}
|
||||
|
||||
sort.Slice(updates, func(i, j int) bool {
|
||||
return kd.Compare(updates[i][0], updates[j][0]) < 0
|
||||
})
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
229
go/store/prolly/tree_merge.go
Normal file
229
go/store/prolly/tree_merge.go
Normal file
@@ -0,0 +1,229 @@
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package prolly
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
const patchBufferSize = 1024
|
||||
|
||||
// TupleMergeFn is a callback that handles 3-way merging of tuples.
|
||||
// A typical implementation will attempt a cell-wise merge of the tuples,
|
||||
// or register a conflict if such a merge is not possible.
|
||||
type TupleMergeFn func(left, right Diff) (Diff, bool)
|
||||
|
||||
// ThreeWayMerge implements a three-way merge algorithm using |base| as the common ancestor, |right| as
|
||||
// the source branch, and |left| as the destination branch. Both |left| and |right| are diff'd against
|
||||
// |base| to compute merge patches, but rather than applying both sets of patches to |base|, patches from
|
||||
// |right| are applied directly to |left|. This reduces the amount of write work and improves performance.
|
||||
// In the case that a key-value pair was modified on both |left| and |right| with different resulting
|
||||
// values, the TupleMergeFn is called to perform a cell-wise merge, or to throw a conflict.
|
||||
func ThreeWayMerge(ctx context.Context, left, right, base Map, cb TupleMergeFn) (final Map, err error) {
|
||||
ld, err := treeDifferFromMaps(ctx, base, left)
|
||||
if err != nil {
|
||||
return Map{}, err
|
||||
}
|
||||
|
||||
rd, err := treeDifferFromMaps(ctx, base, right)
|
||||
if err != nil {
|
||||
return Map{}, err
|
||||
}
|
||||
|
||||
eg, ctx := errgroup.WithContext(ctx)
|
||||
buf := newPatchBuffer(patchBufferSize)
|
||||
|
||||
// iterate |ld| and |rd| in parallel, populating |buf|
|
||||
eg.Go(func() (err error) {
|
||||
defer func() {
|
||||
if cerr := buf.close(); err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
err = sendPatches(ctx, ld, rd, buf, cb)
|
||||
return
|
||||
})
|
||||
|
||||
// consume patches from |buf| and apply them to |left|
|
||||
eg.Go(func() error {
|
||||
final, err = materializeMutations(ctx, left, buf)
|
||||
return err
|
||||
})
|
||||
|
||||
if err = eg.Wait(); err != nil {
|
||||
return Map{}, err
|
||||
}
|
||||
|
||||
return final, nil
|
||||
}
|
||||
|
||||
// patchBuffer implements mutationIter. It consumes Diffs
|
||||
// from the parallel treeDiffers and transforms them into
|
||||
// patches for the treeChunker to apply.
|
||||
type patchBuffer struct {
|
||||
buf chan patch
|
||||
}
|
||||
|
||||
var _ mutationIter = patchBuffer{}
|
||||
|
||||
type patch [2]val.Tuple
|
||||
|
||||
func newPatchBuffer(sz int) patchBuffer {
|
||||
return patchBuffer{buf: make(chan patch, sz)}
|
||||
}
|
||||
|
||||
func (ps patchBuffer) sendPatch(ctx context.Context, diff Diff) error {
|
||||
p := patch{diff.Key, diff.To}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case ps.buf <- p:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// nextMutation implements mutationIter.
|
||||
func (ps patchBuffer) nextMutation(ctx context.Context) (key, value val.Tuple) {
|
||||
var p patch
|
||||
select {
|
||||
case p = <-ps.buf:
|
||||
return p[0], p[1]
|
||||
case <-ctx.Done():
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (ps patchBuffer) close() error {
|
||||
close(ps.buf)
|
||||
return nil
|
||||
}
|
||||
|
||||
func sendPatches(ctx context.Context, l, r treeDiffer, buf patchBuffer, cb TupleMergeFn) (err error) {
|
||||
var (
|
||||
left, right Diff
|
||||
lok, rok = true, true
|
||||
)
|
||||
|
||||
left, err = l.Next(ctx)
|
||||
if err == io.EOF {
|
||||
err, lok = nil, false
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
right, err = r.Next(ctx)
|
||||
if err == io.EOF {
|
||||
err, rok = nil, false
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for lok && rok {
|
||||
cmp := compareDiffKeys(left, right, l.cmp)
|
||||
|
||||
switch {
|
||||
case cmp < 0:
|
||||
// already in left
|
||||
left, err = l.Next(ctx)
|
||||
if err == io.EOF {
|
||||
err, lok = nil, false
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
case cmp > 0:
|
||||
err = buf.sendPatch(ctx, right)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
right, err = r.Next(ctx)
|
||||
if err == io.EOF {
|
||||
err, rok = nil, false
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
case cmp == 0:
|
||||
if !equalDiffVals(left, right) {
|
||||
resolved, ok := cb(left, right)
|
||||
if ok {
|
||||
err = buf.sendPatch(ctx, resolved)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
left, err = l.Next(ctx)
|
||||
if err == io.EOF {
|
||||
err, lok = nil, false
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
right, err = r.Next(ctx)
|
||||
if err == io.EOF {
|
||||
err, rok = nil, false
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if lok {
|
||||
// already in left
|
||||
return nil
|
||||
}
|
||||
|
||||
for rok {
|
||||
err = buf.sendPatch(ctx, right)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
right, err = r.Next(ctx)
|
||||
if err == io.EOF {
|
||||
err, rok = nil, false
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func compareDiffKeys(left, right Diff, cmp compareFn) int {
|
||||
return cmp(nodeItem(left.Key), nodeItem(right.Key))
|
||||
}
|
||||
|
||||
func equalDiffVals(left, right Diff) bool {
|
||||
// todo(andy): bytes must be comparable
|
||||
ok := left.Type == right.Type
|
||||
return ok && bytes.Equal(left.To, right.To)
|
||||
}
|
||||
272
go/store/prolly/tree_merge_test.go
Normal file
272
go/store/prolly/tree_merge_test.go
Normal file
@@ -0,0 +1,272 @@
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package prolly
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/val"
|
||||
)
|
||||
|
||||
func Test3WayMapMerge(t *testing.T) {
|
||||
scales := []int{
|
||||
10,
|
||||
100,
|
||||
1000,
|
||||
10000,
|
||||
}
|
||||
|
||||
kd := val.NewTupleDescriptor(
|
||||
val.Type{Enc: val.Uint32Enc, Nullable: false},
|
||||
)
|
||||
vd := val.NewTupleDescriptor(
|
||||
val.Type{Enc: val.Uint32Enc, Nullable: true},
|
||||
val.Type{Enc: val.Uint32Enc, Nullable: true},
|
||||
val.Type{Enc: val.Uint32Enc, Nullable: true},
|
||||
)
|
||||
|
||||
for _, s := range scales {
|
||||
name := fmt.Sprintf("test proCur map at scale %d", s)
|
||||
t.Run(name, func(t *testing.T) {
|
||||
t.Run("merge identical maps", func(t *testing.T) {
|
||||
testEqualMapMerge(t, s)
|
||||
})
|
||||
t.Run("3way merge inserts", func(t *testing.T) {
|
||||
for k := 0; k < 10; k++ {
|
||||
testThreeWayMapMerge(t, kd, vd, s)
|
||||
}
|
||||
})
|
||||
t.Run("tuple merge fn", func(t *testing.T) {
|
||||
for k := 0; k < 10; k++ {
|
||||
testTupleMergeFn(t, kd, vd, s)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func testEqualMapMerge(t *testing.T, sz int) {
|
||||
om, _ := makeProllyMap(t, sz)
|
||||
m := om.(Map)
|
||||
ctx := context.Background()
|
||||
mm, err := ThreeWayMerge(ctx, m, m, m, panicOnConflict)
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, mm)
|
||||
assert.Equal(t, m.HashOf(), mm.HashOf())
|
||||
}
|
||||
|
||||
func testThreeWayMapMerge(t *testing.T, kd, vd val.TupleDesc, sz int) {
|
||||
baseTuples, leftEdits, rightEdits := makeTuplesAndMutations(kd, vd, sz)
|
||||
om := prollyMapFromTuples(t, kd, vd, baseTuples)
|
||||
|
||||
base := om.(Map)
|
||||
left := applyMutationSet(t, base, leftEdits)
|
||||
right := applyMutationSet(t, base, rightEdits)
|
||||
|
||||
ctx := context.Background()
|
||||
final, err := ThreeWayMerge(ctx, left, right, base, panicOnConflict)
|
||||
assert.NoError(t, err)
|
||||
|
||||
for _, add := range leftEdits.adds {
|
||||
ok, err := final.Has(ctx, add[0])
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, ok)
|
||||
err = final.Get(ctx, add[0], func(key, value val.Tuple) error {
|
||||
assert.Equal(t, value, add[1])
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
for _, add := range rightEdits.adds {
|
||||
ok, err := final.Has(ctx, add[0])
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, ok)
|
||||
err = final.Get(ctx, add[0], func(key, value val.Tuple) error {
|
||||
assert.Equal(t, value, add[1])
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
for _, del := range leftEdits.deletes {
|
||||
ok, err := final.Has(ctx, del)
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, ok)
|
||||
}
|
||||
for _, del := range rightEdits.deletes {
|
||||
ok, err := final.Has(ctx, del)
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, ok)
|
||||
}
|
||||
|
||||
for _, up := range leftEdits.updates {
|
||||
ok, err := final.Has(ctx, up[0])
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, ok)
|
||||
err = final.Get(ctx, up[0], func(key, value val.Tuple) error {
|
||||
assert.Equal(t, value, up[1])
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
for _, up := range rightEdits.updates {
|
||||
ok, err := final.Has(ctx, up[0])
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, ok)
|
||||
err = final.Get(ctx, up[0], func(key, value val.Tuple) error {
|
||||
assert.Equal(t, value, up[1])
|
||||
return nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
func testTupleMergeFn(t *testing.T, kd, vd val.TupleDesc, sz int) {
|
||||
ctx := context.Background()
|
||||
tuples := randomTuplePairs(sz, kd, vd)
|
||||
om := prollyMapFromTuples(t, kd, vd, tuples)
|
||||
base := om.(Map)
|
||||
|
||||
mutSz := sz / 10
|
||||
testRand.Shuffle(len(tuples), func(i, j int) {
|
||||
tuples[i], tuples[j] = tuples[j], tuples[i]
|
||||
})
|
||||
|
||||
// make overlapping edits
|
||||
left := makeUpdatesToTuples(kd, vd, tuples[:mutSz]...)
|
||||
right := makeUpdatesToTuples(kd, vd, tuples[:mutSz]...)
|
||||
|
||||
l := base.Mutate()
|
||||
for _, update := range left {
|
||||
err := l.Put(ctx, update[0], update[2])
|
||||
require.NoError(t, err)
|
||||
}
|
||||
leftMap, err := l.Map(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
r := base.Mutate()
|
||||
for _, update := range right {
|
||||
err := r.Put(ctx, update[0], update[2])
|
||||
require.NoError(t, err)
|
||||
}
|
||||
rightMap, err := r.Map(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
idx := 0
|
||||
final, err := ThreeWayMerge(ctx, leftMap, rightMap, base, func(l, r Diff) (merged Diff, ok bool) {
|
||||
assert.Equal(t, l.Key, r.Key)
|
||||
assert.Equal(t, l.From, r.From)
|
||||
|
||||
assert.Equal(t, l.To, left[idx][2])
|
||||
assert.Equal(t, r.To, right[idx][2])
|
||||
|
||||
// right diff wins
|
||||
merged, ok = r, true
|
||||
idx++
|
||||
return
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
for _, update := range left {
|
||||
err = final.Get(ctx, update[0], func(key, value val.Tuple) error {
|
||||
assert.Equal(t, key, update[0])
|
||||
assert.NotEqual(t, value, update[2])
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
for _, update := range right {
|
||||
err = final.Get(ctx, update[0], func(key, value val.Tuple) error {
|
||||
assert.Equal(t, key, update[0])
|
||||
assert.Equal(t, value, update[2])
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
type mutationSet struct {
|
||||
adds [][2]val.Tuple
|
||||
deletes []val.Tuple
|
||||
updates [][3]val.Tuple
|
||||
}
|
||||
|
||||
func makeTuplesAndMutations(kd, vd val.TupleDesc, sz int) (base [][2]val.Tuple, left, right mutationSet) {
|
||||
mutSz := sz / 10
|
||||
totalSz := sz + (mutSz * 2)
|
||||
tuples := randomTuplePairs(totalSz, kd, vd)
|
||||
|
||||
base = tuples[:sz]
|
||||
|
||||
left = mutationSet{
|
||||
adds: tuples[sz : sz+mutSz],
|
||||
deletes: make([]val.Tuple, mutSz),
|
||||
}
|
||||
right = mutationSet{
|
||||
adds: tuples[sz+mutSz:],
|
||||
deletes: make([]val.Tuple, mutSz),
|
||||
}
|
||||
|
||||
edits := make([][2]val.Tuple, len(base))
|
||||
copy(edits, base)
|
||||
testRand.Shuffle(len(edits), func(i, j int) {
|
||||
edits[i], edits[j] = edits[j], edits[i]
|
||||
})
|
||||
|
||||
for i, pair := range edits[:mutSz] {
|
||||
left.deletes[i] = pair[0]
|
||||
}
|
||||
for i, pair := range edits[mutSz : mutSz*2] {
|
||||
right.deletes[i] = pair[0]
|
||||
}
|
||||
|
||||
left.updates = makeUpdatesToTuples(kd, vd, edits[mutSz*2:mutSz*3]...)
|
||||
right.updates = makeUpdatesToTuples(kd, vd, edits[mutSz*3:mutSz*4]...)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func applyMutationSet(t *testing.T, base Map, edits mutationSet) (m Map) {
|
||||
ctx := context.Background()
|
||||
mut := base.Mutate()
|
||||
|
||||
var err error
|
||||
for _, add := range edits.adds {
|
||||
err = mut.Put(ctx, add[0], add[1])
|
||||
require.NoError(t, err)
|
||||
}
|
||||
for _, del := range edits.deletes {
|
||||
err = mut.Delete(ctx, del)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
for _, up := range edits.updates {
|
||||
err = mut.Put(ctx, up[0], up[1])
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
m, err = mut.Map(ctx)
|
||||
require.NoError(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
func panicOnConflict(left, right Diff) (Diff, bool) {
|
||||
panic("cannot merge cells")
|
||||
}
|
||||
@@ -51,7 +51,7 @@ func countOrderedMap(t *testing.T, om orderedMap) (cnt int) {
|
||||
require.NoError(t, err)
|
||||
cnt++
|
||||
}
|
||||
return
|
||||
return cnt
|
||||
}
|
||||
|
||||
func keyDescFromMap(om orderedMap) val.TupleDesc {
|
||||
@@ -77,23 +77,34 @@ func randomTuplePairs(count int, keyDesc, valDesc val.TupleDesc) (items [][2]val
|
||||
items[i][1] = randomTuple(valBuilder)
|
||||
}
|
||||
|
||||
sortTuplePairs(items, keyDesc)
|
||||
dupes := make([]int, 0, count)
|
||||
for {
|
||||
sortTuplePairs(items, keyDesc)
|
||||
for i := range items {
|
||||
if i == 0 {
|
||||
continue
|
||||
}
|
||||
if keyDesc.Compare(items[i][0], items[i-1][0]) == 0 {
|
||||
dupes = append(dupes, i)
|
||||
}
|
||||
}
|
||||
if len(dupes) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
for i := range items {
|
||||
if i == 0 {
|
||||
continue
|
||||
}
|
||||
if keyDesc.Compare(items[i][0], items[i-1][0]) == 0 {
|
||||
panic("duplicate key, unlucky!")
|
||||
// replace duplicates and validate again
|
||||
for _, d := range dupes {
|
||||
items[d][0] = randomTuple(keyBuilder)
|
||||
}
|
||||
dupes = dupes[:0]
|
||||
}
|
||||
return
|
||||
return items
|
||||
}
|
||||
|
||||
func randomCompositeTuplePairs(count int, keyDesc, valDesc val.TupleDesc) (items [][2]val.Tuple) {
|
||||
// preconditions
|
||||
if count%5 != 0 {
|
||||
panic("expected count divisible by 5")
|
||||
panic("expected empty divisible by 5")
|
||||
}
|
||||
if len(keyDesc.Types) < 2 {
|
||||
panic("expected composite key")
|
||||
|
||||
@@ -40,6 +40,7 @@ import (
|
||||
"github.com/aws/aws-sdk-go/service/dynamodb"
|
||||
"github.com/aws/aws-sdk-go/service/s3"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/utils/filesys"
|
||||
"github.com/dolthub/dolt/go/store/chunks"
|
||||
"github.com/dolthub/dolt/go/store/d"
|
||||
"github.com/dolthub/dolt/go/store/datas"
|
||||
@@ -472,9 +473,27 @@ func (sp Spec) createDatabase(ctx context.Context) (datas.Database, types.ValueR
|
||||
vrw := types.NewValueStore(cs)
|
||||
return datas.NewTypesDatabase(vrw), vrw
|
||||
case "nbs":
|
||||
os.Mkdir(sp.DatabaseName, 0777)
|
||||
cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28)
|
||||
// If the database is the oldgen database return a standard NBS store.
|
||||
if strings.Contains(sp.DatabaseName, "oldgen") {
|
||||
return getStandardLocalStore(ctx, sp.DatabaseName)
|
||||
}
|
||||
|
||||
oldgenDb := filepath.Join(sp.DatabaseName, "oldgen")
|
||||
|
||||
err := validateDir(oldgenDb)
|
||||
// If we can't validate that an oldgen db exists just use a standard local store.
|
||||
if err != nil {
|
||||
return getStandardLocalStore(ctx, sp.DatabaseName)
|
||||
}
|
||||
|
||||
newGenSt, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), sp.DatabaseName, 1<<28)
|
||||
d.PanicIfError(err)
|
||||
|
||||
oldGenSt, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), oldgenDb, 1<<28)
|
||||
d.PanicIfError(err)
|
||||
|
||||
cs := nbs.NewGenerationalCS(oldGenSt, newGenSt)
|
||||
|
||||
vrw := types.NewValueStore(cs)
|
||||
return datas.NewTypesDatabase(vrw), vrw
|
||||
case "mem":
|
||||
@@ -494,6 +513,28 @@ func (sp Spec) createDatabase(ctx context.Context) (datas.Database, types.ValueR
|
||||
}
|
||||
}
|
||||
|
||||
func getStandardLocalStore(ctx context.Context, dbName string) (datas.Database, types.ValueReadWriter) {
|
||||
os.Mkdir(dbName, 0777)
|
||||
|
||||
cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), dbName, 1<<28)
|
||||
d.PanicIfError(err)
|
||||
|
||||
vrw := types.NewValueStore(cs)
|
||||
return datas.NewTypesDatabase(vrw), vrw
|
||||
}
|
||||
|
||||
func validateDir(path string) error {
|
||||
info, err := os.Stat(path)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
} else if !info.IsDir() {
|
||||
return filesys.ErrIsFile
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseDatabaseSpec(spec string) (protocol, name string, err error) {
|
||||
if len(spec) == 0 {
|
||||
err = fmt.Errorf("empty spec")
|
||||
|
||||
@@ -22,6 +22,8 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/geometry"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
)
|
||||
|
||||
@@ -134,7 +136,7 @@ func WriteEWKBLineData(l Linestring, buf []byte) {
|
||||
binary.LittleEndian.PutUint32(buf[:LengthSize], uint32(len(l.Points)))
|
||||
// Append each point
|
||||
for i, p := range l.Points {
|
||||
WriteEWKBPointData(p, buf[LengthSize+PointDataSize*i:LengthSize+PointDataSize*(i+1)])
|
||||
WriteEWKBPointData(p, buf[LengthSize+geometry.PointSize*i:LengthSize+geometry.PointSize*(i+1)])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,11 +147,11 @@ func (v Linestring) writeTo(w nomsWriter, nbf *NomsBinFormat) error {
|
||||
}
|
||||
|
||||
// Allocate buffer for linestring
|
||||
buf := make([]byte, EWKBHeaderSize+LengthSize+PointDataSize*len(v.Points))
|
||||
buf := make([]byte, geometry.EWKBHeaderSize+LengthSize+geometry.PointSize*len(v.Points))
|
||||
|
||||
// Write header and data to buffer
|
||||
WriteEWKBHeader(v, buf)
|
||||
WriteEWKBLineData(v, buf[EWKBHeaderSize:])
|
||||
WriteEWKBLineData(v, buf[geometry.EWKBHeaderSize:])
|
||||
|
||||
w.writeString(string(buf))
|
||||
return nil
|
||||
@@ -164,7 +166,7 @@ func ParseEWKBLine(buf []byte, srid uint32) Linestring {
|
||||
// Parse points
|
||||
points := make([]Point, numPoints)
|
||||
for i := uint32(0); i < numPoints; i++ {
|
||||
points[i] = ParseEWKBPoint(buf[LengthSize+PointDataSize*i:LengthSize+PointDataSize*(i+1)], srid)
|
||||
points[i] = ParseEWKBPoint(buf[LengthSize+geometry.PointSize*i:LengthSize+geometry.PointSize*(i+1)], srid)
|
||||
}
|
||||
|
||||
return Linestring{SRID: srid, Points: points}
|
||||
@@ -172,20 +174,20 @@ func ParseEWKBLine(buf []byte, srid uint32) Linestring {
|
||||
|
||||
func readLinestring(nbf *NomsBinFormat, b *valueDecoder) (Linestring, error) {
|
||||
buf := []byte(b.ReadString())
|
||||
srid, _, geomType := ParseEWKBHeader(buf)
|
||||
if geomType != LinestringID {
|
||||
srid, _, geomType := geometry.ParseEWKBHeader(buf)
|
||||
if geomType != geometry.LinestringType {
|
||||
return Linestring{}, errors.New("not a linestring")
|
||||
}
|
||||
return ParseEWKBLine(buf[EWKBHeaderSize:], srid), nil
|
||||
return ParseEWKBLine(buf[geometry.EWKBHeaderSize:], srid), nil
|
||||
}
|
||||
|
||||
func (v Linestring) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) {
|
||||
buf := []byte(b.ReadString())
|
||||
srid, _, geomType := ParseEWKBHeader(buf)
|
||||
if geomType != LinestringID {
|
||||
srid, _, geomType := geometry.ParseEWKBHeader(buf)
|
||||
if geomType != geometry.LinestringType {
|
||||
return nil, errors.New("not a linestring")
|
||||
}
|
||||
return ParseEWKBLine(buf[EWKBHeaderSize:], srid), nil
|
||||
return ParseEWKBLine(buf[geometry.EWKBHeaderSize:], srid), nil
|
||||
}
|
||||
|
||||
func (v Linestring) skip(nbf *NomsBinFormat, b *binaryNomsReader) {
|
||||
|
||||
@@ -16,24 +16,13 @@ package types
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
)
|
||||
"github.com/dolthub/dolt/go/store/geometry"
|
||||
|
||||
const (
|
||||
SRIDSize = 4
|
||||
EndianSize = 1
|
||||
TypeSize = 4
|
||||
EWKBHeaderSize = SRIDSize + EndianSize + TypeSize
|
||||
PointDataSize = 16
|
||||
PointID = 1
|
||||
LinestringID = 2
|
||||
PolygonID = 3
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
)
|
||||
|
||||
// Point is a Noms Value wrapper around the primitive string type (for now).
|
||||
@@ -93,29 +82,21 @@ func (v Point) valueReadWriter() ValueReadWriter {
|
||||
// WriteEWKBHeader writes the SRID, endianness, and type to the byte buffer
|
||||
// This function assumes v is a valid spatial type
|
||||
func WriteEWKBHeader(v interface{}, buf []byte) {
|
||||
// Write endianness byte (always little endian)
|
||||
buf[SRIDSize] = 1
|
||||
|
||||
// Parse data
|
||||
switch v := v.(type) {
|
||||
case Point:
|
||||
// Write SRID and type
|
||||
binary.LittleEndian.PutUint32(buf[0:SRIDSize], v.SRID)
|
||||
binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], PointID)
|
||||
geometry.WriteEWKBHeader(buf, v.SRID, geometry.PointType)
|
||||
case Linestring:
|
||||
binary.LittleEndian.PutUint32(buf[0:SRIDSize], v.SRID)
|
||||
binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], LinestringID)
|
||||
geometry.WriteEWKBHeader(buf, v.SRID, geometry.LinestringType)
|
||||
case Polygon:
|
||||
binary.LittleEndian.PutUint32(buf[0:SRIDSize], v.SRID)
|
||||
binary.LittleEndian.PutUint32(buf[SRIDSize+EndianSize:EWKBHeaderSize], PolygonID)
|
||||
geometry.WriteEWKBHeader(buf, v.SRID, geometry.PolygonType)
|
||||
}
|
||||
}
|
||||
|
||||
// WriteEWKBPointData converts a Point into a byte array in EWKB format
|
||||
// Very similar to function in GMS
|
||||
func WriteEWKBPointData(p Point, buf []byte) {
|
||||
binary.LittleEndian.PutUint64(buf[:PointDataSize/2], math.Float64bits(p.X))
|
||||
binary.LittleEndian.PutUint64(buf[PointDataSize/2:], math.Float64bits(p.Y))
|
||||
geometry.WriteEWKBPointData(buf, p.X, p.Y)
|
||||
}
|
||||
|
||||
func (v Point) writeTo(w nomsWriter, nbf *NomsBinFormat) error {
|
||||
@@ -126,49 +107,39 @@ func (v Point) writeTo(w nomsWriter, nbf *NomsBinFormat) error {
|
||||
}
|
||||
|
||||
// Allocate buffer for point 4 + 1 + 4 + 16
|
||||
buf := make([]byte, EWKBHeaderSize+PointDataSize)
|
||||
buf := make([]byte, geometry.EWKBHeaderSize+geometry.PointSize)
|
||||
|
||||
// Write header and data to buffer
|
||||
WriteEWKBHeader(v, buf)
|
||||
WriteEWKBPointData(v, buf[EWKBHeaderSize:])
|
||||
WriteEWKBPointData(v, buf[geometry.EWKBHeaderSize:])
|
||||
|
||||
w.writeString(string(buf))
|
||||
return nil
|
||||
}
|
||||
|
||||
// ParseEWKBHeader converts the header potion of a EWKB byte array to srid, endianness, and geometry type
|
||||
func ParseEWKBHeader(buf []byte) (uint32, bool, uint32) {
|
||||
srid := binary.LittleEndian.Uint32(buf[0:SRIDSize]) // First 4 bytes is SRID always in little endian
|
||||
isBig := buf[SRIDSize] == 0 // Next byte is endianness
|
||||
geomType := binary.LittleEndian.Uint32(buf[SRIDSize+EndianSize : EWKBHeaderSize]) // Next 4 bytes is type
|
||||
return srid, isBig, geomType
|
||||
}
|
||||
|
||||
// ParseEWKBPoint converts the data portion of a WKB point to Point
|
||||
// Very similar logic to the function in GMS
|
||||
func ParseEWKBPoint(buf []byte, srid uint32) Point {
|
||||
// Read floats x and y
|
||||
x := math.Float64frombits(binary.LittleEndian.Uint64(buf[:PointDataSize/2]))
|
||||
y := math.Float64frombits(binary.LittleEndian.Uint64(buf[PointDataSize/2:]))
|
||||
x, y := geometry.ParseEWKBPoint(buf)
|
||||
return Point{SRID: srid, X: x, Y: y}
|
||||
}
|
||||
|
||||
func readPoint(nbf *NomsBinFormat, b *valueDecoder) (Point, error) {
|
||||
buf := []byte(b.ReadString())
|
||||
srid, _, geomType := ParseEWKBHeader(buf) // Assume it's always little endian
|
||||
if geomType != PointID {
|
||||
srid, _, geomType := geometry.ParseEWKBHeader(buf) // Assume it's always little endian
|
||||
if geomType != geometry.PointType {
|
||||
return Point{}, errors.New("not a point")
|
||||
}
|
||||
return ParseEWKBPoint(buf[EWKBHeaderSize:], srid), nil
|
||||
return ParseEWKBPoint(buf[geometry.EWKBHeaderSize:], srid), nil
|
||||
}
|
||||
|
||||
func (v Point) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) {
|
||||
buf := []byte(b.ReadString())
|
||||
srid, _, geomType := ParseEWKBHeader(buf) // Assume it's always little endian
|
||||
if geomType != PointID {
|
||||
srid, _, geomType := geometry.ParseEWKBHeader(buf) // Assume it's always little endian
|
||||
if geomType != geometry.PointType {
|
||||
return Point{}, errors.New("not a point")
|
||||
}
|
||||
return ParseEWKBPoint(buf[EWKBHeaderSize:], srid), nil
|
||||
return ParseEWKBPoint(buf[geometry.EWKBHeaderSize:], srid), nil
|
||||
}
|
||||
|
||||
func (v Point) skip(nbf *NomsBinFormat, b *binaryNomsReader) {
|
||||
|
||||
@@ -22,6 +22,8 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/geometry"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
)
|
||||
|
||||
@@ -128,7 +130,7 @@ func WriteEWKBPolyData(p Polygon, buf []byte) {
|
||||
// Write each line
|
||||
start, stop := 0, LengthSize
|
||||
for _, l := range p.Lines {
|
||||
start, stop = stop, stop+LengthSize+PointDataSize*len(l.Points)
|
||||
start, stop = stop, stop+LengthSize+geometry.PointSize*len(l.Points)
|
||||
WriteEWKBLineData(l, buf[start:stop])
|
||||
}
|
||||
}
|
||||
@@ -142,15 +144,15 @@ func (v Polygon) writeTo(w nomsWriter, nbf *NomsBinFormat) error {
|
||||
// Calculate space for polygon buffer
|
||||
size := 0
|
||||
for _, l := range v.Lines {
|
||||
size += LengthSize + PointDataSize*len(l.Points)
|
||||
size += LengthSize + geometry.PointSize*len(l.Points)
|
||||
}
|
||||
|
||||
// Allocate buffer for poly
|
||||
buf := make([]byte, EWKBHeaderSize+LengthSize+size)
|
||||
buf := make([]byte, geometry.EWKBHeaderSize+LengthSize+size)
|
||||
|
||||
// Write header and data to buffer
|
||||
WriteEWKBHeader(v, buf)
|
||||
WriteEWKBPolyData(v, buf[EWKBHeaderSize:])
|
||||
WriteEWKBPolyData(v, buf[geometry.EWKBHeaderSize:])
|
||||
|
||||
w.writeString(string(buf))
|
||||
return nil
|
||||
@@ -167,7 +169,7 @@ func ParseEWKBPoly(buf []byte, srid uint32) Polygon {
|
||||
lines := make([]Linestring, numLines)
|
||||
for i := uint32(0); i < numLines; i++ {
|
||||
lines[i] = ParseEWKBLine(buf[s:], srid)
|
||||
s += LengthSize * PointDataSize * len(lines[i].Points)
|
||||
s += LengthSize * geometry.PointSize * len(lines[i].Points)
|
||||
}
|
||||
|
||||
return Polygon{SRID: srid, Lines: lines}
|
||||
@@ -175,20 +177,20 @@ func ParseEWKBPoly(buf []byte, srid uint32) Polygon {
|
||||
|
||||
func readPolygon(nbf *NomsBinFormat, b *valueDecoder) (Polygon, error) {
|
||||
buf := []byte(b.ReadString())
|
||||
srid, _, geomType := ParseEWKBHeader(buf)
|
||||
if geomType != PolygonID {
|
||||
srid, _, geomType := geometry.ParseEWKBHeader(buf)
|
||||
if geomType != geometry.PolygonType {
|
||||
return Polygon{}, errors.New("not a polygon")
|
||||
}
|
||||
return ParseEWKBPoly(buf[EWKBHeaderSize:], srid), nil
|
||||
return ParseEWKBPoly(buf[geometry.EWKBHeaderSize:], srid), nil
|
||||
}
|
||||
|
||||
func (v Polygon) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) {
|
||||
buf := []byte(b.ReadString())
|
||||
srid, _, geomType := ParseEWKBHeader(buf)
|
||||
if geomType != PolygonID {
|
||||
srid, _, geomType := geometry.ParseEWKBHeader(buf)
|
||||
if geomType != geometry.PolygonType {
|
||||
return nil, errors.New("not a polygon")
|
||||
}
|
||||
return ParseEWKBPoly(buf[EWKBHeaderSize:], srid), nil
|
||||
return ParseEWKBPoly(buf[geometry.EWKBHeaderSize:], srid), nil
|
||||
}
|
||||
|
||||
func (v Polygon) skip(nbf *NomsBinFormat, b *binaryNomsReader) {
|
||||
|
||||
@@ -29,6 +29,7 @@ import (
|
||||
"github.com/shopspring/decimal"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/d"
|
||||
"github.com/dolthub/dolt/go/store/geometry"
|
||||
)
|
||||
|
||||
var ErrUnknownType = errors.New("unknown type $@")
|
||||
@@ -374,27 +375,27 @@ func (r *valueDecoder) readValue(nbf *NomsBinFormat) (Value, error) {
|
||||
case PointKind:
|
||||
r.skipKind()
|
||||
buf := []byte(r.ReadString())
|
||||
srid, _, geomType := ParseEWKBHeader(buf)
|
||||
if geomType != PointID {
|
||||
srid, _, geomType := geometry.ParseEWKBHeader(buf)
|
||||
if geomType != geometry.PointType {
|
||||
return nil, ErrUnknownType
|
||||
}
|
||||
return ParseEWKBPoint(buf[EWKBHeaderSize:], srid), nil
|
||||
return ParseEWKBPoint(buf[geometry.EWKBHeaderSize:], srid), nil
|
||||
case LinestringKind:
|
||||
r.skipKind()
|
||||
buf := []byte(r.ReadString())
|
||||
srid, _, geomType := ParseEWKBHeader(buf)
|
||||
if geomType != LinestringID {
|
||||
srid, _, geomType := geometry.ParseEWKBHeader(buf)
|
||||
if geomType != geometry.LinestringType {
|
||||
return nil, ErrUnknownType
|
||||
}
|
||||
return ParseEWKBLine(buf[EWKBHeaderSize:], srid), nil
|
||||
return ParseEWKBLine(buf[geometry.EWKBHeaderSize:], srid), nil
|
||||
case PolygonKind:
|
||||
r.skipKind()
|
||||
buf := []byte(r.ReadString())
|
||||
srid, _, geomType := ParseEWKBHeader(buf)
|
||||
if geomType != PolygonID {
|
||||
srid, _, geomType := geometry.ParseEWKBHeader(buf)
|
||||
if geomType != geometry.PolygonType {
|
||||
return nil, ErrUnknownType
|
||||
}
|
||||
return ParseEWKBPoly(buf[EWKBHeaderSize:], srid), nil
|
||||
return ParseEWKBPoly(buf[geometry.EWKBHeaderSize:], srid), nil
|
||||
case TypeKind:
|
||||
r.skipKind()
|
||||
return r.readType()
|
||||
|
||||
@@ -19,13 +19,10 @@ import (
|
||||
"encoding/binary"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/shopspring/decimal"
|
||||
)
|
||||
|
||||
type Type struct {
|
||||
Enc Encoding
|
||||
Coll Collation
|
||||
Nullable bool
|
||||
}
|
||||
|
||||
@@ -38,34 +35,24 @@ const (
|
||||
uint16Size ByteSize = 2
|
||||
int32Size ByteSize = 4
|
||||
uint32Size ByteSize = 4
|
||||
int48Size ByteSize = 6
|
||||
uint48Size ByteSize = 6
|
||||
int64Size ByteSize = 8
|
||||
uint64Size ByteSize = 8
|
||||
float32Size ByteSize = 4
|
||||
float64Size ByteSize = 8
|
||||
|
||||
// todo(andy): experimental encoding
|
||||
timestampSize ByteSize = 15
|
||||
)
|
||||
|
||||
type Collation uint16
|
||||
|
||||
const (
|
||||
ByteOrderCollation Collation = 0
|
||||
timestampSize ByteSize = 8
|
||||
)
|
||||
|
||||
type Encoding uint8
|
||||
|
||||
// Constant Size Encodings
|
||||
const (
|
||||
NullEnc Encoding = 0
|
||||
Int8Enc Encoding = 1
|
||||
Uint8Enc Encoding = 2
|
||||
Int16Enc Encoding = 3
|
||||
Uint16Enc Encoding = 4
|
||||
// Int24Enc Encoding = 5
|
||||
// Uint24Enc Encoding = 6
|
||||
NullEnc Encoding = 0
|
||||
Int8Enc Encoding = 1
|
||||
Uint8Enc Encoding = 2
|
||||
Int16Enc Encoding = 3
|
||||
Uint16Enc Encoding = 4
|
||||
Int32Enc Encoding = 7
|
||||
Uint32Enc Encoding = 8
|
||||
Int64Enc Encoding = 9
|
||||
@@ -74,7 +61,6 @@ const (
|
||||
Float64Enc Encoding = 12
|
||||
|
||||
// todo(andy): experimental encodings
|
||||
// consolidate into one
|
||||
TimestampEnc Encoding = 14
|
||||
DateEnc Encoding = 15
|
||||
DatetimeEnc Encoding = 16
|
||||
@@ -89,9 +75,10 @@ const (
|
||||
BytesEnc Encoding = 129
|
||||
|
||||
// todo(andy): experimental encodings
|
||||
DecimalEnc Encoding = 130
|
||||
JSONEnc Encoding = 131
|
||||
TimeEnc Encoding = 132
|
||||
DecimalEnc Encoding = 130
|
||||
JSONEnc Encoding = 131
|
||||
TimeEnc Encoding = 132
|
||||
GeometryEnc Encoding = 133
|
||||
|
||||
// TODO
|
||||
// BitEnc
|
||||
@@ -139,93 +126,10 @@ func sizeFromType(t Type) (ByteSize, bool) {
|
||||
}
|
||||
}
|
||||
|
||||
func ReadBool(val []byte) bool {
|
||||
func readBool(val []byte) bool {
|
||||
expectSize(val, int8Size)
|
||||
return val[0] == 1
|
||||
}
|
||||
func ReadInt8(val []byte) int8 {
|
||||
expectSize(val, int8Size)
|
||||
return int8(val[0])
|
||||
}
|
||||
|
||||
func ReadUint8(val []byte) uint8 {
|
||||
expectSize(val, uint8Size)
|
||||
return val[0]
|
||||
}
|
||||
|
||||
func ReadInt16(val []byte) int16 {
|
||||
expectSize(val, int16Size)
|
||||
return int16(binary.LittleEndian.Uint16(val))
|
||||
}
|
||||
|
||||
func ReadUint16(val []byte) uint16 {
|
||||
expectSize(val, uint16Size)
|
||||
return binary.LittleEndian.Uint16(val)
|
||||
}
|
||||
|
||||
func ReadInt32(val []byte) int32 {
|
||||
expectSize(val, int32Size)
|
||||
return int32(binary.LittleEndian.Uint32(val))
|
||||
}
|
||||
|
||||
func ReadUint32(val []byte) uint32 {
|
||||
expectSize(val, uint32Size)
|
||||
return binary.LittleEndian.Uint32(val)
|
||||
}
|
||||
|
||||
func ReadUint48(val []byte) (u uint64) {
|
||||
expectSize(val, uint48Size)
|
||||
var tmp [8]byte
|
||||
// copy |val| to |tmp|
|
||||
tmp[5], tmp[4] = val[5], val[4]
|
||||
tmp[3], tmp[2] = val[3], val[2]
|
||||
tmp[1], tmp[0] = val[1], val[0]
|
||||
u = binary.LittleEndian.Uint64(tmp[:])
|
||||
return
|
||||
}
|
||||
|
||||
func ReadInt64(val []byte) int64 {
|
||||
expectSize(val, int64Size)
|
||||
return int64(binary.LittleEndian.Uint64(val))
|
||||
}
|
||||
|
||||
func ReadUint64(val []byte) uint64 {
|
||||
expectSize(val, uint64Size)
|
||||
return binary.LittleEndian.Uint64(val)
|
||||
}
|
||||
|
||||
func ReadFloat32(val []byte) float32 {
|
||||
expectSize(val, float32Size)
|
||||
return math.Float32frombits(ReadUint32(val))
|
||||
}
|
||||
|
||||
func ReadFloat64(val []byte) float64 {
|
||||
expectSize(val, float64Size)
|
||||
return math.Float64frombits(ReadUint64(val))
|
||||
}
|
||||
|
||||
func ReadDecimal(val []byte) decimal.Decimal {
|
||||
// todo(andy): temporary lossy implementation
|
||||
//return decimal.NewFromFloat(ReadFloat64(val))
|
||||
return decimal.NewFromFloat(ReadFloat64(val))
|
||||
}
|
||||
|
||||
func ReadTime(buf []byte) (t time.Time) {
|
||||
expectSize(buf, timestampSize)
|
||||
if err := t.UnmarshalBinary(buf); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
func ReadString(val []byte) string {
|
||||
// todo(andy): fix allocation
|
||||
return string(val)
|
||||
}
|
||||
|
||||
func readBytes(val []byte) []byte {
|
||||
return val
|
||||
}
|
||||
|
||||
func writeBool(buf []byte, val bool) {
|
||||
expectSize(buf, 1)
|
||||
@@ -236,94 +140,265 @@ func writeBool(buf []byte, val bool) {
|
||||
}
|
||||
}
|
||||
|
||||
func WriteInt8(buf []byte, val int8) {
|
||||
// false is less that true
|
||||
func compareBool(l, r bool) int {
|
||||
if l == r {
|
||||
return 0
|
||||
}
|
||||
if !l && r {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func readInt8(val []byte) int8 {
|
||||
expectSize(val, int8Size)
|
||||
return int8(val[0])
|
||||
}
|
||||
|
||||
func writeInt8(buf []byte, val int8) {
|
||||
expectSize(buf, int8Size)
|
||||
buf[0] = byte(val)
|
||||
}
|
||||
|
||||
func WriteUint8(buf []byte, val uint8) {
|
||||
func compareInt8(l, r int8) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func readUint8(val []byte) uint8 {
|
||||
expectSize(val, uint8Size)
|
||||
return val[0]
|
||||
}
|
||||
|
||||
func writeUint8(buf []byte, val uint8) {
|
||||
expectSize(buf, uint8Size)
|
||||
buf[0] = byte(val)
|
||||
}
|
||||
|
||||
func WriteInt16(buf []byte, val int16) {
|
||||
func compareUint8(l, r uint8) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func readInt16(val []byte) int16 {
|
||||
expectSize(val, int16Size)
|
||||
return int16(binary.LittleEndian.Uint16(val))
|
||||
}
|
||||
|
||||
func writeInt16(buf []byte, val int16) {
|
||||
expectSize(buf, int16Size)
|
||||
binary.LittleEndian.PutUint16(buf, uint16(val))
|
||||
}
|
||||
|
||||
func WriteUint16(buf []byte, val uint16) {
|
||||
func compareInt16(l, r int16) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func readUint16(val []byte) uint16 {
|
||||
expectSize(val, uint16Size)
|
||||
return binary.LittleEndian.Uint16(val)
|
||||
}
|
||||
|
||||
func writeUint16(buf []byte, val uint16) {
|
||||
expectSize(buf, uint16Size)
|
||||
binary.LittleEndian.PutUint16(buf, val)
|
||||
}
|
||||
|
||||
func WriteInt32(buf []byte, val int32) {
|
||||
func compareUint16(l, r uint16) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func readInt32(val []byte) int32 {
|
||||
expectSize(val, int32Size)
|
||||
return int32(binary.LittleEndian.Uint32(val))
|
||||
}
|
||||
|
||||
func writeInt32(buf []byte, val int32) {
|
||||
expectSize(buf, int32Size)
|
||||
binary.LittleEndian.PutUint32(buf, uint32(val))
|
||||
}
|
||||
|
||||
func WriteUint32(buf []byte, val uint32) {
|
||||
func compareInt32(l, r int32) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func readUint32(val []byte) uint32 {
|
||||
expectSize(val, uint32Size)
|
||||
return binary.LittleEndian.Uint32(val)
|
||||
}
|
||||
|
||||
func writeUint32(buf []byte, val uint32) {
|
||||
expectSize(buf, uint32Size)
|
||||
binary.LittleEndian.PutUint32(buf, val)
|
||||
}
|
||||
|
||||
func WriteUint48(buf []byte, u uint64) {
|
||||
const maxUint48 = uint64(1<<48 - 1)
|
||||
|
||||
expectSize(buf, uint48Size)
|
||||
if u > maxUint48 {
|
||||
panic("uint is greater than max uint48")
|
||||
func compareUint32(l, r uint32) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
var tmp [8]byte
|
||||
binary.LittleEndian.PutUint64(tmp[:], u)
|
||||
// copy |tmp| to |buf|
|
||||
buf[5], buf[4] = tmp[5], tmp[4]
|
||||
buf[3], buf[2] = tmp[3], tmp[2]
|
||||
buf[1], buf[0] = tmp[1], tmp[0]
|
||||
}
|
||||
|
||||
func WriteInt64(buf []byte, val int64) {
|
||||
func readInt64(val []byte) int64 {
|
||||
expectSize(val, int64Size)
|
||||
return int64(binary.LittleEndian.Uint64(val))
|
||||
}
|
||||
|
||||
func writeInt64(buf []byte, val int64) {
|
||||
expectSize(buf, int64Size)
|
||||
binary.LittleEndian.PutUint64(buf, uint64(val))
|
||||
}
|
||||
|
||||
func WriteUint64(buf []byte, val uint64) {
|
||||
func compareInt64(l, r int64) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func readUint64(val []byte) uint64 {
|
||||
expectSize(val, uint64Size)
|
||||
return binary.LittleEndian.Uint64(val)
|
||||
}
|
||||
|
||||
func writeUint64(buf []byte, val uint64) {
|
||||
expectSize(buf, uint64Size)
|
||||
binary.LittleEndian.PutUint64(buf, val)
|
||||
}
|
||||
|
||||
func WriteFloat32(buf []byte, val float32) {
|
||||
func compareUint64(l, r uint64) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func readFloat32(val []byte) float32 {
|
||||
expectSize(val, float32Size)
|
||||
return math.Float32frombits(readUint32(val))
|
||||
}
|
||||
|
||||
func writeFloat32(buf []byte, val float32) {
|
||||
expectSize(buf, float32Size)
|
||||
binary.LittleEndian.PutUint32(buf, math.Float32bits(val))
|
||||
}
|
||||
|
||||
func WriteFloat64(buf []byte, val float64) {
|
||||
func compareFloat32(l, r float32) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func readFloat64(val []byte) float64 {
|
||||
expectSize(val, float64Size)
|
||||
return math.Float64frombits(readUint64(val))
|
||||
}
|
||||
|
||||
func writeFloat64(buf []byte, val float64) {
|
||||
expectSize(buf, float64Size)
|
||||
binary.LittleEndian.PutUint64(buf, math.Float64bits(val))
|
||||
}
|
||||
|
||||
func WriteTime(buf []byte, val time.Time) {
|
||||
expectSize(buf, timestampSize)
|
||||
// todo(andy): fix allocation here
|
||||
m, _ := val.MarshalBinary()
|
||||
copy(buf, m)
|
||||
}
|
||||
|
||||
func writeString(buf []byte, val string, coll Collation) {
|
||||
expectSize(buf, ByteSize(len(val)))
|
||||
copy(buf, val)
|
||||
}
|
||||
|
||||
func writeBytes(buf, val []byte, coll Collation) {
|
||||
expectSize(buf, ByteSize(len(val)))
|
||||
copy(buf, val)
|
||||
}
|
||||
|
||||
func expectSize(buf []byte, sz ByteSize) {
|
||||
if ByteSize(len(buf)) != sz {
|
||||
panic("byte slice is not of expected size")
|
||||
func compareFloat64(l, r float64) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func readTimestamp(buf []byte) (t time.Time) {
|
||||
expectSize(buf, timestampSize)
|
||||
t = time.Unix(0, readInt64(buf)).UTC()
|
||||
return
|
||||
}
|
||||
|
||||
func writeTimestamp(buf []byte, val time.Time) {
|
||||
expectSize(buf, timestampSize)
|
||||
writeInt64(buf, val.UnixNano())
|
||||
}
|
||||
|
||||
func compareTimestamp(l, r time.Time) int {
|
||||
if l.Equal(r) {
|
||||
return 0
|
||||
} else if l.Before(r) {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func readString(val []byte) string {
|
||||
// todo(andy): fix allocation
|
||||
return string(val)
|
||||
}
|
||||
|
||||
func writeString(buf []byte, val string) {
|
||||
expectSize(buf, ByteSize(len(val)))
|
||||
copy(buf, val)
|
||||
}
|
||||
|
||||
func compareString(l, r string) int {
|
||||
return bytes.Compare([]byte(l), []byte(r))
|
||||
}
|
||||
|
||||
func readBytes(val []byte) []byte {
|
||||
return val
|
||||
}
|
||||
|
||||
func writeBytes(buf, val []byte) {
|
||||
expectSize(buf, ByteSize(len(val)))
|
||||
copy(buf, val)
|
||||
}
|
||||
|
||||
func compareBytes(l, r []byte) int {
|
||||
return bytes.Compare(l, r)
|
||||
}
|
||||
|
||||
func compare(typ Type, left, right []byte) int {
|
||||
// order NULLs last
|
||||
if left == nil {
|
||||
@@ -342,231 +417,57 @@ func compare(typ Type, left, right []byte) int {
|
||||
|
||||
switch typ.Enc {
|
||||
case Int8Enc:
|
||||
return compareInt8(ReadInt8(left), ReadInt8(right))
|
||||
return compareInt8(readInt8(left), readInt8(right))
|
||||
case Uint8Enc:
|
||||
return compareUint8(ReadUint8(left), ReadUint8(right))
|
||||
return compareUint8(readUint8(left), readUint8(right))
|
||||
case Int16Enc:
|
||||
return compareInt16(ReadInt16(left), ReadInt16(right))
|
||||
return compareInt16(readInt16(left), readInt16(right))
|
||||
case Uint16Enc:
|
||||
return compareUint16(ReadUint16(left), ReadUint16(right))
|
||||
return compareUint16(readUint16(left), readUint16(right))
|
||||
case Int32Enc:
|
||||
return compareInt32(ReadInt32(left), ReadInt32(right))
|
||||
return compareInt32(readInt32(left), readInt32(right))
|
||||
case Uint32Enc:
|
||||
return compareUint32(ReadUint32(left), ReadUint32(right))
|
||||
return compareUint32(readUint32(left), readUint32(right))
|
||||
case Int64Enc:
|
||||
return compareInt64(ReadInt64(left), ReadInt64(right))
|
||||
return compareInt64(readInt64(left), readInt64(right))
|
||||
case Uint64Enc:
|
||||
return compareUint64(ReadUint64(left), ReadUint64(right))
|
||||
return compareUint64(readUint64(left), readUint64(right))
|
||||
case Float32Enc:
|
||||
return compareFloat32(ReadFloat32(left), ReadFloat32(right))
|
||||
return compareFloat32(readFloat32(left), readFloat32(right))
|
||||
case Float64Enc:
|
||||
return compareFloat64(ReadFloat64(left), ReadFloat64(right))
|
||||
return compareFloat64(readFloat64(left), readFloat64(right))
|
||||
case YearEnc:
|
||||
return compareInt16(ReadInt16(left), ReadInt16(right))
|
||||
return compareInt16(readInt16(left), readInt16(right))
|
||||
case DateEnc, DatetimeEnc, TimestampEnc:
|
||||
return compareTimestamp(ReadTime(left), ReadTime(right))
|
||||
return compareTimestamp(readTimestamp(left), readTimestamp(right))
|
||||
case TimeEnc:
|
||||
panic("unimplemented")
|
||||
case DecimalEnc:
|
||||
// todo(andy): temporary Decimal implementation
|
||||
fallthrough
|
||||
case StringEnc:
|
||||
return compareString(ReadString(left), ReadString(right), typ.Coll)
|
||||
return compareString(readString(left), readString(right))
|
||||
case BytesEnc:
|
||||
return compareBytes(readBytes(left), readBytes(right), typ.Coll)
|
||||
return compareBytes(readBytes(left), readBytes(right))
|
||||
default:
|
||||
panic("unknown encoding")
|
||||
}
|
||||
}
|
||||
|
||||
// false is less that true
|
||||
func compareBool(l, r bool) int {
|
||||
if l == r {
|
||||
return 0
|
||||
}
|
||||
if !l && r {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func compareInt8(l, r int8) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
func expectSize(buf []byte, sz ByteSize) {
|
||||
if ByteSize(len(buf)) != sz {
|
||||
panic("byte slice is not of expected size")
|
||||
}
|
||||
}
|
||||
|
||||
func compareUint8(l, r uint8) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
func expectTrue(b bool) {
|
||||
if !b {
|
||||
panic("expected true")
|
||||
}
|
||||
}
|
||||
|
||||
func compareInt16(l, r int16) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
func expectFalse(b bool) {
|
||||
if b {
|
||||
panic("expected false")
|
||||
}
|
||||
}
|
||||
|
||||
func compareUint16(l, r uint16) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func compareInt32(l, r int32) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func compareUint32(l, r uint32) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func compareInt64(l, r int64) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func compareUint64(l, r uint64) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func compareFloat32(l, r float32) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func compareFloat64(l, r float64) int {
|
||||
if l == r {
|
||||
return 0
|
||||
} else if l < r {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func compareTimestamp(l, r time.Time) int {
|
||||
if l.Equal(r) {
|
||||
return 0
|
||||
}
|
||||
if l.Before(r) {
|
||||
return -1
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func compareString(l, r string, coll Collation) int {
|
||||
// todo(andy): collations
|
||||
return bytes.Compare([]byte(l), []byte(r))
|
||||
}
|
||||
|
||||
func compareBytes(l, r []byte, coll Collation) int {
|
||||
// todo(andy): collations
|
||||
return bytes.Compare(l, r)
|
||||
}
|
||||
|
||||
// rawCmp is an array of indexes used to perform raw Tuple comparisons.
|
||||
// Under certain conditions, Tuple comparisons can be optimized by
|
||||
// directly comparing Tuples as byte slices, rather than accessing
|
||||
// and deserializing each field.
|
||||
// If each of these conditions is met, raw comparisons can be used:
|
||||
// (1) All fields in the Tuple must be non-nullable.
|
||||
// (2) All fields in the Tuple must be of constant size
|
||||
// (eg Ints, Uints, Floats, Time types, etc.)
|
||||
//
|
||||
type rawCmp []int
|
||||
|
||||
var rawCmpLookup = map[Encoding]rawCmp{
|
||||
Int8Enc: {0},
|
||||
Uint8Enc: {0},
|
||||
Int16Enc: {1, 0},
|
||||
Uint16Enc: {1, 0},
|
||||
Int32Enc: {3, 2, 1, 0},
|
||||
Uint32Enc: {3, 2, 1, 0},
|
||||
Int64Enc: {7, 6, 5, 4, 3, 2, 1, 0},
|
||||
Uint64Enc: {7, 6, 5, 4, 3, 2, 1, 0},
|
||||
}
|
||||
|
||||
func compareRaw(left, right Tuple, mapping rawCmp) int {
|
||||
var l, r byte
|
||||
for _, idx := range mapping {
|
||||
l, r = left[idx], right[idx]
|
||||
if l != r {
|
||||
break
|
||||
}
|
||||
}
|
||||
if l > r {
|
||||
return 1
|
||||
} else if l < r {
|
||||
return -1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func maybeGetRawComparison(types ...Type) rawCmp {
|
||||
var raw []int
|
||||
offset := 0
|
||||
for _, typ := range types {
|
||||
if typ.Nullable {
|
||||
return nil
|
||||
}
|
||||
|
||||
mapping, ok := rawCmpLookup[typ.Enc]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i := range mapping {
|
||||
mapping[i] += offset
|
||||
}
|
||||
raw = append(raw, mapping...)
|
||||
offset += len(mapping)
|
||||
}
|
||||
return raw
|
||||
}
|
||||
|
||||
@@ -116,19 +116,19 @@ func TestCompare(t *testing.T) {
|
||||
|
||||
func encInt(i int64) []byte {
|
||||
buf := make([]byte, 8)
|
||||
WriteInt64(buf, i)
|
||||
writeInt64(buf, i)
|
||||
return buf
|
||||
}
|
||||
|
||||
func encUint(u uint64) []byte {
|
||||
buf := make([]byte, 8)
|
||||
WriteUint64(buf, u)
|
||||
writeUint64(buf, u)
|
||||
return buf
|
||||
}
|
||||
|
||||
func encFloat(f float64) []byte {
|
||||
buf := make([]byte, 8)
|
||||
WriteFloat64(buf, f)
|
||||
writeFloat64(buf, f)
|
||||
return buf
|
||||
}
|
||||
|
||||
@@ -156,7 +156,7 @@ func roundTripBools(t *testing.T) {
|
||||
integers := []bool{true, false}
|
||||
for _, exp := range integers {
|
||||
writeBool(buf, exp)
|
||||
assert.Equal(t, exp, ReadBool(buf))
|
||||
assert.Equal(t, exp, readBool(buf))
|
||||
zero(buf)
|
||||
}
|
||||
}
|
||||
@@ -166,8 +166,8 @@ func roundTripInts(t *testing.T) {
|
||||
integers := []int64{-1, 0, -1, math.MaxInt8, math.MinInt8}
|
||||
for _, value := range integers {
|
||||
exp := int8(value)
|
||||
WriteInt8(buf, exp)
|
||||
assert.Equal(t, exp, ReadInt8(buf))
|
||||
writeInt8(buf, exp)
|
||||
assert.Equal(t, exp, readInt8(buf))
|
||||
zero(buf)
|
||||
}
|
||||
|
||||
@@ -175,8 +175,8 @@ func roundTripInts(t *testing.T) {
|
||||
integers = append(integers, math.MaxInt16, math.MaxInt16)
|
||||
for _, value := range integers {
|
||||
exp := int16(value)
|
||||
WriteInt16(buf, exp)
|
||||
assert.Equal(t, exp, ReadInt16(buf))
|
||||
writeInt16(buf, exp)
|
||||
assert.Equal(t, exp, readInt16(buf))
|
||||
zero(buf)
|
||||
}
|
||||
|
||||
@@ -184,8 +184,8 @@ func roundTripInts(t *testing.T) {
|
||||
integers = append(integers, math.MaxInt32, math.MaxInt32)
|
||||
for _, value := range integers {
|
||||
exp := int32(value)
|
||||
WriteInt32(buf, exp)
|
||||
assert.Equal(t, exp, ReadInt32(buf))
|
||||
writeInt32(buf, exp)
|
||||
assert.Equal(t, exp, readInt32(buf))
|
||||
zero(buf)
|
||||
}
|
||||
|
||||
@@ -193,8 +193,8 @@ func roundTripInts(t *testing.T) {
|
||||
integers = append(integers, math.MaxInt64, math.MaxInt64)
|
||||
for _, value := range integers {
|
||||
exp := int64(value)
|
||||
WriteInt64(buf, exp)
|
||||
assert.Equal(t, exp, ReadInt64(buf))
|
||||
writeInt64(buf, exp)
|
||||
assert.Equal(t, exp, readInt64(buf))
|
||||
zero(buf)
|
||||
}
|
||||
}
|
||||
@@ -204,8 +204,8 @@ func roundTripUints(t *testing.T) {
|
||||
uintegers := []uint64{0, 1, math.MaxUint8}
|
||||
for _, value := range uintegers {
|
||||
exp := uint8(value)
|
||||
WriteUint8(buf, exp)
|
||||
assert.Equal(t, exp, ReadUint8(buf))
|
||||
writeUint8(buf, exp)
|
||||
assert.Equal(t, exp, readUint8(buf))
|
||||
zero(buf)
|
||||
}
|
||||
|
||||
@@ -213,8 +213,8 @@ func roundTripUints(t *testing.T) {
|
||||
uintegers = append(uintegers, math.MaxUint16)
|
||||
for _, value := range uintegers {
|
||||
exp := uint16(value)
|
||||
WriteUint16(buf, exp)
|
||||
assert.Equal(t, exp, ReadUint16(buf))
|
||||
writeUint16(buf, exp)
|
||||
assert.Equal(t, exp, readUint16(buf))
|
||||
zero(buf)
|
||||
}
|
||||
|
||||
@@ -222,8 +222,8 @@ func roundTripUints(t *testing.T) {
|
||||
uintegers = append(uintegers, math.MaxUint32)
|
||||
for _, value := range uintegers {
|
||||
exp := uint32(value)
|
||||
WriteUint32(buf, exp)
|
||||
assert.Equal(t, exp, ReadUint32(buf))
|
||||
writeUint32(buf, exp)
|
||||
assert.Equal(t, exp, readUint32(buf))
|
||||
zero(buf)
|
||||
}
|
||||
|
||||
@@ -231,8 +231,8 @@ func roundTripUints(t *testing.T) {
|
||||
uintegers = append(uintegers, math.MaxUint64)
|
||||
for _, value := range uintegers {
|
||||
exp := uint64(value)
|
||||
WriteUint64(buf, exp)
|
||||
assert.Equal(t, exp, ReadUint64(buf))
|
||||
writeUint64(buf, exp)
|
||||
assert.Equal(t, exp, readUint64(buf))
|
||||
zero(buf)
|
||||
}
|
||||
}
|
||||
@@ -242,8 +242,8 @@ func roundTripFloats(t *testing.T) {
|
||||
floats := []float64{-1, 0, 1, math.MaxFloat32, math.SmallestNonzeroFloat32}
|
||||
for _, value := range floats {
|
||||
exp := float32(value)
|
||||
WriteFloat32(buf, exp)
|
||||
assert.Equal(t, exp, ReadFloat32(buf))
|
||||
writeFloat32(buf, exp)
|
||||
assert.Equal(t, exp, readFloat32(buf))
|
||||
zero(buf)
|
||||
}
|
||||
|
||||
@@ -251,8 +251,8 @@ func roundTripFloats(t *testing.T) {
|
||||
floats = append(floats, math.MaxFloat64, math.SmallestNonzeroFloat64)
|
||||
for _, value := range floats {
|
||||
exp := float64(value)
|
||||
WriteFloat64(buf, exp)
|
||||
assert.Equal(t, exp, ReadFloat64(buf))
|
||||
writeFloat64(buf, exp)
|
||||
assert.Equal(t, exp, readFloat64(buf))
|
||||
zero(buf)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package val
|
||||
|
||||
// todo(andy): more ergonomic offsets
|
||||
// type SlicedBuffer struct {
|
||||
// buf []byte
|
||||
// offs []uint16
|
||||
// }
|
||||
|
||||
type Offsets []byte
|
||||
|
||||
// OffsetsSize returns the number of bytes needed to
|
||||
// store |fieldCount| offsets.
|
||||
func OffsetsSize(count int) ByteSize {
|
||||
if count == 0 {
|
||||
return 0
|
||||
}
|
||||
return ByteSize((count - 1) * 2)
|
||||
}
|
||||
|
||||
// Count returns the number of offsets stored in |sl|.
|
||||
func (os Offsets) Count() int {
|
||||
return (len(os) / 2) + 1
|
||||
}
|
||||
|
||||
// GetBounds returns the ith offset. |last| is the byte position
|
||||
// of the _end_ of the last element.
|
||||
func (os Offsets) GetBounds(i int, last ByteSize) (start, stop ByteSize) {
|
||||
start = os.getOffset(i)
|
||||
if os.isLastIndex(i) {
|
||||
stop = last
|
||||
} else {
|
||||
stop = os.getOffset(i + 1)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// getOffset gets the byte position of the _start_ of element |i|.
|
||||
func (os Offsets) getOffset(i int) ByteSize {
|
||||
if i == 0 {
|
||||
return 0
|
||||
}
|
||||
start := (i - 1) * 2
|
||||
off := ReadUint16(os[start : start+2])
|
||||
return ByteSize(off)
|
||||
}
|
||||
|
||||
// Put writes offset |pos| at index |i|.
|
||||
func (os Offsets) Put(i int, off ByteSize) {
|
||||
if i == 0 {
|
||||
return
|
||||
}
|
||||
start := (i - 1) * 2
|
||||
WriteUint16(os[start:start+2], uint16(off))
|
||||
}
|
||||
|
||||
// isLastIndex returns true if |i| is the last index in |sl|.
|
||||
func (os Offsets) isLastIndex(i int) bool {
|
||||
return len(os) == i*2
|
||||
}
|
||||
76
go/store/val/sliced_buffer.go
Normal file
76
go/store/val/sliced_buffer.go
Normal file
@@ -0,0 +1,76 @@
|
||||
// Copyright 2021 Dolthub, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package val
|
||||
|
||||
type SlicedBuffer struct {
|
||||
Buf []byte
|
||||
Offs offsets
|
||||
}
|
||||
|
||||
func slicedTupleBuffer(tup Tuple) SlicedBuffer {
|
||||
mask := tup.mask()
|
||||
offStop := tup.size() - numFieldsSize - mask.size()
|
||||
bufStop := offStop - offsetsSize(mask.count())
|
||||
|
||||
return SlicedBuffer{
|
||||
Buf: tup[:bufStop],
|
||||
Offs: offsets(tup[bufStop:offStop]),
|
||||
}
|
||||
}
|
||||
|
||||
// GetSlice returns the ith slice of |sb.Buf|.
|
||||
func (sb SlicedBuffer) GetSlice(i int) []byte {
|
||||
start := sb.Offs.getOffset(i)
|
||||
stop := ByteSize(len(sb.Buf))
|
||||
if !sb.isLastIndex(i) {
|
||||
stop = sb.Offs.getOffset(i + 1)
|
||||
}
|
||||
return sb.Buf[start:stop]
|
||||
}
|
||||
|
||||
// isLastIndex returns true if |i| is the last index in |sl|.
|
||||
func (sb SlicedBuffer) isLastIndex(i int) bool {
|
||||
return len(sb.Offs) == i*2
|
||||
}
|
||||
|
||||
type offsets []byte
|
||||
|
||||
// offsetsSize returns the number of bytes needed to
|
||||
// store |fieldCount| offsets.
|
||||
func offsetsSize(count int) ByteSize {
|
||||
if count == 0 {
|
||||
return 0
|
||||
}
|
||||
return ByteSize((count - 1) * 2)
|
||||
}
|
||||
|
||||
// getOffset gets the byte position of the _start_ of element |i|.
|
||||
func (os offsets) getOffset(i int) ByteSize {
|
||||
if i == 0 {
|
||||
return 0
|
||||
}
|
||||
start := (i - 1) * 2
|
||||
off := readUint16(os[start : start+2])
|
||||
return ByteSize(off)
|
||||
}
|
||||
|
||||
// putOffset writes offset |pos| at index |i|.
|
||||
func (os offsets) putOffset(i int, off ByteSize) {
|
||||
if i == 0 {
|
||||
return
|
||||
}
|
||||
start := (i - 1) * 2
|
||||
writeUint16(os[start:start+2], uint16(off))
|
||||
}
|
||||
@@ -30,7 +30,7 @@ const (
|
||||
// Tuples are byte slices containing field values and a footer. Tuples only
|
||||
// contain Values for non-NULL Fields. Value i contains the data for ith non-
|
||||
// NULL Field. Values are packed contiguously from the front of the Tuple. The
|
||||
// footer contains offsets, a member mask, and a field count. Offsets enable
|
||||
// footer contains offsets, a member mask, and a field count. offsets enable
|
||||
// random access to Values. The member mask enables NULL-compaction for Values.
|
||||
//
|
||||
// Tuples read and write Values as byte slices. (De)serialization is delegated
|
||||
@@ -42,10 +42,10 @@ const (
|
||||
//
|
||||
// Tuple:
|
||||
// +---------+---------+-----+---------+---------+-------------+-------------+
|
||||
// | Value 0 | Value 1 | ... | Value K | Offsets | Member Mask | Field Count |
|
||||
// | Value 0 | Value 1 | ... | Value K | offsets | Member Mask | Field Count |
|
||||
// +---------+---------+-----+---------+---------+-------------+-------------+
|
||||
//
|
||||
// Offsets:
|
||||
// offsets:
|
||||
// The offset array contains a uint16 for each non-NULL field after field 0.
|
||||
// Offset i encodes the distance to the ith Value from the front of the Tuple.
|
||||
// The size of the offset array is 2*(K-1) bytes, where K is the number of
|
||||
@@ -91,7 +91,7 @@ func NewTuple(pool pool.BuffPool, values ...[]byte) Tuple {
|
||||
panic("tuple data size exceeds maximum")
|
||||
}
|
||||
|
||||
tup, offs, mask := makeTuple(pool, pos, count, len(values))
|
||||
tup, offs, mask := allocateTuple(pool, pos, count, len(values))
|
||||
|
||||
count = 0
|
||||
pos = ByteSize(0)
|
||||
@@ -100,7 +100,7 @@ func NewTuple(pool pool.BuffPool, values ...[]byte) Tuple {
|
||||
continue
|
||||
}
|
||||
mask.set(i)
|
||||
offs.Put(count, pos)
|
||||
offs.putOffset(count, pos)
|
||||
count++
|
||||
|
||||
copy(tup[pos:pos+sizeOf(v)], v)
|
||||
@@ -116,15 +116,15 @@ func CloneTuple(pool pool.BuffPool, tup Tuple) Tuple {
|
||||
return buf
|
||||
}
|
||||
|
||||
func makeTuple(pool pool.BuffPool, bufSz ByteSize, values, fields int) (tup Tuple, offs Offsets, ms memberMask) {
|
||||
offSz := OffsetsSize(values)
|
||||
func allocateTuple(pool pool.BuffPool, bufSz ByteSize, values, fields int) (tup Tuple, offs offsets, ms memberMask) {
|
||||
offSz := offsetsSize(values)
|
||||
maskSz := maskSize(fields)
|
||||
countSz := numFieldsSize
|
||||
|
||||
tup = pool.Get(uint64(bufSz + offSz + maskSz + countSz))
|
||||
|
||||
writeFieldCount(tup, fields)
|
||||
offs = Offsets(tup[bufSz : bufSz+offSz])
|
||||
offs = offsets(tup[bufSz : bufSz+offSz])
|
||||
ms = memberMask(tup[bufSz+offSz : bufSz+offSz+maskSz])
|
||||
|
||||
return
|
||||
@@ -141,10 +141,7 @@ func (tup Tuple) GetField(i int) []byte {
|
||||
// index to compensate for NULL fields
|
||||
i = tup.fieldToValue(i)
|
||||
|
||||
offs, valStop := tup.offsets()
|
||||
start, stop := offs.GetBounds(i, valStop)
|
||||
|
||||
return tup[start:stop]
|
||||
return slicedTupleBuffer(tup).GetSlice(i)
|
||||
}
|
||||
|
||||
func (tup Tuple) size() ByteSize {
|
||||
@@ -157,7 +154,7 @@ func (tup Tuple) Count() int {
|
||||
|
||||
func (tup Tuple) fieldCount() int {
|
||||
sl := tup[tup.size()-numFieldsSize:]
|
||||
return int(ReadUint16(sl))
|
||||
return int(readUint16(sl))
|
||||
}
|
||||
|
||||
func (tup Tuple) valueCount() int {
|
||||
@@ -170,14 +167,6 @@ func (tup Tuple) mask() memberMask {
|
||||
return memberMask(tup[start:stop])
|
||||
}
|
||||
|
||||
func (tup Tuple) offsets() (offs Offsets, valStop ByteSize) {
|
||||
mask := tup.mask()
|
||||
offStop := tup.size() - numFieldsSize - mask.size()
|
||||
valStop = offStop - OffsetsSize(mask.count())
|
||||
offs = Offsets(tup[valStop:offStop])
|
||||
return
|
||||
}
|
||||
|
||||
func (tup Tuple) fieldToValue(i int) int {
|
||||
return tup.mask().countPrefix(i) - 1
|
||||
}
|
||||
@@ -192,5 +181,5 @@ func sizeOf(val []byte) ByteSize {
|
||||
|
||||
func writeFieldCount(tup Tuple, count int) {
|
||||
sl := tup[len(tup)-int(numFieldsSize):]
|
||||
WriteUint16(sl, uint16(count))
|
||||
writeUint16(sl, uint16(count))
|
||||
}
|
||||
|
||||
@@ -15,12 +15,8 @@
|
||||
package val
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/store/pool"
|
||||
)
|
||||
|
||||
@@ -86,7 +82,7 @@ func (tb *TupleBuilder) PutBool(i int, v bool) {
|
||||
func (tb *TupleBuilder) PutInt8(i int, v int8) {
|
||||
tb.Desc.expectEncoding(i, Int8Enc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+int8Size]
|
||||
WriteInt8(tb.fields[i], v)
|
||||
writeInt8(tb.fields[i], v)
|
||||
tb.pos += int8Size
|
||||
}
|
||||
|
||||
@@ -94,7 +90,7 @@ func (tb *TupleBuilder) PutInt8(i int, v int8) {
|
||||
func (tb *TupleBuilder) PutUint8(i int, v uint8) {
|
||||
tb.Desc.expectEncoding(i, Uint8Enc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+uint8Size]
|
||||
WriteUint8(tb.fields[i], v)
|
||||
writeUint8(tb.fields[i], v)
|
||||
tb.pos += uint8Size
|
||||
}
|
||||
|
||||
@@ -102,7 +98,7 @@ func (tb *TupleBuilder) PutUint8(i int, v uint8) {
|
||||
func (tb *TupleBuilder) PutInt16(i int, v int16) {
|
||||
tb.Desc.expectEncoding(i, Int16Enc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+int16Size]
|
||||
WriteInt16(tb.fields[i], v)
|
||||
writeInt16(tb.fields[i], v)
|
||||
tb.pos += int16Size
|
||||
}
|
||||
|
||||
@@ -110,7 +106,7 @@ func (tb *TupleBuilder) PutInt16(i int, v int16) {
|
||||
func (tb *TupleBuilder) PutUint16(i int, v uint16) {
|
||||
tb.Desc.expectEncoding(i, Uint16Enc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+uint16Size]
|
||||
WriteUint16(tb.fields[i], v)
|
||||
writeUint16(tb.fields[i], v)
|
||||
tb.pos += uint16Size
|
||||
}
|
||||
|
||||
@@ -118,7 +114,7 @@ func (tb *TupleBuilder) PutUint16(i int, v uint16) {
|
||||
func (tb *TupleBuilder) PutInt32(i int, v int32) {
|
||||
tb.Desc.expectEncoding(i, Int32Enc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+int32Size]
|
||||
WriteInt32(tb.fields[i], v)
|
||||
writeInt32(tb.fields[i], v)
|
||||
tb.pos += int32Size
|
||||
}
|
||||
|
||||
@@ -126,7 +122,7 @@ func (tb *TupleBuilder) PutInt32(i int, v int32) {
|
||||
func (tb *TupleBuilder) PutUint32(i int, v uint32) {
|
||||
tb.Desc.expectEncoding(i, Uint32Enc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+uint32Size]
|
||||
WriteUint32(tb.fields[i], v)
|
||||
writeUint32(tb.fields[i], v)
|
||||
tb.pos += uint32Size
|
||||
}
|
||||
|
||||
@@ -134,7 +130,7 @@ func (tb *TupleBuilder) PutUint32(i int, v uint32) {
|
||||
func (tb *TupleBuilder) PutInt64(i int, v int64) {
|
||||
tb.Desc.expectEncoding(i, Int64Enc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+int64Size]
|
||||
WriteInt64(tb.fields[i], v)
|
||||
writeInt64(tb.fields[i], v)
|
||||
tb.pos += int64Size
|
||||
}
|
||||
|
||||
@@ -142,7 +138,7 @@ func (tb *TupleBuilder) PutInt64(i int, v int64) {
|
||||
func (tb *TupleBuilder) PutUint64(i int, v uint64) {
|
||||
tb.Desc.expectEncoding(i, Uint64Enc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+uint64Size]
|
||||
WriteUint64(tb.fields[i], v)
|
||||
writeUint64(tb.fields[i], v)
|
||||
tb.pos += uint64Size
|
||||
}
|
||||
|
||||
@@ -150,7 +146,7 @@ func (tb *TupleBuilder) PutUint64(i int, v uint64) {
|
||||
func (tb *TupleBuilder) PutFloat32(i int, v float32) {
|
||||
tb.Desc.expectEncoding(i, Float32Enc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+float32Size]
|
||||
WriteFloat32(tb.fields[i], v)
|
||||
writeFloat32(tb.fields[i], v)
|
||||
tb.pos += float32Size
|
||||
}
|
||||
|
||||
@@ -158,14 +154,14 @@ func (tb *TupleBuilder) PutFloat32(i int, v float32) {
|
||||
func (tb *TupleBuilder) PutFloat64(i int, v float64) {
|
||||
tb.Desc.expectEncoding(i, Float64Enc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+float64Size]
|
||||
WriteFloat64(tb.fields[i], v)
|
||||
writeFloat64(tb.fields[i], v)
|
||||
tb.pos += float64Size
|
||||
}
|
||||
|
||||
func (tb *TupleBuilder) PutTimestamp(i int, v time.Time) {
|
||||
tb.Desc.expectEncoding(i, DateEnc, DatetimeEnc, TimestampEnc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+timestampSize]
|
||||
WriteTime(tb.fields[i], v)
|
||||
writeTimestamp(tb.fields[i], v)
|
||||
tb.pos += timestampSize
|
||||
}
|
||||
|
||||
@@ -174,7 +170,7 @@ func (tb *TupleBuilder) PutSqlTime(i int, v string) {
|
||||
tb.Desc.expectEncoding(i, TimeEnc)
|
||||
sz := ByteSize(len(v))
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
|
||||
writeString(tb.fields[i], v, tb.Desc.Types[i].Coll)
|
||||
writeString(tb.fields[i], v)
|
||||
tb.pos += sz
|
||||
}
|
||||
|
||||
@@ -183,7 +179,7 @@ func (tb *TupleBuilder) PutYear(i int, v int16) {
|
||||
// todo(andy): yearSize, etc?
|
||||
tb.Desc.expectEncoding(i, YearEnc)
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+int16Size]
|
||||
WriteInt16(tb.fields[i], v)
|
||||
writeInt16(tb.fields[i], v)
|
||||
tb.pos += int16Size
|
||||
}
|
||||
|
||||
@@ -192,7 +188,7 @@ func (tb *TupleBuilder) PutDecimal(i int, v string) {
|
||||
// todo(andy): temporary implementation
|
||||
sz := ByteSize(len(v))
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
|
||||
writeString(tb.fields[i], v, tb.Desc.Types[i].Coll)
|
||||
writeString(tb.fields[i], v)
|
||||
tb.pos += sz
|
||||
}
|
||||
|
||||
@@ -201,7 +197,7 @@ func (tb *TupleBuilder) PutString(i int, v string) {
|
||||
tb.Desc.expectEncoding(i, StringEnc)
|
||||
sz := ByteSize(len(v))
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
|
||||
writeString(tb.fields[i], v, tb.Desc.Types[i].Coll)
|
||||
writeString(tb.fields[i], v)
|
||||
tb.pos += sz
|
||||
}
|
||||
|
||||
@@ -210,20 +206,25 @@ func (tb *TupleBuilder) PutBytes(i int, v []byte) {
|
||||
tb.Desc.expectEncoding(i, BytesEnc)
|
||||
sz := ByteSize(len(v))
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
|
||||
writeBytes(tb.fields[i], v, tb.Desc.Types[i].Coll)
|
||||
writeBytes(tb.fields[i], v)
|
||||
tb.pos += sz
|
||||
}
|
||||
|
||||
// PutJSON writes a []byte to the ith field of the Tuple being built.
|
||||
func (tb *TupleBuilder) PutJSON(i int, v interface{}) {
|
||||
func (tb *TupleBuilder) PutJSON(i int, v []byte) {
|
||||
tb.Desc.expectEncoding(i, JSONEnc)
|
||||
buf, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
sz := ByteSize(len(buf))
|
||||
sz := ByteSize(len(v))
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
|
||||
writeBytes(tb.fields[i], buf, tb.Desc.Types[i].Coll)
|
||||
writeBytes(tb.fields[i], v)
|
||||
tb.pos += sz
|
||||
}
|
||||
|
||||
// PutGeometry writes a []byte to the ith field of the Tuple being built.
|
||||
func (tb *TupleBuilder) PutGeometry(i int, v []byte) {
|
||||
tb.Desc.expectEncoding(i, GeometryEnc)
|
||||
sz := ByteSize(len(v))
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
|
||||
writeBytes(tb.fields[i], v)
|
||||
tb.pos += sz
|
||||
}
|
||||
|
||||
@@ -236,109 +237,6 @@ func (tb *TupleBuilder) PutRaw(i int, buf []byte) {
|
||||
}
|
||||
sz := ByteSize(len(buf))
|
||||
tb.fields[i] = tb.buf[tb.pos : tb.pos+sz]
|
||||
writeBytes(tb.fields[i], buf, tb.Desc.Types[i].Coll)
|
||||
writeBytes(tb.fields[i], buf)
|
||||
tb.pos += sz
|
||||
}
|
||||
|
||||
// PutField writes an interface{} to the ith field of the Tuple being built.
|
||||
func (tb *TupleBuilder) PutField(i int, v interface{}) {
|
||||
if v == nil {
|
||||
return // NULL
|
||||
}
|
||||
|
||||
enc := tb.Desc.Types[i].Enc
|
||||
switch enc {
|
||||
case Int8Enc:
|
||||
tb.PutInt8(i, int8(convInt(v)))
|
||||
case Uint8Enc:
|
||||
tb.PutUint8(i, uint8(convUint(v)))
|
||||
case Int16Enc:
|
||||
tb.PutInt16(i, int16(convInt(v)))
|
||||
case Uint16Enc:
|
||||
tb.PutUint16(i, uint16(convUint(v)))
|
||||
case Int32Enc:
|
||||
tb.PutInt32(i, int32(convInt(v)))
|
||||
case Uint32Enc:
|
||||
tb.PutUint32(i, uint32(convUint(v)))
|
||||
case Int64Enc:
|
||||
tb.PutInt64(i, int64(convInt(v)))
|
||||
case Uint64Enc:
|
||||
tb.PutUint64(i, uint64(convUint(v)))
|
||||
case Float32Enc:
|
||||
tb.PutFloat32(i, v.(float32))
|
||||
case Float64Enc:
|
||||
tb.PutFloat64(i, v.(float64))
|
||||
case DecimalEnc:
|
||||
tb.PutDecimal(i, v.(string))
|
||||
case TimeEnc:
|
||||
tb.PutSqlTime(i, v.(string))
|
||||
case YearEnc:
|
||||
tb.PutYear(i, v.(int16))
|
||||
case DateEnc, DatetimeEnc, TimestampEnc:
|
||||
tb.PutTimestamp(i, v.(time.Time))
|
||||
case StringEnc:
|
||||
tb.PutString(i, v.(string))
|
||||
case BytesEnc:
|
||||
if s, ok := v.(string); ok {
|
||||
v = []byte(s)
|
||||
}
|
||||
tb.PutBytes(i, v.([]byte))
|
||||
case JSONEnc:
|
||||
// todo(andy): remove GMS dependency
|
||||
tb.PutJSON(i, v.(sql.JSONDocument).Val)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown encoding %v %v", enc, v))
|
||||
}
|
||||
}
|
||||
|
||||
func convInt(v interface{}) int {
|
||||
switch i := v.(type) {
|
||||
case int:
|
||||
return i
|
||||
case int8:
|
||||
return int(i)
|
||||
case uint8:
|
||||
return int(i)
|
||||
case int16:
|
||||
return int(i)
|
||||
case uint16:
|
||||
return int(i)
|
||||
case int32:
|
||||
return int(i)
|
||||
case uint32:
|
||||
return int(i)
|
||||
case int64:
|
||||
return int(i)
|
||||
case uint64:
|
||||
return int(i)
|
||||
default:
|
||||
panic("impossible conversion")
|
||||
}
|
||||
}
|
||||
|
||||
func convUint(v interface{}) uint {
|
||||
switch i := v.(type) {
|
||||
case uint:
|
||||
return i
|
||||
case int:
|
||||
return uint(i)
|
||||
case int8:
|
||||
return uint(i)
|
||||
case uint8:
|
||||
return uint(i)
|
||||
case int16:
|
||||
return uint(i)
|
||||
case uint16:
|
||||
return uint(i)
|
||||
case int32:
|
||||
return uint(i)
|
||||
case uint32:
|
||||
return uint(i)
|
||||
case int64:
|
||||
return uint(i)
|
||||
case uint64:
|
||||
return uint(i)
|
||||
default:
|
||||
panic("impossible conversion")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,24 +15,15 @@
|
||||
package val
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
)
|
||||
|
||||
type TupleDesc struct {
|
||||
Types []Type
|
||||
cmp TupleComparator
|
||||
|
||||
// Under certain conditions, Tuple comparisons can be
|
||||
// optimized by directly comparing Tuples as byte slices,
|
||||
// rather than accessing and deserializing each field.
|
||||
// See definition of rawCmp for more information.
|
||||
raw rawCmp
|
||||
}
|
||||
|
||||
type TupleComparator interface {
|
||||
@@ -71,7 +62,7 @@ func NewTupleDescriptorWithComparator(cmp TupleComparator, types ...Type) (td Tu
|
||||
|
||||
td.Types = types
|
||||
td.cmp = cmp
|
||||
td.raw = maybeGetRawComparison(types...)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -81,13 +72,6 @@ func TupleDescriptorPrefix(td TupleDesc, count int) TupleDesc {
|
||||
|
||||
// Compare returns the Comaparison of |left| and |right|.
|
||||
func (td TupleDesc) Compare(left, right Tuple) (cmp int) {
|
||||
// todo(andy): compare raw is broken
|
||||
//if td.raw != nil {
|
||||
// return compareRaw(left, right, td.raw)
|
||||
//} else {
|
||||
// return td.cmp(left, right, td)
|
||||
//}
|
||||
|
||||
return td.cmp.Compare(left, right, td)
|
||||
}
|
||||
|
||||
@@ -108,7 +92,7 @@ func (td TupleDesc) GetBool(i int, tup Tuple) (v bool, ok bool) {
|
||||
td.expectEncoding(i, Int8Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadBool(b), true
|
||||
v, ok = readBool(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -119,7 +103,7 @@ func (td TupleDesc) GetInt8(i int, tup Tuple) (v int8, ok bool) {
|
||||
td.expectEncoding(i, Int8Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadInt8(b), true
|
||||
v, ok = readInt8(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -130,7 +114,7 @@ func (td TupleDesc) GetUint8(i int, tup Tuple) (v uint8, ok bool) {
|
||||
td.expectEncoding(i, Uint8Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadUint8(b), true
|
||||
v, ok = readUint8(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -141,7 +125,7 @@ func (td TupleDesc) GetInt16(i int, tup Tuple) (v int16, ok bool) {
|
||||
td.expectEncoding(i, Int16Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadInt16(b), true
|
||||
v, ok = readInt16(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -152,7 +136,7 @@ func (td TupleDesc) GetUint16(i int, tup Tuple) (v uint16, ok bool) {
|
||||
td.expectEncoding(i, Uint16Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadUint16(b), true
|
||||
v, ok = readUint16(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -163,7 +147,7 @@ func (td TupleDesc) GetInt32(i int, tup Tuple) (v int32, ok bool) {
|
||||
td.expectEncoding(i, Int32Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadInt32(b), true
|
||||
v, ok = readInt32(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -174,7 +158,7 @@ func (td TupleDesc) GetUint32(i int, tup Tuple) (v uint32, ok bool) {
|
||||
td.expectEncoding(i, Uint32Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadUint32(b), true
|
||||
v, ok = readUint32(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -185,7 +169,7 @@ func (td TupleDesc) GetInt64(i int, tup Tuple) (v int64, ok bool) {
|
||||
td.expectEncoding(i, Int64Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadInt64(b), true
|
||||
v, ok = readInt64(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -196,7 +180,7 @@ func (td TupleDesc) GetUint64(i int, tup Tuple) (v uint64, ok bool) {
|
||||
td.expectEncoding(i, Uint64Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadUint64(b), true
|
||||
v, ok = readUint64(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -207,7 +191,7 @@ func (td TupleDesc) GetFloat32(i int, tup Tuple) (v float32, ok bool) {
|
||||
td.expectEncoding(i, Float32Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadFloat32(b), true
|
||||
v, ok = readFloat32(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -218,7 +202,7 @@ func (td TupleDesc) GetFloat64(i int, tup Tuple) (v float64, ok bool) {
|
||||
td.expectEncoding(i, Float64Enc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadFloat64(b), true
|
||||
v, ok = readFloat64(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -229,7 +213,7 @@ func (td TupleDesc) GetDecimal(i int, tup Tuple) (v string, ok bool) {
|
||||
td.expectEncoding(i, DecimalEnc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadString(b), true
|
||||
v, ok = readString(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -240,7 +224,7 @@ func (td TupleDesc) GetTimestamp(i int, tup Tuple) (v time.Time, ok bool) {
|
||||
td.expectEncoding(i, TimestampEnc, DateEnc, DatetimeEnc, YearEnc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadTime(b), true
|
||||
v, ok = readTimestamp(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -251,7 +235,7 @@ func (td TupleDesc) GetSqlTime(i int, tup Tuple) (v string, ok bool) {
|
||||
td.expectEncoding(i, TimeEnc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadString(b), true
|
||||
v, ok = readString(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -262,7 +246,7 @@ func (td TupleDesc) GetYear(i int, tup Tuple) (v int16, ok bool) {
|
||||
td.expectEncoding(i, YearEnc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v, ok = ReadInt16(b), true
|
||||
v, ok = readInt16(b), true
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -273,7 +257,7 @@ func (td TupleDesc) GetString(i int, tup Tuple) (v string, ok bool) {
|
||||
td.expectEncoding(i, StringEnc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v = ReadString(b)
|
||||
v = readString(b)
|
||||
ok = true
|
||||
}
|
||||
return
|
||||
@@ -291,69 +275,28 @@ func (td TupleDesc) GetBytes(i int, tup Tuple) (v []byte, ok bool) {
|
||||
return
|
||||
}
|
||||
|
||||
// GetBytes reads a []byte from the ith field of the Tuple.
|
||||
// GetJSON reads a []byte from the ith field of the Tuple.
|
||||
// If the ith field is NULL, |ok| is set to false.
|
||||
func (td TupleDesc) GetJSON(i int, tup Tuple) (v interface{}, ok bool) {
|
||||
func (td TupleDesc) GetJSON(i int, tup Tuple) (v []byte, ok bool) {
|
||||
td.expectEncoding(i, JSONEnc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
if err := json.Unmarshal(b, &v); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
v = readBytes(b)
|
||||
ok = true
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// GetField reads the value from the ith field of the Tuple as an interface{}.
|
||||
func (td TupleDesc) GetField(i int, tup Tuple) (v interface{}) {
|
||||
var ok bool
|
||||
switch td.Types[i].Enc {
|
||||
case Int8Enc:
|
||||
v, ok = td.GetInt8(i, tup)
|
||||
case Uint8Enc:
|
||||
v, ok = td.GetUint8(i, tup)
|
||||
case Int16Enc:
|
||||
v, ok = td.GetInt16(i, tup)
|
||||
case Uint16Enc:
|
||||
v, ok = td.GetUint16(i, tup)
|
||||
case Int32Enc:
|
||||
v, ok = td.GetInt32(i, tup)
|
||||
case Uint32Enc:
|
||||
v, ok = td.GetUint32(i, tup)
|
||||
case Int64Enc:
|
||||
v, ok = td.GetInt64(i, tup)
|
||||
case Uint64Enc:
|
||||
v, ok = td.GetUint64(i, tup)
|
||||
case Float32Enc:
|
||||
v, ok = td.GetFloat32(i, tup)
|
||||
case Float64Enc:
|
||||
v, ok = td.GetFloat64(i, tup)
|
||||
case DecimalEnc:
|
||||
v, ok = td.GetDecimal(i, tup)
|
||||
case TimeEnc:
|
||||
v, ok = td.GetSqlTime(i, tup)
|
||||
case YearEnc:
|
||||
v, ok = td.GetYear(i, tup)
|
||||
case TimestampEnc, DateEnc, DatetimeEnc:
|
||||
v, ok = td.GetTimestamp(i, tup)
|
||||
case StringEnc:
|
||||
v, ok = td.GetString(i, tup)
|
||||
case BytesEnc:
|
||||
v, ok = td.GetBytes(i, tup)
|
||||
case JSONEnc:
|
||||
var js interface{}
|
||||
js, ok = td.GetJSON(i, tup)
|
||||
if ok {
|
||||
v = sql.JSONDocument{Val: js}
|
||||
}
|
||||
default:
|
||||
panic("unknown encoding")
|
||||
// GetBytes reads a []byte from the ith field of the Tuple.
|
||||
// If the ith field is NULL, |ok| is set to false.
|
||||
func (td TupleDesc) GetGeometry(i int, tup Tuple) (v []byte, ok bool) {
|
||||
td.expectEncoding(i, GeometryEnc)
|
||||
b := tup.GetField(i)
|
||||
if b != nil {
|
||||
v = readBytes(b)
|
||||
ok = true
|
||||
}
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return v
|
||||
return
|
||||
}
|
||||
|
||||
func (td TupleDesc) expectEncoding(i int, encodings ...Encoding) {
|
||||
|
||||
@@ -240,7 +240,6 @@ var CopiedNomsFiles []CopiedNomsFile = []CopiedNomsFile{
|
||||
{Path: "store/nomdl/parser.go", NomsPath: "go/nomdl/parser.go", HadCopyrightNotice: true},
|
||||
{Path: "store/nomdl/parser_test.go", NomsPath: "go/nomdl/parser_test.go", HadCopyrightNotice: true},
|
||||
{Path: "store/perf/codec-perf-rig/main.go", NomsPath: "go/perf/codec-perf-rig/main.go", HadCopyrightNotice: true},
|
||||
{Path: "store/perf/hash-perf-rig/main.go", NomsPath: "go/perf/hash-perf-rig/main.go", HadCopyrightNotice: true},
|
||||
{Path: "store/perf/suite/suite.go", NomsPath: "go/perf/suite/suite.go", HadCopyrightNotice: true},
|
||||
{Path: "store/perf/suite/suite_test.go", NomsPath: "go/perf/suite/suite_test.go", HadCopyrightNotice: true},
|
||||
{Path: "store/sloppy/sloppy.go", NomsPath: "go/sloppy/sloppy.go", HadCopyrightNotice: true},
|
||||
|
||||
@@ -682,3 +682,16 @@ SQL
|
||||
[[ "${lines[2]}" =~ "2" ]] || false
|
||||
}
|
||||
|
||||
@test "auto_increment: alter table add constraint for different database" {
|
||||
skip "add constraint for different database fix in progress"
|
||||
dolt sql <<SQL
|
||||
CREATE DATABASE public;
|
||||
CREATE TABLE public.test (pk integer NOT NULL, c1 integer, c2 integer);
|
||||
ALTER TABLE public.test ADD CONSTRAINT serial_pk_pkey PRIMARY KEY (pk);
|
||||
ALTER TABLE public.test MODIFY pk integer auto_increment;
|
||||
SQL
|
||||
|
||||
run dolt sql -q "SHOW CREATE TABLE public.test"
|
||||
[ $status -eq 0 ]
|
||||
[[ "$output" =~ "NOT NULL AUTO_INCREMENT" ]] || false
|
||||
}
|
||||
|
||||
@@ -344,3 +344,28 @@ SQL
|
||||
[[ "$output" =~ "| > | a | b | c |" ]] || false
|
||||
}
|
||||
|
||||
@test "drop-create: drop table from different database" {
|
||||
skip "fix not merged yet"
|
||||
dolt sql <<SQL
|
||||
create table test (currentId int primary key, currentText text);
|
||||
insert into test values (1, 'text1'), (2, 'text2');
|
||||
create schema common;
|
||||
create table common.test (commonId integer, commonText text);
|
||||
insert into test values (999, 'common database text1');
|
||||
SQL
|
||||
|
||||
run dolt sql -q "select * from test"
|
||||
currenttest=$output
|
||||
|
||||
run dolt sql -q "select * from common.test"
|
||||
[[ "$output" =~ "common database text1" ]] || false
|
||||
|
||||
dolt sql -q "drop table common.test"
|
||||
|
||||
run dolt sql -q "select * from test"
|
||||
[ "$output" = "$currenttest" ]
|
||||
|
||||
run dolt sql -q "select * from common.test"
|
||||
[ "$status" -eq 1 ]
|
||||
[[ "$output" =~ "table not found: test" ]] || false
|
||||
}
|
||||
|
||||
@@ -1853,3 +1853,37 @@ SQL
|
||||
[[ "$output" =~ "4,5,6" ]] || false
|
||||
[[ "${#lines[@]}" = "2" ]] || false
|
||||
}
|
||||
|
||||
@test "foreign-keys: alter table add constraint for different database" {
|
||||
skip "add constraint on foreign key without create index should be failing"
|
||||
run dolt sql <<SQL
|
||||
CREATE DATABASE public;
|
||||
CREATE TABLE public.cities (pk integer NOT NULL, city varchar(255), state varchar(2));
|
||||
CREATE TABLE public.states (state_id integer NOT NULL, state varchar(2));
|
||||
ALTER TABLE public.cities ADD CONSTRAINT cities_pkey PRIMARY KEY (pk);
|
||||
ALTER TABLE public.states ADD CONSTRAINT states_pkey PRIMARY KEY (state_id);
|
||||
ALTER TABLE public.cities ADD CONSTRAINT foreign_key1 FOREIGN KEY (state) REFERENCES public.states(state)";
|
||||
SQL
|
||||
[ $status -eq 1 ]
|
||||
[[ $output =~ "error" ]] || false
|
||||
|
||||
run dolt sql -q "SHOW CREATE TABLE public.cities"
|
||||
[[ $output =~ "PRIMARY KEY (\`pk\`)" ]] || false
|
||||
[[ ! $output =~ "CONSTRAINT" ]] || false
|
||||
|
||||
run dolt sql -q "SHOW CREATE TABLE public.states"
|
||||
[[ $output =~ "PRIMARY KEY (\`state_id\`)" ]] || false
|
||||
[[ ! $output =~ "KEY \`foreign_key1\` (\`state\`)" ]] || false
|
||||
|
||||
run dolt sql <<SQL
|
||||
CREATE INDEX foreign_key1 ON public.states(state);
|
||||
ALTER TABLE public.cities ADD CONSTRAINT foreign_key1 FOREIGN KEY (state) REFERENCES public.states(state);
|
||||
SQL
|
||||
[ $status -eq 0 ]
|
||||
|
||||
run dolt sql -q "SHOW CREATE TABLE public.cities"
|
||||
[[ $output =~ "CONSTRAINT \`foreign_key1\` FOREIGN KEY (\`state\`) REFERENCES \`states\` (\`state\`)" ]] || false
|
||||
|
||||
run dolt sql -q "SHOW CREATE TABLE public.states"
|
||||
[[ $output =~ "KEY \`foreign_key1\` (\`state\`)" ]] || false
|
||||
}
|
||||
|
||||
@@ -192,5 +192,5 @@ EOF
|
||||
}
|
||||
|
||||
get_head_commit() {
|
||||
dolt log -n 1 | grep -m 1 commit | cut -c 15-46
|
||||
dolt log -n 1 | grep -m 1 commit | cut -c 13-44
|
||||
}
|
||||
|
||||
@@ -704,3 +704,40 @@ DELIM
|
||||
run dolt sql -r csv -q "select * from keyless"
|
||||
[ "${lines[1]}" = "0,42,2" ]
|
||||
}
|
||||
|
||||
@test "import-create-tables: auto-increment table" {
|
||||
cat <<SQL > schema.sql
|
||||
CREATE TABLE test (
|
||||
pk int PRIMARY KEY AUTO_INCREMENT,
|
||||
v1 int
|
||||
);
|
||||
SQL
|
||||
|
||||
cat <<DELIM > data.csv
|
||||
pk,v1
|
||||
1,1
|
||||
2,2
|
||||
3,3
|
||||
4,4
|
||||
DELIM
|
||||
|
||||
run dolt table import -s schema.sql -c test data.csv
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Rows Processed: 4, Additions: 4, Modifications: 0, Had No Effect: 0" ]] || false
|
||||
[[ "$output" =~ "Import completed successfully." ]] || false
|
||||
|
||||
run dolt sql -r csv -q "select * from test order by pk ASC"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${#lines[@]}" -eq 5 ]
|
||||
[ "${lines[1]}" = 1,1 ]
|
||||
[ "${lines[2]}" = 2,2 ]
|
||||
[ "${lines[3]}" = 3,3 ]
|
||||
[ "${lines[4]}" = 4,4 ]
|
||||
|
||||
dolt sql -q "insert into test values (NULL, 5)"
|
||||
|
||||
run dolt sql -r csv -q "select * from test where pk = 5"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "${#lines[@]}" -eq 2 ]
|
||||
[ "${lines[1]}" = 5,5 ]
|
||||
}
|
||||
|
||||
@@ -2599,3 +2599,17 @@ SQL
|
||||
[[ "$output" =~ "Bbbb" ]] || false
|
||||
[[ "$output" =~ "bBbb" ]] || false
|
||||
}
|
||||
|
||||
@test "index: alter table create index for different database" {
|
||||
skip "create index for different database fix in progress"
|
||||
dolt sql <<SQL
|
||||
CREATE DATABASE public;
|
||||
CREATE TABLE public.test (pk integer NOT NULL, c1 integer);
|
||||
ALTER TABLE public.test ADD CONSTRAINT index_test_pkey PRIMARY KEY (pk);
|
||||
CREATE INDEX index_test_c1_idx ON public.test (c1);
|
||||
SQL
|
||||
|
||||
run dolt sql -q "show create table public.test"
|
||||
[ $status -eq 0 ]
|
||||
[[ "$output" =~ "KEY \`index_test_c1_idx\`" ]] || false
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user