mirror of
https://github.com/dolthub/dolt.git
synced 2026-03-17 09:53:59 -05:00
227 lines
6.1 KiB
Go
227 lines
6.1 KiB
Go
// Copyright 2019 Dolthub, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package pull
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
|
|
"github.com/cenkalti/backoff"
|
|
"golang.org/x/sync/errgroup"
|
|
"golang.org/x/sync/semaphore"
|
|
|
|
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
|
|
"github.com/dolthub/dolt/go/store/chunks"
|
|
"github.com/dolthub/dolt/go/store/hash"
|
|
"github.com/dolthub/dolt/go/store/nbs"
|
|
)
|
|
|
|
func Clone(ctx context.Context, srcCS, sinkCS chunks.ChunkStore, eventCh chan<- TableFileEvent) error {
|
|
srcTS, srcOK := srcCS.(nbs.TableFileStore)
|
|
|
|
if !srcOK {
|
|
return errors.New("src db is not a Table File Store")
|
|
}
|
|
|
|
size, err := srcTS.Size(ctx)
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if size == 0 {
|
|
return ErrNoData
|
|
}
|
|
|
|
sinkTS, sinkOK := sinkCS.(nbs.TableFileStore)
|
|
|
|
if !sinkOK {
|
|
return errors.New("sink db is not a Table File Store")
|
|
}
|
|
|
|
return clone(ctx, srcTS, sinkTS, eventCh)
|
|
}
|
|
|
|
type CloneTableFileEvent int
|
|
|
|
const (
|
|
Listed = iota
|
|
DownloadStart
|
|
DownloadStats
|
|
DownloadSuccess
|
|
DownloadFailed
|
|
)
|
|
|
|
type TableFileEvent struct {
|
|
EventType CloneTableFileEvent
|
|
TableFiles []nbs.TableFile
|
|
Stats []iohelp.ReadStats
|
|
}
|
|
|
|
// mapTableFiles returns the list of all fileIDs for the table files, and a map from fileID to nbs.TableFile
|
|
func mapTableFiles(tblFiles []nbs.TableFile) ([]string, map[string]nbs.TableFile, map[string]int) {
|
|
fileIds := make([]string, len(tblFiles))
|
|
fileIDtoTblFile := make(map[string]nbs.TableFile)
|
|
fileIDtoNumChunks := make(map[string]int)
|
|
|
|
for i, tblFile := range tblFiles {
|
|
fileIDtoTblFile[tblFile.FileID()] = tblFile
|
|
fileIds[i] = tblFile.FileID()
|
|
fileIDtoNumChunks[tblFile.FileID()] = tblFile.NumChunks()
|
|
}
|
|
|
|
return fileIds, fileIDtoTblFile, fileIDtoNumChunks
|
|
}
|
|
|
|
const concurrentTableFileDownloads = 3
|
|
|
|
func clone(ctx context.Context, srcTS, sinkTS nbs.TableFileStore, eventCh chan<- TableFileEvent) error {
|
|
root, sourceFiles, appendixFiles, err := srcTS.Sources(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
tblFiles := filterAppendicesFromSourceFiles(appendixFiles, sourceFiles)
|
|
report := func(e TableFileEvent) {
|
|
if eventCh != nil {
|
|
eventCh <- e
|
|
}
|
|
}
|
|
|
|
// Initializes the list of fileIDs we are going to download, and the map of fileIDToTF. If this clone takes a long
|
|
// time some of the urls within the nbs.TableFiles will expire and fail to download. At that point we will retrieve
|
|
// the sources again, and update the fileIDToTF map with updated info, but not change the files we are downloading.
|
|
desiredFiles, fileIDToTF, fileIDToNumChunks := mapTableFiles(tblFiles)
|
|
completed := make([]bool, len(desiredFiles))
|
|
|
|
report(TableFileEvent{EventType: Listed, TableFiles: tblFiles})
|
|
|
|
download := func(ctx context.Context) error {
|
|
sem := semaphore.NewWeighted(concurrentTableFileDownloads)
|
|
eg, ctx := errgroup.WithContext(ctx)
|
|
for i := 0; i < len(desiredFiles); i++ {
|
|
if completed[i] {
|
|
continue
|
|
}
|
|
if err := sem.Acquire(ctx, 1); err != nil {
|
|
// The errgroup ctx has been canceled. We will
|
|
// return the error from wg.Wait() below.
|
|
break
|
|
}
|
|
idx := i
|
|
eg.Go(func() (err error) {
|
|
defer sem.Release(1)
|
|
|
|
fileID := desiredFiles[idx]
|
|
tblFile, ok := fileIDToTF[fileID]
|
|
if !ok {
|
|
// conjoin happened during clone
|
|
return backoff.Permanent(errors.New("table file not found. please try again"))
|
|
}
|
|
|
|
report(TableFileEvent{EventType: DownloadStart, TableFiles: []nbs.TableFile{tblFile}})
|
|
err = sinkTS.WriteTableFile(ctx, tblFile.FileID(), tblFile.NumChunks(), nil, func() (io.ReadCloser, uint64, error) {
|
|
rd, contentLength, err := tblFile.Open(ctx)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
rdStats := iohelp.NewReaderWithStats(rd, int64(contentLength))
|
|
|
|
rdStats.Start(func(s iohelp.ReadStats) {
|
|
report(TableFileEvent{
|
|
EventType: DownloadStats,
|
|
TableFiles: []nbs.TableFile{tblFile},
|
|
Stats: []iohelp.ReadStats{s},
|
|
})
|
|
})
|
|
|
|
return rdStats, contentLength, nil
|
|
})
|
|
if err != nil {
|
|
report(TableFileEvent{EventType: DownloadFailed, TableFiles: []nbs.TableFile{tblFile}})
|
|
return err
|
|
}
|
|
|
|
report(TableFileEvent{EventType: DownloadSuccess, TableFiles: []nbs.TableFile{tblFile}})
|
|
completed[idx] = true
|
|
return nil
|
|
})
|
|
}
|
|
|
|
return eg.Wait()
|
|
}
|
|
|
|
const maxAttempts = 3
|
|
previousCompletedCnt := 0
|
|
failureCount := 0
|
|
|
|
madeProgress := func() bool {
|
|
currentCompletedCnt := 0
|
|
for _, b := range completed {
|
|
if b {
|
|
currentCompletedCnt++
|
|
}
|
|
}
|
|
if currentCompletedCnt == previousCompletedCnt {
|
|
return false
|
|
} else {
|
|
previousCompletedCnt = currentCompletedCnt
|
|
return true
|
|
}
|
|
}
|
|
|
|
// keep going as long as progress is being made. If progress is not made retry up to maxAttempts times.
|
|
for {
|
|
err = download(ctx)
|
|
if err == nil {
|
|
break
|
|
}
|
|
if permanent, ok := err.(*backoff.PermanentError); ok {
|
|
return permanent.Err
|
|
} else if madeProgress() {
|
|
failureCount = 0
|
|
} else {
|
|
failureCount++
|
|
}
|
|
if failureCount >= maxAttempts {
|
|
return err
|
|
}
|
|
if _, sourceFiles, appendixFiles, err = srcTS.Sources(ctx); err != nil {
|
|
return err
|
|
} else {
|
|
tblFiles = filterAppendicesFromSourceFiles(appendixFiles, sourceFiles)
|
|
_, fileIDToTF, _ = mapTableFiles(tblFiles)
|
|
}
|
|
}
|
|
|
|
sinkTS.AddTableFilesToManifest(ctx, fileIDToNumChunks)
|
|
return sinkTS.SetRootChunk(ctx, root, hash.Hash{})
|
|
}
|
|
|
|
func filterAppendicesFromSourceFiles(appendixFiles []nbs.TableFile, sourceFiles []nbs.TableFile) []nbs.TableFile {
|
|
if len(appendixFiles) == 0 {
|
|
return sourceFiles
|
|
}
|
|
tblFiles := make([]nbs.TableFile, 0)
|
|
_, appendixMap, _ := mapTableFiles(appendixFiles)
|
|
for _, sf := range sourceFiles {
|
|
if _, ok := appendixMap[sf.FileID()]; !ok {
|
|
tblFiles = append(tblFiles, sf)
|
|
}
|
|
}
|
|
return tblFiles
|
|
}
|