Merge pull request #2822 from dolthub/dhruv/clone-stats

go/cmd/dolt: Add download statistics to dolt clone
This commit is contained in:
Dhruv Sringari
2022-02-17 16:35:28 -08:00
committed by GitHub
14 changed files with 151 additions and 42 deletions

17
go/Godeps/LICENSES generated
View File

@@ -4348,6 +4348,23 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
= LICENSE 57fe71a587ee8b02f137ebdb714a7c17cfbf692b75c4b13189f9f9e2 =
================================================================================
================================================================================
= github.com/gosuri/uilive licensed under: =
MIT License
===========
Copyright (c) 2015, Greg Osuri
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
= LICENSE cde7aa271cbea2eab88298ec101c4d0acf5d175e056bdba8f573ca79 =
================================================================================
================================================================================
= github.com/hashicorp/golang-lru licensed under: =

View File

@@ -70,6 +70,7 @@ require (
require (
github.com/dolthub/go-mysql-server v0.11.1-0.20220217185408-f043853f3722
github.com/google/flatbuffers v2.0.5+incompatible
github.com/gosuri/uilive v0.0.4
github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6
github.com/prometheus/client_golang v1.11.0
github.com/shirou/gopsutil/v3 v3.22.1

View File

@@ -381,6 +381,8 @@ github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoA
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gostaticanalysis/analysisutil v0.0.0-20190318220348-4088753ea4d3/go.mod h1:eEOZF4jCKGi+aprrirO9e7WKB3beBRtWgqGunKl6pKE=
github.com/gostaticanalysis/analysisutil v0.0.3/go.mod h1:eEOZF4jCKGi+aprrirO9e7WKB3beBRtWgqGunKl6pKE=
github.com/gosuri/uilive v0.0.4 h1:hUEBpQDj8D8jXgtCdBu7sWsy5sbW/5GhuO8KBwJ2jyY=
github.com/gosuri/uilive v0.0.4/go.mod h1:V/epo5LjjlDE5RJUcqx8dbw+zc93y5Ya3yg8tfZ74VI=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=

View File

@@ -20,16 +20,21 @@ import (
"fmt"
"os"
"path/filepath"
"sort"
"sync"
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dustin/go-humanize"
"github.com/gosuri/uilive"
"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/ref"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
"github.com/dolthub/dolt/go/libraries/utils/strhelp"
"github.com/dolthub/dolt/go/store/datas/pull"
"github.com/dolthub/dolt/go/store/nbs"
"github.com/dolthub/dolt/go/store/types"
)
@@ -93,34 +98,67 @@ func cloneProg(eventCh <-chan pull.TableFileEvent) {
chunks int64
chunksDownloading int64
chunksDownloaded int64
cliPos int
currStats = make(map[string]iohelp.ReadStats)
tableFiles = make(map[string]*nbs.TableFile)
)
cliPos = cli.DeleteAndPrint(cliPos, "Retrieving remote information.")
writer := uilive.New()
writer.Start()
fmt.Fprintf(writer, "Retrieving remote information.\n")
writer.Stop()
for tblFEvt := range eventCh {
switch tblFEvt.EventType {
case pull.Listed:
for _, tf := range tblFEvt.TableFiles {
c := tf
tableFiles[c.FileID()] = &c
chunks += int64(tf.NumChunks())
}
case pull.DownloadStart:
for _, tf := range tblFEvt.TableFiles {
chunksDownloading += int64(tf.NumChunks())
}
case pull.DownloadStats:
for i, s := range tblFEvt.Stats {
tf := tblFEvt.TableFiles[i]
currStats[tf.FileID()] = s
}
case pull.DownloadSuccess:
for _, tf := range tblFEvt.TableFiles {
chunksDownloading -= int64(tf.NumChunks())
chunksDownloaded += int64(tf.NumChunks())
delete(currStats, tf.FileID())
}
case pull.DownloadFailed:
// Ignore for now and output errors on the main thread
for _, tf := range tblFEvt.TableFiles {
delete(currStats, tf.FileID())
}
}
str := fmt.Sprintf("%s of %s chunks complete. %s chunks being downloaded currently.", strhelp.CommaIfy(chunksDownloaded), strhelp.CommaIfy(chunks), strhelp.CommaIfy(chunksDownloading))
cliPos = cli.DeleteAndPrint(cliPos, str)
// Starting and stopping for each event seems to be less jumpy
writer.Start()
fmt.Fprintf(writer, "%s of %s chunks complete. %s chunks being downloaded currently.\n",
strhelp.CommaIfy(chunksDownloaded), strhelp.CommaIfy(chunks), strhelp.CommaIfy(chunksDownloading))
for _, fileId := range sortedKeys(currStats) {
s := currStats[fileId]
bps := float64(s.Read) / s.Elapsed.Seconds()
rate := humanize.Bytes(uint64(bps)) + "/s"
fmt.Fprintf(writer.Newline(), "Downloading file: %s (%s chunks) - %.2f%% downloaded, %s, \n",
fileId, strhelp.CommaIfy(int64((*tableFiles[fileId]).NumChunks())), s.Percent*100, rate)
}
writer.Stop()
}
}
cli.Println()
func sortedKeys(m map[string]iohelp.ReadStats) []string {
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
sort.Strings(keys)
return keys
}
func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch string, dEnv *env.DoltEnv) error {

View File

@@ -1334,7 +1334,7 @@ func sanitizeSignedUrl(url string) string {
}
// Open returns an io.ReadCloser which can be used to read the bytes of a table file.
func (drtf DoltRemoteTableFile) Open(ctx context.Context) (io.ReadCloser, error) {
func (drtf DoltRemoteTableFile) Open(ctx context.Context) (io.ReadCloser, uint64, error) {
if drtf.info.RefreshAfter != nil && drtf.info.RefreshAfter.AsTime().After(time.Now()) {
resp, err := drtf.dcs.csClient.RefreshTableFileUrl(ctx, drtf.info.RefreshRequest)
if err == nil {
@@ -1345,20 +1345,20 @@ func (drtf DoltRemoteTableFile) Open(ctx context.Context) (io.ReadCloser, error)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, drtf.info.Url, nil)
if err != nil {
return nil, err
return nil, 0, err
}
resp, err := drtf.dcs.httpFetcher.Do(req)
if err != nil {
return nil, err
return nil, 0, err
}
if resp.StatusCode/100 != 2 {
defer resp.Body.Close()
body := make([]byte, 4096)
n, _ := io.ReadFull(resp.Body, body)
return nil, fmt.Errorf("%w: status code: %d;\nurl: %s\n\nbody:\n\n%s\n", ErrRemoteTableFileGet, resp.StatusCode, sanitizeSignedUrl(drtf.info.Url), string(body[0:n]))
return nil, 0, fmt.Errorf("%w: status code: %d;\nurl: %s\n\nbody:\n\n%s\n", ErrRemoteTableFileGet, resp.StatusCode, sanitizeSignedUrl(drtf.info.Url), string(body[0:n]))
}
return resp.Body, nil
return resp.Body, uint64(resp.ContentLength), nil
}

View File

@@ -55,7 +55,10 @@ func (rws *ReaderWithStats) Start(updateFunc func(ReadStats)) {
case <-timer.C:
read := atomic.LoadUint64(&rws.read)
elapsed := time.Since(rws.start)
percent := float64(read) / float64(rws.size)
var percent float64
if rws.size != 0 {
percent = float64(read) / float64(rws.size)
}
updateFunc(ReadStats{Read: read, Elapsed: elapsed, Percent: percent})
timer.Reset(updateFrequency)
}
@@ -63,12 +66,17 @@ func (rws *ReaderWithStats) Start(updateFunc func(ReadStats)) {
}()
}
func (rws *ReaderWithStats) Stop() {
// Stop "closes" ReaderWithStats. Occasionally, we might pass this ReaderWithStats as the body of
// a http.Request. Since http.Request will close the body if it is an io.Closer, we can't have ReaderWithStats conform
// to io.Closer. We want full control over the Start and Stop of ReaderWithStats.
func (rws *ReaderWithStats) Stop() error {
close(rws.closeCh)
if closer, ok := rws.rd.(io.Closer); ok {
_ = closer.Close()
return closer.Close()
}
return nil
}
func (rws *ReaderWithStats) Read(p []byte) (int, error) {

View File

@@ -17,12 +17,12 @@ package pull
import (
"context"
"errors"
"io"
"github.com/cenkalti/backoff"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/semaphore"
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
"github.com/dolthub/dolt/go/store/chunks"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/nbs"
@@ -59,6 +59,7 @@ type CloneTableFileEvent int
const (
Listed = iota
DownloadStart
DownloadStats
DownloadSuccess
DownloadFailed
)
@@ -66,6 +67,7 @@ const (
type TableFileEvent struct {
EventType CloneTableFileEvent
TableFiles []nbs.TableFile
Stats []iohelp.ReadStats
}
// mapTableFiles returns the list of all fileIDs for the table files, and a map from fileID to nbs.TableFile
@@ -83,8 +85,8 @@ func mapTableFiles(tblFiles []nbs.TableFile) ([]string, map[string]nbs.TableFile
return fileIds, fileIDtoTblFile, fileIDtoNumChunks
}
func CloseWithErr(c io.Closer, err *error) {
e := c.Close()
func stopWithErr(stats *iohelp.ReaderWithStats, err *error) {
e := stats.Stop()
if *err == nil && e != nil {
*err = e
}
@@ -111,7 +113,7 @@ func clone(ctx context.Context, srcTS, sinkTS nbs.TableFileStore, eventCh chan<-
desiredFiles, fileIDToTF, fileIDToNumChunks := mapTableFiles(tblFiles)
completed := make([]bool, len(desiredFiles))
report(TableFileEvent{Listed, tblFiles})
report(TableFileEvent{EventType: Listed, TableFiles: tblFiles})
download := func(ctx context.Context) error {
sem := semaphore.NewWeighted(concurrentTableFileDownloads)
@@ -136,20 +138,29 @@ func clone(ctx context.Context, srcTS, sinkTS nbs.TableFileStore, eventCh chan<-
return backoff.Permanent(errors.New("table file not found. please try again"))
}
var rd io.ReadCloser
if rd, err = tblFile.Open(ctx); err != nil {
return err
}
defer CloseWithErr(rd, &err)
report(TableFileEvent{DownloadStart, []nbs.TableFile{tblFile}})
err = sinkTS.WriteTableFile(ctx, tblFile.FileID(), tblFile.NumChunks(), rd, 0, nil)
rd, contentLength, err := tblFile.Open(ctx)
if err != nil {
report(TableFileEvent{DownloadFailed, []nbs.TableFile{tblFile}})
return err
}
rdStats := iohelp.NewReaderWithStats(rd, int64(contentLength))
defer stopWithErr(rdStats, &err)
rdStats.Start(func(s iohelp.ReadStats) {
report(TableFileEvent{
EventType: DownloadStats,
TableFiles: []nbs.TableFile{tblFile},
Stats: []iohelp.ReadStats{s},
})
})
report(TableFileEvent{EventType: DownloadStart, TableFiles: []nbs.TableFile{tblFile}})
err = sinkTS.WriteTableFile(ctx, tblFile.FileID(), tblFile.NumChunks(), rdStats, contentLength, nil)
if err != nil {
report(TableFileEvent{EventType: DownloadFailed, TableFiles: []nbs.TableFile{tblFile}})
return err
}
report(TableFileEvent{DownloadSuccess, []nbs.TableFile{tblFile}})
report(TableFileEvent{EventType: DownloadSuccess, TableFiles: []nbs.TableFile{tblFile}})
completed[idx] = true
return nil
})

View File

@@ -385,8 +385,8 @@ func (ttf *TestFailingTableFile) NumChunks() int {
return ttf.numChunks
}
func (ttf *TestFailingTableFile) Open(ctx context.Context) (io.ReadCloser, error) {
return io.NopCloser(bytes.NewReader([]byte{0x00})), errors.New("this is a test error")
func (ttf *TestFailingTableFile) Open(ctx context.Context) (io.ReadCloser, uint64, error) {
return io.NopCloser(bytes.NewReader([]byte{0x00})), 1, errors.New("this is a test error")
}
type TestTableFile struct {
@@ -403,8 +403,8 @@ func (ttf *TestTableFile) NumChunks() int {
return ttf.numChunks
}
func (ttf *TestTableFile) Open(ctx context.Context) (io.ReadCloser, error) {
return io.NopCloser(bytes.NewReader(ttf.data)), nil
func (ttf *TestTableFile) Open(ctx context.Context) (io.ReadCloser, uint64, error) {
return io.NopCloser(bytes.NewReader(ttf.data)), uint64(len(ttf.data)), nil
}
type TestTableFileWriter struct {

View File

@@ -223,7 +223,7 @@ func (p *Puller) uploadTempTableFile(ctx context.Context, ae *atomicerr.AtomicEr
}))
})
defer func() {
fWithStats.Stop()
_ = fWithStats.Stop()
go func() {
_ = file.Remove(tmpTblFile.path)

View File

@@ -226,6 +226,20 @@ func (ccs *persistingChunkSource) reader(ctx context.Context) (io.Reader, error)
return ccs.cs.reader(ctx)
}
func (ccs *persistingChunkSource) size() (uint64, error) {
err := ccs.wait()
if err != nil {
return 0, err
}
if ccs.cs == nil {
return 0, ErrNoChunkSource
}
return ccs.cs.size()
}
func (ccs *persistingChunkSource) calcReads(reqs []getRecord, blockSize uint64) (reads int, remaining bool, err error) {
err = ccs.wait()
@@ -296,6 +310,10 @@ func (ecs emptyChunkSource) reader(context.Context) (io.Reader, error) {
return &bytes.Buffer{}, nil
}
func (ecs emptyChunkSource) size() (uint64, error) {
return 0, nil
}
func (ecs emptyChunkSource) calcReads(reqs []getRecord, blockSize uint64) (reads int, remaining bool, err error) {
return 0, true, nil
}

View File

@@ -1135,7 +1135,7 @@ func (nbs *NomsBlockStore) StatsSummary() string {
// tableFile is our implementation of TableFile.
type tableFile struct {
info TableSpecInfo
open func(ctx context.Context) (io.ReadCloser, error)
open func(ctx context.Context) (io.ReadCloser, uint64, error)
}
// FileID gets the id of the file
@@ -1148,8 +1148,8 @@ func (tf tableFile) NumChunks() int {
return int(tf.info.GetChunkCount())
}
// Open returns an io.ReadCloser which can be used to read the bytes of a table file.
func (tf tableFile) Open(ctx context.Context) (io.ReadCloser, error) {
// Open returns an io.ReadCloser which can be used to read the bytes of a table file and the content length in bytes.
func (tf tableFile) Open(ctx context.Context) (io.ReadCloser, uint64, error) {
return tf.open(ctx)
}
@@ -1210,13 +1210,18 @@ func getTableFiles(css map[addr]chunkSource, contents manifestContents, numSpecs
func newTableFile(cs chunkSource, info tableSpec) tableFile {
return tableFile{
info: info,
open: func(ctx context.Context) (io.ReadCloser, error) {
open: func(ctx context.Context) (io.ReadCloser, uint64, error) {
r, err := cs.reader(ctx)
if err != nil {
return nil, err
return nil, 0, err
}
return io.NopCloser(r), nil
s, err := cs.size()
if err != nil {
return nil, 0, err
}
return io.NopCloser(r), s, nil
},
}
}

View File

@@ -94,8 +94,9 @@ func TestNBSAsTableFileStore(t *testing.T) {
expected, ok := fileToData[fileID]
require.True(t, ok)
rd, err := src.Open(context.Background())
rd, contentLength, err := src.Open(context.Background())
require.NoError(t, err)
require.Equal(t, len(expected), int(contentLength))
data, err := io.ReadAll(rd)
require.NoError(t, err)

View File

@@ -263,6 +263,8 @@ type chunkSource interface {
// opens a Reader to the first byte of the chunkData segment of this table.
reader(context.Context) (io.Reader, error)
// size returns the total size of the chunkSource: chunks, index, and footer
size() (uint64, error)
index() (tableIndex, error)
// Clone returns a |chunkSource| with the same contents as the
@@ -283,8 +285,9 @@ type TableFile interface {
// NumChunks returns the number of chunks in a table file
NumChunks() int
// Open returns an io.ReadCloser which can be used to read the bytes of a table file.
Open(ctx context.Context) (io.ReadCloser, error)
// Open returns an io.ReadCloser which can be used to read the bytes of a table file. The total length of the
// table file in bytes can be optionally returned.
Open(ctx context.Context) (io.ReadCloser, uint64, error)
}
// Describes what is possible to do with TableFiles in a TableFileStore.

View File

@@ -643,6 +643,11 @@ func (tr tableReader) reader(ctx context.Context) (io.Reader, error) {
return io.LimitReader(&readerAdapter{tr.r, 0, ctx}, int64(i.TableFileSize())), nil
}
func (tr tableReader) size() (uint64, error) {
i, _ := tr.index()
return i.TableFileSize(), nil
}
func (tr tableReader) Close() error {
return tr.tableIndex.Close()
}