Merge pull request #4267 from tanyongzhi/790-diff-skinny

Add dolt diff --skinny flag
This commit is contained in:
Jason Fulghum
2022-09-12 11:35:16 -07:00
committed by GitHub
2 changed files with 212 additions and 2 deletions

View File

@@ -60,6 +60,7 @@ const (
limitParam = "limit"
SQLFlag = "sql"
CachedFlag = "cached"
SkinnyFlag = "skinny"
)
var diffDocs = cli.CommandDocumentationContent{
@@ -96,6 +97,7 @@ type diffArgs struct {
tableSet *set.StrSet
limit int
where string
skinny bool
}
type DiffCmd struct{}
@@ -129,6 +131,7 @@ func (cmd DiffCmd) ArgParser() *argparser.ArgParser {
ap.SupportsString(whereParam, "", "column", "filters columns based on values in the diff. See {{.EmphasisLeft}}dolt diff --help{{.EmphasisRight}} for details.")
ap.SupportsInt(limitParam, "", "record_count", "limits to the first N diffs.")
ap.SupportsFlag(CachedFlag, "c", "Show only the unstaged data changes.")
ap.SupportsFlag(SkinnyFlag, "sk", "Shows only primary key columns and any columns with data changes.")
return ap
}
@@ -184,6 +187,8 @@ func parseDiffArgs(ctx context.Context, dEnv *env.DoltEnv, apr *argparser.ArgPar
dArgs.diffParts = Summary
}
dArgs.skinny = apr.Contains(SkinnyFlag)
f := apr.GetValueOrDefault(FormatFlag, "tabular")
switch strings.ToLower(f) {
case "tabular":
@@ -612,8 +617,47 @@ func diffRows(
}
defer rowIter.Close(sqlCtx)
defer rowWriter.Close(ctx)
err = writeDiffResults(sqlCtx, sch, unionSch, rowIter, rowWriter)
var modifiedColNames map[string]bool
if dArgs.skinny {
modifiedColNames, err = getModifiedCols(sqlCtx, rowIter, unionSch, sch)
if err != nil {
return errhand.BuildDError("Error running diff query:\n%s", query).AddCause(err).Build()
}
// instantiate a new schema that only contains the columns with changes
var filteredUnionSch sql.Schema
for _, s := range unionSch {
for colName := range modifiedColNames {
if s.Name == colName {
filteredUnionSch = append(filteredUnionSch, s)
}
}
}
// instantiate a new RowWriter with the new schema that only contains the columns with changes
rowWriter, err = dw.RowWriter(ctx, td, filteredUnionSch)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
defer rowWriter.Close(ctx)
// reset the row iterator
err = rowIter.Close(sqlCtx)
if err != nil {
return errhand.BuildDError("Error closing row iterator:\n%s", query).AddCause(err).Build()
}
_, rowIter, err = se.Query(sqlCtx, query)
defer rowIter.Close(sqlCtx)
if sql.ErrSyntaxError.Is(err) {
return errhand.BuildDError("Failed to parse diff query. Invalid where clause?\nDiff query: %s", query).AddCause(err).Build()
} else if err != nil {
return errhand.BuildDError("Error running diff query:\n%s", query).AddCause(err).Build()
}
}
err = writeDiffResults(sqlCtx, sch, unionSch, rowIter, rowWriter, modifiedColNames, dArgs.skinny)
if err != nil {
return errhand.BuildDError("Error running diff query:\n%s", query).AddCause(err).Build()
}
@@ -657,6 +701,8 @@ func writeDiffResults(
targetSch sql.Schema,
iter sql.RowIter,
writer diff.SqlRowDiffWriter,
modifiedColNames map[string]bool,
filterChangedCols bool,
) error {
ds, err := newDiffSplitter(diffQuerySch, targetSch)
if err != nil {
@@ -666,7 +712,7 @@ func writeDiffResults(
for {
r, err := iter.Next(ctx)
if err == io.EOF {
return writer.Close(ctx)
return nil
} else if err != nil {
return err
}
@@ -676,6 +722,28 @@ func writeDiffResults(
return err
}
if filterChangedCols {
var filteredOldRow, filteredNewRow rowDiff
for i, changeType := range newRow.colDiffs {
if (changeType == diff.Added|diff.Removed) || modifiedColNames[targetSch[i].Name] {
if i < len(oldRow.row) {
filteredOldRow.row = append(filteredOldRow.row, oldRow.row[i])
filteredOldRow.colDiffs = append(filteredOldRow.colDiffs, oldRow.colDiffs[i])
filteredOldRow.rowDiff = oldRow.rowDiff
}
if i < len(newRow.row) {
filteredNewRow.row = append(filteredNewRow.row, newRow.row[i])
filteredNewRow.colDiffs = append(filteredNewRow.colDiffs, newRow.colDiffs[i])
filteredNewRow.rowDiff = newRow.rowDiff
}
}
}
oldRow = filteredOldRow
newRow = filteredNewRow
}
if oldRow.row != nil {
err := writer.WriteRow(ctx, oldRow.row, oldRow.rowDiff, oldRow.colDiffs)
if err != nil {
@@ -691,3 +759,46 @@ func writeDiffResults(
}
}
}
// getModifiedCols returns a set of the names of columns that are modified, as well as the name of the primary key for a particular row iterator and schema.
// In the case where rows are added or removed, all columns will be included
// unionSch refers to a joint schema between the schema before and after any schema changes pertaining to the diff,
// while diffQuerySch refers to the schema returned by the "dolt_diff" sql query.
func getModifiedCols(
ctx *sql.Context,
iter sql.RowIter,
unionSch sql.Schema,
diffQuerySch sql.Schema,
) (map[string]bool, error) {
modifiedColNames := make(map[string]bool)
for {
r, err := iter.Next(ctx)
if err == io.EOF {
break
}
ds, err := newDiffSplitter(diffQuerySch, unionSch)
if err != nil {
return modifiedColNames, err
}
oldRow, newRow, err := ds.splitDiffResultRow(r)
if err != nil {
return modifiedColNames, err
}
for i, changeType := range newRow.colDiffs {
if changeType != diff.None || unionSch[i].PrimaryKey {
modifiedColNames[unionSch[i].Name] = true
}
}
for i, changeType := range oldRow.colDiffs {
if changeType != diff.None || unionSch[i].PrimaryKey {
modifiedColNames[unionSch[i].Name] = true
}
}
}
return modifiedColNames, nil
}

View File

@@ -720,6 +720,105 @@ SQL
[[ "$output" = 'UPDATE `t` SET `val1`=30,`val3`=4 WHERE `pk`=1;' ]] || false
}
@test "diff: skinny flag only shows row changed without schema changes" {
dolt sql -q "CREATE TABLE t(pk int primary key, val1 int, val2 int)"
dolt add .
dolt sql -q "INSERT INTO t VALUES (1, 1, 1)"
dolt commit -am "cm1"
run dolt diff --skinny --data HEAD~1
[ $status -eq 0 ]
[[ "$output" =~ 'pk' ]] || false
[[ "$output" =~ 'val1' ]] || false
[[ "$output" =~ 'val2' ]] || false
dolt sql -q "UPDATE t SET val1=2 WHERE pk=1"
dolt commit -am "cm2"
dolt sql -q "UPDATE t SET val1=3 WHERE pk=1"
dolt commit -am "cm3"
run dolt diff --skinny HEAD~1
[ $status -eq 0 ]
[[ ! "$output" =~ 'val2' ]] || false
[[ "$output" =~ 'pk' ]] || false
[[ "$output" =~ 'val1' ]] || false
}
@test "diff: skinny flag only shows row changed when both schema (column added) and data is changed (row updated)" {
dolt sql -q "create table t(pk int primary key, val1 int, val2 int)"
dolt add .
dolt sql -q "INSERT INTO t VALUES (1, 1, 1)"
dolt sql -q "INSERT INTO t VALUES (2, 2, 2)"
dolt commit -am "cm1"
run dolt diff --skinny --data HEAD~1
[ $status -eq 0 ]
[[ "$output" =~ 'pk' ]] || false
[[ "$output" =~ 'val1' ]] || false
[[ "$output" =~ 'val2' ]] || false
dolt sql -q "UPDATE t SET val1=3 WHERE pk=1"
dolt sql -q "ALTER TABLE t ADD val3 int "
dolt sql -q "UPDATE t SET val3=4 WHERE pk=1"
dolt commit -am "cm2"
run dolt diff --skinny --data HEAD~1
[ $status -eq 0 ]
[[ "$output" =~ 'pk' ]] || false
[[ "$output" =~ 'val1' ]] || false
[[ "$output" =~ 'val3' ]] || false
[[ ! "$output" =~ 'val2' ]] || false
}
@test "diff: skinny flag only shows row changed when both schema (column dropped) and data is changed (row updated)" {
dolt sql -q "create table t(pk int primary key, val1 int, s varchar(255))"
dolt add .
dolt sql -q "INSERT INTO t VALUES (1, 1, 'bla')"
dolt sql -q "INSERT INTO t VALUES (2, 2, 'bla2')"
dolt commit -am "cm1"
run dolt diff --skinny --data HEAD~1
[ $status -eq 0 ]
[[ "$output" =~ 'pk' ]] || false
[[ "$output" =~ 'val1' ]] || false
[[ "$output" =~ 's' ]] || false
dolt sql -q "ALTER TABLE t DROP COLUMN s"
dolt sql -q "UPDATE t SET val1=3 WHERE pk=1"
dolt sql -q "UPDATE t SET val1=4 WHERE pk=2"
dolt commit -am "cm2"
run dolt diff --skinny --data HEAD~1
[ $status -eq 0 ]
[[ "$output" =~ 'pk' ]] || false
[[ "$output" =~ 'val1' ]] || false
[[ "$output" =~ 's' ]] || false
}
@test "diff: skinny flag only shows row changed when data is changed (row deleted)" {
dolt sql -q "create table t(pk int primary key, val1 int, val2 int)"
dolt add .
dolt sql -q "INSERT INTO t VALUES (1, 1, 1)"
dolt sql -q "INSERT INTO t VALUES (2, 2, 2)"
dolt commit -am "cm1"
run dolt diff --skinny --data HEAD~1
[ $status -eq 0 ]
[[ "$output" =~ 'pk' ]] || false
[[ "$output" =~ 'val1' ]] || false
[[ "$output" =~ 'val2' ]] || false
dolt sql -q "DELETE FROM t WHERE pk=1"
dolt commit -am "cm2"
run dolt diff --skinny --data HEAD~1
[ $status -eq 0 ]
[[ "$output" =~ 'pk' ]] || false
[[ "$output" =~ 'val1' ]] || false
[[ "$output" =~ 'val2' ]] || false
}
@test "diff: keyless sql diffs" {
dolt sql -q "create table t(pk int, val int)"