From b2bc73274f3c68b231b4ae0daf86189c2f2f7a8c Mon Sep 17 00:00:00 2001 From: elianddb Date: Thu, 12 Feb 2026 13:56:16 -0800 Subject: [PATCH] fix func docs --- .../doltcore/merge/merge_prolly_rows.go | 4 +- .../sqle/dtables/conflicts_tables_prolly.go | 6 +- .../dtables/constraint_violations_prolly.go | 39 ++++---- .../sqle/enginetest/dolt_queries_merge.go | 53 ++--------- go/store/prolly/artifact_map.go | 88 +++++++++++-------- 5 files changed, 84 insertions(+), 106 deletions(-) diff --git a/go/libraries/doltcore/merge/merge_prolly_rows.go b/go/libraries/doltcore/merge/merge_prolly_rows.go index c52cbb3ce6..73f504c0f4 100644 --- a/go/libraries/doltcore/merge/merge_prolly_rows.go +++ b/go/libraries/doltcore/merge/merge_prolly_rows.go @@ -845,8 +845,8 @@ func (uv uniqValidator) validateDiff(ctx *sql.Context, diff tree.ThreeWayDiff) ( return violations, err } -// deleteArtifact deletes all unique constraint violation artifacts for the row identified by |key| -// and returns whether at least one was deleted. +// deleteArtifact deletes all unique constraint violation artifacts for the row identified by |key| and returns whether +// at least one was deleted. func (uv uniqValidator) deleteArtifact(ctx context.Context, key val.Tuple) (bool, error) { n, err := uv.edits.DeleteConstraintViolationsForRow(ctx, key, prolly.ArtifactTypeUniqueKeyViol) if err != nil { diff --git a/go/libraries/doltcore/sqle/dtables/conflicts_tables_prolly.go b/go/libraries/doltcore/sqle/dtables/conflicts_tables_prolly.go index 1b9c487b62..db96b2cf93 100644 --- a/go/libraries/doltcore/sqle/dtables/conflicts_tables_prolly.go +++ b/go/libraries/doltcore/sqle/dtables/conflicts_tables_prolly.go @@ -639,8 +639,9 @@ func newProllyConflictDeleter(ct ProllyConflictsTable) *prollyConflictDeleter { } } +// Delete deletes the conflict artifact for the row |r|. The artifact key is: source PK, commit hash, artifact type, +// violation-info hash (zeros for merge conflicts). func (cd *prollyConflictDeleter) Delete(ctx *sql.Context, r sql.Row) (err error) { - // Artifact key is: source PK, commit hash, artifact type, disambiguator (zeros for conflicts). numSourceKeyFields := cd.kd.Count() - 3 if !schema.IsKeyless(cd.ct.ourSch) { err = cd.putPrimaryKeys(ctx, r, numSourceKeyFields) @@ -668,8 +669,9 @@ func (cd *prollyConflictDeleter) Delete(ctx *sql.Context, r sql.Row) (err error) return nil } +// putPrimaryKeys writes the first |numSourceKeyFields| key columns from row |r| into the tuple builder. It reads key +// columns from either base, ours, or theirs depending on which are non-nil in |r|. func (cd *prollyConflictDeleter) putPrimaryKeys(ctx *sql.Context, r sql.Row, numSourceKeyFields int) error { - // Get key columns from either base, ours, or theirs. o := func() int { if o := 1; r[o] != nil { return o diff --git a/go/libraries/doltcore/sqle/dtables/constraint_violations_prolly.go b/go/libraries/doltcore/sqle/dtables/constraint_violations_prolly.go index 8060d0b1a5..a1c5a74324 100644 --- a/go/libraries/doltcore/sqle/dtables/constraint_violations_prolly.go +++ b/go/libraries/doltcore/sqle/dtables/constraint_violations_prolly.go @@ -320,36 +320,35 @@ type prollyCVDeleter struct { var _ sql.RowDeleter = (*prollyCVDeleter)(nil) -// fillArtifactKeyPrefix writes the common key prefix (pk columns, root hash, artifact type) into kb from row r. -// Used when building both new-format and legacy artifact keys so the same logic is not duplicated. -func fillArtifactKeyPrefix(ctx *sql.Context, ns tree.NodeStore, kb *val.TupleBuilder, r sql.Row, numPks int, h hash.Hash, artType prolly.ArtifactType) error { +// fillArtifactKeyPrefix writes the common key prefix (pk columns, root hash, artifact type) into |keyBuilder| from row +// |r| for the given |numPks|, |rootHash|, and |artifactType|. Used when building new-format and legacy artifact keys. +func fillArtifactKeyPrefix(ctx *sql.Context, nodeStore tree.NodeStore, keyBuilder *val.TupleBuilder, r sql.Row, numPks int, rootHash hash.Hash, artifactType prolly.ArtifactType) error { for i := 0; i < numPks; i++ { - if err := tree.PutField(ctx, ns, kb, i, r[i+2]); err != nil { + if err := tree.PutField(ctx, nodeStore, keyBuilder, i, r[i+2]); err != nil { return err } } - kb.PutCommitAddr(numPks, h) - kb.PutUint8(numPks+1, uint8(artType)) + keyBuilder.PutCommitAddr(numPks, rootHash) + keyBuilder.PutUint8(numPks+1, uint8(artifactType)) return nil } -// Delete implements the interface sql.RowDeleter. -// Builds both the new-format key (with disambiguator) and the legacy key (without) so that -// artifacts written before the multiple-violations-per-row change remain deletable. +// Delete implements [sql.RowDeleter]. It builds both the new-format key (with violation-info hash) and the +// legacy key (without) so that artifacts written before the multiple-violations-per-row change remain deletable. func (d *prollyCVDeleter) Delete(ctx *sql.Context, r sql.Row) error { numPks := d.kd.Count() - 3 - ns := d.cvt.artM.NodeStore() - h := hash.Parse(r[0].(string)) - artType := merge.UnmapCVType(r[1]) + nodeStore := d.cvt.artM.NodeStore() + rootHash := hash.Parse(r[0].(string)) + artifactType := merge.UnmapCVType(r[1]) - if err := fillArtifactKeyPrefix(ctx, ns, d.kb, r, numPks, h, artType); err != nil { + if err := fillArtifactKeyPrefix(ctx, nodeStore, d.kb, r, numPks, rootHash, artifactType); err != nil { return err } - vinfoBytes, err := json.Marshal(r[len(r)-1]) + violationInfoBytes, err := json.Marshal(r[len(r)-1]) if err != nil { return err } - d.kb.PutByteString(numPks+2, prolly.ConstraintViolationDisambiguator(vinfoBytes)) + d.kb.PutByteString(numPks+2, prolly.ConstraintViolationInfoHash(violationInfoBytes)) newKey, err := d.kb.Build(d.pool) if err != nil { @@ -359,17 +358,17 @@ func (d *prollyCVDeleter) Delete(ctx *sql.Context, r sql.Row) error { return err } - // Legacy key (no disambiguator) for artifacts stored before multiple violations per row. + // Legacy key (no violation-info hash) for artifacts stored before multiple violations per row. legacyDesc := d.kd.PrefixDesc(d.kd.Count() - 1) - legKb := val.NewTupleBuilder(legacyDesc, ns) - if err := fillArtifactKeyPrefix(ctx, ns, legKb, r, numPks, h, artType); err != nil { + legacyKeyBuilder := val.NewTupleBuilder(legacyDesc, nodeStore) + if err := fillArtifactKeyPrefix(ctx, nodeStore, legacyKeyBuilder, r, numPks, rootHash, artifactType); err != nil { return err } - legKey, err := legKb.Build(d.pool) + legacyKey, err := legacyKeyBuilder.Build(d.pool) if err != nil { return err } - return d.ed.Delete(ctx, legKey) + return d.ed.Delete(ctx, legacyKey) } // StatementBegin implements the interface sql.TableEditor. Currently a no-op. diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_queries_merge.go b/go/libraries/doltcore/sqle/enginetest/dolt_queries_merge.go index f3cab1980d..abdeea568c 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_queries_merge.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_queries_merge.go @@ -4337,7 +4337,8 @@ var MergeArtifactsScripts = []queries.ScriptTest{ }, }, { - Name: "Multiple foreign key violations for a given row reported (issue #6329)", + // https://github.com/dolthub/dolt/issues/6329 + Name: "Multiple foreign key violations for a given row reported", SetUpScript: []string{ "SET dolt_force_transaction_commit = on;", ` @@ -4388,7 +4389,8 @@ var MergeArtifactsScripts = []queries.ScriptTest{ }, }, { - Name: "Multiple unique key violations for a given row reported (issue #6329)", + // https://github.com/dolthub/dolt/issues/6329 + Name: "Multiple unique key violations for a given row reported", SetUpScript: []string{ "SET dolt_force_transaction_commit = on;", "CREATE table t (pk int PRIMARY KEY, col1 int UNIQUE, col2 int UNIQUE);", @@ -4527,43 +4529,6 @@ var MergeArtifactsScripts = []queries.ScriptTest{ }, { // https://github.com/dolthub/dolt/issues/6329 - Name: "merge reports multiple unique key violations for the same row", - SetUpScript: []string{ - "SET dolt_force_transaction_commit = on;", - "CREATE TABLE t (pk int primary key, col1 int not null, col2 int not null, unique key unique1 (col1), unique key unique2 (col2));", - "INSERT INTO t VALUES (1, 2, 3);", - "CALL DOLT_COMMIT('-Am', 'adding table t on main branch');", - "CALL DOLT_BRANCH('other');", - "UPDATE t SET col1 = 0, col2 = 0;", - "CALL DOLT_COMMIT('-am', 'changing row 1 values on main branch');", - "CALL DOLT_CHECKOUT('other');", - "INSERT INTO t VALUES (100, 0, 0);", - "CALL DOLT_COMMIT('-am', 'adding a row on branch other');", - "CALL DOLT_CHECKOUT('main');", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "CALL DOLT_MERGE('other');", - Expected: []sql.Row{{"", 0, 1, "conflicts found"}}, - }, - { - Query: "SELECT * FROM dolt_constraint_violations;", - Expected: []sql.Row{{"t", uint64(4)}}, - }, - { - Query: "SELECT violation_type, pk, col1, col2, violation_info FROM dolt_constraint_violations_t ORDER BY pk, CAST(violation_info as CHAR);", - Expected: []sql.Row{ - {"unique index", 1, 0, 0, merge.UniqCVMeta{Columns: []string{"col1"}, Name: "unique1"}}, - {"unique index", 1, 0, 0, merge.UniqCVMeta{Columns: []string{"col2"}, Name: "unique2"}}, - {"unique index", 100, 0, 0, merge.UniqCVMeta{Columns: []string{"col1"}, Name: "unique1"}}, - {"unique index", 100, 0, 0, merge.UniqCVMeta{Columns: []string{"col2"}, Name: "unique2"}}, - }, - }, - }, - }, - { - // Violation system tables (dolt_constraint_violations and dolt_constraint_violations_) - // must support multiple violations for the same row and report them all. Name: "violation system table supports multiple violations per row", SetUpScript: []string{ "SET dolt_force_transaction_commit = on;", @@ -4592,12 +4557,12 @@ var MergeArtifactsScripts = []queries.ScriptTest{ Expected: []sql.Row{{4}}, }, { - Query: "SELECT pk, violation_type, a, b FROM dolt_constraint_violations_t ORDER BY pk, CAST(violation_info AS CHAR);", + Query: "SELECT * FROM dolt_constraint_violations_t ORDER BY pk, CAST(violation_info AS CHAR);", Expected: []sql.Row{ - {1, "unique index", 0, 0}, - {1, "unique index", 0, 0}, - {2, "unique index", 0, 0}, - {2, "unique index", 0, 0}, + {doltCommit, "unique index", 1, 0, 0, merge.UniqCVMeta{Name: "ua", Columns: []string{"a"}}}, + {doltCommit, "unique index", 1, 0, 0, merge.UniqCVMeta{Name: "ub", Columns: []string{"b"}}}, + {doltCommit, "unique index", 2, 0, 0, merge.UniqCVMeta{Name: "ua", Columns: []string{"a"}}}, + {doltCommit, "unique index", 2, 0, 0, merge.UniqCVMeta{Name: "ub", Columns: []string{"b"}}}, }, }, { diff --git a/go/store/prolly/artifact_map.go b/go/store/prolly/artifact_map.go index 65fcb1c3c1..ae2150b642 100644 --- a/go/store/prolly/artifact_map.go +++ b/go/store/prolly/artifact_map.go @@ -17,12 +17,14 @@ package prolly import ( "bytes" "context" - "crypto/sha256" + "encoding/binary" "encoding/json" "fmt" "io" "strings" + "github.com/zeebo/xxh3" + "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/pool" "github.com/dolthub/dolt/go/store/prolly/message" @@ -48,9 +50,17 @@ const ( const ( artifactMapPendingBufferSize = 650_000 - // cvDisambiguatorSize is the byte length of the constraint-violation disambiguator - // appended to artifact keys so multiple violations of the same type for the same row can coexist. - cvDisambiguatorSize = hash.ByteLen + + // violationInfoHashSize is the byte length of the violation-info hash suffix in artifact keys. + // We use [hash.ByteLen] (20) so the fourth key field matches the commit-hash field size and the + // key descriptor stays uniform. The hash is derived from the violation info via xxh3 so + // multiple violations of the same type for the same row get distinct keys. + violationInfoHashSize = hash.ByteLen + + // violationInfoHashSeed is used for the second xxh3 call when building the 20-byte suffix. + // It is the 64-bit golden-ratio constant (2^64/φ) so the extra 4 bytes come from a different + // hash than the first 16, improving distribution across the full key suffix. + violationInfoHashSeed = uint64(0x9e3779b97f4a7c15) ) type ArtifactMap struct { @@ -344,27 +354,28 @@ type ArtifactsEditor struct { } // BuildArtifactKey builds the artifact map key from |srcKey|, |srcRootish|, |artType|, and an optional -// |cvDisambiguator|. For constraint violations, pass the first cvDisambiguatorSize bytes of sha256(v_info) -// so multiple violations of the same type for the same row get distinct keys. For conflicts, pass nil -// (zeros are used). Old keys without a disambiguator are still valid when reading. -func (wr *ArtifactsEditor) BuildArtifactKey(_ context.Context, srcKey val.Tuple, srcRootish hash.Hash, artType ArtifactType, cvDisambiguator []byte) (val.Tuple, error) { +// |violationInfoHash|. For foreign key, unique index, check constraint, and not null violations pass +// [ConstraintViolationInfoHash] with |meta.VInfo| so multiple violations of the same type for the same row get distinct +// keys. For conflicts pass nil (zeros). Old keys without the violation-info hash suffix are still valid. +func (wr *ArtifactsEditor) BuildArtifactKey(_ context.Context, srcKey val.Tuple, srcRootish hash.Hash, artType ArtifactType, violationInfoHash []byte) (val.Tuple, error) { for i := 0; i < srcKey.Count(); i++ { wr.artKB.PutRaw(i, srcKey.GetField(i)) } wr.artKB.PutCommitAddr(srcKey.Count(), srcRootish) wr.artKB.PutUint8(srcKey.Count()+1, uint8(artType)) - if len(cvDisambiguator) >= cvDisambiguatorSize { - wr.artKB.PutByteString(srcKey.Count()+2, cvDisambiguator[:cvDisambiguatorSize]) + if len(violationInfoHash) >= violationInfoHashSize { + wr.artKB.PutByteString(srcKey.Count()+2, violationInfoHash[:violationInfoHashSize]) } else { - wr.artKB.PutByteString(srcKey.Count()+2, make([]byte, cvDisambiguatorSize)) + wr.artKB.PutByteString(srcKey.Count()+2, make([]byte, violationInfoHashSize)) } return wr.artKB.Build(wr.pool) } -// Add adds an artifact entry. The key includes |srcKey|, |srcRootish|, |artType|, and |cvDisambiguator|. -// For constraint violations use ConstraintViolationDisambiguator(meta.VInfo); for conflicts pass nil. -func (wr *ArtifactsEditor) Add(ctx context.Context, srcKey val.Tuple, srcRootish hash.Hash, artType ArtifactType, meta []byte, cvDisambiguator []byte) error { - key, err := wr.BuildArtifactKey(ctx, srcKey, srcRootish, artType, cvDisambiguator) +// Add adds an artifact entry. The key includes |srcKey|, |srcRootish|, |artType|, and |violationInfoHash|. +// For foreign key, unique index, check constraint, and not null violations use [ConstraintViolationInfoHash] with +// |meta.VInfo|; for conflicts pass nil. +func (wr *ArtifactsEditor) Add(ctx context.Context, srcKey val.Tuple, srcRootish hash.Hash, artType ArtifactType, meta []byte, violationInfoHash []byte) error { + key, err := wr.BuildArtifactKey(ctx, srcKey, srcRootish, artType, violationInfoHash) if err != nil { return err } @@ -378,11 +389,10 @@ func (wr *ArtifactsEditor) Add(ctx context.Context, srcKey val.Tuple, srcRootish return wr.mut.Put(ctx, key, value) } -// ReplaceConstraintViolation replaces constraint violations that match the -// given one but have a different commit hash. If no existing violation exists, -// the given will be inserted. Returns true if a violation was replaced. If an -// existing violation exists but has a different |meta.VInfo| value then -// ErrMergeArtifactCollision is a returned. +// ReplaceConstraintViolation replaces constraint violations that match the given one but have a different +// commit hash. If no existing violation exists, the given will be inserted. If an existing violation +// has the same |meta.Value| but a different |meta.VInfo|, a new artifact is added (distinct key via +// violation-info hash) instead of replacing. Same value and same |meta.VInfo| causes replace. func (wr *ArtifactsEditor) ReplaceConstraintViolation(ctx context.Context, srcKey val.Tuple, srcRootish hash.Hash, artType ArtifactType, meta ConstraintViolationMeta) error { itr, err := wr.mut.IterRange(ctx, PrefixRange(ctx, srcKey, wr.srcKeyDesc)) if err != nil { @@ -416,10 +426,10 @@ func (wr *ArtifactsEditor) ReplaceConstraintViolation(ctx context.Context, srcKe if bytes.Equal(currMeta.Value, meta.Value) { if !bytes.Equal(currMeta.VInfo, meta.VInfo) { // Different violation (e.g. different unique index) for the same row; we add another - // artifact with a distinct key via the disambiguator, so do not treat as collision. + // artifact with a distinct key via the violation-info hash, so do not treat as collision. continue } - // Same violation identity (same value and v_info); replace by deleting and re-adding below. + // Same violation identity (same value and violation info); replace by deleting and re-adding below. err = wr.Delete(ctx, art.ArtKey) if err != nil { return err @@ -434,8 +444,8 @@ func (wr *ArtifactsEditor) ReplaceConstraintViolation(ctx context.Context, srcKe if err != nil { return err } - disambiguator := ConstraintViolationDisambiguator(meta.VInfo) - err = wr.Add(ctx, srcKey, srcRootish, artType, d, disambiguator) + violationInfoHash := ConstraintViolationInfoHash(meta.VInfo) + err = wr.Add(ctx, srcKey, srcRootish, artType, d, violationInfoHash) if err != nil { return err } @@ -443,17 +453,21 @@ func (wr *ArtifactsEditor) ReplaceConstraintViolation(ctx context.Context, srcKe return nil } -// ConstraintViolationDisambiguator returns the first cvDisambiguatorSize bytes of sha256(vInfo) -// to uniquely identify a constraint violation in the artifact key so multiple violations of the -// same type for the same row can coexist. -func ConstraintViolationDisambiguator(vInfo []byte) []byte { - sum := sha256.Sum256(vInfo) - return sum[:cvDisambiguatorSize] +// ConstraintViolationInfoHash returns violationInfoHashSize bytes derived from |violationInfo| for use as the key +// suffix so multiple violations of the same type for the same row get distinct artifact keys. +func ConstraintViolationInfoHash(violationInfo []byte) []byte { + out := make([]byte, violationInfoHashSize) + h128 := xxh3.Hash128(violationInfo).Bytes() + copy(out, h128[:]) + h64 := xxh3.HashSeed(violationInfo, violationInfoHashSeed) + var tmp [8]byte + binary.BigEndian.PutUint64(tmp[:], h64) + copy(out[16:], tmp[4:8]) + return out } -// DeleteConstraintViolationsForRow deletes every constraint-violation artifact for the row |srcKey| -// with type |artType|. It returns the number of artifacts deleted. Used when resolving all -// violations for a row (e.g. after fixing multiple unique violations on that row). +// DeleteConstraintViolationsForRow deletes every constraint-violation artifact for the row |srcKey| with type +// |artType|. It returns the number of artifacts deleted. Used when resolving all violations for a row. func (wr *ArtifactsEditor) DeleteConstraintViolationsForRow(ctx context.Context, srcKey val.Tuple, artType ArtifactType) (int, error) { itr, err := wr.mut.IterRange(ctx, PrefixRange(ctx, srcKey, wr.srcKeyDesc)) if err != nil { @@ -667,10 +681,8 @@ type Artifact struct { } func mergeArtifactsDescriptorsFromSource(srcKd *val.TupleDesc) (kd, vd *val.TupleDesc) { - // Artifact key is: source PK, commit hash, artifact type, then a fixed-size - // disambiguator (hash of violation info) so multiple violations of the same type - // for the same row can coexist. Old keys without the disambiguator are still - // read correctly; new writes always include it. + // Artifact key is: source PK, commit hash, artifact type, then a fixed-size violation-info hash so multiple + // violations of the same type for the same row can coexist. keyTypes := srcKd.Types // source branch commit hash @@ -679,7 +691,7 @@ func mergeArtifactsDescriptorsFromSource(srcKd *val.TupleDesc) (kd, vd *val.Tupl // artifact type keyTypes = append(keyTypes, val.Type{Enc: val.Uint8Enc, Nullable: false}) - // constraint-violation disambiguator: first cvDisambiguatorSize bytes of sha256(v_info) + // violation-info hash: violationInfoHashSize bytes derived from violation info (for distinct keys per row) keyTypes = append(keyTypes, val.Type{Enc: val.ByteStringEnc, Nullable: false}) // json blob data