go/libraries/utils/earl: Add special handling for AWS URLs with non-compliant host components so that we can bump the Golang version.

This commit is contained in:
Aaron Son
2025-10-14 11:55:58 +02:00
parent 3afbafa004
commit 06a1e9bb3a
8 changed files with 80 additions and 7 deletions

View File

@@ -85,9 +85,9 @@ jobs:
run: |
latest=$(git rev-parse HEAD)
echo "commitish=$latest" >> $GITHUB_OUTPUT
GO_BUILD_VERSION=1.25.1 go/utils/publishrelease/buildpgobinaries.sh
GO_BUILD_VERSION=1.25.3 go/utils/publishrelease/buildpgobinaries.sh
env:
GO_BUILD_VERSION: "1.25.1"
GO_BUILD_VERSION: "1.25.3"
PROFILE: ${{ format('{0}/dolt-cpu-profile.pprof', github.workspace) }}
- name: Create Release
id: create_release

View File

@@ -199,4 +199,4 @@ require (
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
)
go 1.25.1
go 1.25.3

View File

@@ -19,7 +19,9 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/libraries/utils/earl"
"github.com/dolthub/dolt/go/store/types"
)
@@ -32,3 +34,10 @@ func TestCreateMemDB(t *testing.T) {
assert.NotNil(t, vrw)
assert.NotNil(t, ns)
}
func TestAWSScheme(t *testing.T) {
url, err := earl.Parse("aws://[hosted-doltdb-prod-instance-backups:hosted-doltdb-prod-instance-backups]/5ebe7f6e-7c4a-437a-b684-6aa6cdeb63df/backups/20251014T083342.813/b1625a79-0ae3-4a83-89a7-8715317c4cea/us-jails")
require.NoError(t, err)
assert.Equal(t, "aws", url.Scheme)
assert.Equal(t, "hosted-doltdb-prod-instance-backups:hosted-doltdb-prod-instance-backups", url.Hostname())
}

View File

@@ -15,6 +15,8 @@
package earl
import (
"errors"
"fmt"
"net/url"
"regexp"
"strconv"
@@ -91,7 +93,47 @@ func parse(urlStr string) (*url.URL, error) {
Path: urlStr[strIdx+3:],
}, nil
}
if strings.Index(urlStr, "://") == -1 {
// XXX: This is a kludge to support AWS remote URLs. These URLs use a non-standard syntax to specify the s3 bucket and dynamodb table names, and they look like:
// aws://[s3_bucket_name:dynamodb_table_name]/path/to/files/in/s3/and/db/key/in/dynamo
//
// This was supported by Go url.Parse until 1.25.2, where validation was added to the bracketed hostname component:
// https://github.com/golang/go/issues/75678
//
// Here we explicitly kludge around the aws schema in a hard-coded way. Pretty gross for now.
if strings.HasPrefix(urlStr, "aws://[") {
hostStart := 7
hostEnd := hostStart + strings.Index(urlStr[hostStart:], "]")
if hostEnd == hostStart-1 {
return nil, errors.New("could not parse aws schema url: expected aws://[s3_bucket:dynamodb_table] but did not find closing bracket.")
}
host := urlStr[hostStart:hostEnd]
hostColon := strings.Index(host, ":")
if hostColon == -1 {
return nil, errors.New("could not parse aws schema url: expected aws://[s3_bucket:dynamodb_table] but did not find colon introducting dynamodb_table.")
}
rawBucketName := host[:hostColon]
rawTableName := host[hostColon+1:]
// For full compliance with previous beahvior, we pass both components through url.Parse as hostnames to get the same escape handling as we used to have.
parsedBucketName, err := url.Parse("http://" + rawBucketName)
if err != nil {
return nil, fmt.Errorf("could not parse aws s3 bucket name as hostname: %w", err)
}
parsedTableName, err := url.Parse("http://" + rawTableName)
if err != nil {
return nil, fmt.Errorf("could not parse aws dynamodb table name as hostname: %w", err)
}
returnedHost := "[" + parsedBucketName.Host + ":" + parsedTableName.Host + "]"
// Here we parse the original urlStr but with the host component replaced by a hard coded compliant value. We then replace the Host in the *URL we return.
parsed, err := url.Parse("aws://hostname" + urlStr[hostEnd+1:])
if err != nil {
return nil, fmt.Errorf("could not parse aws url: %w", err)
}
parsed.Host = returnedHost
return parsed, nil
} else if strings.Index(urlStr, "://") == -1 {
u, err := url.Parse("http://" + urlStr)
if err == nil && isValidHost(u.Host) {

View File

@@ -200,6 +200,25 @@ func TestParse(t *testing.T) {
},
false,
},
{
"aws://[bucketname]/path/to/files",
url.URL{},
true,
},
{
"aws://[bucketname:tablename",
url.URL{},
true,
},
{
"aws://[bucketname:tablename]/path/to/files",
url.URL{
Scheme: "aws",
Host: "[bucketname:tablename]",
Path: "/path/to/files",
},
false,
},
{
"file://C:/Users/name/datasets",
url.URL{

View File

@@ -42,6 +42,7 @@ import (
"github.com/oracle/oci-go-sdk/v65/objectstorage"
"github.com/dolthub/dolt/go/libraries/utils/awsrefreshcreds"
"github.com/dolthub/dolt/go/libraries/utils/earl"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/store/chunks"
"github.com/dolthub/dolt/go/store/d"
@@ -330,7 +331,9 @@ func (sp Spec) NewChunkStore(ctx context.Context) chunks.ChunkStore {
func parseAWSSpec(ctx context.Context, awsURL string, options SpecOptions) chunks.ChunkStore {
fmt.Println(awsURL, options)
u, _ := url.Parse(awsURL)
// earl has special handling for aws:// urls.
u, err := earl.Parse(awsURL)
d.PanicIfError(err)
parts := strings.SplitN(u.Hostname(), ":", 2) // [table] [, bucket]?
d.PanicIfFalse(len(parts) == 2)

View File

@@ -6,7 +6,7 @@ set -o pipefail
script_dir=$(dirname "$0")
cd $script_dir/../..
GO_BUILD_VERSION=1.25.1
GO_BUILD_VERSION=1.25.3
if (( $# != 1 )); then
echo "usage: build.sh linux-arm64|linux-amd64|darwin-arm64|darwin-amd64|windows-amd64"

View File

@@ -1,6 +1,6 @@
module github.com/dolthub/dolt/integration-tests/go-sql-server-driver
go 1.25.1
go 1.25.3
require (
github.com/dolthub/dolt/go v0.40.4