added new encodings for year and date

This commit is contained in:
Andy Arthur
2022-05-24 15:49:36 -07:00
parent 3263672122
commit 2e7a81437b
8 changed files with 318 additions and 87 deletions

View File

@@ -51,12 +51,14 @@ func GetField(td val.TupleDesc, i int, tup val.Tuple) (v interface{}, err error)
v, ok = td.GetFloat64(i, tup)
case val.DecimalEnc:
v, ok = td.GetDecimal(i, tup)
case val.TimeEnc:
v, ok = td.GetSqlTime(i, tup)
case val.YearEnc:
v, ok = td.GetYear(i, tup)
case val.TimestampEnc, val.DateEnc, val.DatetimeEnc:
v, ok = td.GetTimestamp(i, tup)
case val.DateEnc:
v, ok = td.GetDate(i, tup)
case val.TimeEnc:
v, ok = td.GetSqlTime(i, tup)
case val.DatetimeEnc:
v, ok = td.GetDatetime(i, tup)
case val.StringEnc:
v, ok = td.GetString(i, tup)
case val.ByteStringEnc:
@@ -116,12 +118,14 @@ func PutField(tb *val.TupleBuilder, i int, v interface{}) error {
tb.PutFloat64(i, v.(float64))
case val.DecimalEnc:
tb.PutDecimal(i, v.(string))
case val.TimeEnc:
tb.PutSqlTime(i, v.(string))
case val.YearEnc:
tb.PutYear(i, v.(int16))
case val.DateEnc, val.DatetimeEnc, val.TimestampEnc:
tb.PutTimestamp(i, v.(time.Time))
case val.DateEnc:
tb.PutDate(i, v.(time.Time))
case val.TimeEnc:
tb.PutSqlTime(i, v.(string))
case val.DatetimeEnc:
tb.PutDatetime(i, v.(time.Time))
case val.StringEnc:
tb.PutString(i, v.(string))
case val.ByteStringEnc:

View File

@@ -27,6 +27,7 @@ import (
)
func init() {
// todo: multiple query types can map to a single encoding
encodingToType[val.Int8Enc] = query.Type_INT8
encodingToType[val.Uint8Enc] = query.Type_UINT8
encodingToType[val.Int16Enc] = query.Type_INT16
@@ -38,9 +39,7 @@ func init() {
encodingToType[val.Float32Enc] = query.Type_FLOAT32
encodingToType[val.Float64Enc] = query.Type_FLOAT64
encodingToType[val.DecimalEnc] = query.Type_DECIMAL
encodingToType[val.TimeEnc] = query.Type_TIME
encodingToType[val.YearEnc] = query.Type_YEAR
encodingToType[val.TimestampEnc] = query.Type_TIMESTAMP
encodingToType[val.DateEnc] = query.Type_TIMESTAMP
encodingToType[val.DatetimeEnc] = query.Type_TIMESTAMP
encodingToType[val.StringEnc] = query.Type_VARCHAR

View File

@@ -134,18 +134,20 @@ func encodingFromSqlType(typ query.Type) val.Encoding {
switch typ {
case query.Type_DECIMAL:
return val.DecimalEnc
case query.Type_DATE:
return val.DateEnc
case query.Type_DATETIME:
return val.DatetimeEnc
case query.Type_TIME:
return val.TimeEnc
case query.Type_TIMESTAMP:
return val.TimestampEnc
case query.Type_YEAR:
return val.YearEnc
case query.Type_GEOMETRY:
return val.GeometryEnc
case query.Type_BIT:
return val.Uint64Enc
case query.Type_BLOB:
return val.ByteStringEnc
case query.Type_TEXT:
return val.StringEnc
case query.Type_ENUM:
return val.StringEnc
case query.Type_SET:
return val.StringEnc
case query.Type_JSON:
return val.JSONEnc
}
switch typ {
@@ -173,26 +175,24 @@ func encodingFromSqlType(typ query.Type) val.Encoding {
return val.Float32Enc
case query.Type_FLOAT64:
return val.Float64Enc
case query.Type_BIT:
return val.Uint64Enc
case query.Type_YEAR:
return val.YearEnc
case query.Type_DATE:
return val.DateEnc
case query.Type_TIME:
return val.TimeEnc
case query.Type_TIMESTAMP:
return val.DatetimeEnc
case query.Type_DATETIME:
return val.DatetimeEnc
case query.Type_BINARY:
return val.ByteStringEnc
case query.Type_VARBINARY:
return val.ByteStringEnc
case query.Type_BLOB:
return val.ByteStringEnc
case query.Type_CHAR:
return val.StringEnc
case query.Type_VARCHAR:
return val.StringEnc
case query.Type_TEXT:
return val.StringEnc
case query.Type_JSON:
return val.JSONEnc
case query.Type_ENUM:
return val.StringEnc
case query.Type_SET:
return val.StringEnc
default:
panic(fmt.Sprintf("unknown encoding %v", typ))
}

View File

@@ -44,9 +44,12 @@ const (
float32Size ByteSize = 4
float64Size ByteSize = 8
// todo(andy): experimental encoding
timestampSize ByteSize = 8
hash128Size ByteSize = 16
hash128Size ByteSize = 16
yearSize ByteSize = 1
dateSize ByteSize = 4
//timeSize ByteSize = 8
datetimeSize ByteSize = 8
)
type Encoding uint8
@@ -65,12 +68,12 @@ const (
Float32Enc Encoding = 11
Float64Enc Encoding = 12
// todo(andy): experimental encodings
TimestampEnc Encoding = 14
DateEnc Encoding = 15
DatetimeEnc Encoding = 16
YearEnc Encoding = 17
Hash128Enc Encoding = 18
Hash128Enc Encoding = 13
YearEnc Encoding = 14
DateEnc Encoding = 15
// TimeEnc Encoding = 16
DatetimeEnc Encoding = 17
sentinel Encoding = 127
)
@@ -123,10 +126,14 @@ func sizeFromType(t Type) (ByteSize, bool) {
return float32Size, true
case Float64Enc:
return float64Size, true
case DateEnc, DatetimeEnc, TimestampEnc:
return timestampSize, true
case YearEnc:
return int16Size, true
return yearSize, true
case DateEnc:
return dateSize, true
//case TimeEnc:
// return timeSize, true
case DatetimeEnc:
return datetimeSize, true
case Hash128Enc:
return hash128Size, true
default:
@@ -359,18 +366,69 @@ func compareFloat64(l, r float64) int {
}
}
func readTimestamp(buf []byte) (t time.Time) {
expectSize(buf, timestampSize)
func readString(val []byte) string {
// todo(andy): fix allocation
return string(readByteString(val))
}
const minYear int16 = 1901
func readYear(val []byte) int16 {
expectSize(val, yearSize)
return int16(readUint8(val)) + minYear
}
func writeYear(buf []byte, val int16) {
expectSize(buf, yearSize)
writeUint8(buf, uint8(val-minYear))
}
func compareYear(l, r int16) int {
return compareInt16(l, r)
}
// adapted from:
// https://dev.mysql.com/doc/internals/en/date-and-time-data-type-representation.html
const (
yearShift uint32 = 16
monthShift uint32 = 8
monthMask uint32 = 255 << monthShift
dayMask uint32 = 255
)
func readDate(val []byte) (date time.Time) {
expectSize(val, dateSize)
t := readUint32(val)
y := t >> yearShift
m := (t & monthMask) >> monthShift
d := (t & dayMask)
return time.Date(int(y), time.Month(m), int(d), 0, 0, 0, 0, time.UTC)
}
func writeDate(buf []byte, val time.Time) {
expectSize(buf, dateSize)
t := uint32(val.Year() << yearShift)
t += uint32(val.Month() << monthShift)
t += uint32(val.Day())
writeUint32(buf, t)
}
func compareDate(l, r time.Time) int {
return compareDatetime(l, r)
}
func readDatetime(buf []byte) (t time.Time) {
expectSize(buf, datetimeSize)
t = time.Unix(0, readInt64(buf)).UTC()
return
}
func writeTimestamp(buf []byte, val time.Time) {
expectSize(buf, timestampSize)
func writeDatetime(buf []byte, val time.Time) {
expectSize(buf, datetimeSize)
writeInt64(buf, val.UnixNano())
}
func compareTimestamp(l, r time.Time) int {
func compareDatetime(l, r time.Time) int {
if l.Equal(r) {
return 0
} else if l.Before(r) {
@@ -380,11 +438,6 @@ func compareTimestamp(l, r time.Time) int {
}
}
func readString(val []byte) string {
// todo(andy): fix allocation
return string(readByteString(val))
}
func writeString(buf []byte, val string) {
writeByteString(buf, []byte(val))
}

View File

@@ -17,6 +17,7 @@ package val
import (
"math"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
@@ -27,7 +28,7 @@ func TestCompare(t *testing.T) {
l, r []byte
cmp int
}{
// ints
// int
{
typ: Type{Enc: Int64Enc},
l: encInt(0), r: encInt(0),
@@ -43,7 +44,7 @@ func TestCompare(t *testing.T) {
l: encInt(1), r: encInt(0),
cmp: 1,
},
// uints
// uint
{
typ: Type{Enc: Uint64Enc},
l: encUint(0), r: encUint(0),
@@ -59,7 +60,7 @@ func TestCompare(t *testing.T) {
l: encUint(1), r: encUint(0),
cmp: 1,
},
// floats
// float
{
typ: Type{Enc: Float64Enc},
l: encFloat(0), r: encFloat(0),
@@ -75,7 +76,58 @@ func TestCompare(t *testing.T) {
l: encFloat(1), r: encFloat(0),
cmp: 1,
},
// strings
// year
{
typ: Type{Enc: YearEnc},
l: encYear(2022), r: encYear(2022),
cmp: 0,
},
{
typ: Type{Enc: YearEnc},
l: encYear(2022), r: encYear(1999),
cmp: 1,
},
{
typ: Type{Enc: YearEnc},
l: encYear(2000), r: encYear(2022),
cmp: -1,
},
// date
{
typ: Type{Enc: DateEnc},
l: encDate(2022, 05, 24), r: encDate(2022, 05, 24),
cmp: 0,
},
{
typ: Type{Enc: DateEnc},
l: encDate(2022, 12, 24), r: encDate(2022, 05, 24),
cmp: 1,
},
{
typ: Type{Enc: DateEnc},
l: encDate(1999, 04, 24), r: encDate(2022, 05, 24),
cmp: -1,
},
// datetime
{
typ: Type{Enc: DatetimeEnc},
l: encDatetime(time.Date(1999, 11, 01, 01, 01, 01, 00, time.UTC)),
r: encDatetime(time.Date(1999, 11, 01, 01, 01, 01, 00, time.UTC)),
cmp: 0,
},
{
typ: Type{Enc: DatetimeEnc},
l: encDatetime(time.Date(2000, 11, 01, 01, 01, 01, 00, time.UTC)),
r: encDatetime(time.Date(1999, 11, 01, 01, 01, 01, 00, time.UTC)),
cmp: 1,
},
{
typ: Type{Enc: DatetimeEnc},
l: encDatetime(time.Date(1999, 11, 01, 01, 01, 01, 00, time.UTC)),
r: encDatetime(time.Date(2000, 11, 01, 01, 01, 01, 00, time.UTC)),
cmp: -1,
},
// string
{
typ: Type{Enc: StringEnc},
l: encStr(""), r: encStr(""),
@@ -110,24 +162,27 @@ func TestCompare(t *testing.T) {
for _, test := range tests {
act := compare(test.typ, test.l, test.r)
assert.Equal(t, test.cmp, act)
assert.Equal(t, test.cmp, act, "expected %s %s %s ",
formatValue(test.typ.Enc, test.l),
fmtComparator(test.cmp),
formatValue(test.typ.Enc, test.r))
}
}
func encInt(i int64) []byte {
buf := make([]byte, 8)
buf := make([]byte, uint64Size)
writeInt64(buf, i)
return buf
}
func encUint(u uint64) []byte {
buf := make([]byte, 8)
buf := make([]byte, int64Size)
writeUint64(buf, u)
return buf
}
func encFloat(f float64) []byte {
buf := make([]byte, 8)
buf := make([]byte, float64Size)
writeFloat64(buf, f)
return buf
}
@@ -138,6 +193,26 @@ func encStr(s string) []byte {
return buf
}
func encYear(y int16) []byte {
buf := make([]byte, yearSize)
writeYear(buf, y)
return buf
}
func encDate(y, m, d int) []byte {
var date time.Time
date = date.AddDate(y, m, d)
buf := make([]byte, dateSize)
writeDate(buf, date)
return buf
}
func encDatetime(dt time.Time) []byte {
buf := make([]byte, datetimeSize)
writeDatetime(buf, dt)
return buf
}
func TestCodecRoundTrip(t *testing.T) {
t.Run("round trip bool", func(t *testing.T) {
roundTripBools(t)
@@ -151,6 +226,15 @@ func TestCodecRoundTrip(t *testing.T) {
t.Run("round trip floats", func(t *testing.T) {
roundTripFloats(t)
})
t.Run("round trip years", func(t *testing.T) {
roundTripYears(t)
})
t.Run("round trip dates", func(t *testing.T) {
roundTripDates(t)
})
t.Run("round trip datetimes", func(t *testing.T) {
roundTripDatetimes(t)
})
}
func roundTripBools(t *testing.T) {
@@ -259,8 +343,65 @@ func roundTripFloats(t *testing.T) {
}
}
func roundTripYears(t *testing.T) {
years := []int16{
1901,
2022,
2155,
}
buf := make([]byte, yearSize)
for _, y := range years {
writeYear(buf, y)
assert.Equal(t, y, readYear(buf))
zero(buf)
}
}
func roundTripDates(t *testing.T) {
dates := []time.Time{
testDate(1000, 01, 01),
testDate(2022, 05, 24),
testDate(9999, 12, 31),
}
buf := make([]byte, dateSize)
for _, d := range dates {
writeDate(buf, d)
assert.Equal(t, d, readDate(buf))
zero(buf)
}
}
func roundTripDatetimes(t *testing.T) {
datetimes := []time.Time{
time.Now().UTC(),
}
buf := make([]byte, datetimeSize)
for _, dt := range datetimes {
writeDatetime(buf, dt)
assert.Equal(t, dt, readDatetime(buf))
zero(buf)
}
}
func testDate(y, m, d int) (date time.Time) {
return time.Date(y, time.Month(m), d, 0, 0, 0, 0, time.UTC)
}
func zero(buf []byte) {
for i := range buf {
buf[i] = 0
}
}
func fmtComparator(c int) string {
if c == 0 {
return "="
} else if c < 0 {
return "<"
} else {
return ">"
}
}

View File

@@ -157,11 +157,19 @@ func (tb *TupleBuilder) PutFloat64(i int, v float64) {
tb.pos += float64Size
}
func (tb *TupleBuilder) PutTimestamp(i int, v time.Time) {
tb.Desc.expectEncoding(i, DateEnc, DatetimeEnc, TimestampEnc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+timestampSize]
writeTimestamp(tb.fields[i], v)
tb.pos += timestampSize
// PutYear writes an int16-encoded year to the ith field of the Tuple being built.
func (tb *TupleBuilder) PutYear(i int, v int16) {
tb.Desc.expectEncoding(i, YearEnc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+yearSize]
writeYear(tb.fields[i], v)
tb.pos += int16Size
}
func (tb *TupleBuilder) PutDate(i int, v time.Time) {
tb.Desc.expectEncoding(i, DateEnc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+dateSize]
writeDate(tb.fields[i], v)
tb.pos += dateSize
}
// PutSqlTime writes a string to the ith field of the Tuple being built.
@@ -173,13 +181,11 @@ func (tb *TupleBuilder) PutSqlTime(i int, v string) {
tb.pos += sz
}
// PutYear writes an int16-encoded year to the ith field of the Tuple being built.
func (tb *TupleBuilder) PutYear(i int, v int16) {
// todo(andy): yearSize, etc?
tb.Desc.expectEncoding(i, YearEnc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+int16Size]
writeInt16(tb.fields[i], v)
tb.pos += int16Size
func (tb *TupleBuilder) PutDatetime(i int, v time.Time) {
tb.Desc.expectEncoding(i, DatetimeEnc)
tb.fields[i] = tb.buf[tb.pos : tb.pos+datetimeSize]
writeDatetime(tb.fields[i], v)
tb.pos += datetimeSize
}
func (tb *TupleBuilder) PutDecimal(i int, v string) {

View File

@@ -91,11 +91,13 @@ func compare(typ Type, left, right []byte) int {
case Float64Enc:
return compareFloat64(readFloat64(left), readFloat64(right))
case YearEnc:
return compareInt16(readInt16(left), readInt16(right))
case DateEnc, DatetimeEnc, TimestampEnc:
return compareTimestamp(readTimestamp(left), readTimestamp(right))
return compareYear(readYear(left), readYear(right))
case DateEnc:
return compareDate(readDate(left), readDate(right))
case TimeEnc:
panic("unimplemented")
case DatetimeEnc:
return compareDatetime(readDatetime(left), readDatetime(right))
case DecimalEnc:
// todo(andy): temporary Decimal implementation
fallthrough

View File

@@ -250,13 +250,24 @@ func (td TupleDesc) GetDecimal(i int, tup Tuple) (v string, ok bool) {
return
}
// GetTimestamp reads a time.Time from the ith field of the Tuple.
// GetYear reads an int16 from the ith field of the Tuple.
// If the ith field is NULL, |ok| is set to false.
func (td TupleDesc) GetTimestamp(i int, tup Tuple) (v time.Time, ok bool) {
td.expectEncoding(i, TimestampEnc, DateEnc, DatetimeEnc, YearEnc)
func (td TupleDesc) GetYear(i int, tup Tuple) (v int16, ok bool) {
td.expectEncoding(i, YearEnc)
b := td.GetField(i, tup)
if b != nil {
v, ok = readTimestamp(b), true
v, ok = readInt16(b), true
}
return
}
// GetDate reads a time.Time from the ith field of the Tuple.
// If the ith field is NULL, |ok| is set to false.
func (td TupleDesc) GetDate(i int, tup Tuple) (v time.Time, ok bool) {
td.expectEncoding(i, DateEnc)
b := td.GetField(i, tup)
if b != nil {
v, ok = readDate(b), true
}
return
}
@@ -272,13 +283,13 @@ func (td TupleDesc) GetSqlTime(i int, tup Tuple) (v string, ok bool) {
return
}
// GetYear reads an int16 from the ith field of the Tuple.
// GetDatetime reads a time.Time from the ith field of the Tuple.
// If the ith field is NULL, |ok| is set to false.
func (td TupleDesc) GetYear(i int, tup Tuple) (v int16, ok bool) {
td.expectEncoding(i, YearEnc)
func (td TupleDesc) GetDatetime(i int, tup Tuple) (v time.Time, ok bool) {
td.expectEncoding(i, DatetimeEnc)
b := td.GetField(i, tup)
if b != nil {
v, ok = readInt16(b), true
v, ok = readDatetime(b), true
}
return
}
@@ -375,9 +386,11 @@ func (td TupleDesc) FormatValue(i int, value []byte) string {
if value == nil {
return "NULL"
}
return formatValue(td.Types[i].Enc, value)
}
func formatValue(enc Encoding, value []byte) string {
// todo(andy): complete cases
switch td.Types[i].Enc {
switch enc {
case Int8Enc:
v := readInt8(value)
return strconv.Itoa(int(v))
@@ -408,6 +421,19 @@ func (td TupleDesc) FormatValue(i int, value []byte) string {
case Float64Enc:
v := readFloat64(value)
return fmt.Sprintf("%f", v)
case YearEnc:
v := readYear(value)
return strconv.Itoa(int(v))
case DateEnc:
v := readDate(value)
return v.Format("2006-01-02")
//case TimeEnc:
// // todo(andy)
// v := readTime(value)
// return v
case DatetimeEnc:
v := readDatetime(value)
return v.Format(time.RFC3339)
case StringEnc:
return readString(value)
case ByteStringEnc: