diff --git a/go/libraries/utils/file/mmap.go b/go/libraries/utils/file/mmap.go index fd1ef44afa..1016f9ec28 100644 --- a/go/libraries/utils/file/mmap.go +++ b/go/libraries/utils/file/mmap.go @@ -15,11 +15,14 @@ package file import ( + "github.com/edsrzf/mmap-go" + "encoding/binary" "errors" "fmt" "io" "os" + "runtime" ) const ( @@ -27,24 +30,23 @@ const ( uint32Size = 4 ) -type FileReaderAt interface { - io.ReaderAt - io.Closer - GetUint64(offset int64) uint64 - GetUint32(offset int64) uint32 +// MmapData holds both the page-aligned mapped region, and the actual requested data range +type MmapData struct { + data []byte + originalData mmap.MMap } var mmapAlignment = int64(os.Getpagesize()) -func (m mmapData) GetUint64(offset int64) uint64 { +func (m MmapData) GetUint64(offset int64) uint64 { return binary.BigEndian.Uint64(m.data[offset : offset+uint64Size]) } -func (m mmapData) GetUint32(offset int64) uint32 { +func (m MmapData) GetUint32(offset int64) uint32 { return binary.BigEndian.Uint32(m.data[offset : offset+uint32Size]) } -func (m *mmapData) ReadAt(p []byte, off int64) (int, error) { +func (m *MmapData) ReadAt(p []byte, off int64) (int, error) { if m.data == nil { return 0, errors.New("mmap: closed") } @@ -57,3 +59,35 @@ func (m *mmapData) ReadAt(p []byte, off int64) (int, error) { } return n, nil } + +func Mmap(file *os.File, offset int64, length int) (reader *MmapData, err error) { + // Align offset to page boundary + alignedOffset := offset & ^(mmapAlignment - 1) + adjustment := offset - alignedOffset + adjustedLength := length + int(adjustment) + + // Map the region + mappedData, err := mmap.MapRegion(file, adjustedLength, mmap.RDONLY, 0, alignedOffset) + if err != nil { + return &MmapData{}, err + } + + // Return the adjusted slice starting at the actual offset + reader = &MmapData{ + data: mappedData[adjustment : adjustment+int64(length)], + originalData: mappedData, + } + + runtime.SetFinalizer(reader, (*MmapData).Close) + return reader, err +} + +func (m *MmapData) Close() error { + if m.data == nil { + return nil + } + m.data = nil + originalData := m.originalData + m.originalData = nil + return originalData.Unmap() +} diff --git a/go/libraries/utils/file/mmap_unix.go b/go/libraries/utils/file/mmap_unix.go deleted file mode 100644 index 67baad4fe5..0000000000 --- a/go/libraries/utils/file/mmap_unix.go +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2025 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build linux || darwin -// +build linux darwin - -package file - -// Why not use go's mmap package? -// mmap doesn't support mapping in only part of a file. - -import ( - "os" - "runtime" - "syscall" -) - -// mmapData holds both the page-aligned mapped region, and the actual requested data range -type mmapData struct { - data []byte - originalData []byte -} - -// mmap creates a memory-mapped region of the file -func Mmap(file *os.File, offset, length int64) (reader FileReaderAt, err error) { - // Align offset to page boundary - alignedOffset := offset & ^(mmapAlignment - 1) - adjustment := offset - alignedOffset - adjustedLength := length + adjustment - - // Map the region - data, err := syscall.Mmap(int(file.Fd()), alignedOffset, int(adjustedLength), syscall.PROT_READ, syscall.MAP_SHARED) - if err != nil { - return nil, err - } - - // Return the adjusted slice starting at the actual offset - reader = &mmapData{ - data: data[adjustment : adjustment+length], - originalData: data, - } - runtime.SetFinalizer(reader, FileReaderAt.Close) - return reader, err -} - -func (m *mmapData) Close() error { - if m.data == nil { - return nil - } - m.data = nil - originalData := m.originalData - m.originalData = nil - return syscall.Munmap(originalData) -} diff --git a/go/libraries/utils/file/mmap_windows.go b/go/libraries/utils/file/mmap_windows.go deleted file mode 100644 index b030d1ba87..0000000000 --- a/go/libraries/utils/file/mmap_windows.go +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2025 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build windows -// +build windows - -package file - -import ( - "os" - "runtime" - "syscall" - "unsafe" -) - -// mmapData holds both the page-aligned mapped region, and the actual requested data range -type mmapData struct { - data []byte - mapView uintptr // a pointer the start of the page-aligned mapped region -} - -func (r *mmapData) Close() error { - if r.data == nil { - return nil - } else if len(r.data) == 0 { - r.data = nil - return nil - } - r.data = nil - runtime.SetFinalizer(r, nil) - return syscall.UnmapViewOfFile(r.mapView) -} - -// Open memory-maps the named file for reading. -func Mmap(file *os.File, offset, length int64) (reader FileReaderAt, err error) { - fi, err := file.Stat() - if err != nil { - return nil, err - } - - fileSize := fi.Size() - if fileSize == 0 || length == 0 { - // If the requested range is 0 bytes, we don't need to make the syscall, or set the finalizer. - return &mmapData{}, nil - } - - // Align offset to allocation granularity (64KB on Windows) - alignedOffset := offset & ^(mmapAlignment - 1) - adjustment := offset - alignedOffset - adjustedLength := length + adjustment - - fileSizeLow, fileSizeHigh := uint32(fileSize), uint32(fileSize>>32) - fmap, err := syscall.CreateFileMapping(syscall.Handle(file.Fd()), nil, syscall.PAGE_READONLY, fileSizeHigh, fileSizeLow, nil) - if err != nil { - return nil, err - } - - defer syscall.CloseHandle(fmap) - offsetLow, offsetHigh := uint32(alignedOffset), uint32(alignedOffset>>32) - mapView, err := syscall.MapViewOfFile(fmap, syscall.FILE_MAP_READ, offsetHigh, offsetLow, uintptr(adjustedLength)) - if err != nil { - return nil, err - } - - data := (*[1 << 30]byte)(unsafe.Pointer(mapView))[adjustment : adjustment+length] - - reader = &mmapData{ - data: data, - mapView: mapView, - } - runtime.SetFinalizer(reader, FileReaderAt.Close) - - return reader, nil -} diff --git a/go/store/nbs/mmap_index_reader.go b/go/store/nbs/mmap_index_reader.go index 4c25f45e56..264a0125f4 100644 --- a/go/store/nbs/mmap_index_reader.go +++ b/go/store/nbs/mmap_index_reader.go @@ -50,7 +50,7 @@ type archiveIndexReader interface { // mmapIndexReader lazily loads archive index data from a memory-mapped file. type mmapIndexReader struct { - data file.FileReaderAt + data *file.MmapData indexSize uint64 byteSpanCount uint32 chunkCount uint32 @@ -74,7 +74,7 @@ func newMmapIndexReader(fileHandle *os.File, footer archiveFooter) (*mmapIndexRe suffixesOffset := chunkRefsOffset + int64(footer.chunkCount)*2*int64(uint32Size) // Memory map the entire index section - mappedData, err := file.Mmap(fileHandle, int64(indexSpan.offset), int64(indexSpan.length)) + mappedData, err := file.Mmap(fileHandle, int64(indexSpan.offset), int(indexSpan.length)) if err != nil { return nil, fmt.Errorf("failed to mmap index: %w", err) }