Use edsrzf/mmap-go for mmapped files instead of rolling our own implementation.

This commit is contained in:
Nick Tobey
2025-07-28 17:58:08 -04:00
parent aa9e68d884
commit d74299efa5
4 changed files with 44 additions and 160 deletions

View File

@@ -15,11 +15,14 @@
package file
import (
"github.com/edsrzf/mmap-go"
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"runtime"
)
const (
@@ -27,24 +30,23 @@ const (
uint32Size = 4
)
type FileReaderAt interface {
io.ReaderAt
io.Closer
GetUint64(offset int64) uint64
GetUint32(offset int64) uint32
// MmapData holds both the page-aligned mapped region, and the actual requested data range
type MmapData struct {
data []byte
originalData mmap.MMap
}
var mmapAlignment = int64(os.Getpagesize())
func (m mmapData) GetUint64(offset int64) uint64 {
func (m MmapData) GetUint64(offset int64) uint64 {
return binary.BigEndian.Uint64(m.data[offset : offset+uint64Size])
}
func (m mmapData) GetUint32(offset int64) uint32 {
func (m MmapData) GetUint32(offset int64) uint32 {
return binary.BigEndian.Uint32(m.data[offset : offset+uint32Size])
}
func (m *mmapData) ReadAt(p []byte, off int64) (int, error) {
func (m *MmapData) ReadAt(p []byte, off int64) (int, error) {
if m.data == nil {
return 0, errors.New("mmap: closed")
}
@@ -57,3 +59,35 @@ func (m *mmapData) ReadAt(p []byte, off int64) (int, error) {
}
return n, nil
}
func Mmap(file *os.File, offset int64, length int) (reader *MmapData, err error) {
// Align offset to page boundary
alignedOffset := offset & ^(mmapAlignment - 1)
adjustment := offset - alignedOffset
adjustedLength := length + int(adjustment)
// Map the region
mappedData, err := mmap.MapRegion(file, adjustedLength, mmap.RDONLY, 0, alignedOffset)
if err != nil {
return &MmapData{}, err
}
// Return the adjusted slice starting at the actual offset
reader = &MmapData{
data: mappedData[adjustment : adjustment+int64(length)],
originalData: mappedData,
}
runtime.SetFinalizer(reader, (*MmapData).Close)
return reader, err
}
func (m *MmapData) Close() error {
if m.data == nil {
return nil
}
m.data = nil
originalData := m.originalData
m.originalData = nil
return originalData.Unmap()
}

View File

@@ -1,65 +0,0 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build linux || darwin
// +build linux darwin
package file
// Why not use go's mmap package?
// mmap doesn't support mapping in only part of a file.
import (
"os"
"runtime"
"syscall"
)
// mmapData holds both the page-aligned mapped region, and the actual requested data range
type mmapData struct {
data []byte
originalData []byte
}
// mmap creates a memory-mapped region of the file
func Mmap(file *os.File, offset, length int64) (reader FileReaderAt, err error) {
// Align offset to page boundary
alignedOffset := offset & ^(mmapAlignment - 1)
adjustment := offset - alignedOffset
adjustedLength := length + adjustment
// Map the region
data, err := syscall.Mmap(int(file.Fd()), alignedOffset, int(adjustedLength), syscall.PROT_READ, syscall.MAP_SHARED)
if err != nil {
return nil, err
}
// Return the adjusted slice starting at the actual offset
reader = &mmapData{
data: data[adjustment : adjustment+length],
originalData: data,
}
runtime.SetFinalizer(reader, FileReaderAt.Close)
return reader, err
}
func (m *mmapData) Close() error {
if m.data == nil {
return nil
}
m.data = nil
originalData := m.originalData
m.originalData = nil
return syscall.Munmap(originalData)
}

View File

@@ -1,85 +0,0 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
// +build windows
package file
import (
"os"
"runtime"
"syscall"
"unsafe"
)
// mmapData holds both the page-aligned mapped region, and the actual requested data range
type mmapData struct {
data []byte
mapView uintptr // a pointer the start of the page-aligned mapped region
}
func (r *mmapData) Close() error {
if r.data == nil {
return nil
} else if len(r.data) == 0 {
r.data = nil
return nil
}
r.data = nil
runtime.SetFinalizer(r, nil)
return syscall.UnmapViewOfFile(r.mapView)
}
// Open memory-maps the named file for reading.
func Mmap(file *os.File, offset, length int64) (reader FileReaderAt, err error) {
fi, err := file.Stat()
if err != nil {
return nil, err
}
fileSize := fi.Size()
if fileSize == 0 || length == 0 {
// If the requested range is 0 bytes, we don't need to make the syscall, or set the finalizer.
return &mmapData{}, nil
}
// Align offset to allocation granularity (64KB on Windows)
alignedOffset := offset & ^(mmapAlignment - 1)
adjustment := offset - alignedOffset
adjustedLength := length + adjustment
fileSizeLow, fileSizeHigh := uint32(fileSize), uint32(fileSize>>32)
fmap, err := syscall.CreateFileMapping(syscall.Handle(file.Fd()), nil, syscall.PAGE_READONLY, fileSizeHigh, fileSizeLow, nil)
if err != nil {
return nil, err
}
defer syscall.CloseHandle(fmap)
offsetLow, offsetHigh := uint32(alignedOffset), uint32(alignedOffset>>32)
mapView, err := syscall.MapViewOfFile(fmap, syscall.FILE_MAP_READ, offsetHigh, offsetLow, uintptr(adjustedLength))
if err != nil {
return nil, err
}
data := (*[1 << 30]byte)(unsafe.Pointer(mapView))[adjustment : adjustment+length]
reader = &mmapData{
data: data,
mapView: mapView,
}
runtime.SetFinalizer(reader, FileReaderAt.Close)
return reader, nil
}

View File

@@ -50,7 +50,7 @@ type archiveIndexReader interface {
// mmapIndexReader lazily loads archive index data from a memory-mapped file.
type mmapIndexReader struct {
data file.FileReaderAt
data *file.MmapData
indexSize uint64
byteSpanCount uint32
chunkCount uint32
@@ -74,7 +74,7 @@ func newMmapIndexReader(fileHandle *os.File, footer archiveFooter) (*mmapIndexRe
suffixesOffset := chunkRefsOffset + int64(footer.chunkCount)*2*int64(uint32Size)
// Memory map the entire index section
mappedData, err := file.Mmap(fileHandle, int64(indexSpan.offset), int64(indexSpan.length))
mappedData, err := file.Mmap(fileHandle, int64(indexSpan.offset), int(indexSpan.length))
if err != nil {
return nil, fmt.Errorf("failed to mmap index: %w", err)
}