Add memory mapped file implementation that allows for mapping a region of a file.

This commit is contained in:
Nick Tobey
2025-07-24 02:19:48 -07:00
committed by nick
parent c65f0869e3
commit f58fd327ab
3 changed files with 226 additions and 0 deletions
+59
View File
@@ -0,0 +1,59 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package file
import (
"encoding/binary"
"errors"
"fmt"
"io"
"os"
)
const (
uint64Size = 8
uint32Size = 4
)
type FileReaderAt interface {
io.ReaderAt
io.Closer
GetUint64(offset int64) uint64
GetUint32(offset int64) uint32
}
var mmapAlignment = int64(os.Getpagesize())
func (m mmapData) GetUint64(offset int64) uint64 {
return binary.BigEndian.Uint64(m.data[offset : offset+uint64Size])
}
func (m mmapData) GetUint32(offset int64) uint32 {
return binary.BigEndian.Uint32(m.data[offset : offset+uint32Size])
}
func (m *mmapData) ReadAt(p []byte, off int64) (int, error) {
if m.data == nil {
return 0, errors.New("mmap: closed")
}
if off < 0 || int64(len(m.data)) < off {
return 0, fmt.Errorf("mmap: invalid ReadAt offset %d", off)
}
n := copy(p, m.data[off:])
if n < len(p) {
return n, io.EOF
}
return n, nil
}
+82
View File
@@ -0,0 +1,82 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build linux || darwin
// +build linux darwin
package file
// Why not use go's mmap package?
// mmap doesn't support mapping in only part of a file.
import (
"errors"
"fmt"
"io"
"os"
"runtime"
"syscall"
)
// mmapData holds both the page-aligned mapped region, and the actual requested data range
type mmapData struct {
data []byte
originalData []byte
}
// mmap creates a memory-mapped region of the file
func Mmap(file *os.File, offset, length int64) (reader FileReaderAt, err error) {
// Align offset to page boundary
alignedOffset := offset & ^(mmapAlignment - 1)
adjustment := offset - alignedOffset
adjustedLength := length + adjustment
// Map the region
data, err := syscall.Mmap(int(file.Fd()), alignedOffset, int(adjustedLength), syscall.PROT_READ, syscall.MAP_SHARED)
if err != nil {
return nil, err
}
// Return the adjusted slice starting at the actual offset
reader = &mmapData{
data: data[adjustment : adjustment+length],
originalData: data,
}
runtime.SetFinalizer(reader, FileReaderAt.Close)
return reader, err
}
func (m *mmapData) Close() error {
if m.data == nil {
return nil
}
m.data = nil
originalData := m.originalData
m.originalData = nil
return syscall.Munmap(originalData)
}
func (m *mmapData) ReadAt(p []byte, off int64) (int, error) {
if m.data == nil {
return 0, errors.New("mmap: closed")
}
if off < 0 || int64(len(m.data)) < off {
return 0, fmt.Errorf("mmap: invalid ReadAt offset %d", off)
}
n := copy(p, m.data[off:])
if n < len(p) {
return n, io.EOF
}
return n, nil
}
+85
View File
@@ -0,0 +1,85 @@
// Copyright 2025 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
// +build windows
package file
import (
"os"
"runtime"
"syscall"
"unsafe"
)
// mmapData holds both the page-aligned mapped region, and the actual requested data range
type mmapData struct {
data []byte
mapView uintptr // a pointer the start of the page-aligned mapped region
}
func (r *mmapData) Close() error {
if r.data == nil {
return nil
} else if len(r.data) == 0 {
r.data = nil
return nil
}
r.data = nil
runtime.SetFinalizer(r, nil)
return syscall.UnmapViewOfFile(r.mapView)
}
// Open memory-maps the named file for reading.
func Mmap(file *os.File, offset, length int64) (reader FileReaderAt, err error) {
fi, err := file.Stat()
if err != nil {
return nil, err
}
fileSize := fi.Size()
if fileSize == 0 || length == 0 {
// If the requested range is 0 bytes, we don't need to make the syscall, or set the finalizer.
return &mmapData{}, nil
}
// Align offset to allocation granularity (64KB on Windows)
alignedOffset := offset & ^(mmapAlignment - 1)
adjustment := offset - alignedOffset
adjustedLength := length + adjustment
fileSizeLow, fileSizeHigh := uint32(fileSize), uint32(fileSize>>32)
fmap, err := syscall.CreateFileMapping(syscall.Handle(file.Fd()), nil, syscall.PAGE_READONLY, fileSizeHigh, fileSizeLow, nil)
if err != nil {
return nil, err
}
defer syscall.CloseHandle(fmap)
offsetLow, offsetHigh := uint32(alignedOffset), uint32(alignedOffset>>32)
mapView, err := syscall.MapViewOfFile(fmap, syscall.FILE_MAP_READ, offsetHigh, offsetLow, uintptr(adjustedLength))
if err != nil {
return nil, err
}
data := (*[1 << 30]byte)(unsafe.Pointer(mapView))[adjustment : adjustment+length]
reader = &mmapData{
data: data,
mapView: mapView,
}
runtime.SetFinalizer(reader, FileReaderAt.Close)
return reader, nil
}