Allow rendering a text preview using custom fonts

2026-02-18 03:18:52 -06:00 · 2021-10-21 16:01:30 +02:00
parent 54d450eddd
commit 38acfb1dbd
6 changed files with 620 additions and 61 deletions
--- a/thumbnails/pkg/config/config.go
+++ b/thumbnails/pkg/config/config.go
@@ -66,6 +66,7 @@ type Thumbnail struct {
 	CS3AllowInsecure    bool              `ocisConfig:"cs3_allow_insecure"`
 	RevaGateway         string            `ocisConfig:"reva_gateway"`
 	WebdavNamespace     string            `ocisConfig:"webdav_namespace"`
+	FontMapFile         string            `ocisConfig:"font_map_file"`
 }

 // New initializes a new configuration with or without defaults.
--- a/thumbnails/pkg/config/mappings.go
+++ b/thumbnails/pkg/config/mappings.go
@@ -91,6 +91,10 @@ func structMappings(cfg *Config) []shared.EnvBinding {
 			EnvVars:     []string{"THUMBNAILS_GRPC_NAMESPACE"},
 			Destination: &cfg.Server.Namespace,
 		},
+		{
+			EnvVars:     []string{"THUMBNAILS_TXT_FONTMAP_FILE"},
+			Destination: &cfg.Thumbnail.FontMapFile,
+		},
 		{
 			EnvVars:     []string{"THUMBNAILS_FILESYSTEMSTORAGE_ROOT"},
 			Destination: &cfg.Thumbnail.FileSystemStorage.RootDirectory,
--- a/thumbnails/pkg/preprocessor/fontloader.go
+++ b/thumbnails/pkg/preprocessor/fontloader.go
@@ -0,0 +1,188 @@
+package preprocessor
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/owncloud/ocis/ocis-pkg/sync"
+	"golang.org/x/image/font"
+	"golang.org/x/image/font/gofont/goregular"
+	"golang.org/x/image/font/opentype"
+)
+
+// FontMap maps a script with the target font to be used for that script
+// It also uses a DefaultFont in case there isn't a matching script in the map
+//
+// For cases like Japanese where multiple scripts are used, we rely on the text
+// analyzer to use the script which is unique to japanese (Hiragana or Katakana)
+// even if it has to overwrite the "official" detected script (Han). This means
+// that "Han" should be used just for chinese while "Hiragana" and "Katakana"
+// should be used for japanese
+type FontMap struct {
+	FontMap     map[string]string `json:"fontMap"`
+	DefaultFont string            `json:"defaultFont"`
+}
+
+// It contains the location of the loaded file (in FLoc) and the FontMap loaded
+// from the file
+type FontMapData struct {
+	FMap *FontMap
+	FLoc string
+}
+
+// It contains the location of the font used, and the loaded face (font.Face)
+// ready to be used
+type LoadedFace struct {
+	FontFile string
+	Face     font.Face
+}
+
+// Represents a FontLoader. Use the "NewFontLoader" to get a instance
+type FontLoader struct {
+	faceCache   sync.Cache
+	fontMapData *FontMapData
+	faceOpts    *opentype.FaceOptions
+}
+
+// Create a new FontLoader based on the fontMapFile. The FaceOptions will
+// be the same for all the font loaded by this instance.
+// Note that only the fonts described in the fontMapFile will be used.
+//
+// The fontMapFile has the following structure
+//	{
+//		"fontMap": {
+//			"Han": "packaged/myFont-CJK.otf",
+//			"Arabic": "packaged/myFont-Arab.otf",
+//			"Latin": "/fonts/regular/myFont.otf"
+//		}
+//		"defaultFont": "/fonts/regular/myFont.otf"
+//	}
+//
+// The fontMapFile contains paths to where the fonts are located in the FS.
+// Absolute paths can be used as shown above. If a relative path is used,
+// it will be relative to the fontMapFile location. This should make the
+// packaging easier since all the fonts can be placed in the same directory
+// where the fontMapFile is, or in inner directories.
+func NewFontLoader(fontMapFile string, faceOpts *opentype.FaceOptions) (*FontLoader, error) {
+	fontMap := &FontMap{}
+
+	if fontMapFile != "" {
+		file, err := os.Open(fontMapFile)
+		if err != nil {
+			return nil, err
+		}
+		defer file.Close()
+
+		parser := json.NewDecoder(file)
+		if err = parser.Decode(fontMap); err != nil {
+			return nil, err
+		}
+	}
+
+	return &FontLoader{
+		faceCache: sync.NewCache(5),
+		fontMapData: &FontMapData{
+			FMap: fontMap,
+			FLoc: fontMapFile,
+		},
+		faceOpts: faceOpts,
+	}, nil
+}
+
+// Load and return the font face to be used for that script according to the
+// FontMap set when the FontLoader was created. If the script doesn't have
+// an associated font, a default font will be used. Note that the default font
+// might not be able to handle properly the script
+func (fl *FontLoader) LoadFaceForScript(script string) (*LoadedFace, error) {
+	var parsedFont *opentype.Font
+	var parsingError error
+
+	fontFile := fl.fontMapData.FMap.DefaultFont
+	if val, ok := fl.fontMapData.FMap.FontMap[script]; ok {
+		fontFile = val
+	}
+
+	if fontFile != "" && !filepath.IsAbs(fontFile) {
+		fontFile = filepath.Join(filepath.Dir(fl.fontMapData.FLoc), fontFile)
+	}
+
+	// if the face for the script isn't cached, load the font file and create a new face
+	cachedFace := fl.faceCache.Load(fontFile)
+	if cachedFace != nil {
+		return cachedFace.V.(*LoadedFace), nil
+	}
+
+	if fontFile == "" {
+		parsedFont, parsingError = opentype.Parse(goregular.TTF)
+		if parsingError != nil {
+			return nil, parsingError
+		}
+	} else {
+		// opentype.ParseReaderAt seems to require to keep the file opened
+		// so read the font file into memory
+		data, err := os.ReadFile(fontFile)
+		if err != nil {
+			return nil, err
+		}
+		parsedFont, parsingError = opentype.Parse(data)
+		if parsingError != nil {
+			return nil, parsingError
+		}
+	}
+
+	face, err := opentype.NewFace(parsedFont, fl.faceOpts)
+	if err != nil {
+		return nil, err
+	}
+
+	loadedFace := &LoadedFace{
+		FontFile: fontFile,
+		Face:     face,
+	}
+	fl.faceCache.Store(fontFile, loadedFace, time.Now().Add(10*time.Minute))
+	return loadedFace, nil
+}
+
+func (fl *FontLoader) GetFaceOptSize() float64 {
+	return fl.faceOpts.Size
+}
+
+func (fl *FontLoader) GetFaceOptDPI() float64 {
+	return fl.faceOpts.DPI
+}
+
+func (fl *FontLoader) GetScriptList() []string {
+	fontMap := fl.fontMapData.FMap.FontMap
+
+	arePresent := map[string]bool{
+		"Common":    false,
+		"Inherited": false,
+	}
+	listSize := len(fontMap)
+
+	for key := range arePresent {
+		if _, inFontMap := fontMap[key]; inFontMap {
+			arePresent[key] = true
+		} else {
+			listSize++
+		}
+	}
+
+	keys := make([]string, listSize)
+
+	i := 0
+	for k := range fontMap {
+		keys[i] = k
+		i++
+	}
+
+	for script, isPresent := range arePresent {
+		if !isPresent {
+			keys[i] = script
+			i++
+		}
+	}
+	return keys
+}
--- a/thumbnails/pkg/preprocessor/preprocessor.go
+++ b/thumbnails/pkg/preprocessor/preprocessor.go
@@ -2,21 +2,17 @@ package preprocessor

 import (
 	"bufio"
-	"github.com/golang/freetype"
-	"github.com/golang/freetype/truetype"
-	"github.com/pkg/errors"
-	"golang.org/x/image/font"
-	"golang.org/x/image/font/gofont/goregular"
 	"image"
 	"image/draw"
 	"io"
+	"math"
 	"mime"
 	"strings"
-)

-const (
-	fontSize         = 12
-	spacing  float64 = 1.5
+	"github.com/pkg/errors"
+	"golang.org/x/image/font"
+	"golang.org/x/image/font/opentype"
+	"golang.org/x/image/math/fixed"
 )

 type FileConverter interface {
@@ -33,73 +29,160 @@ func (i ImageDecoder) Convert(r io.Reader) (image.Image, error) {
 	return img, nil
 }

-type TxtToImageConverter struct{}
+type TxtToImageConverter struct {
+	fontLoader *FontLoader
+}

 func (t TxtToImageConverter) Convert(r io.Reader) (image.Image, error) {
 	img := image.NewRGBA(image.Rect(0, 0, 640, 480))
-	draw.Draw(img, img.Bounds(), image.White, image.Point{}, draw.Src)

-	c := freetype.NewContext()
-	// Ignoring the error since we are using the embedded Golang font.
-	// This shouldn't return an error.
-	f, _ := truetype.Parse(goregular.TTF)
-	c.SetFont(f)
-	c.SetFontSize(fontSize)
-	c.SetClip(img.Bounds())
-	c.SetDst(img)
-	c.SetSrc(image.Black)
-	c.SetHinting(font.HintingFull)
-	pt := freetype.Pt(10, 10+int(c.PointToFixed(fontSize)>>6))
+	imgBounds := img.Bounds()
+	draw.Draw(img, imgBounds, image.White, image.Point{}, draw.Src)
+
+	fontSizeAsInt := int(math.Ceil(t.fontLoader.GetFaceOptSize()))
+	margin := 10
+	minX := fixed.I(imgBounds.Min.X + margin)
+	maxX := fixed.I(imgBounds.Max.X - margin)
+	maxY := fixed.I(imgBounds.Max.Y - margin)
+	initialPoint := fixed.P(imgBounds.Min.X+margin, imgBounds.Min.Y+margin+fontSizeAsInt)
+	canvas := &font.Drawer{
+		Dst: img,
+		Src: image.Black,
+		Dot: initialPoint,
+	}
+
+	scriptList := t.fontLoader.GetScriptList()
+	textAnalyzer := NewTextAnalyzer(scriptList)
+	taOpts := AnalysisOpts{
+		UseMergeMap: true,
+		MergeMap:    DefaultMergeMap,
+	}

 	scanner := bufio.NewScanner(r)
+Scan: // Label for the scanner loop, so we can break it easily
 	for scanner.Scan() {
 		txt := scanner.Text()
-		cs := chunks(txt, 80)
-		for _, s := range cs {
-			_, err := c.DrawString(strings.TrimSpace(s), pt)
-			if err != nil {
-				return nil, err
-			}
-			pt.Y += c.PointToFixed(fontSize * spacing)
-			if pt.Y.Round() >= img.Bounds().Dy() {
-				return img, scanner.Err()
+		height := fixed.I(fontSizeAsInt) // reset to default height
+		if txt != "" {
+			textResult := textAnalyzer.AnalyzeString(txt, taOpts)
+			textResult.MergeCommon(DefaultMergeMap)
+
+			for _, sRange := range textResult.ScriptRanges {
+				targetFontFace, _ := t.fontLoader.LoadFaceForScript(sRange.TargetScript)
+				// if the target script is "_unknown" it's expected that the loaded face
+				// uses the default font
+				faceHeight := targetFontFace.Face.Metrics().Height
+				if faceHeight > height {
+					height = faceHeight
+				}
+
+				canvas.Face = targetFontFace.Face
+				initialByte := sRange.Low
+				for _, sRangeSpace := range sRange.Spaces {
+					if canvas.Dot.Y > maxY {
+						break Scan
+					}
+					drawWord(canvas, textResult.Text[initialByte:sRangeSpace], minX, maxX, height, maxY, true)
+					initialByte = sRangeSpace
+				}
+				if initialByte <= sRange.High {
+					// some bytes left to be written
+					if canvas.Dot.Y > maxY {
+						break Scan
+					}
+					drawWord(canvas, textResult.Text[initialByte:sRange.High+1], minX, maxX, height, maxY, len(sRange.Spaces) > 0)
+				}
 			}
 		}
+		canvas.Dot.X = minX
+		canvas.Dot.Y += height.Mul(fixed.Int26_6(1<<6 + 1<<5)) // height * 1.5

+		if canvas.Dot.Y > maxY {
+			break
+		}
 	}
 	return img, scanner.Err()
 }

-// Code from https://stackoverflow.com/a/61469854
-// Written By Igor Mikushkin
-func chunks(s string, chunkSize int) []string {
-	if chunkSize >= len(s) {
-		return []string{s}
-	}
-	var chunks []string
-	chunk := make([]rune, chunkSize)
-	length := 0
-	for _, r := range s {
-		chunk[length] = r
-		length++
-		if length == chunkSize {
-			chunks = append(chunks, string(chunk))
-			length = 0
+// Draw the word in the canvas. The mixX and maxX defines the drawable range
+// (X axis) where the word can be drawn (in case the word is too big and doesn't
+// fit in the canvas), and the incY defines the increment in the Y axis if we
+// need to draw the word in a new line
+//
+// Note that the word will likely start with a white space char
+func drawWord(canvas *font.Drawer, word string, minX, maxX, incY, maxY fixed.Int26_6, goToNewLine bool) {
+	bbox, _ := canvas.BoundString(word)
+	if bbox.Max.X <= maxX {
+		// word fits in the current line
+		canvas.DrawString(word)
+	} else {
+		// word doesn't fit -> retry in a new line
+		trimmedWord := strings.TrimSpace(word)
+		oldDot := canvas.Dot
+
+		canvas.Dot.X = minX
+		canvas.Dot.Y += incY
+		bbox2, _ := canvas.BoundString(trimmedWord)
+		if goToNewLine && bbox2.Max.X <= maxX {
+			if canvas.Dot.Y > maxY {
+				// Don't draw if we're over the Y limit
+				return
+			}
+			canvas.DrawString(trimmedWord)
+		} else {
+			// word doesn't fit in a new line -> draw as many chars as possible
+			canvas.Dot = oldDot
+			for _, char := range trimmedWord {
+				charBytes := []byte(string(char))
+				bbox3, _ := canvas.BoundBytes(charBytes)
+				if bbox3.Max.X > maxX {
+					canvas.Dot.X = minX
+					canvas.Dot.Y += incY
+					if canvas.Dot.Y > maxY {
+						// Don't draw if we're over the Y limit
+						return
+					}
+				}
+				canvas.DrawBytes(charBytes)
+			}
 		}
 	}
-	if length > 0 {
-		chunks = append(chunks, string(chunk[:length]))
-	}
-	return chunks
 }

-func ForType(mimeType string) FileConverter {
+func ForType(mimeType string, opts map[string]interface{}) FileConverter {
 	// We can ignore the error here because we parse it in IsMimeTypeSupported before and if it fails
 	// return the service call. So we should only get here when the mimeType parses fine.
 	mimeType, _, _ = mime.ParseMediaType(mimeType)
 	switch mimeType {
 	case "text/plain":
-		return TxtToImageConverter{}
+		fontFileMap := ""
+		fontFaceOpts := &opentype.FaceOptions{
+			Size:    12,
+			DPI:     72,
+			Hinting: font.HintingNone,
+		}
+
+		if optedFontFileMap, ok := opts["fontFileMap"]; ok {
+			if stringFontFileMap, ok := optedFontFileMap.(string); ok {
+				fontFileMap = stringFontFileMap
+			}
+		}
+
+		if optedFontFaceOpts, ok := opts["fontFaceOpts"]; ok {
+			if typedFontFaceOpts, ok := optedFontFaceOpts.(*opentype.FaceOptions); ok {
+				fontFaceOpts = typedFontFaceOpts
+			}
+		}
+
+		fontLoader, err := NewFontLoader(fontFileMap, fontFaceOpts)
+		if err != nil {
+			// if couldn't create the FontLoader with the specified fontFileMap,
+			// try to use the default font
+			fontLoader, _ = NewFontLoader("", fontFaceOpts)
+		}
+		return TxtToImageConverter{
+			fontLoader: fontLoader,
+		}
 	default:
 		return ImageDecoder{}
 	}
--- a/thumbnails/pkg/preprocessor/textanalyzer.go
+++ b/thumbnails/pkg/preprocessor/textanalyzer.go
@@ -0,0 +1,269 @@
+package preprocessor
+
+import (
+	"unicode"
+)
+
+// Default list of scripts to be analyzed within the string.
+//
+// Scripts that aren't present in the list will be considered as part
+// of the last "known" script. For example, if "Avestan" script (which isn't
+// present) is preceeded by "Arabic" script, then the "Avestan" script will
+// be considered as "Arabic"
+//
+// Punctuation symbols are usually considered part of the "Common" script
+var DefaultScripts = []string{
+	"Arabic",
+	"Common",
+	"Devanagari",
+	"Han",
+	"Hangul",
+	"Hiragana",
+	"Inherited",
+	"Katakana",
+	"Latin",
+}
+
+// Convenient map[string]map[string]string type used to merge multiple
+// scripts into one. This is mainly used for japanese language which uses
+// "Han", "Hiragana" and "Katakana" scripts.
+//
+// The map contains the expected previous script as first key, the expected
+// current script as second key, and the resulting script (if both keys
+// match) as value
+type MergeMap map[string]map[string]string
+
+// The default mergeMap containing info for the japanese scripts
+var DefaultMergeMap = MergeMap{
+	"Han": map[string]string{
+		"Hiragana": "Hiragana",
+		"Katakana": "Katakana",
+	},
+	"Hiragana": map[string]string{
+		"Han":      "Hiragana",
+		"Katakana": "Hiragana",
+	},
+	"Katakana": map[string]string{
+		"Han":      "Katakana",
+		"Hiragana": "Hiragana",
+	},
+}
+
+// Analysis options.
+type AnalysisOpts struct {
+	UseMergeMap bool
+	MergeMap    MergeMap
+}
+
+// A script range. The range should be attached to a string which could contain
+// multiple scripts. The "TargetScript" will go from bytes "Low" to "High"
+// (both inclusive), and contains a "RuneCount" number of runes or chars
+// (mostly for debugging purposes).
+// The Space contains the bytes (inside the range) that are considered as
+// white space.
+type ScriptRange struct {
+	Low, High    int
+	Spaces       []int
+	TargetScript string
+	RuneCount    int
+}
+
+// The result of a text analysis. It contains the analyzed text, a list of
+// script ranges (see the ScriptRange type) and a map containing how many
+// runes have been detected for a particular script.
+type TextAnalysis struct {
+	ScriptRanges []ScriptRange
+	RuneCount    map[string]int
+	Text         string
+}
+
+// The TextAnalyzer object contains private members. It should be created via
+// "NewTextAnalyzer" function.
+type TextAnalyzer struct {
+	scripts         map[string]*unicode.RangeTable
+	scriptListCache []string
+}
+
+// Create a new TextAnalyzer. A list of scripts must be provided.
+// You can use the "DefaultScripts" variable for a default list,
+// although it doesn't contain all the available scripts.
+// See the unicode.Scripts variable (in the unicode package) for a
+// full list. Note that using invalid scripts will cause an undefined
+// behavior
+func NewTextAnalyzer(scriptList []string) TextAnalyzer {
+	scriptRanges := make(map[string]*unicode.RangeTable, len(scriptList))
+	for _, script := range scriptList {
+		scriptRanges[script] = unicode.Scripts[script]
+	}
+	return TextAnalyzer{
+		scripts:         scriptRanges,
+		scriptListCache: scriptList,
+	}
+}
+
+// Analyze the target string using the specified options.
+// A TextAnalysis will be returned with the result of the analysis.
+func (ta *TextAnalyzer) AnalyzeString(word string, opts AnalysisOpts) TextAnalysis {
+	analysis := TextAnalysis{
+		RuneCount: make(map[string]int),
+		Text:      word,
+	}
+	var lastRange *ScriptRange
+
+	runeCount := 0
+	for wordIndex, char := range word {
+		script := "_unknown"
+		for scriptIndex, scriptFound := range ta.scriptListCache {
+			// if we can't match with a known script, do nothing and jump to the next char
+			if unicode.Is(ta.scripts[scriptFound], char) {
+				if scriptIndex > 3 {
+					// we might expect more chars with the same script
+					// so move the script first to match it faster next time
+					ta.reorderScriptList(scriptFound)
+				}
+				script = scriptFound
+			}
+		}
+
+		isWhiteSpace := unicode.Is(unicode.White_Space, char)
+		if lastRange == nil {
+			runeCount = 1
+			lastRange = &ScriptRange{
+				Low:          wordIndex,
+				Spaces:       make([]int, 0),
+				TargetScript: script,
+			}
+		} else {
+			if script != lastRange.TargetScript {
+				if opts.UseMergeMap {
+					// This option mainly target japanese chars; multiple scripts can be used
+					// in the same piece of text (Han, Hiragana and Katakana)
+					// Instead of starting a new range, adjust the target script of the last range
+					if expCurrent, currentOk := opts.MergeMap[lastRange.TargetScript]; currentOk {
+						if expFinal, finalOk := expCurrent[script]; finalOk {
+							lastRange.TargetScript = expFinal
+							if isWhiteSpace {
+								lastRange.Spaces = append(lastRange.Spaces, wordIndex)
+							}
+							runeCount++
+							continue
+						}
+					}
+				}
+
+				lastRange.High = wordIndex - 1
+				lastRange.RuneCount = runeCount
+				analysis.ScriptRanges = append(analysis.ScriptRanges, *lastRange)
+				if _, exists := analysis.RuneCount[lastRange.TargetScript]; !exists {
+					analysis.RuneCount[lastRange.TargetScript] = 0
+				}
+				analysis.RuneCount[lastRange.TargetScript] += runeCount
+				lastRange = &ScriptRange{
+					Low:          wordIndex,
+					Spaces:       make([]int, 0),
+					TargetScript: script,
+				}
+				runeCount = 1
+			} else {
+				runeCount++
+			}
+		}
+		if isWhiteSpace {
+			lastRange.Spaces = append(lastRange.Spaces, wordIndex)
+		}
+	}
+
+	if lastRange != nil {
+		// close the last range
+		lastRange.High = len(word) - 1
+		lastRange.RuneCount = runeCount
+		analysis.RuneCount[lastRange.TargetScript] += runeCount
+		analysis.ScriptRanges = append(analysis.ScriptRanges, *lastRange)
+	}
+	return analysis
+}
+
+// Reorder the scriptListCache in the TextAnalyzer in order to speed up
+// the next script searches. A "Latin" script is expected to be surrounded
+// by "Latin" chars, although "Common" script chars might be present too
+func (ta *TextAnalyzer) reorderScriptList(matchedScript string) {
+	for index, script := range ta.scriptListCache {
+		if script == matchedScript {
+			if index != 0 {
+				// move the script to the first position for a faster matching
+				newList := append([]string{script}, ta.scriptListCache[:index]...)
+				ta.scriptListCache = append(newList, ta.scriptListCache[index+1:]...)
+			}
+			// if index == 0 there is nothing to do: the element is already the first
+			break
+		}
+	}
+}
+
+// Change the "Common" script to the one used in the previous script range.
+// The ranges will be readjusted and merged if they're adjacent.
+// This naive approach should be good enough for normal use cases
+//
+// The MergeMap is needed in case of the japanese language: the ranges
+// "Han"-"Common"-"Katakana" might be replaced to "Han"-"Hiragana"-"Katakana"
+// However, the ranges should be merged together into a big "Hiragana" range.
+// If the MergeMap isn't needed, use an empty one
+func (tr *TextAnalysis) MergeCommon(mergeMap MergeMap) {
+	var finalRanges []ScriptRange
+	var previousRange *ScriptRange
+	for _, sRange := range tr.ScriptRanges {
+		if previousRange != nil {
+			if previousRange.TargetScript == sRange.TargetScript {
+				previousRange.High = sRange.High
+				previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...)
+			} else if sRange.TargetScript == "Common" || sRange.TargetScript == "Inherited" {
+				// new range will be absorbed into the previous one
+				previousRange.High = sRange.High
+				previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...)
+				previousRange.RuneCount += sRange.RuneCount
+				tr.RuneCount[previousRange.TargetScript] += sRange.RuneCount
+				tr.RuneCount[sRange.TargetScript] -= sRange.RuneCount
+			} else if previousRange.TargetScript == "Common" || previousRange.TargetScript == "Inherited" {
+				// might happen if the text starts with a Common script
+				previousRange.High = sRange.High
+				previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...)
+				tr.RuneCount[sRange.TargetScript] += previousRange.RuneCount
+				tr.RuneCount[previousRange.TargetScript] -= previousRange.RuneCount
+				previousRange.TargetScript = sRange.TargetScript
+			} else {
+				if expCurrent, currentOk := mergeMap[previousRange.TargetScript]; currentOk {
+					if expFinal, finalOk := expCurrent[sRange.TargetScript]; finalOk {
+						if sRange.TargetScript == expFinal {
+							// the previous range has changed the target script
+							tr.RuneCount[previousRange.TargetScript] -= previousRange.RuneCount
+						} else {
+							// new range has been absorbed
+							tr.RuneCount[sRange.TargetScript] -= sRange.RuneCount
+						}
+						tr.RuneCount[expFinal] += sRange.RuneCount
+						previousRange.TargetScript = expFinal
+						previousRange.High = sRange.High
+						previousRange.Spaces = append(previousRange.Spaces, sRange.Spaces...)
+						previousRange.RuneCount += sRange.RuneCount
+						continue
+					}
+				}
+				finalRanges = append(finalRanges, *previousRange)
+				*previousRange = sRange
+			}
+		} else {
+			previousRange = &ScriptRange{}
+			*previousRange = sRange
+		}
+	}
+
+	finalRanges = append(finalRanges, *previousRange)
+	tr.ScriptRanges = finalRanges
+	delete(tr.RuneCount, "Common")
+	delete(tr.RuneCount, "Inherited")
+	for index, rCount := range tr.RuneCount {
+		if rCount == 0 {
+			delete(tr.RuneCount, index)
+		}
+	}
+}
--- a/thumbnails/pkg/service/v0/service.go
+++ b/thumbnails/pkg/service/v0/service.go
@@ -41,6 +41,9 @@ func NewService(opts ...Option) v0proto.ThumbnailServiceHandler {
 		cs3Source:    options.CS3Source,
 		logger:       logger,
 		cs3Client:    options.CS3Client,
+		preprocessorOpts: PreprocessorOpts{
+			TxtFontFileMap: options.Config.Thumbnail.FontMapFile,
+		},
 	}

 	return svc
@@ -48,13 +51,18 @@ func NewService(opts ...Option) v0proto.ThumbnailServiceHandler {

 // Thumbnail implements the GRPC handler.
 type Thumbnail struct {
-	serviceID       string
-	webdavNamespace string
-	manager         thumbnail.Manager
-	webdavSource    imgsource.Source
-	cs3Source       imgsource.Source
-	logger          log.Logger
-	cs3Client       gateway.GatewayAPIClient
+	serviceID        string
+	webdavNamespace  string
+	manager          thumbnail.Manager
+	webdavSource     imgsource.Source
+	cs3Source        imgsource.Source
+	logger           log.Logger
+	cs3Client        gateway.GatewayAPIClient
+	preprocessorOpts PreprocessorOpts
+}
+
+type PreprocessorOpts struct {
+	TxtFontFileMap string
 }

 // GetThumbnail retrieves a thumbnail for an image
@@ -114,7 +122,10 @@ func (g Thumbnail) handleCS3Source(ctx context.Context, req *v0proto.GetThumbnai
 		return nil, merrors.InternalServerError(g.serviceID, "could not get image from source: %s", err.Error())
 	}
 	defer r.Close() // nolint:errcheck
-	pp := preprocessor.ForType(sRes.GetInfo().GetMimeType())
+	ppOpts := map[string]interface{}{
+		"fontFileMap": g.preprocessorOpts.TxtFontFileMap,
+	}
+	pp := preprocessor.ForType(sRes.GetInfo().GetMimeType(), ppOpts)
 	img, err := pp.Convert(r)
 	if img == nil || err != nil {
 		return nil, merrors.InternalServerError(g.serviceID, "could not get image")
@@ -188,7 +199,10 @@ func (g Thumbnail) handleWebdavSource(ctx context.Context, req *v0proto.GetThumb
 		return nil, merrors.InternalServerError(g.serviceID, "could not get image from source: %s", err.Error())
 	}
 	defer r.Close() // nolint:errcheck
-	pp := preprocessor.ForType(sRes.GetInfo().GetMimeType())
+	ppOpts := map[string]interface{}{
+		"fontFileMap": g.preprocessorOpts.TxtFontFileMap,
+	}
+	pp := preprocessor.ForType(sRes.GetInfo().GetMimeType(), ppOpts)
 	img, err := pp.Convert(r)
 	if img == nil || err != nil {
 		return nil, merrors.InternalServerError(g.serviceID, "could not get image")