Bump reva

This commit is contained in:
André Duffeck
2025-02-13 10:08:22 +01:00
parent 5b85029813
commit 52e61d46d1
208 changed files with 11004 additions and 5254 deletions

View File

@@ -34,8 +34,6 @@ const (
DiffInsert Operation = 1
// DiffEqual item represents an equal diff.
DiffEqual Operation = 0
//IndexSeparator is used to seperate the array indexes in an index string
IndexSeparator = ","
)
// Diff represents one diff operation
@@ -406,14 +404,11 @@ func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune
func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
hydrated := make([]Diff, 0, len(diffs))
for _, aDiff := range diffs {
chars := strings.Split(aDiff.Text, IndexSeparator)
text := make([]string, len(chars))
runes := []rune(aDiff.Text)
text := make([]string, len(runes))
for i, r := range chars {
i1, err := strconv.Atoi(r)
if err == nil {
text[i] = lineArray[i1]
}
for i, r := range runes {
text[i] = lineArray[runeToInt(r)]
}
aDiff.Text = strings.Join(text, "")

View File

@@ -9,11 +9,16 @@
package diffmatchpatch
import (
"strconv"
"fmt"
"strings"
"unicode/utf8"
)
const UNICODE_INVALID_RANGE_START = 0xD800
const UNICODE_INVALID_RANGE_END = 0xDFFF
const UNICODE_INVALID_RANGE_DELTA = UNICODE_INVALID_RANGE_END - UNICODE_INVALID_RANGE_START + 1
const UNICODE_RANGE_MAX = 0x10FFFF
// unescaper unescapes selected chars for compatibility with JavaScript's encodeURI.
// In speed critical applications this could be dropped since the receiving application will certainly decode these fine. Note that this function is case-sensitive. Thus "%3F" would not be unescaped. But this is ok because it is only called with the output of HttpUtility.UrlEncode which returns lowercase hex. Example: "%3f" -> "?", "%24" -> "$", etc.
var unescaper = strings.NewReplacer(
@@ -93,14 +98,93 @@ func intArrayToString(ns []uint32) string {
return ""
}
indexSeparator := IndexSeparator[0]
// Appr. 3 chars per num plus the comma.
b := []byte{}
b := []rune{}
for _, n := range ns {
b = strconv.AppendInt(b, int64(n), 10)
b = append(b, indexSeparator)
b = append(b, intToRune(n))
}
b = b[:len(b)-1]
return string(b)
}
// These constants define the number of bits representable
// in 1,2,3,4 byte utf8 sequences, respectively.
const ONE_BYTE_BITS = 7
const TWO_BYTE_BITS = 11
const THREE_BYTE_BITS = 16
const FOUR_BYTE_BITS = 21
// Helper for getting a sequence of bits from an integer.
func getBits(i uint32, cnt byte, from byte) byte {
return byte((i >> from) & ((1 << cnt) - 1))
}
// Converts an integer in the range 0~1112060 into a rune.
// Based on the ranges table in https://en.wikipedia.org/wiki/UTF-8
func intToRune(i uint32) rune {
if i < (1 << ONE_BYTE_BITS) {
return rune(i)
}
if i < (1 << TWO_BYTE_BITS) {
r, size := utf8.DecodeRune([]byte{0b11000000 | getBits(i, 5, 6), 0b10000000 | getBits(i, 6, 0)})
if size != 2 || r == utf8.RuneError {
panic(fmt.Sprintf("Error encoding an int %d with size 2, got rune %v and size %d", size, r, i))
}
return r
}
// Last -3 here needed because for some reason 3rd to last codepoint 65533 in this range
// was returning utf8.RuneError during encoding.
if i < ((1 << THREE_BYTE_BITS) - UNICODE_INVALID_RANGE_DELTA - 3) {
if i >= UNICODE_INVALID_RANGE_START {
i += UNICODE_INVALID_RANGE_DELTA
}
r, size := utf8.DecodeRune([]byte{0b11100000 | getBits(i, 4, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
if size != 3 || r == utf8.RuneError {
panic(fmt.Sprintf("Error encoding an int %d with size 3, got rune %v and size %d", size, r, i))
}
return r
}
if i < (1<<FOUR_BYTE_BITS - UNICODE_INVALID_RANGE_DELTA - 3) {
i += UNICODE_INVALID_RANGE_DELTA + 3
r, size := utf8.DecodeRune([]byte{0b11110000 | getBits(i, 3, 18), 0b10000000 | getBits(i, 6, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
if size != 4 || r == utf8.RuneError {
panic(fmt.Sprintf("Error encoding an int %d with size 4, got rune %v and size %d", size, r, i))
}
return r
}
panic(fmt.Sprintf("The integer %d is too large for runeToInt()", i))
}
// Converts a rune generated by intToRune back to an integer
func runeToInt(r rune) uint32 {
i := uint32(r)
if i < (1 << ONE_BYTE_BITS) {
return i
}
bytes := []byte{0, 0, 0, 0}
size := utf8.EncodeRune(bytes, r)
if size == 2 {
return uint32(bytes[0]&0b11111)<<6 | uint32(bytes[1]&0b111111)
}
if size == 3 {
result := uint32(bytes[0]&0b1111)<<12 | uint32(bytes[1]&0b111111)<<6 | uint32(bytes[2]&0b111111)
if result >= UNICODE_INVALID_RANGE_END {
return result - UNICODE_INVALID_RANGE_DELTA
}
return result
}
if size == 4 {
result := uint32(bytes[0]&0b111)<<18 | uint32(bytes[1]&0b111111)<<12 | uint32(bytes[2]&0b111111)<<6 | uint32(bytes[3]&0b111111)
return result - UNICODE_INVALID_RANGE_DELTA - 3
}
panic(fmt.Sprintf("Unexpected state decoding rune=%v size=%d", r, size))
}