fix: Various issues with JSON import and UTF8 or very long filenames (#9850)

This commit is contained in:
Tom Moor
2025-08-05 21:15:45 -04:00
committed by GitHub
parent a48aabdd66
commit a97908518a
7 changed files with 169 additions and 30 deletions

View File

@@ -231,6 +231,7 @@ const Subtitle = styled.p<{ $small?: boolean; $selected?: boolean }>`
font-size: ${(props) => (props.$small ? 13 : 14)}px;
color: ${s("textTertiary")};
margin-top: -2px;
overflow-wrap: break-word;
`;
export const Actions = styled(Flex)<{ $selected?: boolean }>`

View File

@@ -269,7 +269,7 @@
"y-indexeddb": "^9.0.11",
"y-prosemirror": "^1.3.7",
"y-protocols": "^1.0.6",
"yauzl": "^2.10.0",
"yauzl": "^3.2.0",
"yjs": "^13.6.1",
"zod": "^3.25.76"
},

View File

@@ -132,8 +132,8 @@ export default class ZipHelper {
try {
zipfile.readEntry();
zipfile.on("entry", function (entry: Entry) {
const fileName = Buffer.from(entry.fileName).toString("utf8");
Logger.debug("utils", "Extracting zip entry", { fileName });
const filePath = Buffer.from(entry.fileName).toString("utf8");
Logger.debug("utils", "Extracting zip entry", { filePath });
const processNext = (error?: NodeJS.ErrnoException | null) => {
if (error) {
@@ -144,15 +144,15 @@ export default class ZipHelper {
zipfile.readEntry();
};
if (validateFileName(fileName)) {
Logger.warn("Invalid zip entry", { fileName });
if (validateFileName(filePath)) {
Logger.warn("Invalid zip entry", { filePath });
processNext();
return;
}
if (/\/$/.test(fileName)) {
if (/\/$/.test(filePath)) {
// directory file names end with '/'
fs.mkdirp(path.join(outputDir, fileName), (mkErr) =>
fs.mkdirp(path.join(outputDir, filePath), (mkErr) =>
processNext(mkErr)
);
} else {
@@ -163,19 +163,22 @@ export default class ZipHelper {
}
// ensure parent directory exists
fs.mkdirp(
path.join(outputDir, path.dirname(fileName)),
path.join(outputDir, path.dirname(filePath)),
function (mkErr) {
if (mkErr) {
return processNext(mkErr);
}
const fileName = trimFileAndExt(
path.basename(filePath),
MAX_FILE_NAME_LENGTH
);
const location = trimFileAndExt(
path.join(
outputDir,
trimFileAndExt(fileName, MAX_FILE_NAME_LENGTH)
),
path.join(outputDir, path.dirname(filePath), fileName),
MAX_PATH_LENGTH
);
const dest = fs
.createWriteStream(location)
.on("error", (error) => {

View File

@@ -51,6 +51,111 @@ describe("trimFileAndExt", () => {
expect(trimFileAndExt("file.txt", 6)).toBe("fi.txt");
expect(trimFileAndExt("file.txt", 8)).toBe("file.txt");
expect(trimFileAndExt("file.md", 9)).toBe("file.md");
expect(trimFileAndExt("你好.md", 2)).toBe("你.md");
expect(trimFileAndExt("你好.md", 9)).toBe("你.md"); // No trimming needed
expect(trimFileAndExt("你好.md", 8)).toBe("你.md"); // Trim one character
});
it("should handle files with no extension", () => {
expect(trimFileAndExt("filename", 4)).toBe("file");
expect(trimFileAndExt("verylongfilename", 8)).toBe("verylong");
expect(trimFileAndExt("file", 10)).toBe("file");
});
it("should handle extensions longer than the limit", () => {
expect(trimFileAndExt("file.verylongextension", 10)).toBe("file.veryl");
expect(trimFileAndExt("a.toolongext", 5)).toBe("a.too");
});
it("should handle edge cases with very short limits", () => {
expect(trimFileAndExt("file.txt", 1)).toBe("f"); // Can only fit 1 byte
expect(trimFileAndExt("file.txt", 0)).toBe("");
expect(trimFileAndExt("file.txt", -1)).toBe("");
});
it("should handle files with multiple dots", () => {
expect(trimFileAndExt("file.name.txt", 10)).toBe("file.n.txt");
expect(trimFileAndExt("archive.tar.gz", 14)).toBe("archive.tar.gz"); // No trimming needed
expect(trimFileAndExt("archive.tar.gz", 12)).toBe("archive.t.gz"); // Trim to fit
expect(trimFileAndExt("my.file.backup.txt", 10)).toBe("my.fil.txt");
});
it("should handle empty strings", () => {
expect(trimFileAndExt("", 10)).toBe("");
expect(trimFileAndExt("", 0)).toBe("");
});
it("should handle multi-byte UTF-8 characters properly", () => {
expect(trimFileAndExt("🦄🌟.txt", 8)).toBe("🦄.txt"); // 🦄 is 4 bytes, 🌟 is 4 bytes, .txt is 4 bytes
expect(trimFileAndExt("файл.txt", 8)).toBe("фа.txt"); // Cyrillic characters (фа is 4 bytes + .txt is 4 bytes = 8 total)
expect(trimFileAndExt("测试文件.md", 10)).toBe("测试.md"); // Chinese characters
});
it("should not break UTF-8 character boundaries", () => {
// Ensure we don't cut through multi-byte characters
expect(trimFileAndExt("🦄🦄.txt", 8)).toBe("🦄.txt"); // Should not cut through second emoji (🦄 is 4 bytes + .txt is 4 bytes = 8 total)
expect(trimFileAndExt("🦄🦄.txt", 7)).toBe(".txt"); // Should slice the whole filename when extension won't fit (but preserve UTF-8 boundaries)
expect(trimFileAndExt("你好世界.txt", 11)).toBe("你好.txt"); // Should cut at character boundary
});
it("should handle extension-only files", () => {
expect(trimFileAndExt(".gitignore", 5)).toBe(".giti");
expect(trimFileAndExt(".env", 3)).toBe(".en");
expect(trimFileAndExt(".bashrc", 10)).toBe(".bashrc");
});
it("should handle files where extension equals or exceeds the limit", () => {
expect(trimFileAndExt("file.extension", 9)).toBe("file.exte"); // Extension is 10 bytes, limit is 9
expect(trimFileAndExt("f.verylongextension", 10)).toBe("f.verylong"); // Slice whole filename when extension too long
});
it("should preserve behavior when no trimming needed", () => {
expect(trimFileAndExt("short.txt", 100)).toBe("short.txt");
expect(trimFileAndExt("file.md", 50)).toBe("file.md");
});
it("should handle mixed ASCII and UTF-8 characters", () => {
expect(trimFileAndExt("file-测试.txt", 12)).toBe("file-测.txt");
expect(trimFileAndExt("🦄unicorn.md", 10)).toBe("🦄uni.md");
expect(trimFileAndExt("test-файл.doc", 11)).toBe("test-ф.doc");
});
it("should handle very long filenames", () => {
const longName = "a".repeat(200);
const result = trimFileAndExt(`${longName}.txt`, 50);
expect(Buffer.byteLength(result, "utf8")).toBe(50);
expect(result.endsWith(".txt")).toBe(true);
});
it("should handle filesystem limit edge cases", () => {
// Test around common filesystem limits
expect(trimFileAndExt("file.txt", 255)).toBe("file.txt"); // Common filename limit
expect(trimFileAndExt("file.txt", 4096)).toBe("file.txt"); // Common path limit
const result255 = trimFileAndExt("a".repeat(300) + ".txt", 255);
expect(Buffer.byteLength(result255, "utf8")).toBeLessThanOrEqual(255);
});
it("should never produce invalid UTF-8 sequences", () => {
const testCases = [
"🦄🦄.txt",
"файлы.doc",
"测试文件.md",
"émoji🎉.txt",
"mixed-αβγ-123.log",
];
testCases.forEach((filename) => {
for (let limit = 1; limit <= 20; limit++) {
const result = trimFileAndExt(filename, limit);
expect(result).not.toContain("<22>");
expect(Buffer.byteLength(result, "utf8")).toBeLessThanOrEqual(limit);
}
});
});
it("should handle special ASCII characters", () => {
expect(trimFileAndExt("file-name_123.txt", 10)).toBe("file-n.txt");
expect(trimFileAndExt("file@domain.com.txt", 12)).toBe("file@dom.txt");
expect(trimFileAndExt("file (copy).txt", 10)).toBe("file (.txt");
});
});

View File

@@ -31,18 +31,57 @@ export function stringByteLength(str: string): number {
return Buffer.byteLength(str, "utf8");
}
/**
* Safely slice a string to a maximum byte length without breaking UTF-8 characters.
*
* @param str The string to slice.
* @param maxBytes The maximum byte length.
* @returns The sliced string.
*/
function sliceStringToByteLength(str: string, maxBytes: number): string {
if (maxBytes <= 0) {
return "";
}
const buffer = Buffer.from(str, "utf8");
if (buffer.length <= maxBytes) {
return str;
}
// Work backwards from maxBytes to find valid UTF-8 boundary
for (let i = maxBytes; i > 0; i--) {
const slice = buffer.subarray(0, i);
const result = slice.toString("utf8");
// Check if the result round-trips correctly (no replacement characters)
if (Buffer.from(result, "utf8").equals(slice)) {
return result;
}
}
return "";
}
/**
* Trim a file name to a maximum length, retaining the extension.
*
* @param text The file name to trim.
* @param length The maximum length of the file name.
* @param length The maximum length of the file name in bytes.
* @returns The trimmed file name.
*/
export function trimFileAndExt(text: string, length: number): string {
if (stringByteLength(text) > length) {
if (Buffer.byteLength(text, "utf8") > length) {
const ext = path.extname(text);
const name = path.basename(text, ext);
return name.slice(0, length - stringByteLength(ext)) + ext;
const extByteLength = Buffer.byteLength(ext, "utf8");
const availableBytesForName = length - extByteLength;
if (availableBytesForName <= 0) {
// If extension is too long, trim the whole filename
return sliceStringToByteLength(text, length);
}
const trimmedName = sliceStringToByteLength(name, availableBytesForName);
return trimmedName + ext;
}
return text;
}

View File

@@ -1,7 +1,5 @@
import { v4 } from "uuid";
import { Scope } from "@shared/types";
import { OAuthAuthentication, OAuthClient, User } from "@server/models";
import { hash } from "@server/utils/crypto";
import { OAuthInterface } from "./OAuthInterface";
import {
buildOAuthAuthentication,

View File

@@ -8161,13 +8161,6 @@ fb-watchman@^2.0.0:
dependencies:
bser "2.1.1"
fd-slicer@~1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e"
integrity "sha1-JcfInLH5B3+IkbvmHY85Dq4lbx4= sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g=="
dependencies:
pend "~1.2.0"
fdir@^6.4.4, fdir@^6.4.6:
version "6.4.6"
resolved "https://registry.yarnpkg.com/fdir/-/fdir-6.4.6.tgz#2b268c0232697063111bbf3f64810a2a741ba281"
@@ -15228,13 +15221,13 @@ yarn-deduplicate@^6.0.2:
semver "^7.5.0"
tslib "^2.5.0"
yauzl@^2.10.0:
version "2.10.0"
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"
integrity "sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk= sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g=="
yauzl@^3.2.0:
version "3.2.0"
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-3.2.0.tgz#7b6cb548f09a48a6177ea0be8ece48deb7da45c0"
integrity sha512-Ow9nuGZE+qp1u4JIPvg+uCiUr7xGQWdff7JQSk5VGYTAZMDe2q8lxJ10ygv10qmSj031Ty/6FNJpLO4o1Sgc+w==
dependencies:
buffer-crc32 "~0.2.3"
fd-slicer "~1.1.0"
pend "~1.2.0"
yjs@^13.6.1:
version "13.6.1"