fix: Various issues with JSON import and UTF8 or very long filenames (#9850)

2026-02-19 19:09:37 -06:00 · 2025-08-05 21:15:45 -04:00
parent a48aabdd66
commit a97908518a
7 changed files with 169 additions and 30 deletions
--- a/app/components/List/Item.tsx
+++ b/app/components/List/Item.tsx
@@ -231,6 +231,7 @@ const Subtitle = styled.p<{ $small?: boolean; $selected?: boolean }>`
  font-size: ${(props) => (props.$small ? 13 : 14)}px;
  color: ${s("textTertiary")};
  margin-top: -2px;
+  overflow-wrap: break-word;
 `;

 export const Actions = styled(Flex)<{ $selected?: boolean }>`
--- a/package.json
+++ b/package.json
@@ -269,7 +269,7 @@
    "y-indexeddb": "^9.0.11",
    "y-prosemirror": "^1.3.7",
    "y-protocols": "^1.0.6",
-    "yauzl": "^2.10.0",
+    "yauzl": "^3.2.0",
    "yjs": "^13.6.1",
    "zod": "^3.25.76"
  },
--- a/server/utils/ZipHelper.ts
+++ b/server/utils/ZipHelper.ts
@@ -132,8 +132,8 @@ export default class ZipHelper {
          try {
            zipfile.readEntry();
            zipfile.on("entry", function (entry: Entry) {
-              const fileName = Buffer.from(entry.fileName).toString("utf8");
-              Logger.debug("utils", "Extracting zip entry", { fileName });
+              const filePath = Buffer.from(entry.fileName).toString("utf8");
+              Logger.debug("utils", "Extracting zip entry", { filePath });

              const processNext = (error?: NodeJS.ErrnoException | null) => {
                if (error) {
@@ -144,15 +144,15 @@ export default class ZipHelper {
                zipfile.readEntry();
              };

-              if (validateFileName(fileName)) {
-                Logger.warn("Invalid zip entry", { fileName });
+              if (validateFileName(filePath)) {
+                Logger.warn("Invalid zip entry", { filePath });
                processNext();
                return;
              }

-              if (/\/$/.test(fileName)) {
+              if (/\/$/.test(filePath)) {
                // directory file names end with '/'
-                fs.mkdirp(path.join(outputDir, fileName), (mkErr) =>
+                fs.mkdirp(path.join(outputDir, filePath), (mkErr) =>
                  processNext(mkErr)
                );
              } else {
@@ -163,19 +163,22 @@ export default class ZipHelper {
                  }
                  // ensure parent directory exists
                  fs.mkdirp(
-                    path.join(outputDir, path.dirname(fileName)),
+                    path.join(outputDir, path.dirname(filePath)),
                    function (mkErr) {
                      if (mkErr) {
                        return processNext(mkErr);
                      }

+                      const fileName = trimFileAndExt(
+                        path.basename(filePath),
+                        MAX_FILE_NAME_LENGTH
+                      );
+
                      const location = trimFileAndExt(
-                        path.join(
-                          outputDir,
-                          trimFileAndExt(fileName, MAX_FILE_NAME_LENGTH)
-                        ),
+                        path.join(outputDir, path.dirname(filePath), fileName),
                        MAX_PATH_LENGTH
                      );
+
                      const dest = fs
                        .createWriteStream(location)
                        .on("error", (error) => {
--- a/server/utils/fs.test.ts
+++ b/server/utils/fs.test.ts
@@ -51,6 +51,111 @@ describe("trimFileAndExt", () => {
    expect(trimFileAndExt("file.txt", 6)).toBe("fi.txt");
    expect(trimFileAndExt("file.txt", 8)).toBe("file.txt");
    expect(trimFileAndExt("file.md", 9)).toBe("file.md");
-    expect(trimFileAndExt("你好.md", 2)).toBe("你.md");
+    expect(trimFileAndExt("你好.md", 9)).toBe("你好.md"); // No trimming needed
+    expect(trimFileAndExt("你好.md", 8)).toBe("你.md"); // Trim one character
+  });
+
+  it("should handle files with no extension", () => {
+    expect(trimFileAndExt("filename", 4)).toBe("file");
+    expect(trimFileAndExt("verylongfilename", 8)).toBe("verylong");
+    expect(trimFileAndExt("file", 10)).toBe("file");
+  });
+
+  it("should handle extensions longer than the limit", () => {
+    expect(trimFileAndExt("file.verylongextension", 10)).toBe("file.veryl");
+    expect(trimFileAndExt("a.toolongext", 5)).toBe("a.too");
+  });
+
+  it("should handle edge cases with very short limits", () => {
+    expect(trimFileAndExt("file.txt", 1)).toBe("f"); // Can only fit 1 byte
+    expect(trimFileAndExt("file.txt", 0)).toBe("");
+    expect(trimFileAndExt("file.txt", -1)).toBe("");
+  });
+
+  it("should handle files with multiple dots", () => {
+    expect(trimFileAndExt("file.name.txt", 10)).toBe("file.n.txt");
+    expect(trimFileAndExt("archive.tar.gz", 14)).toBe("archive.tar.gz"); // No trimming needed
+    expect(trimFileAndExt("archive.tar.gz", 12)).toBe("archive.t.gz"); // Trim to fit
+    expect(trimFileAndExt("my.file.backup.txt", 10)).toBe("my.fil.txt");
+  });
+
+  it("should handle empty strings", () => {
+    expect(trimFileAndExt("", 10)).toBe("");
+    expect(trimFileAndExt("", 0)).toBe("");
+  });
+
+  it("should handle multi-byte UTF-8 characters properly", () => {
+    expect(trimFileAndExt("🦄🌟.txt", 8)).toBe("🦄.txt"); // 🦄 is 4 bytes, 🌟 is 4 bytes, .txt is 4 bytes
+    expect(trimFileAndExt("файл.txt", 8)).toBe("фа.txt"); // Cyrillic characters (фа is 4 bytes + .txt is 4 bytes = 8 total)
+    expect(trimFileAndExt("测试文件.md", 10)).toBe("测试.md"); // Chinese characters
+  });
+
+  it("should not break UTF-8 character boundaries", () => {
+    // Ensure we don't cut through multi-byte characters
+    expect(trimFileAndExt("🦄🦄.txt", 8)).toBe("🦄.txt"); // Should not cut through second emoji (🦄 is 4 bytes + .txt is 4 bytes = 8 total)
+    expect(trimFileAndExt("🦄🦄.txt", 7)).toBe(".txt"); // Should slice the whole filename when extension won't fit (but preserve UTF-8 boundaries)
+    expect(trimFileAndExt("你好世界.txt", 11)).toBe("你好.txt"); // Should cut at character boundary
+  });
+
+  it("should handle extension-only files", () => {
+    expect(trimFileAndExt(".gitignore", 5)).toBe(".giti");
+    expect(trimFileAndExt(".env", 3)).toBe(".en");
+    expect(trimFileAndExt(".bashrc", 10)).toBe(".bashrc");
+  });
+
+  it("should handle files where extension equals or exceeds the limit", () => {
+    expect(trimFileAndExt("file.extension", 9)).toBe("file.exte"); // Extension is 10 bytes, limit is 9
+    expect(trimFileAndExt("f.verylongextension", 10)).toBe("f.verylong"); // Slice whole filename when extension too long
+  });
+
+  it("should preserve behavior when no trimming needed", () => {
+    expect(trimFileAndExt("short.txt", 100)).toBe("short.txt");
+    expect(trimFileAndExt("file.md", 50)).toBe("file.md");
+  });
+
+  it("should handle mixed ASCII and UTF-8 characters", () => {
+    expect(trimFileAndExt("file-测试.txt", 12)).toBe("file-测.txt");
+    expect(trimFileAndExt("🦄unicorn.md", 10)).toBe("🦄uni.md");
+    expect(trimFileAndExt("test-файл.doc", 11)).toBe("test-ф.doc");
+  });
+
+  it("should handle very long filenames", () => {
+    const longName = "a".repeat(200);
+    const result = trimFileAndExt(`${longName}.txt`, 50);
+    expect(Buffer.byteLength(result, "utf8")).toBe(50);
+    expect(result.endsWith(".txt")).toBe(true);
+  });
+
+  it("should handle filesystem limit edge cases", () => {
+    // Test around common filesystem limits
+    expect(trimFileAndExt("file.txt", 255)).toBe("file.txt"); // Common filename limit
+    expect(trimFileAndExt("file.txt", 4096)).toBe("file.txt"); // Common path limit
+
+    const result255 = trimFileAndExt("a".repeat(300) + ".txt", 255);
+    expect(Buffer.byteLength(result255, "utf8")).toBeLessThanOrEqual(255);
+  });
+
+  it("should never produce invalid UTF-8 sequences", () => {
+    const testCases = [
+      "🦄🦄.txt",
+      "файлы.doc",
+      "测试文件.md",
+      "émoji🎉.txt",
+      "mixed-αβγ-123.log",
+    ];
+
+    testCases.forEach((filename) => {
+      for (let limit = 1; limit <= 20; limit++) {
+        const result = trimFileAndExt(filename, limit);
+        expect(result).not.toContain("<22>");
+        expect(Buffer.byteLength(result, "utf8")).toBeLessThanOrEqual(limit);
+      }
+    });
+  });
+
+  it("should handle special ASCII characters", () => {
+    expect(trimFileAndExt("file-name_123.txt", 10)).toBe("file-n.txt");
+    expect(trimFileAndExt("file@domain.com.txt", 12)).toBe("file@dom.txt");
+    expect(trimFileAndExt("file (copy).txt", 10)).toBe("file (.txt");
  });
 });
--- a/server/utils/fs.ts
+++ b/server/utils/fs.ts
@@ -31,18 +31,57 @@ export function stringByteLength(str: string): number {
  return Buffer.byteLength(str, "utf8");
 }

+/**
+ * Safely slice a string to a maximum byte length without breaking UTF-8 characters.
+ *
+ * @param str The string to slice.
+ * @param maxBytes The maximum byte length.
+ * @returns The sliced string.
+ */
+function sliceStringToByteLength(str: string, maxBytes: number): string {
+  if (maxBytes <= 0) {
+    return "";
+  }
+
+  const buffer = Buffer.from(str, "utf8");
+  if (buffer.length <= maxBytes) {
+    return str;
+  }
+
+  // Work backwards from maxBytes to find valid UTF-8 boundary
+  for (let i = maxBytes; i > 0; i--) {
+    const slice = buffer.subarray(0, i);
+    const result = slice.toString("utf8");
+    // Check if the result round-trips correctly (no replacement characters)
+    if (Buffer.from(result, "utf8").equals(slice)) {
+      return result;
+    }
+  }
+
+  return "";
+}
+
 /**
 * Trim a file name to a maximum length, retaining the extension.
 *
 * @param text The file name to trim.
- * @param length The maximum length of the file name.
+ * @param length The maximum length of the file name in bytes.
 * @returns The trimmed file name.
 */
 export function trimFileAndExt(text: string, length: number): string {
-  if (stringByteLength(text) > length) {
+  if (Buffer.byteLength(text, "utf8") > length) {
    const ext = path.extname(text);
    const name = path.basename(text, ext);
-    return name.slice(0, length - stringByteLength(ext)) + ext;
+    const extByteLength = Buffer.byteLength(ext, "utf8");
+    const availableBytesForName = length - extByteLength;
+
+    if (availableBytesForName <= 0) {
+      // If extension is too long, trim the whole filename
+      return sliceStringToByteLength(text, length);
+    }
+
+    const trimmedName = sliceStringToByteLength(name, availableBytesForName);
+    return trimmedName + ext;
  }
  return text;
 }
--- a/server/utils/oauth/OAuthInterface.test.ts
+++ b/server/utils/oauth/OAuthInterface.test.ts
@@ -1,7 +1,5 @@
 import { v4 } from "uuid";
 import { Scope } from "@shared/types";
-import { OAuthAuthentication, OAuthClient, User } from "@server/models";
-import { hash } from "@server/utils/crypto";
 import { OAuthInterface } from "./OAuthInterface";
 import {
  buildOAuthAuthentication,
--- a/yarn.lock
+++ b/yarn.lock
@@ -8161,13 +8161,6 @@ fb-watchman@^2.0.0:
  dependencies:
    bser "2.1.1"

-fd-slicer@~1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e"
-  integrity "sha1-JcfInLH5B3+IkbvmHY85Dq4lbx4= sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g=="
-  dependencies:
-    pend "~1.2.0"
-
 fdir@^6.4.4, fdir@^6.4.6:
  version "6.4.6"
  resolved "https://registry.yarnpkg.com/fdir/-/fdir-6.4.6.tgz#2b268c0232697063111bbf3f64810a2a741ba281"
@@ -15228,13 +15221,13 @@ yarn-deduplicate@^6.0.2:
    semver "^7.5.0"
    tslib "^2.5.0"

-yauzl@^2.10.0:
-  version "2.10.0"
-  resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"
-  integrity "sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk= sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g=="
+yauzl@^3.2.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-3.2.0.tgz#7b6cb548f09a48a6177ea0be8ece48deb7da45c0"
+  integrity sha512-Ow9nuGZE+qp1u4JIPvg+uCiUr7xGQWdff7JQSk5VGYTAZMDe2q8lxJ10ygv10qmSj031Ty/6FNJpLO4o1Sgc+w==
  dependencies:
    buffer-crc32 "~0.2.3"
-    fd-slicer "~1.1.0"
+    pend "~1.2.0"

 yjs@^13.6.1:
  version "13.6.1"