fix: Validate the presence of null unicode in output (#3164)

* fix: Validate the presence of null unicode in output

* chore: Update changelog and bump SDK version
This commit is contained in:
Greg Furman
2026-03-14 15:26:11 +02:00
committed by GitHub
parent 588be514f3
commit 794fbe7527
7 changed files with 49 additions and 9 deletions
+1 -1
View File
@@ -6,7 +6,7 @@ package-mode = false
[tool.poetry.dependencies]
python = "^3.10"
hatchet-sdk = "^1.28.1"
hatchet-sdk = "^1.28.2"
# LLM integrations
openai = "^1.0.0"
anthropic = "^0.39.0"
+19 -1
View File
@@ -1,6 +1,7 @@
package repository
import (
"bytes"
"encoding/json"
"fmt"
"strings"
@@ -11,7 +12,7 @@ func ValidateJSONB(jsonb []byte, fieldName string) error {
return nil
}
if strings.Contains(string(jsonb), "\\u0000") {
if !isUnicodeValid(jsonb) {
return fmt.Errorf("encoded jsonb contains invalid null character \\u0000 in field `%s`", fieldName)
}
@@ -21,3 +22,20 @@ func ValidateJSONB(jsonb []byte, fieldName string) error {
return nil
}
func isUnicodeValid(jsonb []byte) bool {
dec := json.NewDecoder(bytes.NewReader(jsonb))
for {
token, err := dec.Token()
if err != nil {
// NOTE(gregfurman): regardless of whether io.EOF or actual parsing error,
// just return early as json.Valid should catch invalid payload.
return true
}
if s, ok := token.(string); ok {
if strings.ContainsRune(s, '\u0000') {
return false
}
}
}
}
+14 -4
View File
@@ -16,6 +16,8 @@ func TestValidateJSONB_ValidJSON(t *testing.T) {
[]byte(`true`),
[]byte(`null`),
[]byte(`[]`),
[]byte(`{"a":"\\u0000"}`),
[]byte(`{"a":"\\\\u0000"}`),
}
for _, c := range cases {
@@ -27,9 +29,17 @@ func TestValidateJSONB_ValidJSON(t *testing.T) {
func TestValidateJSONB_RejectsEncodedNull(t *testing.T) {
// This byte slice contains the literal substring `\u0000`.
b := []byte("{\"a\":\"\\u0000\"}")
if err := ValidateJSONB(b, "field"); err == nil {
t.Fatalf("expected error for encoded null, got nil")
cases := [][]byte{
[]byte(`{"a":"\u0000"}`),
[]byte(`{"a":"\\\u0000"}`),
[]byte(`{"foo\u0000":"bar"}`),
[]byte(`{"f\u0000oo":"bar"}`),
[]byte(`[{"f\u0000oo":"bar"}]`),
[]byte(`[{"a":"A","b":"B","c":"C\u0000"}]`),
}
for _, c := range cases {
if isValid := isUnicodeValid(c); isValid {
t.Fatalf("expected invalid unicode for json %q, got valid", string(c))
}
}
}
+6
View File
@@ -5,6 +5,12 @@ All notable changes to Hatchet's Python SDK will be documented in this changelog
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.28.2] - 2026-03-12
### Changed
- Fixes a bug where the literal string (`\u0000`) in task output was incorrectly rejected as null unicode.
## [1.28.1] - 2026-03-05
### Changed
+3 -1
View File
@@ -43,7 +43,9 @@ def remove_null_unicode_character(
if isinstance(data, dict):
return {
key: remove_null_unicode_character(cast(Any, value), replacement)
remove_null_unicode_character(
cast(Any, key), replacement
): remove_null_unicode_character(cast(Any, value), replacement)
for key, value in data.items()
}
@@ -1,6 +1,7 @@
import asyncio
import ctypes
import functools
import re
from collections.abc import Callable
from concurrent.futures import ThreadPoolExecutor
from dataclasses import is_dataclass
@@ -537,7 +538,10 @@ class Runner:
if not serialized_output:
return None
if "\\u0000" in serialized_output:
# Checks whether a JSON-encoded null character (\u0000) is present in serialized output.
# This matches the literal "\u0000" preceded by an odd number of backslashes, rejecting payloads
# that will decode to the null char.
if re.search(r"(?<!\\)(\\\\)*\\u0000", serialized_output):
raise IllegalTaskOutputError(dedent(f"""
Task outputs cannot contain the unicode null character \\u0000
+1 -1
View File
@@ -1,6 +1,6 @@
[tool.poetry]
name = "hatchet-sdk"
version = "1.28.1"
version = "1.28.2"
description = "This is the official Python SDK for Hatchet, a distributed, fault-tolerant task queue. The SDK allows you to easily integrate Hatchet's task scheduling and workflow orchestration capabilities into your Python applications."
authors = [
"Alexander Belanger <alexander@hatchet.run>",