mirror of
https://github.com/hatchet-dev/hatchet.git
synced 2026-04-24 02:59:55 -05:00
fix: Validate the presence of null unicode in output (#3164)
* fix: Validate the presence of null unicode in output * chore: Update changelog and bump SDK version
This commit is contained in:
@@ -6,7 +6,7 @@ package-mode = false
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
hatchet-sdk = "^1.28.1"
|
||||
hatchet-sdk = "^1.28.2"
|
||||
# LLM integrations
|
||||
openai = "^1.0.0"
|
||||
anthropic = "^0.39.0"
|
||||
|
||||
+19
-1
@@ -1,6 +1,7 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
@@ -11,7 +12,7 @@ func ValidateJSONB(jsonb []byte, fieldName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if strings.Contains(string(jsonb), "\\u0000") {
|
||||
if !isUnicodeValid(jsonb) {
|
||||
return fmt.Errorf("encoded jsonb contains invalid null character \\u0000 in field `%s`", fieldName)
|
||||
}
|
||||
|
||||
@@ -21,3 +22,20 @@ func ValidateJSONB(jsonb []byte, fieldName string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isUnicodeValid(jsonb []byte) bool {
|
||||
dec := json.NewDecoder(bytes.NewReader(jsonb))
|
||||
for {
|
||||
token, err := dec.Token()
|
||||
if err != nil {
|
||||
// NOTE(gregfurman): regardless of whether io.EOF or actual parsing error,
|
||||
// just return early as json.Valid should catch invalid payload.
|
||||
return true
|
||||
}
|
||||
if s, ok := token.(string); ok {
|
||||
if strings.ContainsRune(s, '\u0000') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,8 @@ func TestValidateJSONB_ValidJSON(t *testing.T) {
|
||||
[]byte(`true`),
|
||||
[]byte(`null`),
|
||||
[]byte(`[]`),
|
||||
[]byte(`{"a":"\\u0000"}`),
|
||||
[]byte(`{"a":"\\\\u0000"}`),
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
@@ -27,9 +29,17 @@ func TestValidateJSONB_ValidJSON(t *testing.T) {
|
||||
|
||||
func TestValidateJSONB_RejectsEncodedNull(t *testing.T) {
|
||||
// This byte slice contains the literal substring `\u0000`.
|
||||
b := []byte("{\"a\":\"\\u0000\"}")
|
||||
|
||||
if err := ValidateJSONB(b, "field"); err == nil {
|
||||
t.Fatalf("expected error for encoded null, got nil")
|
||||
cases := [][]byte{
|
||||
[]byte(`{"a":"\u0000"}`),
|
||||
[]byte(`{"a":"\\\u0000"}`),
|
||||
[]byte(`{"foo\u0000":"bar"}`),
|
||||
[]byte(`{"f\u0000oo":"bar"}`),
|
||||
[]byte(`[{"f\u0000oo":"bar"}]`),
|
||||
[]byte(`[{"a":"A","b":"B","c":"C\u0000"}]`),
|
||||
}
|
||||
for _, c := range cases {
|
||||
if isValid := isUnicodeValid(c); isValid {
|
||||
t.Fatalf("expected invalid unicode for json %q, got valid", string(c))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,12 @@ All notable changes to Hatchet's Python SDK will be documented in this changelog
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [1.28.2] - 2026-03-12
|
||||
|
||||
### Changed
|
||||
|
||||
- Fixes a bug where the literal string (`\u0000`) in task output was incorrectly rejected as null unicode.
|
||||
|
||||
## [1.28.1] - 2026-03-05
|
||||
|
||||
### Changed
|
||||
|
||||
@@ -43,7 +43,9 @@ def remove_null_unicode_character(
|
||||
|
||||
if isinstance(data, dict):
|
||||
return {
|
||||
key: remove_null_unicode_character(cast(Any, value), replacement)
|
||||
remove_null_unicode_character(
|
||||
cast(Any, key), replacement
|
||||
): remove_null_unicode_character(cast(Any, value), replacement)
|
||||
for key, value in data.items()
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
import ctypes
|
||||
import functools
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import is_dataclass
|
||||
@@ -537,7 +538,10 @@ class Runner:
|
||||
if not serialized_output:
|
||||
return None
|
||||
|
||||
if "\\u0000" in serialized_output:
|
||||
# Checks whether a JSON-encoded null character (\u0000) is present in serialized output.
|
||||
# This matches the literal "\u0000" preceded by an odd number of backslashes, rejecting payloads
|
||||
# that will decode to the null char.
|
||||
if re.search(r"(?<!\\)(\\\\)*\\u0000", serialized_output):
|
||||
raise IllegalTaskOutputError(dedent(f"""
|
||||
Task outputs cannot contain the unicode null character \\u0000
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "hatchet-sdk"
|
||||
version = "1.28.1"
|
||||
version = "1.28.2"
|
||||
description = "This is the official Python SDK for Hatchet, a distributed, fault-tolerant task queue. The SDK allows you to easily integrate Hatchet's task scheduling and workflow orchestration capabilities into your Python applications."
|
||||
authors = [
|
||||
"Alexander Belanger <alexander@hatchet.run>",
|
||||
|
||||
Reference in New Issue
Block a user