diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py
index 023a14bcc..98ac50812 100644
--- a/backend/python/vllm/backend.py
+++ b/backend/python/vllm/backend.py
@@ -95,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
 
         if request.Quantization != "":
             engine_args.quantization = request.Quantization
+        if request.LoadFormat != "":
+            engine_args.load_format = request.LoadFormat
         if request.GPUMemoryUtilization != 0:
             engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
         if request.TrustRemoteCode:
diff --git a/core/backend/options.go b/core/backend/options.go
index 90d563e03..82c582c84 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -139,6 +139,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		DraftModel:           c.DraftModel,
 		AudioPath:            c.VallE.AudioPath,
 		Quantization:         c.Quantization,
+		LoadFormat:           c.LoadFormat,
 		GPUMemoryUtilization: c.GPUMemoryUtilization,
 		TrustRemoteCode:      c.TrustRemoteCode,
 		EnforceEager:         c.EnforceEager,
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index b386d0965..c3d1063db 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -143,6 +143,7 @@ type LLMConfig struct {
 	DraftModel           string  `yaml:"draft_model"`
 	NDraft               int32   `yaml:"n_draft"`
 	Quantization         string  `yaml:"quantization"`
+	LoadFormat           string  `yaml:"load_format"`
 	GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
 	TrustRemoteCode      bool    `yaml:"trust_remote_code"`      // vLLM
 	EnforceEager         bool    `yaml:"enforce_eager"`          // vLLM