feat: add zai/glm-4.6v image understanding support (#10267)

Fixes #10265. Thanks @liuy.
2026-06-28 15:01:41 +03:00 · 2026-02-10 10:38:09 +08:00
parent d3c71875e4
commit 33ee8bbf1d
7 changed files with 51 additions and 10 deletions
@@ -32,5 +32,22 @@ export const DEFAULT_AUDIO_MODELS: Record<string, string> = {
  openai: "gpt-4o-mini-transcribe",
  deepgram: "nova-3",
 };
+
+export const AUTO_AUDIO_KEY_PROVIDERS = ["openai", "groq", "deepgram", "google"] as const;
+export const AUTO_IMAGE_KEY_PROVIDERS = [
+  "openai",
+  "anthropic",
+  "google",
+  "minimax",
+  "zai",
+] as const;
+export const AUTO_VIDEO_KEY_PROVIDERS = ["google"] as const;
+export const DEFAULT_IMAGE_MODELS: Record<string, string> = {
+  openai: "gpt-5-mini",
+  anthropic: "claude-opus-4-6",
+  google: "gemini-3-flash-preview",
+  minimax: "MiniMax-VL-01",
+  zai: "glm-4.6v",
+};
 export const CLI_OUTPUT_MAX_BUFFER = 5 * MB;
 export const DEFAULT_MEDIA_CONCURRENCY = 2;
@@ -6,6 +6,7 @@ import { googleProvider } from "./google/index.js";
 import { groqProvider } from "./groq/index.js";
 import { minimaxProvider } from "./minimax/index.js";
 import { openaiProvider } from "./openai/index.js";
+import { zaiProvider } from "./zai/index.js";

 const PROVIDERS: MediaUnderstandingProvider[] = [
  groqProvider,
@@ -13,6 +14,7 @@ const PROVIDERS: MediaUnderstandingProvider[] = [
  googleProvider,
  anthropicProvider,
  minimaxProvider,
+  zaiProvider,
  deepgramProvider,
 ];

@@ -0,0 +1,8 @@
+import type { MediaUnderstandingProvider } from "../../types.js";
+import { describeImageWithModel } from "../image.js";
+
+export const zaiProvider: MediaUnderstandingProvider = {
+  id: "zai",
+  capabilities: ["image"],
+  describeImage: describeImageWithModel,
+};
@@ -27,8 +27,12 @@ import { logVerbose, shouldLogVerbose } from "../globals.js";
 import { runExec } from "../process/exec.js";
 import { MediaAttachmentCache, normalizeAttachments, selectAttachments } from "./attachments.js";
 import {
+  AUTO_AUDIO_KEY_PROVIDERS,
+  AUTO_IMAGE_KEY_PROVIDERS,
+  AUTO_VIDEO_KEY_PROVIDERS,
  CLI_OUTPUT_MAX_BUFFER,
  DEFAULT_AUDIO_MODELS,
+  DEFAULT_IMAGE_MODELS,
  DEFAULT_TIMEOUT_SECONDS,
 } from "./defaults.js";
 import { isMediaUnderstandingSkipError, MediaUnderstandingSkipError } from "./errors.js";
@@ -48,16 +52,6 @@ import {
 } from "./resolve.js";
 import { estimateBase64Size, resolveVideoMaxBase64Bytes } from "./video.js";

-const AUTO_AUDIO_KEY_PROVIDERS = ["openai", "groq", "deepgram", "google"] as const;
-const AUTO_IMAGE_KEY_PROVIDERS = ["openai", "anthropic", "google", "minimax"] as const;
-const AUTO_VIDEO_KEY_PROVIDERS = ["google"] as const;
-const DEFAULT_IMAGE_MODELS: Record<string, string> = {
-  openai: "gpt-5-mini",
-  anthropic: "claude-opus-4-6",
-  google: "gemini-3-flash-preview",
-  minimax: "MiniMax-VL-01",
-};
-
 export type ActiveMediaModel = {
  provider: string;
  model?: string;