chore: Run pnpm format:fix.

This commit is contained in:
cpojer
2026-01-31 21:13:13 +09:00
parent dcc2de15a6
commit 8cab78abbc
624 changed files with 10729 additions and 7514 deletions
+78 -111
View File
@@ -31,10 +31,7 @@ function resolveDefaultStorePath(config: VoiceCallConfig): string {
const existing =
[resolvedPreferred].find((dir) => {
try {
return (
fs.existsSync(path.join(dir, "calls.jsonl")) ||
fs.existsSync(dir)
);
return fs.existsSync(path.join(dir, "calls.jsonl")) || fs.existsSync(dir);
} catch {
return false;
}
@@ -62,10 +59,7 @@ export function registerVoiceCallCli(params: {
root
.command("call")
.description("Initiate an outbound voice call")
.requiredOption(
"-m, --message <text>",
"Message to speak when call connects",
)
.requiredOption("-m, --message <text>", "Message to speak when call connects")
.option(
"-t, --to <phone>",
"Phone number to call (E.164 format, uses config toNumber if not set)",
@@ -75,27 +69,23 @@ export function registerVoiceCallCli(params: {
"Call mode: notify (hangup after message) or conversation (stay open)",
"conversation",
)
.action(
async (options: { message: string; to?: string; mode?: string }) => {
const rt = await ensureRuntime();
const to = options.to ?? rt.config.toNumber;
if (!to) {
throw new Error("Missing --to and no toNumber configured");
}
const result = await rt.manager.initiateCall(to, undefined, {
message: options.message,
mode:
options.mode === "notify" || options.mode === "conversation"
? options.mode
: undefined,
});
if (!result.success) {
throw new Error(result.error || "initiate failed");
}
// eslint-disable-next-line no-console
console.log(JSON.stringify({ callId: result.callId }, null, 2));
},
);
.action(async (options: { message: string; to?: string; mode?: string }) => {
const rt = await ensureRuntime();
const to = options.to ?? rt.config.toNumber;
if (!to) {
throw new Error("Missing --to and no toNumber configured");
}
const result = await rt.manager.initiateCall(to, undefined, {
message: options.message,
mode:
options.mode === "notify" || options.mode === "conversation" ? options.mode : undefined,
});
if (!result.success) {
throw new Error(result.error || "initiate failed");
}
// eslint-disable-next-line no-console
console.log(JSON.stringify({ callId: result.callId }, null, 2));
});
root
.command("start")
@@ -107,23 +97,19 @@ export function registerVoiceCallCli(params: {
"Call mode: notify (hangup after message) or conversation (stay open)",
"conversation",
)
.action(
async (options: { to: string; message?: string; mode?: string }) => {
const rt = await ensureRuntime();
const result = await rt.manager.initiateCall(options.to, undefined, {
message: options.message,
mode:
options.mode === "notify" || options.mode === "conversation"
? options.mode
: undefined,
});
if (!result.success) {
throw new Error(result.error || "initiate failed");
}
// eslint-disable-next-line no-console
console.log(JSON.stringify({ callId: result.callId }, null, 2));
},
);
.action(async (options: { to: string; message?: string; mode?: string }) => {
const rt = await ensureRuntime();
const result = await rt.manager.initiateCall(options.to, undefined, {
message: options.message,
mode:
options.mode === "notify" || options.mode === "conversation" ? options.mode : undefined,
});
if (!result.success) {
throw new Error(result.error || "initiate failed");
}
// eslint-disable-next-line no-console
console.log(JSON.stringify({ callId: result.callId }, null, 2));
});
root
.command("continue")
@@ -132,10 +118,7 @@ export function registerVoiceCallCli(params: {
.requiredOption("--message <text>", "Message to speak")
.action(async (options: { callId: string; message: string }) => {
const rt = await ensureRuntime();
const result = await rt.manager.continueCall(
options.callId,
options.message,
);
const result = await rt.manager.continueCall(options.callId, options.message);
if (!result.success) {
throw new Error(result.error || "continue failed");
}
@@ -185,86 +168,70 @@ export function registerVoiceCallCli(params: {
root
.command("tail")
.description(
"Tail voice-call JSONL logs (prints new lines; useful during provider tests)",
)
.description("Tail voice-call JSONL logs (prints new lines; useful during provider tests)")
.option("--file <path>", "Path to calls.jsonl", resolveDefaultStorePath(config))
.option("--since <n>", "Print last N lines first", "25")
.option("--poll <ms>", "Poll interval in ms", "250")
.action(
async (options: { file: string; since?: string; poll?: string }) => {
const file = options.file;
const since = Math.max(0, Number(options.since ?? 0));
const pollMs = Math.max(50, Number(options.poll ?? 250));
.action(async (options: { file: string; since?: string; poll?: string }) => {
const file = options.file;
const since = Math.max(0, Number(options.since ?? 0));
const pollMs = Math.max(50, Number(options.poll ?? 250));
if (!fs.existsSync(file)) {
logger.error(`No log file at ${file}`);
process.exit(1);
}
if (!fs.existsSync(file)) {
logger.error(`No log file at ${file}`);
process.exit(1);
}
const initial = fs.readFileSync(file, "utf8");
const lines = initial.split("\n").filter(Boolean);
for (const line of lines.slice(Math.max(0, lines.length - since))) {
// eslint-disable-next-line no-console
console.log(line);
}
const initial = fs.readFileSync(file, "utf8");
const lines = initial.split("\n").filter(Boolean);
for (const line of lines.slice(Math.max(0, lines.length - since))) {
// eslint-disable-next-line no-console
console.log(line);
}
let offset = Buffer.byteLength(initial, "utf8");
let offset = Buffer.byteLength(initial, "utf8");
for (;;) {
try {
const stat = fs.statSync(file);
if (stat.size < offset) {
offset = 0;
}
if (stat.size > offset) {
const fd = fs.openSync(file, "r");
try {
const buf = Buffer.alloc(stat.size - offset);
fs.readSync(fd, buf, 0, buf.length, offset);
offset = stat.size;
const text = buf.toString("utf8");
for (const line of text.split("\n").filter(Boolean)) {
// eslint-disable-next-line no-console
console.log(line);
}
} finally {
fs.closeSync(fd);
}
}
} catch {
// ignore and retry
for (;;) {
try {
const stat = fs.statSync(file);
if (stat.size < offset) {
offset = 0;
}
await sleep(pollMs);
if (stat.size > offset) {
const fd = fs.openSync(file, "r");
try {
const buf = Buffer.alloc(stat.size - offset);
fs.readSync(fd, buf, 0, buf.length, offset);
offset = stat.size;
const text = buf.toString("utf8");
for (const line of text.split("\n").filter(Boolean)) {
// eslint-disable-next-line no-console
console.log(line);
}
} finally {
fs.closeSync(fd);
}
}
} catch {
// ignore and retry
}
},
);
await sleep(pollMs);
}
});
root
.command("expose")
.description("Enable/disable Tailscale serve/funnel for the webhook")
.option("--mode <mode>", "off | serve (tailnet) | funnel (public)", "funnel")
.option(
"--path <path>",
"Tailscale path to expose (recommend matching serve.path)",
)
.option("--path <path>", "Tailscale path to expose (recommend matching serve.path)")
.option("--port <port>", "Local webhook port")
.option("--serve-path <path>", "Local webhook path")
.action(
async (options: {
mode?: string;
port?: string;
path?: string;
servePath?: string;
}) => {
async (options: { mode?: string; port?: string; path?: string; servePath?: string }) => {
const mode = resolveMode(options.mode ?? "funnel");
const servePort = Number(options.port ?? config.serve.port ?? 3334);
const servePath = String(
options.servePath ?? config.serve.path ?? "/voice/webhook",
);
const tsPath = String(
options.path ?? config.tailscale?.path ?? servePath,
);
const servePath = String(options.servePath ?? config.serve.path ?? "/voice/webhook");
const tsPath = String(options.path ?? config.tailscale?.path ?? servePath);
const localUrl = `http://127.0.0.1:${servePort}`;
+1 -3
View File
@@ -2,9 +2,7 @@ import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { validateProviderConfig, resolveVoiceCallConfig, type VoiceCallConfig } from "./config.js";
function createBaseConfig(
provider: "telnyx" | "twilio" | "plivo" | "mock",
): VoiceCallConfig {
function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): VoiceCallConfig {
return {
enabled: true,
provider,
+88 -110
View File
@@ -23,12 +23,7 @@ export const E164Schema = z
* - "pairing": Unknown callers can request pairing (future)
* - "open": Accept all inbound calls (dangerous!)
*/
export const InboundPolicySchema = z.enum([
"disabled",
"allowlist",
"pairing",
"open",
]);
export const InboundPolicySchema = z.enum(["disabled", "allowlist", "pairing", "open"]);
export type InboundPolicy = z.infer<typeof InboundPolicySchema>;
// -----------------------------------------------------------------------------
@@ -37,33 +32,33 @@ export type InboundPolicy = z.infer<typeof InboundPolicySchema>;
export const TelnyxConfigSchema = z
.object({
/** Telnyx API v2 key */
apiKey: z.string().min(1).optional(),
/** Telnyx connection ID (from Call Control app) */
connectionId: z.string().min(1).optional(),
/** Public key for webhook signature verification */
publicKey: z.string().min(1).optional(),
})
/** Telnyx API v2 key */
apiKey: z.string().min(1).optional(),
/** Telnyx connection ID (from Call Control app) */
connectionId: z.string().min(1).optional(),
/** Public key for webhook signature verification */
publicKey: z.string().min(1).optional(),
})
.strict();
export type TelnyxConfig = z.infer<typeof TelnyxConfigSchema>;
export const TwilioConfigSchema = z
.object({
/** Twilio Account SID */
accountSid: z.string().min(1).optional(),
/** Twilio Auth Token */
authToken: z.string().min(1).optional(),
})
/** Twilio Account SID */
accountSid: z.string().min(1).optional(),
/** Twilio Auth Token */
authToken: z.string().min(1).optional(),
})
.strict();
export type TwilioConfig = z.infer<typeof TwilioConfigSchema>;
export const PlivoConfigSchema = z
.object({
/** Plivo Auth ID (starts with MA/SA) */
authId: z.string().min(1).optional(),
/** Plivo Auth Token */
authToken: z.string().min(1).optional(),
})
/** Plivo Auth ID (starts with MA/SA) */
authId: z.string().min(1).optional(),
/** Plivo Auth Token */
authToken: z.string().min(1).optional(),
})
.strict();
export type PlivoConfig = z.infer<typeof PlivoConfigSchema>;
@@ -190,9 +185,7 @@ export const VoiceCallTailscaleConfigSchema = z
})
.strict()
.default({ mode: "off", path: "/voice/webhook" });
export type VoiceCallTailscaleConfig = z.infer<
typeof VoiceCallTailscaleConfigSchema
>;
export type VoiceCallTailscaleConfig = z.infer<typeof VoiceCallTailscaleConfigSchema>;
// -----------------------------------------------------------------------------
// Tunnel Configuration (unified ngrok/tailscale)
@@ -207,9 +200,7 @@ export const VoiceCallTunnelConfigSchema = z
* - "tailscale-serve": Tailscale serve (private to tailnet)
* - "tailscale-funnel": Tailscale funnel (public HTTPS)
*/
provider: z
.enum(["none", "ngrok", "tailscale-serve", "tailscale-funnel"])
.default("none"),
provider: z.enum(["none", "ngrok", "tailscale-serve", "tailscale-funnel"]).default("none"),
/** ngrok auth token (optional, enables longer sessions and more features) */
ngrokAuthToken: z.string().min(1).optional(),
/** ngrok custom domain (paid feature, e.g., "myapp.ngrok.io") */
@@ -283,9 +274,7 @@ export const VoiceCallStreamingConfigSchema = z
vadThreshold: 0.5,
streamPath: "/voice/stream",
});
export type VoiceCallStreamingConfig = z.infer<
typeof VoiceCallStreamingConfigSchema
>;
export type VoiceCallStreamingConfig = z.infer<typeof VoiceCallStreamingConfigSchema>;
// -----------------------------------------------------------------------------
// Main Voice Call Configuration
@@ -293,90 +282,90 @@ export type VoiceCallStreamingConfig = z.infer<
export const VoiceCallConfigSchema = z
.object({
/** Enable voice call functionality */
enabled: z.boolean().default(false),
/** Enable voice call functionality */
enabled: z.boolean().default(false),
/** Active provider (telnyx, twilio, plivo, or mock) */
provider: z.enum(["telnyx", "twilio", "plivo", "mock"]).optional(),
/** Active provider (telnyx, twilio, plivo, or mock) */
provider: z.enum(["telnyx", "twilio", "plivo", "mock"]).optional(),
/** Telnyx-specific configuration */
telnyx: TelnyxConfigSchema.optional(),
/** Telnyx-specific configuration */
telnyx: TelnyxConfigSchema.optional(),
/** Twilio-specific configuration */
twilio: TwilioConfigSchema.optional(),
/** Twilio-specific configuration */
twilio: TwilioConfigSchema.optional(),
/** Plivo-specific configuration */
plivo: PlivoConfigSchema.optional(),
/** Plivo-specific configuration */
plivo: PlivoConfigSchema.optional(),
/** Phone number to call from (E.164) */
fromNumber: E164Schema.optional(),
/** Phone number to call from (E.164) */
fromNumber: E164Schema.optional(),
/** Default phone number to call (E.164) */
toNumber: E164Schema.optional(),
/** Default phone number to call (E.164) */
toNumber: E164Schema.optional(),
/** Inbound call policy */
inboundPolicy: InboundPolicySchema.default("disabled"),
/** Inbound call policy */
inboundPolicy: InboundPolicySchema.default("disabled"),
/** Allowlist of phone numbers for inbound calls (E.164) */
allowFrom: z.array(E164Schema).default([]),
/** Allowlist of phone numbers for inbound calls (E.164) */
allowFrom: z.array(E164Schema).default([]),
/** Greeting message for inbound calls */
inboundGreeting: z.string().optional(),
/** Greeting message for inbound calls */
inboundGreeting: z.string().optional(),
/** Outbound call configuration */
outbound: OutboundConfigSchema,
/** Outbound call configuration */
outbound: OutboundConfigSchema,
/** Maximum call duration in seconds */
maxDurationSeconds: z.number().int().positive().default(300),
/** Maximum call duration in seconds */
maxDurationSeconds: z.number().int().positive().default(300),
/** Silence timeout for end-of-speech detection (ms) */
silenceTimeoutMs: z.number().int().positive().default(800),
/** Silence timeout for end-of-speech detection (ms) */
silenceTimeoutMs: z.number().int().positive().default(800),
/** Timeout for user transcript (ms) */
transcriptTimeoutMs: z.number().int().positive().default(180000),
/** Timeout for user transcript (ms) */
transcriptTimeoutMs: z.number().int().positive().default(180000),
/** Ring timeout for outbound calls (ms) */
ringTimeoutMs: z.number().int().positive().default(30000),
/** Ring timeout for outbound calls (ms) */
ringTimeoutMs: z.number().int().positive().default(30000),
/** Maximum concurrent calls */
maxConcurrentCalls: z.number().int().positive().default(1),
/** Maximum concurrent calls */
maxConcurrentCalls: z.number().int().positive().default(1),
/** Webhook server configuration */
serve: VoiceCallServeConfigSchema,
/** Webhook server configuration */
serve: VoiceCallServeConfigSchema,
/** Tailscale exposure configuration (legacy, prefer tunnel config) */
tailscale: VoiceCallTailscaleConfigSchema,
/** Tailscale exposure configuration (legacy, prefer tunnel config) */
tailscale: VoiceCallTailscaleConfigSchema,
/** Tunnel configuration (unified ngrok/tailscale) */
tunnel: VoiceCallTunnelConfigSchema,
/** Tunnel configuration (unified ngrok/tailscale) */
tunnel: VoiceCallTunnelConfigSchema,
/** Real-time audio streaming configuration */
streaming: VoiceCallStreamingConfigSchema,
/** Real-time audio streaming configuration */
streaming: VoiceCallStreamingConfigSchema,
/** Public webhook URL override (if set, bypasses tunnel auto-detection) */
publicUrl: z.string().url().optional(),
/** Public webhook URL override (if set, bypasses tunnel auto-detection) */
publicUrl: z.string().url().optional(),
/** Skip webhook signature verification (development only, NOT for production) */
skipSignatureVerification: z.boolean().default(false),
/** Skip webhook signature verification (development only, NOT for production) */
skipSignatureVerification: z.boolean().default(false),
/** STT configuration */
stt: SttConfigSchema,
/** STT configuration */
stt: SttConfigSchema,
/** TTS override (deep-merges with core messages.tts) */
tts: TtsConfigSchema,
/** TTS override (deep-merges with core messages.tts) */
tts: TtsConfigSchema,
/** Store path for call logs */
store: z.string().optional(),
/** Store path for call logs */
store: z.string().optional(),
/** Model for generating voice responses (e.g., "anthropic/claude-sonnet-4", "openai/gpt-4o") */
responseModel: z.string().default("openai/gpt-4o-mini"),
/** Model for generating voice responses (e.g., "anthropic/claude-sonnet-4", "openai/gpt-4o") */
responseModel: z.string().default("openai/gpt-4o-mini"),
/** System prompt for voice responses */
responseSystemPrompt: z.string().optional(),
/** System prompt for voice responses */
responseSystemPrompt: z.string().optional(),
/** Timeout for response generation in ms (default 30s) */
responseTimeoutMs: z.number().int().positive().default(30000),
})
/** Timeout for response generation in ms (default 30s) */
responseTimeoutMs: z.number().int().positive().default(30000),
})
.strict();
export type VoiceCallConfig = z.infer<typeof VoiceCallConfigSchema>;
@@ -395,30 +384,23 @@ export function resolveVoiceCallConfig(config: VoiceCallConfig): VoiceCallConfig
// Telnyx
if (resolved.provider === "telnyx") {
resolved.telnyx = resolved.telnyx ?? {};
resolved.telnyx.apiKey =
resolved.telnyx.apiKey ?? process.env.TELNYX_API_KEY;
resolved.telnyx.connectionId =
resolved.telnyx.connectionId ?? process.env.TELNYX_CONNECTION_ID;
resolved.telnyx.publicKey =
resolved.telnyx.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
resolved.telnyx.apiKey = resolved.telnyx.apiKey ?? process.env.TELNYX_API_KEY;
resolved.telnyx.connectionId = resolved.telnyx.connectionId ?? process.env.TELNYX_CONNECTION_ID;
resolved.telnyx.publicKey = resolved.telnyx.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
}
// Twilio
if (resolved.provider === "twilio") {
resolved.twilio = resolved.twilio ?? {};
resolved.twilio.accountSid =
resolved.twilio.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
resolved.twilio.authToken =
resolved.twilio.authToken ?? process.env.TWILIO_AUTH_TOKEN;
resolved.twilio.accountSid = resolved.twilio.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
resolved.twilio.authToken = resolved.twilio.authToken ?? process.env.TWILIO_AUTH_TOKEN;
}
// Plivo
if (resolved.provider === "plivo") {
resolved.plivo = resolved.plivo ?? {};
resolved.plivo.authId =
resolved.plivo.authId ?? process.env.PLIVO_AUTH_ID;
resolved.plivo.authToken =
resolved.plivo.authToken ?? process.env.PLIVO_AUTH_TOKEN;
resolved.plivo.authId = resolved.plivo.authId ?? process.env.PLIVO_AUTH_ID;
resolved.plivo.authToken = resolved.plivo.authToken ?? process.env.PLIVO_AUTH_TOKEN;
}
// Tunnel Config
@@ -427,13 +409,9 @@ export function resolveVoiceCallConfig(config: VoiceCallConfig): VoiceCallConfig
allowNgrokFreeTierLoopbackBypass: false,
};
resolved.tunnel.allowNgrokFreeTierLoopbackBypass =
resolved.tunnel.allowNgrokFreeTierLoopbackBypass ||
resolved.tunnel.allowNgrokFreeTier ||
false;
resolved.tunnel.ngrokAuthToken =
resolved.tunnel.ngrokAuthToken ?? process.env.NGROK_AUTHTOKEN;
resolved.tunnel.ngrokDomain =
resolved.tunnel.ngrokDomain ?? process.env.NGROK_DOMAIN;
resolved.tunnel.allowNgrokFreeTierLoopbackBypass || resolved.tunnel.allowNgrokFreeTier || false;
resolved.tunnel.ngrokAuthToken = resolved.tunnel.ngrokAuthToken ?? process.env.NGROK_AUTHTOKEN;
resolved.tunnel.ngrokDomain = resolved.tunnel.ngrokDomain ?? process.env.NGROK_DOMAIN;
return resolved;
}
+2 -7
View File
@@ -51,10 +51,7 @@ type CoreAgentDeps = {
ensureAgentWorkspace: (params?: { dir: string }) => Promise<void>;
resolveStorePath: (store?: string, opts?: { agentId?: string }) => string;
loadSessionStore: (storePath: string) => Record<string, unknown>;
saveSessionStore: (
storePath: string,
store: Record<string, unknown>,
) => Promise<void>;
saveSessionStore: (storePath: string, store: Record<string, unknown>) => Promise<void>;
resolveSessionFilePath: (
sessionId: string,
entry: unknown,
@@ -116,9 +113,7 @@ function resolveOpenClawRoot(): string {
}
}
throw new Error(
"Unable to resolve core root. Set OPENCLAW_ROOT to the package root.",
);
throw new Error("Unable to resolve core root. Set OPENCLAW_ROOT to the package root.");
}
async function importCoreModule<T>(relativePath: string): Promise<T> {
+4 -5
View File
@@ -85,11 +85,10 @@ describe("CallManager", () => {
const manager = new CallManager(config, storePath);
manager.initialize(provider, "https://example.com/voice/webhook");
const { callId, success } = await manager.initiateCall(
"+15550000002",
undefined,
{ message: "Hello there", mode: "notify" },
);
const { callId, success } = await manager.initiateCall("+15550000002", undefined, {
message: "Hello there",
mode: "notify",
});
expect(success).toBe(true);
manager.processEvent({
+19 -62
View File
@@ -124,8 +124,7 @@ export class CallManager {
const callId = crypto.randomUUID();
const from =
this.config.fromNumber ||
(this.provider?.name === "mock" ? "+15550000000" : undefined);
this.config.fromNumber || (this.provider?.name === "mock" ? "+15550000000" : undefined);
if (!from) {
return { callId: "", success: false, error: "fromNumber not configured" };
}
@@ -157,9 +156,7 @@ export class CallManager {
if (mode === "notify" && initialMessage) {
const pollyVoice = mapVoiceToPolly(this.config.tts?.openai?.voice);
inlineTwiml = this.generateNotifyTwiml(initialMessage, pollyVoice);
console.log(
`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`,
);
console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
}
const result = await this.provider.initiateCall({
@@ -196,10 +193,7 @@ export class CallManager {
/**
* Speak to user in an active call.
*/
async speak(
callId: CallId,
text: string,
): Promise<{ success: boolean; error?: string }> {
async speak(callId: CallId, text: string): Promise<{ success: boolean; error?: string }> {
const call = this.activeCalls.get(callId);
if (!call) {
return { success: false, error: "Call not found" };
@@ -222,8 +216,7 @@ export class CallManager {
this.addTranscriptEntry(call, "bot", text);
// Play TTS
const voice =
this.provider?.name === "twilio" ? this.config.tts?.openai?.voice : undefined;
const voice = this.provider?.name === "twilio" ? this.config.tts?.openai?.voice : undefined;
await this.provider.playTts({
callId,
providerCallId: call.providerCallId,
@@ -248,9 +241,7 @@ export class CallManager {
async speakInitialMessage(providerCallId: string): Promise<void> {
const call = this.getCallByProviderCallId(providerCallId);
if (!call) {
console.warn(
`[voice-call] speakInitialMessage: no call found for ${providerCallId}`,
);
console.warn(`[voice-call] speakInitialMessage: no call found for ${providerCallId}`);
return;
}
@@ -258,9 +249,7 @@ export class CallManager {
const mode = (call.metadata?.mode as CallMode) ?? "conversation";
if (!initialMessage) {
console.log(
`[voice-call] speakInitialMessage: no initial message for ${call.callId}`,
);
console.log(`[voice-call] speakInitialMessage: no initial message for ${call.callId}`);
return;
}
@@ -270,29 +259,21 @@ export class CallManager {
this.persistCallRecord(call);
}
console.log(
`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`,
);
console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
const result = await this.speak(call.callId, initialMessage);
if (!result.success) {
console.warn(
`[voice-call] Failed to speak initial message: ${result.error}`,
);
console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
return;
}
// In notify mode, auto-hangup after delay
if (mode === "notify") {
const delaySec = this.config.outbound.notifyHangupDelaySec;
console.log(
`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`,
);
console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
setTimeout(async () => {
const currentCall = this.getCall(call.callId);
if (currentCall && !TerminalStates.has(currentCall.state)) {
console.log(
`[voice-call] Notify mode: hanging up call ${call.callId}`,
);
console.log(`[voice-call] Notify mode: hanging up call ${call.callId}`);
await this.endCall(call.callId);
}
}, delaySec * 1000);
@@ -368,9 +349,7 @@ export class CallManager {
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
this.transcriptWaiters.delete(callId);
reject(
new Error(`Timed out waiting for transcript after ${timeoutMs}ms`),
);
reject(new Error(`Timed out waiting for transcript after ${timeoutMs}ms`));
}, timeoutMs);
this.transcriptWaiters.set(callId, { resolve, reject, timeout });
@@ -491,10 +470,7 @@ export class CallManager {
const normalized = from?.replace(/\D/g, "") || "";
const allowed = (allowFrom || []).some((num) => {
const normalizedAllow = num.replace(/\D/g, "");
return (
normalized.endsWith(normalizedAllow) ||
normalizedAllow.endsWith(normalized)
);
return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized);
});
const status = allowed ? "accepted" : "rejected";
console.log(
@@ -511,11 +487,7 @@ export class CallManager {
/**
* Create a call record for an inbound call.
*/
private createInboundCall(
providerCallId: string,
from: string,
to: string,
): CallRecord {
private createInboundCall(providerCallId: string, from: string, to: string): CallRecord {
const callId = crypto.randomUUID();
const callRecord: CallRecord = {
@@ -530,8 +502,7 @@ export class CallManager {
transcript: [],
processedEventIds: [],
metadata: {
initialMessage:
this.config.inboundGreeting || "Hello! How can I help you today?",
initialMessage: this.config.inboundGreeting || "Hello! How can I help you today?",
},
};
@@ -539,9 +510,7 @@ export class CallManager {
this.providerCallIdMap.set(providerCallId, callId); // Map providerCallId to internal callId
this.persistCallRecord(callRecord);
console.log(
`[voice-call] Created inbound call record: ${callId} from ${from}`,
);
console.log(`[voice-call] Created inbound call record: ${callId} from ${from}`);
return callRecord;
}
@@ -663,10 +632,7 @@ export class CallManager {
call.endReason = "error";
this.transitionState(call, "error");
this.clearMaxDurationTimer(call.callId);
this.rejectTranscriptWaiter(
call.callId,
`Call error: ${event.error}`,
);
this.rejectTranscriptWaiter(call.callId, `Call error: ${event.error}`);
this.activeCalls.delete(call.callId);
if (call.providerCallId) {
this.providerCallIdMap.delete(call.providerCallId);
@@ -680,9 +646,7 @@ export class CallManager {
private maybeSpeakInitialMessageOnAnswered(call: CallRecord): void {
const initialMessage =
typeof call.metadata?.initialMessage === "string"
? call.metadata.initialMessage.trim()
: "";
typeof call.metadata?.initialMessage === "string" ? call.metadata.initialMessage.trim() : "";
if (!initialMessage) return;
@@ -759,10 +723,7 @@ export class CallManager {
}
// States that can cycle during multi-turn conversations
private static readonly ConversationStates = new Set<CallState>([
"speaking",
"listening",
]);
private static readonly ConversationStates = new Set<CallState>(["speaking", "listening"]);
// Non-terminal state order for monotonic transitions
private static readonly StateOrder: readonly CallState[] = [
@@ -808,11 +769,7 @@ export class CallManager {
/**
* Add an entry to the call transcript.
*/
private addTranscriptEntry(
call: CallRecord,
speaker: "bot" | "user",
text: string,
): void {
private addTranscriptEntry(call: CallRecord, speaker: "bot" | "user", text: string): void {
const entry: TranscriptEntry = {
timestamp: Date.now(),
speaker,
+4 -1
View File
@@ -14,7 +14,10 @@ import {
} from "./timers.js";
import { endCall } from "./outbound.js";
function shouldAcceptInbound(config: CallManagerContext["config"], from: string | undefined): boolean {
function shouldAcceptInbound(
config: CallManagerContext["config"],
from: string | undefined,
): boolean {
const { inboundPolicy: policy, allowFrom } = config;
switch (policy) {
+14 -6
View File
@@ -1,6 +1,11 @@
import crypto from "node:crypto";
import { TerminalStates, type CallId, type CallRecord, type OutboundCallOptions } from "../types.js";
import {
TerminalStates,
type CallId,
type CallRecord,
type OutboundCallOptions,
} from "../types.js";
import type { CallMode } from "../config.js";
import { mapVoiceToPolly } from "../voice-mapping.js";
import type { CallManagerContext } from "./context.js";
@@ -8,7 +13,12 @@ import { getCallByProviderCallId } from "./lookup.js";
import { generateNotifyTwiml } from "./twiml.js";
import { addTranscriptEntry, transitionState } from "./state.js";
import { persistCallRecord } from "./store.js";
import { clearMaxDurationTimer, clearTranscriptWaiter, rejectTranscriptWaiter, waitForFinalTranscript } from "./timers.js";
import {
clearMaxDurationTimer,
clearTranscriptWaiter,
rejectTranscriptWaiter,
waitForFinalTranscript,
} from "./timers.js";
export async function initiateCall(
ctx: CallManagerContext,
@@ -38,8 +48,7 @@ export async function initiateCall(
const callId = crypto.randomUUID();
const from =
ctx.config.fromNumber ||
(ctx.provider?.name === "mock" ? "+15550000000" : undefined);
ctx.config.fromNumber || (ctx.provider?.name === "mock" ? "+15550000000" : undefined);
if (!from) {
return { callId: "", success: false, error: "fromNumber not configured" };
}
@@ -120,8 +129,7 @@ export async function speak(
addTranscriptEntry(call, "bot", text);
const voice =
ctx.provider?.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
const voice = ctx.provider?.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
await ctx.provider.playTts({
callId,
providerCallId: call.providerCallId,
+1 -5
View File
@@ -35,11 +35,7 @@ export function transitionState(call: CallRecord, newState: CallState): void {
}
}
export function addTranscriptEntry(
call: CallRecord,
speaker: "bot" | "user",
text: string,
): void {
export function addTranscriptEntry(call: CallRecord, speaker: "bot" | "user", text: string): void {
const entry: TranscriptEntry = {
timestamp: Date.now(),
speaker,
+1 -4
View File
@@ -67,10 +67,7 @@ export function resolveTranscriptWaiter(
waiter.resolve(transcript);
}
export function waitForFinalTranscript(
ctx: CallManagerContext,
callId: CallId,
): Promise<string> {
export function waitForFinalTranscript(ctx: CallManagerContext, callId: CallId): Promise<string> {
// Only allow one in-flight waiter per call.
rejectTranscriptWaiter(ctx, callId, "Transcript waiter replaced");
+6 -22
View File
@@ -87,10 +87,7 @@ export class MediaStreamHandler {
/**
* Handle new WebSocket connection from Twilio.
*/
private async handleConnection(
ws: WebSocket,
_request: IncomingMessage,
): Promise<void> {
private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise<void> {
let session: StreamSession | null = null;
ws.on("message", async (data: Buffer) => {
@@ -140,16 +137,11 @@ export class MediaStreamHandler {
/**
* Handle stream start event.
*/
private async handleStart(
ws: WebSocket,
message: TwilioMediaMessage,
): Promise<StreamSession> {
private async handleStart(ws: WebSocket, message: TwilioMediaMessage): Promise<StreamSession> {
const streamSid = message.streamSid || "";
const callSid = message.start?.callSid || "";
console.log(
`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`,
);
console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`);
// Create STT session
const sttSession = this.config.sttProvider.createSession();
@@ -181,10 +173,7 @@ export class MediaStreamHandler {
// Connect to OpenAI STT (non-blocking, log errors but don't fail the call)
sttSession.connect().catch((err) => {
console.warn(
`[MediaStream] STT connection failed (TTS still works):`,
err.message,
);
console.warn(`[MediaStream] STT connection failed (TTS still works):`, err.message);
});
return session;
@@ -252,10 +241,7 @@ export class MediaStreamHandler {
* Queue a TTS operation for sequential playback.
* Only one TTS operation plays at a time per stream to prevent overlap.
*/
async queueTts(
streamSid: string,
playFn: (signal: AbortSignal) => Promise<void>,
): Promise<void> {
async queueTts(streamSid: string, playFn: (signal: AbortSignal) => Promise<void>): Promise<void> {
const queue = this.getTtsQueue(streamSid);
let resolveEntry: () => void;
let rejectEntry: (error: unknown) => void;
@@ -292,9 +278,7 @@ export class MediaStreamHandler {
* Get active session by call ID.
*/
getSessionByCallId(callId: string): StreamSession | undefined {
return [...this.sessions.values()].find(
(session) => session.callId === callId,
);
return [...this.sessions.values()].find((session) => session.callId === callId);
}
/**
+4 -12
View File
@@ -50,9 +50,7 @@ export class MockProvider implements VoiceCallProvider {
}
}
private normalizeEvent(
evt: Partial<NormalizedEvent>,
): NormalizedEvent | null {
private normalizeEvent(evt: Partial<NormalizedEvent>): NormalizedEvent | null {
if (!evt.type || !evt.callId) return null;
const base = {
@@ -96,9 +94,7 @@ export class MockProvider implements VoiceCallProvider {
}
case "call.silence": {
const payload = evt as Partial<
NormalizedEvent & { durationMs?: number }
>;
const payload = evt as Partial<NormalizedEvent & { durationMs?: number }>;
return {
...base,
type: evt.type,
@@ -116,9 +112,7 @@ export class MockProvider implements VoiceCallProvider {
}
case "call.ended": {
const payload = evt as Partial<
NormalizedEvent & { reason?: EndReason }
>;
const payload = evt as Partial<NormalizedEvent & { reason?: EndReason }>;
return {
...base,
type: evt.type,
@@ -127,9 +121,7 @@ export class MockProvider implements VoiceCallProvider {
}
case "call.error": {
const payload = evt as Partial<
NormalizedEvent & { error?: string; retryable?: boolean }
>;
const payload = evt as Partial<NormalizedEvent & { error?: string; retryable?: boolean }>;
return {
...base,
type: evt.type,
+8 -21
View File
@@ -103,8 +103,7 @@ export class PlivoProvider implements VoiceCallProvider {
}
parseWebhookEvent(ctx: WebhookContext): ProviderWebhookParseResult {
const flow =
typeof ctx.query?.flow === "string" ? ctx.query.flow.trim() : "";
const flow = typeof ctx.query?.flow === "string" ? ctx.query.flow.trim() : "";
const parsed = this.parseBody(ctx.rawBody);
if (!parsed) {
@@ -139,9 +138,7 @@ export class PlivoProvider implements VoiceCallProvider {
if (flow === "xml-listen") {
const callId = this.getCallIdFromQuery(ctx);
const pending = callId
? this.pendingListenByCallId.get(callId)
: undefined;
const pending = callId ? this.pendingListenByCallId.get(callId) : undefined;
if (callId) this.pendingListenByCallId.delete(callId);
const actionUrl = this.buildActionUrl(ctx, {
@@ -180,10 +177,7 @@ export class PlivoProvider implements VoiceCallProvider {
};
}
private normalizeEvent(
params: URLSearchParams,
callIdOverride?: string,
): NormalizedEvent | null {
private normalizeEvent(params: URLSearchParams, callIdOverride?: string): NormalizedEvent | null {
const callUuid = params.get("CallUUID") || "";
const requestUuid = params.get("RequestUUID") || "";
@@ -329,11 +323,9 @@ export class PlivoProvider implements VoiceCallProvider {
}
async playTts(input: PlayTtsInput): Promise<void> {
const callUuid = this.requestUuidToCallUuid.get(input.providerCallId) ??
input.providerCallId;
const callUuid = this.requestUuidToCallUuid.get(input.providerCallId) ?? input.providerCallId;
const webhookBase =
this.callUuidToWebhookUrl.get(callUuid) ||
this.callIdToWebhookUrl.get(input.callId);
this.callUuidToWebhookUrl.get(callUuid) || this.callIdToWebhookUrl.get(input.callId);
if (!webhookBase) {
throw new Error("Missing webhook URL for this call (provider state missing)");
}
@@ -364,11 +356,9 @@ export class PlivoProvider implements VoiceCallProvider {
}
async startListening(input: StartListeningInput): Promise<void> {
const callUuid = this.requestUuidToCallUuid.get(input.providerCallId) ??
input.providerCallId;
const callUuid = this.requestUuidToCallUuid.get(input.providerCallId) ?? input.providerCallId;
const webhookBase =
this.callUuidToWebhookUrl.get(callUuid) ||
this.callIdToWebhookUrl.get(input.callId);
this.callUuidToWebhookUrl.get(callUuid) || this.callIdToWebhookUrl.get(input.callId);
if (!webhookBase) {
throw new Error("Missing webhook URL for this call (provider state missing)");
}
@@ -427,10 +417,7 @@ export class PlivoProvider implements VoiceCallProvider {
</Response>`;
}
private static xmlGetInputSpeech(params: {
actionUrl: string;
language?: string;
}): string {
private static xmlGetInputSpeech(params: { actionUrl: string; language?: string }): string {
const language = params.language || "en-US";
return `<?xml version="1.0" encoding="UTF-8"?>
<Response>
@@ -183,9 +183,7 @@ class OpenAIRealtimeSTTSession implements RealtimeSTTSession {
return;
}
if (
this.reconnectAttempts >= OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS
) {
if (this.reconnectAttempts >= OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS) {
console.error(
`[RealtimeSTT] Max reconnect attempts (${OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS}) reached`,
);
@@ -193,9 +191,7 @@ class OpenAIRealtimeSTTSession implements RealtimeSTTSession {
}
this.reconnectAttempts++;
const delay =
OpenAIRealtimeSTTSession.RECONNECT_DELAY_MS *
2 ** (this.reconnectAttempts - 1);
const delay = OpenAIRealtimeSTTSession.RECONNECT_DELAY_MS * 2 ** (this.reconnectAttempts - 1);
console.log(
`[RealtimeSTT] Reconnecting ${this.reconnectAttempts}/${OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS} in ${delay}ms...`,
);
+5 -10
View File
@@ -161,9 +161,7 @@ export class TelnyxProvider implements VoiceCallProvider {
let callId = "";
if (data.payload?.client_state) {
try {
callId = Buffer.from(data.payload.client_state, "base64").toString(
"utf8",
);
callId = Buffer.from(data.payload.client_state, "base64").toString("utf8");
} catch {
// Fallback if not valid Base64
callId = data.payload.client_state;
@@ -312,13 +310,10 @@ export class TelnyxProvider implements VoiceCallProvider {
* Start transcription (STT) via Telnyx.
*/
async startListening(input: StartListeningInput): Promise<void> {
await this.apiRequest(
`/calls/${input.providerCallId}/actions/transcription_start`,
{
command_id: crypto.randomUUID(),
language: input.language || "en",
},
);
await this.apiRequest(`/calls/${input.providerCallId}/actions/transcription_start`, {
command_id: crypto.randomUUID(),
language: input.language || "en",
});
}
/**
@@ -84,9 +84,7 @@ export class OpenAITTSProvider {
this.instructions = config.instructions;
if (!this.apiKey) {
throw new Error(
"OpenAI API key required (set OPENAI_API_KEY or pass apiKey)",
);
throw new Error("OpenAI API key required (set OPENAI_API_KEY or pass apiKey)");
}
}
@@ -215,11 +213,7 @@ function linearToMulaw(sample: number): number {
// Add bias and find segment
sample += BIAS;
let exponent = 7;
for (
let expMask = 0x4000;
(sample & expMask) === 0 && exponent > 0;
exponent--, expMask >>= 1
) {
for (let expMask = 0x4000; (sample & expMask) === 0 && exponent > 0; exponent--, expMask >>= 1) {
// Find the segment (exponent)
}
@@ -252,10 +246,7 @@ export function mulawToLinear(mulaw: number): number {
* Chunk audio buffer into 20ms frames for streaming.
* At 8kHz mono, 20ms = 160 samples = 160 bytes (mu-law).
*/
export function chunkAudio(
audio: Buffer,
chunkSize = 160,
): Generator<Buffer, void, unknown> {
export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, void, unknown> {
return (function* () {
for (let i = 0; i < audio.length; i += chunkSize) {
yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
@@ -12,10 +12,7 @@ function createProvider(): TwilioProvider {
);
}
function createContext(
rawBody: string,
query?: WebhookContext["query"],
): WebhookContext {
function createContext(rawBody: string, query?: WebhookContext["query"]): WebhookContext {
return {
headers: {},
rawBody,
+12 -37
View File
@@ -211,22 +211,16 @@ export class TwilioProvider implements VoiceCallProvider {
/**
* Parse Twilio direction to normalized format.
*/
private static parseDirection(
direction: string | null,
): "inbound" | "outbound" | undefined {
private static parseDirection(direction: string | null): "inbound" | "outbound" | undefined {
if (direction === "inbound") return "inbound";
if (direction === "outbound-api" || direction === "outbound-dial")
return "outbound";
if (direction === "outbound-api" || direction === "outbound-dial") return "outbound";
return undefined;
}
/**
* Convert Twilio webhook params to normalized event format.
*/
private normalizeEvent(
params: URLSearchParams,
callIdOverride?: string,
): NormalizedEvent | null {
private normalizeEvent(params: URLSearchParams, callIdOverride?: string): NormalizedEvent | null {
const callSid = params.get("CallSid") || "";
const baseEvent = {
@@ -300,8 +294,7 @@ export class TwilioProvider implements VoiceCallProvider {
if (!ctx) return TwilioProvider.EMPTY_TWIML;
const params = new URLSearchParams(ctx.rawBody);
const type =
typeof ctx.query?.type === "string" ? ctx.query.type.trim() : undefined;
const type = typeof ctx.query?.type === "string" ? ctx.query.type.trim() : undefined;
const isStatusCallback = type === "status";
const callStatus = params.get("CallStatus");
const direction = params.get("Direction");
@@ -329,9 +322,7 @@ export class TwilioProvider implements VoiceCallProvider {
// Conversation mode: return streaming TwiML immediately for outbound calls.
if (isOutbound) {
const streamUrl = this.getStreamUrl();
return streamUrl
? this.getStreamConnectXml(streamUrl)
: TwilioProvider.PAUSE_TWIML;
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
}
}
@@ -344,9 +335,7 @@ export class TwilioProvider implements VoiceCallProvider {
// For inbound calls, answer immediately with stream
if (direction === "inbound") {
const streamUrl = this.getStreamUrl();
return streamUrl
? this.getStreamConnectXml(streamUrl)
: TwilioProvider.PAUSE_TWIML;
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
}
// For outbound calls, only connect to stream when call is in-progress
@@ -355,9 +344,7 @@ export class TwilioProvider implements VoiceCallProvider {
}
const streamUrl = this.getStreamUrl();
return streamUrl
? this.getStreamConnectXml(streamUrl)
: TwilioProvider.PAUSE_TWIML;
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
}
/**
@@ -374,9 +361,7 @@ export class TwilioProvider implements VoiceCallProvider {
const origin = url.origin;
// Convert https:// to wss:// for WebSocket
const wsOrigin = origin
.replace(/^https:\/\//, "wss://")
.replace(/^http:\/\//, "ws://");
const wsOrigin = origin.replace(/^https:\/\//, "wss://").replace(/^http:\/\//, "ws://");
// Append the stream path
const path = this.options.streamPath.startsWith("/")
@@ -433,10 +418,7 @@ export class TwilioProvider implements VoiceCallProvider {
Timeout: "30",
};
const result = await this.apiRequest<TwilioCallResponse>(
"/Calls.json",
params,
);
const result = await this.apiRequest<TwilioCallResponse>("/Calls.json", params);
this.callWebhookUrls.set(result.sid, url.toString());
@@ -489,9 +471,7 @@ export class TwilioProvider implements VoiceCallProvider {
// Fall back to TwiML <Say> (may not work on all accounts)
const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
if (!webhookUrl) {
throw new Error(
"Missing webhook URL for this call (provider state not initialized)",
);
throw new Error("Missing webhook URL for this call (provider state not initialized)");
}
console.warn(
@@ -517,10 +497,7 @@ export class TwilioProvider implements VoiceCallProvider {
* Generates audio with core TTS, converts to mu-law, and streams via WebSocket.
* Uses a queue to serialize playback and prevent overlapping audio.
*/
private async playTtsViaStream(
text: string,
streamSid: string,
): Promise<void> {
private async playTtsViaStream(text: string, streamSid: string): Promise<void> {
if (!this.ttsProvider || !this.mediaStreamHandler) {
throw new Error("TTS provider and media stream handler required");
}
@@ -556,9 +533,7 @@ export class TwilioProvider implements VoiceCallProvider {
async startListening(input: StartListeningInput): Promise<void> {
const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
if (!webhookUrl) {
throw new Error(
"Missing webhook URL for this call (provider state not initialized)",
);
throw new Error("Missing webhook URL for this call (provider state not initialized)");
}
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
@@ -9,19 +9,16 @@ export async function twilioApiRequest<T = unknown>(params: {
const bodyParams =
params.body instanceof URLSearchParams
? params.body
: Object.entries(params.body).reduce<URLSearchParams>(
(acc, [key, value]) => {
if (Array.isArray(value)) {
for (const entry of value) {
acc.append(key, entry);
}
} else if (typeof value === "string") {
acc.append(key, value);
: Object.entries(params.body).reduce<URLSearchParams>((acc, [key, value]) => {
if (Array.isArray(value)) {
for (const entry of value) {
acc.append(key, entry);
}
return acc;
},
new URLSearchParams(),
);
} else if (typeof value === "string") {
acc.append(key, value);
}
return acc;
}, new URLSearchParams());
const response = await fetch(`${params.baseUrl}${params.endpoint}`, {
method: "POST",
@@ -11,8 +11,7 @@ export function verifyTwilioProviderWebhook(params: {
}): WebhookVerificationResult {
const result = verifyTwilioWebhook(params.ctx, params.authToken, {
publicUrl: params.currentPublicUrl || undefined,
allowNgrokFreeTierLoopbackBypass:
params.options.allowNgrokFreeTierLoopbackBypass ?? false,
allowNgrokFreeTierLoopbackBypass: params.options.allowNgrokFreeTierLoopbackBypass ?? false,
skipVerification: params.options.skipVerification,
});
@@ -41,8 +41,7 @@ type SessionEntry = {
export async function generateVoiceResponse(
params: VoiceResponseParams,
): Promise<VoiceResponseResult> {
const { voiceConfig, callId, from, transcript, userMessage, coreConfig } =
params;
const { voiceConfig, callId, from, transcript, userMessage, coreConfig } = params;
if (!coreConfig) {
return { text: null, error: "Core config unavailable for voice response" };
@@ -54,10 +53,7 @@ export async function generateVoiceResponse(
} catch (err) {
return {
text: null,
error:
err instanceof Error
? err.message
: "Unable to load core agent dependencies",
error: err instanceof Error ? err.message : "Unable to load core agent dependencies",
};
}
const cfg = coreConfig;
@@ -95,12 +91,9 @@ export async function generateVoiceResponse(
});
// Resolve model from config
const modelRef =
voiceConfig.responseModel ||
`${deps.DEFAULT_PROVIDER}/${deps.DEFAULT_MODEL}`;
const modelRef = voiceConfig.responseModel || `${deps.DEFAULT_PROVIDER}/${deps.DEFAULT_MODEL}`;
const slashIndex = modelRef.indexOf("/");
const provider =
slashIndex === -1 ? deps.DEFAULT_PROVIDER : modelRef.slice(0, slashIndex);
const provider = slashIndex === -1 ? deps.DEFAULT_PROVIDER : modelRef.slice(0, slashIndex);
const model = slashIndex === -1 ? modelRef : modelRef.slice(slashIndex + 1);
// Resolve thinking level
@@ -118,17 +111,13 @@ export async function generateVoiceResponse(
let extraSystemPrompt = basePrompt;
if (transcript.length > 0) {
const history = transcript
.map(
(entry) =>
`${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`,
)
.map((entry) => `${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`)
.join("\n");
extraSystemPrompt = `${basePrompt}\n\nConversation so far:\n${history}`;
}
// Resolve timeout
const timeoutMs =
voiceConfig.responseTimeoutMs ?? deps.resolveAgentTimeoutMs({ cfg });
const timeoutMs = voiceConfig.responseTimeoutMs ?? deps.resolveAgentTimeoutMs({ cfg });
const runId = `voice:${callId}:${Date.now()}`;
try {
+6 -21
View File
@@ -42,9 +42,7 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
const allowNgrokFreeTierLoopbackBypass =
config.tunnel?.provider === "ngrok" &&
isLoopbackBind(config.serve?.bind) &&
(config.tunnel?.allowNgrokFreeTierLoopbackBypass ||
config.tunnel?.allowNgrokFreeTier ||
false);
(config.tunnel?.allowNgrokFreeTierLoopbackBypass || config.tunnel?.allowNgrokFreeTier || false);
switch (config.provider) {
case "telnyx":
@@ -63,9 +61,7 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
allowNgrokFreeTierLoopbackBypass,
publicUrl: config.publicUrl,
skipVerification: config.skipSignatureVerification,
streamPath: config.streaming?.enabled
? config.streaming.streamPath
: undefined,
streamPath: config.streaming?.enabled ? config.streaming.streamPath : undefined,
},
);
case "plivo":
@@ -83,9 +79,7 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
case "mock":
return new MockProvider();
default:
throw new Error(
`Unsupported voice-call provider: ${String(config.provider)}`,
);
throw new Error(`Unsupported voice-call provider: ${String(config.provider)}`);
}
}
@@ -106,9 +100,7 @@ export async function createVoiceCallRuntime(params: {
const config = resolveVoiceCallConfig(rawConfig);
if (!config.enabled) {
throw new Error(
"Voice call disabled. Enable the plugin entry in config.",
);
throw new Error("Voice call disabled. Enable the plugin entry in config.");
}
const validation = validateProviderConfig(config);
@@ -118,12 +110,7 @@ export async function createVoiceCallRuntime(params: {
const provider = resolveProvider(config);
const manager = new CallManager(config);
const webhookServer = new VoiceCallWebhookServer(
config,
manager,
provider,
coreConfig,
);
const webhookServer = new VoiceCallWebhookServer(config, manager, provider, coreConfig);
const localUrl = await webhookServer.start();
@@ -143,9 +130,7 @@ export async function createVoiceCallRuntime(params: {
publicUrl = tunnelResult?.publicUrl ?? null;
} catch (err) {
log.error(
`[voice-call] Tunnel setup failed: ${
err instanceof Error ? err.message : String(err)
}`,
`[voice-call] Tunnel setup failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
+2 -8
View File
@@ -47,10 +47,7 @@ export function pcmToMulaw(pcm: Buffer): Buffer {
return mulaw;
}
export function convertPcmToMulaw8k(
pcm: Buffer,
inputSampleRate: number,
): Buffer {
export function convertPcmToMulaw8k(pcm: Buffer, inputSampleRate: number): Buffer {
const pcm8k = resamplePcmTo8k(pcm, inputSampleRate);
return pcmToMulaw(pcm8k);
}
@@ -58,10 +55,7 @@ export function convertPcmToMulaw8k(
/**
* Chunk audio buffer into 20ms frames for streaming (8kHz mono mu-law).
*/
export function chunkAudio(
audio: Buffer,
chunkSize = 160,
): Generator<Buffer, void, unknown> {
export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, void, unknown> {
return (function* () {
for (let i = 0; i < audio.length; i += chunkSize) {
yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
+1 -4
View File
@@ -44,10 +44,7 @@ export function createTelephonyTtsProvider(params: {
};
}
function applyTtsOverride(
coreConfig: CoreConfig,
override?: VoiceCallTtsConfig,
): CoreConfig {
function applyTtsOverride(coreConfig: CoreConfig, override?: VoiceCallTtsConfig): CoreConfig {
if (!override) return coreConfig;
const base = coreConfig.messages?.tts;
+7 -23
View File
@@ -52,14 +52,7 @@ export async function startNgrokTunnel(config: {
}
// Build ngrok command args
const args = [
"http",
String(config.port),
"--log",
"stdout",
"--log-format",
"json",
];
const args = ["http", String(config.port), "--log", "stdout", "--log-format", "json"];
// Add custom domain if provided (paid ngrok feature)
if (config.domain) {
@@ -234,11 +227,9 @@ export async function startTailscaleTunnel(config: {
const localUrl = `http://127.0.0.1:${config.port}${path}`;
return new Promise((resolve, reject) => {
const proc = spawn(
"tailscale",
[config.mode, "--bg", "--yes", "--set-path", path, localUrl],
{ stdio: ["ignore", "pipe", "pipe"] },
);
const proc = spawn("tailscale", [config.mode, "--bg", "--yes", "--set-path", path, localUrl], {
stdio: ["ignore", "pipe", "pipe"],
});
const timeout = setTimeout(() => {
proc.kill("SIGKILL");
@@ -249,9 +240,7 @@ export async function startTailscaleTunnel(config: {
clearTimeout(timeout);
if (code === 0) {
const publicUrl = `https://${dnsName}${path}`;
console.log(
`[voice-call] Tailscale ${config.mode} active: ${publicUrl}`,
);
console.log(`[voice-call] Tailscale ${config.mode} active: ${publicUrl}`);
resolve({
publicUrl,
@@ -275,10 +264,7 @@ export async function startTailscaleTunnel(config: {
/**
* Stop a Tailscale serve/funnel tunnel.
*/
async function stopTailscaleTunnel(
mode: "serve" | "funnel",
path: string,
): Promise<void> {
async function stopTailscaleTunnel(mode: "serve" | "funnel", path: string): Promise<void> {
return new Promise((resolve) => {
const proc = spawn("tailscale", [mode, "off", path], {
stdio: "ignore",
@@ -299,9 +285,7 @@ async function stopTailscaleTunnel(
/**
* Start a tunnel based on configuration.
*/
export async function startTunnel(
config: TunnelConfig,
): Promise<TunnelResult | null> {
export async function startTunnel(config: TunnelConfig): Promise<TunnelResult | null> {
switch (config.provider) {
case "ngrok":
return startNgrokTunnel({
@@ -38,9 +38,7 @@ function plivoV3Signature(params: {
const sortedQuery = Array.from(queryMap.keys())
.sort()
.flatMap((k) =>
[...(queryMap.get(k) ?? [])].sort().map((v) => `${k}=${v}`),
)
.flatMap((k) => [...(queryMap.get(k) ?? [])].sort().map((v) => `${k}=${v}`))
.join("&");
const postParams = new URLSearchParams(params.postBody);
@@ -71,24 +69,17 @@ function plivoV3Signature(params: {
return canonicalizeBase64(digest);
}
function twilioSignature(params: {
authToken: string;
url: string;
postBody: string;
}): string {
function twilioSignature(params: { authToken: string; url: string; postBody: string }): string {
let dataToSign = params.url;
const sortedParams = Array.from(
new URLSearchParams(params.postBody).entries(),
).sort((a, b) => a[0].localeCompare(b[0]));
const sortedParams = Array.from(new URLSearchParams(params.postBody).entries()).sort((a, b) =>
a[0].localeCompare(b[0]),
);
for (const [key, value] of sortedParams) {
dataToSign += key + value;
}
return crypto
.createHmac("sha1", params.authToken)
.update(dataToSign)
.digest("base64");
return crypto.createHmac("sha1", params.authToken).update(dataToSign).digest("base64");
}
describe("verifyPlivoWebhook", () => {
+5 -18
View File
@@ -98,10 +98,7 @@ export function reconstructWebhookUrl(ctx: WebhookContext): string {
return `${proto}://${host}${path}`;
}
function buildTwilioVerificationUrl(
ctx: WebhookContext,
publicUrl?: string,
): string {
function buildTwilioVerificationUrl(ctx: WebhookContext, publicUrl?: string): string {
if (!publicUrl) {
return reconstructWebhookUrl(ctx);
}
@@ -186,12 +183,7 @@ export function verifyTwilioWebhook(
const params = new URLSearchParams(ctx.rawBody);
// Validate signature
const isValid = validateTwilioSignature(
authToken,
signature,
verificationUrl,
params,
);
const isValid = validateTwilioSignature(authToken, signature, verificationUrl, params);
if (isValid) {
return { ok: true, verificationUrl };
@@ -199,8 +191,7 @@ export function verifyTwilioWebhook(
// Check if this is ngrok free tier - the URL might have different format
const isNgrokFreeTier =
verificationUrl.includes(".ngrok-free.app") ||
verificationUrl.includes(".ngrok.io");
verificationUrl.includes(".ngrok-free.app") || verificationUrl.includes(".ngrok.io");
if (
isNgrokFreeTier &&
@@ -353,10 +344,7 @@ function validatePlivoV3Signature(params: {
});
const hmacBase = `${baseUrl}.${params.nonce}`;
const digest = crypto
.createHmac("sha256", params.authToken)
.update(hmacBase)
.digest("base64");
const digest = crypto.createHmac("sha256", params.authToken).update(hmacBase).digest("base64");
const expected = normalizeSignatureBase64(digest);
// Header can contain multiple signatures separated by commas.
@@ -413,8 +401,7 @@ export function verifyPlivoWebhook(
}
if (signatureV3 && nonceV3) {
const method =
ctx.method === "GET" || ctx.method === "POST" ? ctx.method : null;
const method = ctx.method === "GET" || ctx.method === "POST" ? ctx.method : null;
if (!method) {
return {
+17 -53
View File
@@ -54,13 +54,10 @@ export class VoiceCallWebhookServer {
* Initialize media streaming with OpenAI Realtime STT.
*/
private initializeMediaStreaming(): void {
const apiKey =
this.config.streaming?.openaiApiKey || process.env.OPENAI_API_KEY;
const apiKey = this.config.streaming?.openaiApiKey || process.env.OPENAI_API_KEY;
if (!apiKey) {
console.warn(
"[voice-call] Streaming enabled but no OpenAI API key found",
);
console.warn("[voice-call] Streaming enabled but no OpenAI API key found");
return;
}
@@ -74,9 +71,7 @@ export class VoiceCallWebhookServer {
const streamConfig: MediaStreamConfig = {
sttProvider,
onTranscript: (providerCallId, transcript) => {
console.log(
`[voice-call] Transcript for ${providerCallId}: ${transcript}`,
);
console.log(`[voice-call] Transcript for ${providerCallId}: ${transcript}`);
// Clear TTS queue on barge-in (user started speaking, interrupt current playback)
if (this.provider.name === "twilio") {
@@ -86,9 +81,7 @@ export class VoiceCallWebhookServer {
// Look up our internal call ID from the provider call ID
const call = this.manager.getCallByProviderCallId(providerCallId);
if (!call) {
console.warn(
`[voice-call] No active call found for provider ID: ${providerCallId}`,
);
console.warn(`[voice-call] No active call found for provider ID: ${providerCallId}`);
return;
}
@@ -106,8 +99,7 @@ export class VoiceCallWebhookServer {
// Auto-respond in conversation mode (inbound always, outbound if mode is conversation)
const callMode = call.metadata?.mode as string | undefined;
const shouldRespond =
call.direction === "inbound" || callMode === "conversation";
const shouldRespond = call.direction === "inbound" || callMode === "conversation";
if (shouldRespond) {
this.handleInboundResponse(call.callId, transcript).catch((err) => {
console.warn(`[voice-call] Failed to auto-respond:`, err);
@@ -123,15 +115,10 @@ export class VoiceCallWebhookServer {
console.log(`[voice-call] Partial for ${callId}: ${partial}`);
},
onConnect: (callId, streamSid) => {
console.log(
`[voice-call] Media stream connected: ${callId} -> ${streamSid}`,
);
console.log(`[voice-call] Media stream connected: ${callId} -> ${streamSid}`);
// Register stream with provider for TTS routing
if (this.provider.name === "twilio") {
(this.provider as TwilioProvider).registerCallStream(
callId,
streamSid,
);
(this.provider as TwilioProvider).registerCallStream(callId, streamSid);
}
// Speak initial message if one was provided when call was initiated
@@ -173,10 +160,7 @@ export class VoiceCallWebhookServer {
// Handle WebSocket upgrades for media streams
if (this.mediaStreamHandler) {
this.server.on("upgrade", (request, socket, head) => {
const url = new URL(
request.url || "/",
`http://${request.headers.host}`,
);
const url = new URL(request.url || "/", `http://${request.headers.host}`);
if (url.pathname === streamPath) {
console.log("[voice-call] WebSocket upgrade for media stream");
@@ -193,9 +177,7 @@ export class VoiceCallWebhookServer {
const url = `http://${bind}:${port}${webhookPath}`;
console.log(`[voice-call] Webhook server listening on ${url}`);
if (this.mediaStreamHandler) {
console.log(
`[voice-call] Media stream WebSocket on ws://${bind}:${port}${streamPath}`,
);
console.log(`[voice-call] Media stream WebSocket on ws://${bind}:${port}${streamPath}`);
}
resolve(url);
});
@@ -258,9 +240,7 @@ export class VoiceCallWebhookServer {
// Verify signature
const verification = this.provider.verifyWebhook(ctx);
if (!verification.ok) {
console.warn(
`[voice-call] Webhook verification failed: ${verification.reason}`,
);
console.warn(`[voice-call] Webhook verification failed: ${verification.reason}`);
res.statusCode = 401;
res.end("Unauthorized");
return;
@@ -274,10 +254,7 @@ export class VoiceCallWebhookServer {
try {
this.manager.processEvent(event);
} catch (err) {
console.error(
`[voice-call] Error processing event ${event.type}:`,
err,
);
console.error(`[voice-call] Error processing event ${event.type}:`, err);
}
}
@@ -285,9 +262,7 @@ export class VoiceCallWebhookServer {
res.statusCode = result.statusCode || 200;
if (result.providerResponseHeaders) {
for (const [key, value] of Object.entries(
result.providerResponseHeaders,
)) {
for (const [key, value] of Object.entries(result.providerResponseHeaders)) {
res.setHeader(key, value);
}
}
@@ -311,13 +286,8 @@ export class VoiceCallWebhookServer {
* Handle auto-response for inbound calls using the agent system.
* Supports tool calling for richer voice interactions.
*/
private async handleInboundResponse(
callId: string,
userMessage: string,
): Promise<void> {
console.log(
`[voice-call] Auto-responding to inbound call ${callId}: "${userMessage}"`,
);
private async handleInboundResponse(callId: string, userMessage: string): Promise<void> {
console.log(`[voice-call] Auto-responding to inbound call ${callId}: "${userMessage}"`);
// Get call context for conversation history
const call = this.manager.getCall(callId);
@@ -344,9 +314,7 @@ export class VoiceCallWebhookServer {
});
if (result.error) {
console.error(
`[voice-call] Response generation error: ${result.error}`,
);
console.error(`[voice-call] Response generation error: ${result.error}`);
return;
}
@@ -458,9 +426,7 @@ export async function cleanupTailscaleExposureRoute(opts: {
* Setup Tailscale serve/funnel for the webhook server.
* This is a helper that shells out to `tailscale serve` or `tailscale funnel`.
*/
export async function setupTailscaleExposure(
config: VoiceCallConfig,
): Promise<string | null> {
export async function setupTailscaleExposure(config: VoiceCallConfig): Promise<string | null> {
if (config.tailscale.mode === "off") {
return null;
}
@@ -479,9 +445,7 @@ export async function setupTailscaleExposure(
/**
* Cleanup Tailscale serve/funnel.
*/
export async function cleanupTailscaleExposure(
config: VoiceCallConfig,
): Promise<void> {
export async function cleanupTailscaleExposure(config: VoiceCallConfig): Promise<void> {
if (config.tailscale.mode === "off") {
return;
}