mirror of
https://github.com/farcasclaudiu/openclaw.git
synced 2026-06-29 11:02:12 +03:00
chore: Run pnpm format:fix.
This commit is contained in:
@@ -31,10 +31,7 @@ function resolveDefaultStorePath(config: VoiceCallConfig): string {
|
||||
const existing =
|
||||
[resolvedPreferred].find((dir) => {
|
||||
try {
|
||||
return (
|
||||
fs.existsSync(path.join(dir, "calls.jsonl")) ||
|
||||
fs.existsSync(dir)
|
||||
);
|
||||
return fs.existsSync(path.join(dir, "calls.jsonl")) || fs.existsSync(dir);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
@@ -62,10 +59,7 @@ export function registerVoiceCallCli(params: {
|
||||
root
|
||||
.command("call")
|
||||
.description("Initiate an outbound voice call")
|
||||
.requiredOption(
|
||||
"-m, --message <text>",
|
||||
"Message to speak when call connects",
|
||||
)
|
||||
.requiredOption("-m, --message <text>", "Message to speak when call connects")
|
||||
.option(
|
||||
"-t, --to <phone>",
|
||||
"Phone number to call (E.164 format, uses config toNumber if not set)",
|
||||
@@ -75,27 +69,23 @@ export function registerVoiceCallCli(params: {
|
||||
"Call mode: notify (hangup after message) or conversation (stay open)",
|
||||
"conversation",
|
||||
)
|
||||
.action(
|
||||
async (options: { message: string; to?: string; mode?: string }) => {
|
||||
const rt = await ensureRuntime();
|
||||
const to = options.to ?? rt.config.toNumber;
|
||||
if (!to) {
|
||||
throw new Error("Missing --to and no toNumber configured");
|
||||
}
|
||||
const result = await rt.manager.initiateCall(to, undefined, {
|
||||
message: options.message,
|
||||
mode:
|
||||
options.mode === "notify" || options.mode === "conversation"
|
||||
? options.mode
|
||||
: undefined,
|
||||
});
|
||||
if (!result.success) {
|
||||
throw new Error(result.error || "initiate failed");
|
||||
}
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
||||
},
|
||||
);
|
||||
.action(async (options: { message: string; to?: string; mode?: string }) => {
|
||||
const rt = await ensureRuntime();
|
||||
const to = options.to ?? rt.config.toNumber;
|
||||
if (!to) {
|
||||
throw new Error("Missing --to and no toNumber configured");
|
||||
}
|
||||
const result = await rt.manager.initiateCall(to, undefined, {
|
||||
message: options.message,
|
||||
mode:
|
||||
options.mode === "notify" || options.mode === "conversation" ? options.mode : undefined,
|
||||
});
|
||||
if (!result.success) {
|
||||
throw new Error(result.error || "initiate failed");
|
||||
}
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
||||
});
|
||||
|
||||
root
|
||||
.command("start")
|
||||
@@ -107,23 +97,19 @@ export function registerVoiceCallCli(params: {
|
||||
"Call mode: notify (hangup after message) or conversation (stay open)",
|
||||
"conversation",
|
||||
)
|
||||
.action(
|
||||
async (options: { to: string; message?: string; mode?: string }) => {
|
||||
const rt = await ensureRuntime();
|
||||
const result = await rt.manager.initiateCall(options.to, undefined, {
|
||||
message: options.message,
|
||||
mode:
|
||||
options.mode === "notify" || options.mode === "conversation"
|
||||
? options.mode
|
||||
: undefined,
|
||||
});
|
||||
if (!result.success) {
|
||||
throw new Error(result.error || "initiate failed");
|
||||
}
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
||||
},
|
||||
);
|
||||
.action(async (options: { to: string; message?: string; mode?: string }) => {
|
||||
const rt = await ensureRuntime();
|
||||
const result = await rt.manager.initiateCall(options.to, undefined, {
|
||||
message: options.message,
|
||||
mode:
|
||||
options.mode === "notify" || options.mode === "conversation" ? options.mode : undefined,
|
||||
});
|
||||
if (!result.success) {
|
||||
throw new Error(result.error || "initiate failed");
|
||||
}
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
||||
});
|
||||
|
||||
root
|
||||
.command("continue")
|
||||
@@ -132,10 +118,7 @@ export function registerVoiceCallCli(params: {
|
||||
.requiredOption("--message <text>", "Message to speak")
|
||||
.action(async (options: { callId: string; message: string }) => {
|
||||
const rt = await ensureRuntime();
|
||||
const result = await rt.manager.continueCall(
|
||||
options.callId,
|
||||
options.message,
|
||||
);
|
||||
const result = await rt.manager.continueCall(options.callId, options.message);
|
||||
if (!result.success) {
|
||||
throw new Error(result.error || "continue failed");
|
||||
}
|
||||
@@ -185,86 +168,70 @@ export function registerVoiceCallCli(params: {
|
||||
|
||||
root
|
||||
.command("tail")
|
||||
.description(
|
||||
"Tail voice-call JSONL logs (prints new lines; useful during provider tests)",
|
||||
)
|
||||
.description("Tail voice-call JSONL logs (prints new lines; useful during provider tests)")
|
||||
.option("--file <path>", "Path to calls.jsonl", resolveDefaultStorePath(config))
|
||||
.option("--since <n>", "Print last N lines first", "25")
|
||||
.option("--poll <ms>", "Poll interval in ms", "250")
|
||||
.action(
|
||||
async (options: { file: string; since?: string; poll?: string }) => {
|
||||
const file = options.file;
|
||||
const since = Math.max(0, Number(options.since ?? 0));
|
||||
const pollMs = Math.max(50, Number(options.poll ?? 250));
|
||||
.action(async (options: { file: string; since?: string; poll?: string }) => {
|
||||
const file = options.file;
|
||||
const since = Math.max(0, Number(options.since ?? 0));
|
||||
const pollMs = Math.max(50, Number(options.poll ?? 250));
|
||||
|
||||
if (!fs.existsSync(file)) {
|
||||
logger.error(`No log file at ${file}`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (!fs.existsSync(file)) {
|
||||
logger.error(`No log file at ${file}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const initial = fs.readFileSync(file, "utf8");
|
||||
const lines = initial.split("\n").filter(Boolean);
|
||||
for (const line of lines.slice(Math.max(0, lines.length - since))) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(line);
|
||||
}
|
||||
const initial = fs.readFileSync(file, "utf8");
|
||||
const lines = initial.split("\n").filter(Boolean);
|
||||
for (const line of lines.slice(Math.max(0, lines.length - since))) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(line);
|
||||
}
|
||||
|
||||
let offset = Buffer.byteLength(initial, "utf8");
|
||||
let offset = Buffer.byteLength(initial, "utf8");
|
||||
|
||||
for (;;) {
|
||||
try {
|
||||
const stat = fs.statSync(file);
|
||||
if (stat.size < offset) {
|
||||
offset = 0;
|
||||
}
|
||||
if (stat.size > offset) {
|
||||
const fd = fs.openSync(file, "r");
|
||||
try {
|
||||
const buf = Buffer.alloc(stat.size - offset);
|
||||
fs.readSync(fd, buf, 0, buf.length, offset);
|
||||
offset = stat.size;
|
||||
const text = buf.toString("utf8");
|
||||
for (const line of text.split("\n").filter(Boolean)) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(line);
|
||||
}
|
||||
} finally {
|
||||
fs.closeSync(fd);
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// ignore and retry
|
||||
for (;;) {
|
||||
try {
|
||||
const stat = fs.statSync(file);
|
||||
if (stat.size < offset) {
|
||||
offset = 0;
|
||||
}
|
||||
await sleep(pollMs);
|
||||
if (stat.size > offset) {
|
||||
const fd = fs.openSync(file, "r");
|
||||
try {
|
||||
const buf = Buffer.alloc(stat.size - offset);
|
||||
fs.readSync(fd, buf, 0, buf.length, offset);
|
||||
offset = stat.size;
|
||||
const text = buf.toString("utf8");
|
||||
for (const line of text.split("\n").filter(Boolean)) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(line);
|
||||
}
|
||||
} finally {
|
||||
fs.closeSync(fd);
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// ignore and retry
|
||||
}
|
||||
},
|
||||
);
|
||||
await sleep(pollMs);
|
||||
}
|
||||
});
|
||||
|
||||
root
|
||||
.command("expose")
|
||||
.description("Enable/disable Tailscale serve/funnel for the webhook")
|
||||
.option("--mode <mode>", "off | serve (tailnet) | funnel (public)", "funnel")
|
||||
.option(
|
||||
"--path <path>",
|
||||
"Tailscale path to expose (recommend matching serve.path)",
|
||||
)
|
||||
.option("--path <path>", "Tailscale path to expose (recommend matching serve.path)")
|
||||
.option("--port <port>", "Local webhook port")
|
||||
.option("--serve-path <path>", "Local webhook path")
|
||||
.action(
|
||||
async (options: {
|
||||
mode?: string;
|
||||
port?: string;
|
||||
path?: string;
|
||||
servePath?: string;
|
||||
}) => {
|
||||
async (options: { mode?: string; port?: string; path?: string; servePath?: string }) => {
|
||||
const mode = resolveMode(options.mode ?? "funnel");
|
||||
const servePort = Number(options.port ?? config.serve.port ?? 3334);
|
||||
const servePath = String(
|
||||
options.servePath ?? config.serve.path ?? "/voice/webhook",
|
||||
);
|
||||
const tsPath = String(
|
||||
options.path ?? config.tailscale?.path ?? servePath,
|
||||
);
|
||||
const servePath = String(options.servePath ?? config.serve.path ?? "/voice/webhook");
|
||||
const tsPath = String(options.path ?? config.tailscale?.path ?? servePath);
|
||||
|
||||
const localUrl = `http://127.0.0.1:${servePort}`;
|
||||
|
||||
|
||||
@@ -2,9 +2,7 @@ import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
|
||||
import { validateProviderConfig, resolveVoiceCallConfig, type VoiceCallConfig } from "./config.js";
|
||||
|
||||
function createBaseConfig(
|
||||
provider: "telnyx" | "twilio" | "plivo" | "mock",
|
||||
): VoiceCallConfig {
|
||||
function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): VoiceCallConfig {
|
||||
return {
|
||||
enabled: true,
|
||||
provider,
|
||||
|
||||
@@ -23,12 +23,7 @@ export const E164Schema = z
|
||||
* - "pairing": Unknown callers can request pairing (future)
|
||||
* - "open": Accept all inbound calls (dangerous!)
|
||||
*/
|
||||
export const InboundPolicySchema = z.enum([
|
||||
"disabled",
|
||||
"allowlist",
|
||||
"pairing",
|
||||
"open",
|
||||
]);
|
||||
export const InboundPolicySchema = z.enum(["disabled", "allowlist", "pairing", "open"]);
|
||||
export type InboundPolicy = z.infer<typeof InboundPolicySchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@@ -37,33 +32,33 @@ export type InboundPolicy = z.infer<typeof InboundPolicySchema>;
|
||||
|
||||
export const TelnyxConfigSchema = z
|
||||
.object({
|
||||
/** Telnyx API v2 key */
|
||||
apiKey: z.string().min(1).optional(),
|
||||
/** Telnyx connection ID (from Call Control app) */
|
||||
connectionId: z.string().min(1).optional(),
|
||||
/** Public key for webhook signature verification */
|
||||
publicKey: z.string().min(1).optional(),
|
||||
})
|
||||
/** Telnyx API v2 key */
|
||||
apiKey: z.string().min(1).optional(),
|
||||
/** Telnyx connection ID (from Call Control app) */
|
||||
connectionId: z.string().min(1).optional(),
|
||||
/** Public key for webhook signature verification */
|
||||
publicKey: z.string().min(1).optional(),
|
||||
})
|
||||
.strict();
|
||||
export type TelnyxConfig = z.infer<typeof TelnyxConfigSchema>;
|
||||
|
||||
export const TwilioConfigSchema = z
|
||||
.object({
|
||||
/** Twilio Account SID */
|
||||
accountSid: z.string().min(1).optional(),
|
||||
/** Twilio Auth Token */
|
||||
authToken: z.string().min(1).optional(),
|
||||
})
|
||||
/** Twilio Account SID */
|
||||
accountSid: z.string().min(1).optional(),
|
||||
/** Twilio Auth Token */
|
||||
authToken: z.string().min(1).optional(),
|
||||
})
|
||||
.strict();
|
||||
export type TwilioConfig = z.infer<typeof TwilioConfigSchema>;
|
||||
|
||||
export const PlivoConfigSchema = z
|
||||
.object({
|
||||
/** Plivo Auth ID (starts with MA/SA) */
|
||||
authId: z.string().min(1).optional(),
|
||||
/** Plivo Auth Token */
|
||||
authToken: z.string().min(1).optional(),
|
||||
})
|
||||
/** Plivo Auth ID (starts with MA/SA) */
|
||||
authId: z.string().min(1).optional(),
|
||||
/** Plivo Auth Token */
|
||||
authToken: z.string().min(1).optional(),
|
||||
})
|
||||
.strict();
|
||||
export type PlivoConfig = z.infer<typeof PlivoConfigSchema>;
|
||||
|
||||
@@ -190,9 +185,7 @@ export const VoiceCallTailscaleConfigSchema = z
|
||||
})
|
||||
.strict()
|
||||
.default({ mode: "off", path: "/voice/webhook" });
|
||||
export type VoiceCallTailscaleConfig = z.infer<
|
||||
typeof VoiceCallTailscaleConfigSchema
|
||||
>;
|
||||
export type VoiceCallTailscaleConfig = z.infer<typeof VoiceCallTailscaleConfigSchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Tunnel Configuration (unified ngrok/tailscale)
|
||||
@@ -207,9 +200,7 @@ export const VoiceCallTunnelConfigSchema = z
|
||||
* - "tailscale-serve": Tailscale serve (private to tailnet)
|
||||
* - "tailscale-funnel": Tailscale funnel (public HTTPS)
|
||||
*/
|
||||
provider: z
|
||||
.enum(["none", "ngrok", "tailscale-serve", "tailscale-funnel"])
|
||||
.default("none"),
|
||||
provider: z.enum(["none", "ngrok", "tailscale-serve", "tailscale-funnel"]).default("none"),
|
||||
/** ngrok auth token (optional, enables longer sessions and more features) */
|
||||
ngrokAuthToken: z.string().min(1).optional(),
|
||||
/** ngrok custom domain (paid feature, e.g., "myapp.ngrok.io") */
|
||||
@@ -283,9 +274,7 @@ export const VoiceCallStreamingConfigSchema = z
|
||||
vadThreshold: 0.5,
|
||||
streamPath: "/voice/stream",
|
||||
});
|
||||
export type VoiceCallStreamingConfig = z.infer<
|
||||
typeof VoiceCallStreamingConfigSchema
|
||||
>;
|
||||
export type VoiceCallStreamingConfig = z.infer<typeof VoiceCallStreamingConfigSchema>;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Main Voice Call Configuration
|
||||
@@ -293,90 +282,90 @@ export type VoiceCallStreamingConfig = z.infer<
|
||||
|
||||
export const VoiceCallConfigSchema = z
|
||||
.object({
|
||||
/** Enable voice call functionality */
|
||||
enabled: z.boolean().default(false),
|
||||
/** Enable voice call functionality */
|
||||
enabled: z.boolean().default(false),
|
||||
|
||||
/** Active provider (telnyx, twilio, plivo, or mock) */
|
||||
provider: z.enum(["telnyx", "twilio", "plivo", "mock"]).optional(),
|
||||
/** Active provider (telnyx, twilio, plivo, or mock) */
|
||||
provider: z.enum(["telnyx", "twilio", "plivo", "mock"]).optional(),
|
||||
|
||||
/** Telnyx-specific configuration */
|
||||
telnyx: TelnyxConfigSchema.optional(),
|
||||
/** Telnyx-specific configuration */
|
||||
telnyx: TelnyxConfigSchema.optional(),
|
||||
|
||||
/** Twilio-specific configuration */
|
||||
twilio: TwilioConfigSchema.optional(),
|
||||
/** Twilio-specific configuration */
|
||||
twilio: TwilioConfigSchema.optional(),
|
||||
|
||||
/** Plivo-specific configuration */
|
||||
plivo: PlivoConfigSchema.optional(),
|
||||
/** Plivo-specific configuration */
|
||||
plivo: PlivoConfigSchema.optional(),
|
||||
|
||||
/** Phone number to call from (E.164) */
|
||||
fromNumber: E164Schema.optional(),
|
||||
/** Phone number to call from (E.164) */
|
||||
fromNumber: E164Schema.optional(),
|
||||
|
||||
/** Default phone number to call (E.164) */
|
||||
toNumber: E164Schema.optional(),
|
||||
/** Default phone number to call (E.164) */
|
||||
toNumber: E164Schema.optional(),
|
||||
|
||||
/** Inbound call policy */
|
||||
inboundPolicy: InboundPolicySchema.default("disabled"),
|
||||
/** Inbound call policy */
|
||||
inboundPolicy: InboundPolicySchema.default("disabled"),
|
||||
|
||||
/** Allowlist of phone numbers for inbound calls (E.164) */
|
||||
allowFrom: z.array(E164Schema).default([]),
|
||||
/** Allowlist of phone numbers for inbound calls (E.164) */
|
||||
allowFrom: z.array(E164Schema).default([]),
|
||||
|
||||
/** Greeting message for inbound calls */
|
||||
inboundGreeting: z.string().optional(),
|
||||
/** Greeting message for inbound calls */
|
||||
inboundGreeting: z.string().optional(),
|
||||
|
||||
/** Outbound call configuration */
|
||||
outbound: OutboundConfigSchema,
|
||||
/** Outbound call configuration */
|
||||
outbound: OutboundConfigSchema,
|
||||
|
||||
/** Maximum call duration in seconds */
|
||||
maxDurationSeconds: z.number().int().positive().default(300),
|
||||
/** Maximum call duration in seconds */
|
||||
maxDurationSeconds: z.number().int().positive().default(300),
|
||||
|
||||
/** Silence timeout for end-of-speech detection (ms) */
|
||||
silenceTimeoutMs: z.number().int().positive().default(800),
|
||||
/** Silence timeout for end-of-speech detection (ms) */
|
||||
silenceTimeoutMs: z.number().int().positive().default(800),
|
||||
|
||||
/** Timeout for user transcript (ms) */
|
||||
transcriptTimeoutMs: z.number().int().positive().default(180000),
|
||||
/** Timeout for user transcript (ms) */
|
||||
transcriptTimeoutMs: z.number().int().positive().default(180000),
|
||||
|
||||
/** Ring timeout for outbound calls (ms) */
|
||||
ringTimeoutMs: z.number().int().positive().default(30000),
|
||||
/** Ring timeout for outbound calls (ms) */
|
||||
ringTimeoutMs: z.number().int().positive().default(30000),
|
||||
|
||||
/** Maximum concurrent calls */
|
||||
maxConcurrentCalls: z.number().int().positive().default(1),
|
||||
/** Maximum concurrent calls */
|
||||
maxConcurrentCalls: z.number().int().positive().default(1),
|
||||
|
||||
/** Webhook server configuration */
|
||||
serve: VoiceCallServeConfigSchema,
|
||||
/** Webhook server configuration */
|
||||
serve: VoiceCallServeConfigSchema,
|
||||
|
||||
/** Tailscale exposure configuration (legacy, prefer tunnel config) */
|
||||
tailscale: VoiceCallTailscaleConfigSchema,
|
||||
/** Tailscale exposure configuration (legacy, prefer tunnel config) */
|
||||
tailscale: VoiceCallTailscaleConfigSchema,
|
||||
|
||||
/** Tunnel configuration (unified ngrok/tailscale) */
|
||||
tunnel: VoiceCallTunnelConfigSchema,
|
||||
/** Tunnel configuration (unified ngrok/tailscale) */
|
||||
tunnel: VoiceCallTunnelConfigSchema,
|
||||
|
||||
/** Real-time audio streaming configuration */
|
||||
streaming: VoiceCallStreamingConfigSchema,
|
||||
/** Real-time audio streaming configuration */
|
||||
streaming: VoiceCallStreamingConfigSchema,
|
||||
|
||||
/** Public webhook URL override (if set, bypasses tunnel auto-detection) */
|
||||
publicUrl: z.string().url().optional(),
|
||||
/** Public webhook URL override (if set, bypasses tunnel auto-detection) */
|
||||
publicUrl: z.string().url().optional(),
|
||||
|
||||
/** Skip webhook signature verification (development only, NOT for production) */
|
||||
skipSignatureVerification: z.boolean().default(false),
|
||||
/** Skip webhook signature verification (development only, NOT for production) */
|
||||
skipSignatureVerification: z.boolean().default(false),
|
||||
|
||||
/** STT configuration */
|
||||
stt: SttConfigSchema,
|
||||
/** STT configuration */
|
||||
stt: SttConfigSchema,
|
||||
|
||||
/** TTS override (deep-merges with core messages.tts) */
|
||||
tts: TtsConfigSchema,
|
||||
/** TTS override (deep-merges with core messages.tts) */
|
||||
tts: TtsConfigSchema,
|
||||
|
||||
/** Store path for call logs */
|
||||
store: z.string().optional(),
|
||||
/** Store path for call logs */
|
||||
store: z.string().optional(),
|
||||
|
||||
/** Model for generating voice responses (e.g., "anthropic/claude-sonnet-4", "openai/gpt-4o") */
|
||||
responseModel: z.string().default("openai/gpt-4o-mini"),
|
||||
/** Model for generating voice responses (e.g., "anthropic/claude-sonnet-4", "openai/gpt-4o") */
|
||||
responseModel: z.string().default("openai/gpt-4o-mini"),
|
||||
|
||||
/** System prompt for voice responses */
|
||||
responseSystemPrompt: z.string().optional(),
|
||||
/** System prompt for voice responses */
|
||||
responseSystemPrompt: z.string().optional(),
|
||||
|
||||
/** Timeout for response generation in ms (default 30s) */
|
||||
responseTimeoutMs: z.number().int().positive().default(30000),
|
||||
})
|
||||
/** Timeout for response generation in ms (default 30s) */
|
||||
responseTimeoutMs: z.number().int().positive().default(30000),
|
||||
})
|
||||
.strict();
|
||||
|
||||
export type VoiceCallConfig = z.infer<typeof VoiceCallConfigSchema>;
|
||||
@@ -395,30 +384,23 @@ export function resolveVoiceCallConfig(config: VoiceCallConfig): VoiceCallConfig
|
||||
// Telnyx
|
||||
if (resolved.provider === "telnyx") {
|
||||
resolved.telnyx = resolved.telnyx ?? {};
|
||||
resolved.telnyx.apiKey =
|
||||
resolved.telnyx.apiKey ?? process.env.TELNYX_API_KEY;
|
||||
resolved.telnyx.connectionId =
|
||||
resolved.telnyx.connectionId ?? process.env.TELNYX_CONNECTION_ID;
|
||||
resolved.telnyx.publicKey =
|
||||
resolved.telnyx.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
|
||||
resolved.telnyx.apiKey = resolved.telnyx.apiKey ?? process.env.TELNYX_API_KEY;
|
||||
resolved.telnyx.connectionId = resolved.telnyx.connectionId ?? process.env.TELNYX_CONNECTION_ID;
|
||||
resolved.telnyx.publicKey = resolved.telnyx.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
|
||||
}
|
||||
|
||||
// Twilio
|
||||
if (resolved.provider === "twilio") {
|
||||
resolved.twilio = resolved.twilio ?? {};
|
||||
resolved.twilio.accountSid =
|
||||
resolved.twilio.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
|
||||
resolved.twilio.authToken =
|
||||
resolved.twilio.authToken ?? process.env.TWILIO_AUTH_TOKEN;
|
||||
resolved.twilio.accountSid = resolved.twilio.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
|
||||
resolved.twilio.authToken = resolved.twilio.authToken ?? process.env.TWILIO_AUTH_TOKEN;
|
||||
}
|
||||
|
||||
// Plivo
|
||||
if (resolved.provider === "plivo") {
|
||||
resolved.plivo = resolved.plivo ?? {};
|
||||
resolved.plivo.authId =
|
||||
resolved.plivo.authId ?? process.env.PLIVO_AUTH_ID;
|
||||
resolved.plivo.authToken =
|
||||
resolved.plivo.authToken ?? process.env.PLIVO_AUTH_TOKEN;
|
||||
resolved.plivo.authId = resolved.plivo.authId ?? process.env.PLIVO_AUTH_ID;
|
||||
resolved.plivo.authToken = resolved.plivo.authToken ?? process.env.PLIVO_AUTH_TOKEN;
|
||||
}
|
||||
|
||||
// Tunnel Config
|
||||
@@ -427,13 +409,9 @@ export function resolveVoiceCallConfig(config: VoiceCallConfig): VoiceCallConfig
|
||||
allowNgrokFreeTierLoopbackBypass: false,
|
||||
};
|
||||
resolved.tunnel.allowNgrokFreeTierLoopbackBypass =
|
||||
resolved.tunnel.allowNgrokFreeTierLoopbackBypass ||
|
||||
resolved.tunnel.allowNgrokFreeTier ||
|
||||
false;
|
||||
resolved.tunnel.ngrokAuthToken =
|
||||
resolved.tunnel.ngrokAuthToken ?? process.env.NGROK_AUTHTOKEN;
|
||||
resolved.tunnel.ngrokDomain =
|
||||
resolved.tunnel.ngrokDomain ?? process.env.NGROK_DOMAIN;
|
||||
resolved.tunnel.allowNgrokFreeTierLoopbackBypass || resolved.tunnel.allowNgrokFreeTier || false;
|
||||
resolved.tunnel.ngrokAuthToken = resolved.tunnel.ngrokAuthToken ?? process.env.NGROK_AUTHTOKEN;
|
||||
resolved.tunnel.ngrokDomain = resolved.tunnel.ngrokDomain ?? process.env.NGROK_DOMAIN;
|
||||
|
||||
return resolved;
|
||||
}
|
||||
|
||||
@@ -51,10 +51,7 @@ type CoreAgentDeps = {
|
||||
ensureAgentWorkspace: (params?: { dir: string }) => Promise<void>;
|
||||
resolveStorePath: (store?: string, opts?: { agentId?: string }) => string;
|
||||
loadSessionStore: (storePath: string) => Record<string, unknown>;
|
||||
saveSessionStore: (
|
||||
storePath: string,
|
||||
store: Record<string, unknown>,
|
||||
) => Promise<void>;
|
||||
saveSessionStore: (storePath: string, store: Record<string, unknown>) => Promise<void>;
|
||||
resolveSessionFilePath: (
|
||||
sessionId: string,
|
||||
entry: unknown,
|
||||
@@ -116,9 +113,7 @@ function resolveOpenClawRoot(): string {
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
"Unable to resolve core root. Set OPENCLAW_ROOT to the package root.",
|
||||
);
|
||||
throw new Error("Unable to resolve core root. Set OPENCLAW_ROOT to the package root.");
|
||||
}
|
||||
|
||||
async function importCoreModule<T>(relativePath: string): Promise<T> {
|
||||
|
||||
@@ -85,11 +85,10 @@ describe("CallManager", () => {
|
||||
const manager = new CallManager(config, storePath);
|
||||
manager.initialize(provider, "https://example.com/voice/webhook");
|
||||
|
||||
const { callId, success } = await manager.initiateCall(
|
||||
"+15550000002",
|
||||
undefined,
|
||||
{ message: "Hello there", mode: "notify" },
|
||||
);
|
||||
const { callId, success } = await manager.initiateCall("+15550000002", undefined, {
|
||||
message: "Hello there",
|
||||
mode: "notify",
|
||||
});
|
||||
expect(success).toBe(true);
|
||||
|
||||
manager.processEvent({
|
||||
|
||||
@@ -124,8 +124,7 @@ export class CallManager {
|
||||
|
||||
const callId = crypto.randomUUID();
|
||||
const from =
|
||||
this.config.fromNumber ||
|
||||
(this.provider?.name === "mock" ? "+15550000000" : undefined);
|
||||
this.config.fromNumber || (this.provider?.name === "mock" ? "+15550000000" : undefined);
|
||||
if (!from) {
|
||||
return { callId: "", success: false, error: "fromNumber not configured" };
|
||||
}
|
||||
@@ -157,9 +156,7 @@ export class CallManager {
|
||||
if (mode === "notify" && initialMessage) {
|
||||
const pollyVoice = mapVoiceToPolly(this.config.tts?.openai?.voice);
|
||||
inlineTwiml = this.generateNotifyTwiml(initialMessage, pollyVoice);
|
||||
console.log(
|
||||
`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`,
|
||||
);
|
||||
console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
|
||||
}
|
||||
|
||||
const result = await this.provider.initiateCall({
|
||||
@@ -196,10 +193,7 @@ export class CallManager {
|
||||
/**
|
||||
* Speak to user in an active call.
|
||||
*/
|
||||
async speak(
|
||||
callId: CallId,
|
||||
text: string,
|
||||
): Promise<{ success: boolean; error?: string }> {
|
||||
async speak(callId: CallId, text: string): Promise<{ success: boolean; error?: string }> {
|
||||
const call = this.activeCalls.get(callId);
|
||||
if (!call) {
|
||||
return { success: false, error: "Call not found" };
|
||||
@@ -222,8 +216,7 @@ export class CallManager {
|
||||
this.addTranscriptEntry(call, "bot", text);
|
||||
|
||||
// Play TTS
|
||||
const voice =
|
||||
this.provider?.name === "twilio" ? this.config.tts?.openai?.voice : undefined;
|
||||
const voice = this.provider?.name === "twilio" ? this.config.tts?.openai?.voice : undefined;
|
||||
await this.provider.playTts({
|
||||
callId,
|
||||
providerCallId: call.providerCallId,
|
||||
@@ -248,9 +241,7 @@ export class CallManager {
|
||||
async speakInitialMessage(providerCallId: string): Promise<void> {
|
||||
const call = this.getCallByProviderCallId(providerCallId);
|
||||
if (!call) {
|
||||
console.warn(
|
||||
`[voice-call] speakInitialMessage: no call found for ${providerCallId}`,
|
||||
);
|
||||
console.warn(`[voice-call] speakInitialMessage: no call found for ${providerCallId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -258,9 +249,7 @@ export class CallManager {
|
||||
const mode = (call.metadata?.mode as CallMode) ?? "conversation";
|
||||
|
||||
if (!initialMessage) {
|
||||
console.log(
|
||||
`[voice-call] speakInitialMessage: no initial message for ${call.callId}`,
|
||||
);
|
||||
console.log(`[voice-call] speakInitialMessage: no initial message for ${call.callId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -270,29 +259,21 @@ export class CallManager {
|
||||
this.persistCallRecord(call);
|
||||
}
|
||||
|
||||
console.log(
|
||||
`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`,
|
||||
);
|
||||
console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
|
||||
const result = await this.speak(call.callId, initialMessage);
|
||||
if (!result.success) {
|
||||
console.warn(
|
||||
`[voice-call] Failed to speak initial message: ${result.error}`,
|
||||
);
|
||||
console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// In notify mode, auto-hangup after delay
|
||||
if (mode === "notify") {
|
||||
const delaySec = this.config.outbound.notifyHangupDelaySec;
|
||||
console.log(
|
||||
`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`,
|
||||
);
|
||||
console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
|
||||
setTimeout(async () => {
|
||||
const currentCall = this.getCall(call.callId);
|
||||
if (currentCall && !TerminalStates.has(currentCall.state)) {
|
||||
console.log(
|
||||
`[voice-call] Notify mode: hanging up call ${call.callId}`,
|
||||
);
|
||||
console.log(`[voice-call] Notify mode: hanging up call ${call.callId}`);
|
||||
await this.endCall(call.callId);
|
||||
}
|
||||
}, delaySec * 1000);
|
||||
@@ -368,9 +349,7 @@ export class CallManager {
|
||||
return new Promise((resolve, reject) => {
|
||||
const timeout = setTimeout(() => {
|
||||
this.transcriptWaiters.delete(callId);
|
||||
reject(
|
||||
new Error(`Timed out waiting for transcript after ${timeoutMs}ms`),
|
||||
);
|
||||
reject(new Error(`Timed out waiting for transcript after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
|
||||
this.transcriptWaiters.set(callId, { resolve, reject, timeout });
|
||||
@@ -491,10 +470,7 @@ export class CallManager {
|
||||
const normalized = from?.replace(/\D/g, "") || "";
|
||||
const allowed = (allowFrom || []).some((num) => {
|
||||
const normalizedAllow = num.replace(/\D/g, "");
|
||||
return (
|
||||
normalized.endsWith(normalizedAllow) ||
|
||||
normalizedAllow.endsWith(normalized)
|
||||
);
|
||||
return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized);
|
||||
});
|
||||
const status = allowed ? "accepted" : "rejected";
|
||||
console.log(
|
||||
@@ -511,11 +487,7 @@ export class CallManager {
|
||||
/**
|
||||
* Create a call record for an inbound call.
|
||||
*/
|
||||
private createInboundCall(
|
||||
providerCallId: string,
|
||||
from: string,
|
||||
to: string,
|
||||
): CallRecord {
|
||||
private createInboundCall(providerCallId: string, from: string, to: string): CallRecord {
|
||||
const callId = crypto.randomUUID();
|
||||
|
||||
const callRecord: CallRecord = {
|
||||
@@ -530,8 +502,7 @@ export class CallManager {
|
||||
transcript: [],
|
||||
processedEventIds: [],
|
||||
metadata: {
|
||||
initialMessage:
|
||||
this.config.inboundGreeting || "Hello! How can I help you today?",
|
||||
initialMessage: this.config.inboundGreeting || "Hello! How can I help you today?",
|
||||
},
|
||||
};
|
||||
|
||||
@@ -539,9 +510,7 @@ export class CallManager {
|
||||
this.providerCallIdMap.set(providerCallId, callId); // Map providerCallId to internal callId
|
||||
this.persistCallRecord(callRecord);
|
||||
|
||||
console.log(
|
||||
`[voice-call] Created inbound call record: ${callId} from ${from}`,
|
||||
);
|
||||
console.log(`[voice-call] Created inbound call record: ${callId} from ${from}`);
|
||||
return callRecord;
|
||||
}
|
||||
|
||||
@@ -663,10 +632,7 @@ export class CallManager {
|
||||
call.endReason = "error";
|
||||
this.transitionState(call, "error");
|
||||
this.clearMaxDurationTimer(call.callId);
|
||||
this.rejectTranscriptWaiter(
|
||||
call.callId,
|
||||
`Call error: ${event.error}`,
|
||||
);
|
||||
this.rejectTranscriptWaiter(call.callId, `Call error: ${event.error}`);
|
||||
this.activeCalls.delete(call.callId);
|
||||
if (call.providerCallId) {
|
||||
this.providerCallIdMap.delete(call.providerCallId);
|
||||
@@ -680,9 +646,7 @@ export class CallManager {
|
||||
|
||||
private maybeSpeakInitialMessageOnAnswered(call: CallRecord): void {
|
||||
const initialMessage =
|
||||
typeof call.metadata?.initialMessage === "string"
|
||||
? call.metadata.initialMessage.trim()
|
||||
: "";
|
||||
typeof call.metadata?.initialMessage === "string" ? call.metadata.initialMessage.trim() : "";
|
||||
|
||||
if (!initialMessage) return;
|
||||
|
||||
@@ -759,10 +723,7 @@ export class CallManager {
|
||||
}
|
||||
|
||||
// States that can cycle during multi-turn conversations
|
||||
private static readonly ConversationStates = new Set<CallState>([
|
||||
"speaking",
|
||||
"listening",
|
||||
]);
|
||||
private static readonly ConversationStates = new Set<CallState>(["speaking", "listening"]);
|
||||
|
||||
// Non-terminal state order for monotonic transitions
|
||||
private static readonly StateOrder: readonly CallState[] = [
|
||||
@@ -808,11 +769,7 @@ export class CallManager {
|
||||
/**
|
||||
* Add an entry to the call transcript.
|
||||
*/
|
||||
private addTranscriptEntry(
|
||||
call: CallRecord,
|
||||
speaker: "bot" | "user",
|
||||
text: string,
|
||||
): void {
|
||||
private addTranscriptEntry(call: CallRecord, speaker: "bot" | "user", text: string): void {
|
||||
const entry: TranscriptEntry = {
|
||||
timestamp: Date.now(),
|
||||
speaker,
|
||||
|
||||
@@ -14,7 +14,10 @@ import {
|
||||
} from "./timers.js";
|
||||
import { endCall } from "./outbound.js";
|
||||
|
||||
function shouldAcceptInbound(config: CallManagerContext["config"], from: string | undefined): boolean {
|
||||
function shouldAcceptInbound(
|
||||
config: CallManagerContext["config"],
|
||||
from: string | undefined,
|
||||
): boolean {
|
||||
const { inboundPolicy: policy, allowFrom } = config;
|
||||
|
||||
switch (policy) {
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
import crypto from "node:crypto";
|
||||
|
||||
import { TerminalStates, type CallId, type CallRecord, type OutboundCallOptions } from "../types.js";
|
||||
import {
|
||||
TerminalStates,
|
||||
type CallId,
|
||||
type CallRecord,
|
||||
type OutboundCallOptions,
|
||||
} from "../types.js";
|
||||
import type { CallMode } from "../config.js";
|
||||
import { mapVoiceToPolly } from "../voice-mapping.js";
|
||||
import type { CallManagerContext } from "./context.js";
|
||||
@@ -8,7 +13,12 @@ import { getCallByProviderCallId } from "./lookup.js";
|
||||
import { generateNotifyTwiml } from "./twiml.js";
|
||||
import { addTranscriptEntry, transitionState } from "./state.js";
|
||||
import { persistCallRecord } from "./store.js";
|
||||
import { clearMaxDurationTimer, clearTranscriptWaiter, rejectTranscriptWaiter, waitForFinalTranscript } from "./timers.js";
|
||||
import {
|
||||
clearMaxDurationTimer,
|
||||
clearTranscriptWaiter,
|
||||
rejectTranscriptWaiter,
|
||||
waitForFinalTranscript,
|
||||
} from "./timers.js";
|
||||
|
||||
export async function initiateCall(
|
||||
ctx: CallManagerContext,
|
||||
@@ -38,8 +48,7 @@ export async function initiateCall(
|
||||
|
||||
const callId = crypto.randomUUID();
|
||||
const from =
|
||||
ctx.config.fromNumber ||
|
||||
(ctx.provider?.name === "mock" ? "+15550000000" : undefined);
|
||||
ctx.config.fromNumber || (ctx.provider?.name === "mock" ? "+15550000000" : undefined);
|
||||
if (!from) {
|
||||
return { callId: "", success: false, error: "fromNumber not configured" };
|
||||
}
|
||||
@@ -120,8 +129,7 @@ export async function speak(
|
||||
|
||||
addTranscriptEntry(call, "bot", text);
|
||||
|
||||
const voice =
|
||||
ctx.provider?.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
|
||||
const voice = ctx.provider?.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
|
||||
await ctx.provider.playTts({
|
||||
callId,
|
||||
providerCallId: call.providerCallId,
|
||||
|
||||
@@ -35,11 +35,7 @@ export function transitionState(call: CallRecord, newState: CallState): void {
|
||||
}
|
||||
}
|
||||
|
||||
export function addTranscriptEntry(
|
||||
call: CallRecord,
|
||||
speaker: "bot" | "user",
|
||||
text: string,
|
||||
): void {
|
||||
export function addTranscriptEntry(call: CallRecord, speaker: "bot" | "user", text: string): void {
|
||||
const entry: TranscriptEntry = {
|
||||
timestamp: Date.now(),
|
||||
speaker,
|
||||
|
||||
@@ -67,10 +67,7 @@ export function resolveTranscriptWaiter(
|
||||
waiter.resolve(transcript);
|
||||
}
|
||||
|
||||
export function waitForFinalTranscript(
|
||||
ctx: CallManagerContext,
|
||||
callId: CallId,
|
||||
): Promise<string> {
|
||||
export function waitForFinalTranscript(ctx: CallManagerContext, callId: CallId): Promise<string> {
|
||||
// Only allow one in-flight waiter per call.
|
||||
rejectTranscriptWaiter(ctx, callId, "Transcript waiter replaced");
|
||||
|
||||
|
||||
@@ -87,10 +87,7 @@ export class MediaStreamHandler {
|
||||
/**
|
||||
* Handle new WebSocket connection from Twilio.
|
||||
*/
|
||||
private async handleConnection(
|
||||
ws: WebSocket,
|
||||
_request: IncomingMessage,
|
||||
): Promise<void> {
|
||||
private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise<void> {
|
||||
let session: StreamSession | null = null;
|
||||
|
||||
ws.on("message", async (data: Buffer) => {
|
||||
@@ -140,16 +137,11 @@ export class MediaStreamHandler {
|
||||
/**
|
||||
* Handle stream start event.
|
||||
*/
|
||||
private async handleStart(
|
||||
ws: WebSocket,
|
||||
message: TwilioMediaMessage,
|
||||
): Promise<StreamSession> {
|
||||
private async handleStart(ws: WebSocket, message: TwilioMediaMessage): Promise<StreamSession> {
|
||||
const streamSid = message.streamSid || "";
|
||||
const callSid = message.start?.callSid || "";
|
||||
|
||||
console.log(
|
||||
`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`,
|
||||
);
|
||||
console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`);
|
||||
|
||||
// Create STT session
|
||||
const sttSession = this.config.sttProvider.createSession();
|
||||
@@ -181,10 +173,7 @@ export class MediaStreamHandler {
|
||||
|
||||
// Connect to OpenAI STT (non-blocking, log errors but don't fail the call)
|
||||
sttSession.connect().catch((err) => {
|
||||
console.warn(
|
||||
`[MediaStream] STT connection failed (TTS still works):`,
|
||||
err.message,
|
||||
);
|
||||
console.warn(`[MediaStream] STT connection failed (TTS still works):`, err.message);
|
||||
});
|
||||
|
||||
return session;
|
||||
@@ -252,10 +241,7 @@ export class MediaStreamHandler {
|
||||
* Queue a TTS operation for sequential playback.
|
||||
* Only one TTS operation plays at a time per stream to prevent overlap.
|
||||
*/
|
||||
async queueTts(
|
||||
streamSid: string,
|
||||
playFn: (signal: AbortSignal) => Promise<void>,
|
||||
): Promise<void> {
|
||||
async queueTts(streamSid: string, playFn: (signal: AbortSignal) => Promise<void>): Promise<void> {
|
||||
const queue = this.getTtsQueue(streamSid);
|
||||
let resolveEntry: () => void;
|
||||
let rejectEntry: (error: unknown) => void;
|
||||
@@ -292,9 +278,7 @@ export class MediaStreamHandler {
|
||||
* Get active session by call ID.
|
||||
*/
|
||||
getSessionByCallId(callId: string): StreamSession | undefined {
|
||||
return [...this.sessions.values()].find(
|
||||
(session) => session.callId === callId,
|
||||
);
|
||||
return [...this.sessions.values()].find((session) => session.callId === callId);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -50,9 +50,7 @@ export class MockProvider implements VoiceCallProvider {
|
||||
}
|
||||
}
|
||||
|
||||
private normalizeEvent(
|
||||
evt: Partial<NormalizedEvent>,
|
||||
): NormalizedEvent | null {
|
||||
private normalizeEvent(evt: Partial<NormalizedEvent>): NormalizedEvent | null {
|
||||
if (!evt.type || !evt.callId) return null;
|
||||
|
||||
const base = {
|
||||
@@ -96,9 +94,7 @@ export class MockProvider implements VoiceCallProvider {
|
||||
}
|
||||
|
||||
case "call.silence": {
|
||||
const payload = evt as Partial<
|
||||
NormalizedEvent & { durationMs?: number }
|
||||
>;
|
||||
const payload = evt as Partial<NormalizedEvent & { durationMs?: number }>;
|
||||
return {
|
||||
...base,
|
||||
type: evt.type,
|
||||
@@ -116,9 +112,7 @@ export class MockProvider implements VoiceCallProvider {
|
||||
}
|
||||
|
||||
case "call.ended": {
|
||||
const payload = evt as Partial<
|
||||
NormalizedEvent & { reason?: EndReason }
|
||||
>;
|
||||
const payload = evt as Partial<NormalizedEvent & { reason?: EndReason }>;
|
||||
return {
|
||||
...base,
|
||||
type: evt.type,
|
||||
@@ -127,9 +121,7 @@ export class MockProvider implements VoiceCallProvider {
|
||||
}
|
||||
|
||||
case "call.error": {
|
||||
const payload = evt as Partial<
|
||||
NormalizedEvent & { error?: string; retryable?: boolean }
|
||||
>;
|
||||
const payload = evt as Partial<NormalizedEvent & { error?: string; retryable?: boolean }>;
|
||||
return {
|
||||
...base,
|
||||
type: evt.type,
|
||||
|
||||
@@ -103,8 +103,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
}
|
||||
|
||||
parseWebhookEvent(ctx: WebhookContext): ProviderWebhookParseResult {
|
||||
const flow =
|
||||
typeof ctx.query?.flow === "string" ? ctx.query.flow.trim() : "";
|
||||
const flow = typeof ctx.query?.flow === "string" ? ctx.query.flow.trim() : "";
|
||||
|
||||
const parsed = this.parseBody(ctx.rawBody);
|
||||
if (!parsed) {
|
||||
@@ -139,9 +138,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
|
||||
if (flow === "xml-listen") {
|
||||
const callId = this.getCallIdFromQuery(ctx);
|
||||
const pending = callId
|
||||
? this.pendingListenByCallId.get(callId)
|
||||
: undefined;
|
||||
const pending = callId ? this.pendingListenByCallId.get(callId) : undefined;
|
||||
if (callId) this.pendingListenByCallId.delete(callId);
|
||||
|
||||
const actionUrl = this.buildActionUrl(ctx, {
|
||||
@@ -180,10 +177,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
};
|
||||
}
|
||||
|
||||
private normalizeEvent(
|
||||
params: URLSearchParams,
|
||||
callIdOverride?: string,
|
||||
): NormalizedEvent | null {
|
||||
private normalizeEvent(params: URLSearchParams, callIdOverride?: string): NormalizedEvent | null {
|
||||
const callUuid = params.get("CallUUID") || "";
|
||||
const requestUuid = params.get("RequestUUID") || "";
|
||||
|
||||
@@ -329,11 +323,9 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
}
|
||||
|
||||
async playTts(input: PlayTtsInput): Promise<void> {
|
||||
const callUuid = this.requestUuidToCallUuid.get(input.providerCallId) ??
|
||||
input.providerCallId;
|
||||
const callUuid = this.requestUuidToCallUuid.get(input.providerCallId) ?? input.providerCallId;
|
||||
const webhookBase =
|
||||
this.callUuidToWebhookUrl.get(callUuid) ||
|
||||
this.callIdToWebhookUrl.get(input.callId);
|
||||
this.callUuidToWebhookUrl.get(callUuid) || this.callIdToWebhookUrl.get(input.callId);
|
||||
if (!webhookBase) {
|
||||
throw new Error("Missing webhook URL for this call (provider state missing)");
|
||||
}
|
||||
@@ -364,11 +356,9 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
}
|
||||
|
||||
async startListening(input: StartListeningInput): Promise<void> {
|
||||
const callUuid = this.requestUuidToCallUuid.get(input.providerCallId) ??
|
||||
input.providerCallId;
|
||||
const callUuid = this.requestUuidToCallUuid.get(input.providerCallId) ?? input.providerCallId;
|
||||
const webhookBase =
|
||||
this.callUuidToWebhookUrl.get(callUuid) ||
|
||||
this.callIdToWebhookUrl.get(input.callId);
|
||||
this.callUuidToWebhookUrl.get(callUuid) || this.callIdToWebhookUrl.get(input.callId);
|
||||
if (!webhookBase) {
|
||||
throw new Error("Missing webhook URL for this call (provider state missing)");
|
||||
}
|
||||
@@ -427,10 +417,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
||||
</Response>`;
|
||||
}
|
||||
|
||||
private static xmlGetInputSpeech(params: {
|
||||
actionUrl: string;
|
||||
language?: string;
|
||||
}): string {
|
||||
private static xmlGetInputSpeech(params: { actionUrl: string; language?: string }): string {
|
||||
const language = params.language || "en-US";
|
||||
return `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
|
||||
@@ -183,9 +183,7 @@ class OpenAIRealtimeSTTSession implements RealtimeSTTSession {
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
this.reconnectAttempts >= OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS
|
||||
) {
|
||||
if (this.reconnectAttempts >= OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS) {
|
||||
console.error(
|
||||
`[RealtimeSTT] Max reconnect attempts (${OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS}) reached`,
|
||||
);
|
||||
@@ -193,9 +191,7 @@ class OpenAIRealtimeSTTSession implements RealtimeSTTSession {
|
||||
}
|
||||
|
||||
this.reconnectAttempts++;
|
||||
const delay =
|
||||
OpenAIRealtimeSTTSession.RECONNECT_DELAY_MS *
|
||||
2 ** (this.reconnectAttempts - 1);
|
||||
const delay = OpenAIRealtimeSTTSession.RECONNECT_DELAY_MS * 2 ** (this.reconnectAttempts - 1);
|
||||
console.log(
|
||||
`[RealtimeSTT] Reconnecting ${this.reconnectAttempts}/${OpenAIRealtimeSTTSession.MAX_RECONNECT_ATTEMPTS} in ${delay}ms...`,
|
||||
);
|
||||
|
||||
@@ -161,9 +161,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
let callId = "";
|
||||
if (data.payload?.client_state) {
|
||||
try {
|
||||
callId = Buffer.from(data.payload.client_state, "base64").toString(
|
||||
"utf8",
|
||||
);
|
||||
callId = Buffer.from(data.payload.client_state, "base64").toString("utf8");
|
||||
} catch {
|
||||
// Fallback if not valid Base64
|
||||
callId = data.payload.client_state;
|
||||
@@ -312,13 +310,10 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
* Start transcription (STT) via Telnyx.
|
||||
*/
|
||||
async startListening(input: StartListeningInput): Promise<void> {
|
||||
await this.apiRequest(
|
||||
`/calls/${input.providerCallId}/actions/transcription_start`,
|
||||
{
|
||||
command_id: crypto.randomUUID(),
|
||||
language: input.language || "en",
|
||||
},
|
||||
);
|
||||
await this.apiRequest(`/calls/${input.providerCallId}/actions/transcription_start`, {
|
||||
command_id: crypto.randomUUID(),
|
||||
language: input.language || "en",
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -84,9 +84,7 @@ export class OpenAITTSProvider {
|
||||
this.instructions = config.instructions;
|
||||
|
||||
if (!this.apiKey) {
|
||||
throw new Error(
|
||||
"OpenAI API key required (set OPENAI_API_KEY or pass apiKey)",
|
||||
);
|
||||
throw new Error("OpenAI API key required (set OPENAI_API_KEY or pass apiKey)");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -215,11 +213,7 @@ function linearToMulaw(sample: number): number {
|
||||
// Add bias and find segment
|
||||
sample += BIAS;
|
||||
let exponent = 7;
|
||||
for (
|
||||
let expMask = 0x4000;
|
||||
(sample & expMask) === 0 && exponent > 0;
|
||||
exponent--, expMask >>= 1
|
||||
) {
|
||||
for (let expMask = 0x4000; (sample & expMask) === 0 && exponent > 0; exponent--, expMask >>= 1) {
|
||||
// Find the segment (exponent)
|
||||
}
|
||||
|
||||
@@ -252,10 +246,7 @@ export function mulawToLinear(mulaw: number): number {
|
||||
* Chunk audio buffer into 20ms frames for streaming.
|
||||
* At 8kHz mono, 20ms = 160 samples = 160 bytes (mu-law).
|
||||
*/
|
||||
export function chunkAudio(
|
||||
audio: Buffer,
|
||||
chunkSize = 160,
|
||||
): Generator<Buffer, void, unknown> {
|
||||
export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, void, unknown> {
|
||||
return (function* () {
|
||||
for (let i = 0; i < audio.length; i += chunkSize) {
|
||||
yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
|
||||
|
||||
@@ -12,10 +12,7 @@ function createProvider(): TwilioProvider {
|
||||
);
|
||||
}
|
||||
|
||||
function createContext(
|
||||
rawBody: string,
|
||||
query?: WebhookContext["query"],
|
||||
): WebhookContext {
|
||||
function createContext(rawBody: string, query?: WebhookContext["query"]): WebhookContext {
|
||||
return {
|
||||
headers: {},
|
||||
rawBody,
|
||||
|
||||
@@ -211,22 +211,16 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
/**
|
||||
* Parse Twilio direction to normalized format.
|
||||
*/
|
||||
private static parseDirection(
|
||||
direction: string | null,
|
||||
): "inbound" | "outbound" | undefined {
|
||||
private static parseDirection(direction: string | null): "inbound" | "outbound" | undefined {
|
||||
if (direction === "inbound") return "inbound";
|
||||
if (direction === "outbound-api" || direction === "outbound-dial")
|
||||
return "outbound";
|
||||
if (direction === "outbound-api" || direction === "outbound-dial") return "outbound";
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Twilio webhook params to normalized event format.
|
||||
*/
|
||||
private normalizeEvent(
|
||||
params: URLSearchParams,
|
||||
callIdOverride?: string,
|
||||
): NormalizedEvent | null {
|
||||
private normalizeEvent(params: URLSearchParams, callIdOverride?: string): NormalizedEvent | null {
|
||||
const callSid = params.get("CallSid") || "";
|
||||
|
||||
const baseEvent = {
|
||||
@@ -300,8 +294,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
if (!ctx) return TwilioProvider.EMPTY_TWIML;
|
||||
|
||||
const params = new URLSearchParams(ctx.rawBody);
|
||||
const type =
|
||||
typeof ctx.query?.type === "string" ? ctx.query.type.trim() : undefined;
|
||||
const type = typeof ctx.query?.type === "string" ? ctx.query.type.trim() : undefined;
|
||||
const isStatusCallback = type === "status";
|
||||
const callStatus = params.get("CallStatus");
|
||||
const direction = params.get("Direction");
|
||||
@@ -329,9 +322,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
// Conversation mode: return streaming TwiML immediately for outbound calls.
|
||||
if (isOutbound) {
|
||||
const streamUrl = this.getStreamUrl();
|
||||
return streamUrl
|
||||
? this.getStreamConnectXml(streamUrl)
|
||||
: TwilioProvider.PAUSE_TWIML;
|
||||
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -344,9 +335,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
// For inbound calls, answer immediately with stream
|
||||
if (direction === "inbound") {
|
||||
const streamUrl = this.getStreamUrl();
|
||||
return streamUrl
|
||||
? this.getStreamConnectXml(streamUrl)
|
||||
: TwilioProvider.PAUSE_TWIML;
|
||||
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
||||
}
|
||||
|
||||
// For outbound calls, only connect to stream when call is in-progress
|
||||
@@ -355,9 +344,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
}
|
||||
|
||||
const streamUrl = this.getStreamUrl();
|
||||
return streamUrl
|
||||
? this.getStreamConnectXml(streamUrl)
|
||||
: TwilioProvider.PAUSE_TWIML;
|
||||
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -374,9 +361,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
const origin = url.origin;
|
||||
|
||||
// Convert https:// to wss:// for WebSocket
|
||||
const wsOrigin = origin
|
||||
.replace(/^https:\/\//, "wss://")
|
||||
.replace(/^http:\/\//, "ws://");
|
||||
const wsOrigin = origin.replace(/^https:\/\//, "wss://").replace(/^http:\/\//, "ws://");
|
||||
|
||||
// Append the stream path
|
||||
const path = this.options.streamPath.startsWith("/")
|
||||
@@ -433,10 +418,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
Timeout: "30",
|
||||
};
|
||||
|
||||
const result = await this.apiRequest<TwilioCallResponse>(
|
||||
"/Calls.json",
|
||||
params,
|
||||
);
|
||||
const result = await this.apiRequest<TwilioCallResponse>("/Calls.json", params);
|
||||
|
||||
this.callWebhookUrls.set(result.sid, url.toString());
|
||||
|
||||
@@ -489,9 +471,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
// Fall back to TwiML <Say> (may not work on all accounts)
|
||||
const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
|
||||
if (!webhookUrl) {
|
||||
throw new Error(
|
||||
"Missing webhook URL for this call (provider state not initialized)",
|
||||
);
|
||||
throw new Error("Missing webhook URL for this call (provider state not initialized)");
|
||||
}
|
||||
|
||||
console.warn(
|
||||
@@ -517,10 +497,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
* Generates audio with core TTS, converts to mu-law, and streams via WebSocket.
|
||||
* Uses a queue to serialize playback and prevent overlapping audio.
|
||||
*/
|
||||
private async playTtsViaStream(
|
||||
text: string,
|
||||
streamSid: string,
|
||||
): Promise<void> {
|
||||
private async playTtsViaStream(text: string, streamSid: string): Promise<void> {
|
||||
if (!this.ttsProvider || !this.mediaStreamHandler) {
|
||||
throw new Error("TTS provider and media stream handler required");
|
||||
}
|
||||
@@ -556,9 +533,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
async startListening(input: StartListeningInput): Promise<void> {
|
||||
const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
|
||||
if (!webhookUrl) {
|
||||
throw new Error(
|
||||
"Missing webhook URL for this call (provider state not initialized)",
|
||||
);
|
||||
throw new Error("Missing webhook URL for this call (provider state not initialized)");
|
||||
}
|
||||
|
||||
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
@@ -9,19 +9,16 @@ export async function twilioApiRequest<T = unknown>(params: {
|
||||
const bodyParams =
|
||||
params.body instanceof URLSearchParams
|
||||
? params.body
|
||||
: Object.entries(params.body).reduce<URLSearchParams>(
|
||||
(acc, [key, value]) => {
|
||||
if (Array.isArray(value)) {
|
||||
for (const entry of value) {
|
||||
acc.append(key, entry);
|
||||
}
|
||||
} else if (typeof value === "string") {
|
||||
acc.append(key, value);
|
||||
: Object.entries(params.body).reduce<URLSearchParams>((acc, [key, value]) => {
|
||||
if (Array.isArray(value)) {
|
||||
for (const entry of value) {
|
||||
acc.append(key, entry);
|
||||
}
|
||||
return acc;
|
||||
},
|
||||
new URLSearchParams(),
|
||||
);
|
||||
} else if (typeof value === "string") {
|
||||
acc.append(key, value);
|
||||
}
|
||||
return acc;
|
||||
}, new URLSearchParams());
|
||||
|
||||
const response = await fetch(`${params.baseUrl}${params.endpoint}`, {
|
||||
method: "POST",
|
||||
|
||||
@@ -11,8 +11,7 @@ export function verifyTwilioProviderWebhook(params: {
|
||||
}): WebhookVerificationResult {
|
||||
const result = verifyTwilioWebhook(params.ctx, params.authToken, {
|
||||
publicUrl: params.currentPublicUrl || undefined,
|
||||
allowNgrokFreeTierLoopbackBypass:
|
||||
params.options.allowNgrokFreeTierLoopbackBypass ?? false,
|
||||
allowNgrokFreeTierLoopbackBypass: params.options.allowNgrokFreeTierLoopbackBypass ?? false,
|
||||
skipVerification: params.options.skipVerification,
|
||||
});
|
||||
|
||||
|
||||
@@ -41,8 +41,7 @@ type SessionEntry = {
|
||||
export async function generateVoiceResponse(
|
||||
params: VoiceResponseParams,
|
||||
): Promise<VoiceResponseResult> {
|
||||
const { voiceConfig, callId, from, transcript, userMessage, coreConfig } =
|
||||
params;
|
||||
const { voiceConfig, callId, from, transcript, userMessage, coreConfig } = params;
|
||||
|
||||
if (!coreConfig) {
|
||||
return { text: null, error: "Core config unavailable for voice response" };
|
||||
@@ -54,10 +53,7 @@ export async function generateVoiceResponse(
|
||||
} catch (err) {
|
||||
return {
|
||||
text: null,
|
||||
error:
|
||||
err instanceof Error
|
||||
? err.message
|
||||
: "Unable to load core agent dependencies",
|
||||
error: err instanceof Error ? err.message : "Unable to load core agent dependencies",
|
||||
};
|
||||
}
|
||||
const cfg = coreConfig;
|
||||
@@ -95,12 +91,9 @@ export async function generateVoiceResponse(
|
||||
});
|
||||
|
||||
// Resolve model from config
|
||||
const modelRef =
|
||||
voiceConfig.responseModel ||
|
||||
`${deps.DEFAULT_PROVIDER}/${deps.DEFAULT_MODEL}`;
|
||||
const modelRef = voiceConfig.responseModel || `${deps.DEFAULT_PROVIDER}/${deps.DEFAULT_MODEL}`;
|
||||
const slashIndex = modelRef.indexOf("/");
|
||||
const provider =
|
||||
slashIndex === -1 ? deps.DEFAULT_PROVIDER : modelRef.slice(0, slashIndex);
|
||||
const provider = slashIndex === -1 ? deps.DEFAULT_PROVIDER : modelRef.slice(0, slashIndex);
|
||||
const model = slashIndex === -1 ? modelRef : modelRef.slice(slashIndex + 1);
|
||||
|
||||
// Resolve thinking level
|
||||
@@ -118,17 +111,13 @@ export async function generateVoiceResponse(
|
||||
let extraSystemPrompt = basePrompt;
|
||||
if (transcript.length > 0) {
|
||||
const history = transcript
|
||||
.map(
|
||||
(entry) =>
|
||||
`${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`,
|
||||
)
|
||||
.map((entry) => `${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`)
|
||||
.join("\n");
|
||||
extraSystemPrompt = `${basePrompt}\n\nConversation so far:\n${history}`;
|
||||
}
|
||||
|
||||
// Resolve timeout
|
||||
const timeoutMs =
|
||||
voiceConfig.responseTimeoutMs ?? deps.resolveAgentTimeoutMs({ cfg });
|
||||
const timeoutMs = voiceConfig.responseTimeoutMs ?? deps.resolveAgentTimeoutMs({ cfg });
|
||||
const runId = `voice:${callId}:${Date.now()}`;
|
||||
|
||||
try {
|
||||
|
||||
@@ -42,9 +42,7 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
||||
const allowNgrokFreeTierLoopbackBypass =
|
||||
config.tunnel?.provider === "ngrok" &&
|
||||
isLoopbackBind(config.serve?.bind) &&
|
||||
(config.tunnel?.allowNgrokFreeTierLoopbackBypass ||
|
||||
config.tunnel?.allowNgrokFreeTier ||
|
||||
false);
|
||||
(config.tunnel?.allowNgrokFreeTierLoopbackBypass || config.tunnel?.allowNgrokFreeTier || false);
|
||||
|
||||
switch (config.provider) {
|
||||
case "telnyx":
|
||||
@@ -63,9 +61,7 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
||||
allowNgrokFreeTierLoopbackBypass,
|
||||
publicUrl: config.publicUrl,
|
||||
skipVerification: config.skipSignatureVerification,
|
||||
streamPath: config.streaming?.enabled
|
||||
? config.streaming.streamPath
|
||||
: undefined,
|
||||
streamPath: config.streaming?.enabled ? config.streaming.streamPath : undefined,
|
||||
},
|
||||
);
|
||||
case "plivo":
|
||||
@@ -83,9 +79,7 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
||||
case "mock":
|
||||
return new MockProvider();
|
||||
default:
|
||||
throw new Error(
|
||||
`Unsupported voice-call provider: ${String(config.provider)}`,
|
||||
);
|
||||
throw new Error(`Unsupported voice-call provider: ${String(config.provider)}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,9 +100,7 @@ export async function createVoiceCallRuntime(params: {
|
||||
const config = resolveVoiceCallConfig(rawConfig);
|
||||
|
||||
if (!config.enabled) {
|
||||
throw new Error(
|
||||
"Voice call disabled. Enable the plugin entry in config.",
|
||||
);
|
||||
throw new Error("Voice call disabled. Enable the plugin entry in config.");
|
||||
}
|
||||
|
||||
const validation = validateProviderConfig(config);
|
||||
@@ -118,12 +110,7 @@ export async function createVoiceCallRuntime(params: {
|
||||
|
||||
const provider = resolveProvider(config);
|
||||
const manager = new CallManager(config);
|
||||
const webhookServer = new VoiceCallWebhookServer(
|
||||
config,
|
||||
manager,
|
||||
provider,
|
||||
coreConfig,
|
||||
);
|
||||
const webhookServer = new VoiceCallWebhookServer(config, manager, provider, coreConfig);
|
||||
|
||||
const localUrl = await webhookServer.start();
|
||||
|
||||
@@ -143,9 +130,7 @@ export async function createVoiceCallRuntime(params: {
|
||||
publicUrl = tunnelResult?.publicUrl ?? null;
|
||||
} catch (err) {
|
||||
log.error(
|
||||
`[voice-call] Tunnel setup failed: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
`[voice-call] Tunnel setup failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,10 +47,7 @@ export function pcmToMulaw(pcm: Buffer): Buffer {
|
||||
return mulaw;
|
||||
}
|
||||
|
||||
export function convertPcmToMulaw8k(
|
||||
pcm: Buffer,
|
||||
inputSampleRate: number,
|
||||
): Buffer {
|
||||
export function convertPcmToMulaw8k(pcm: Buffer, inputSampleRate: number): Buffer {
|
||||
const pcm8k = resamplePcmTo8k(pcm, inputSampleRate);
|
||||
return pcmToMulaw(pcm8k);
|
||||
}
|
||||
@@ -58,10 +55,7 @@ export function convertPcmToMulaw8k(
|
||||
/**
|
||||
* Chunk audio buffer into 20ms frames for streaming (8kHz mono mu-law).
|
||||
*/
|
||||
export function chunkAudio(
|
||||
audio: Buffer,
|
||||
chunkSize = 160,
|
||||
): Generator<Buffer, void, unknown> {
|
||||
export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, void, unknown> {
|
||||
return (function* () {
|
||||
for (let i = 0; i < audio.length; i += chunkSize) {
|
||||
yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
|
||||
|
||||
@@ -44,10 +44,7 @@ export function createTelephonyTtsProvider(params: {
|
||||
};
|
||||
}
|
||||
|
||||
function applyTtsOverride(
|
||||
coreConfig: CoreConfig,
|
||||
override?: VoiceCallTtsConfig,
|
||||
): CoreConfig {
|
||||
function applyTtsOverride(coreConfig: CoreConfig, override?: VoiceCallTtsConfig): CoreConfig {
|
||||
if (!override) return coreConfig;
|
||||
|
||||
const base = coreConfig.messages?.tts;
|
||||
|
||||
@@ -52,14 +52,7 @@ export async function startNgrokTunnel(config: {
|
||||
}
|
||||
|
||||
// Build ngrok command args
|
||||
const args = [
|
||||
"http",
|
||||
String(config.port),
|
||||
"--log",
|
||||
"stdout",
|
||||
"--log-format",
|
||||
"json",
|
||||
];
|
||||
const args = ["http", String(config.port), "--log", "stdout", "--log-format", "json"];
|
||||
|
||||
// Add custom domain if provided (paid ngrok feature)
|
||||
if (config.domain) {
|
||||
@@ -234,11 +227,9 @@ export async function startTailscaleTunnel(config: {
|
||||
const localUrl = `http://127.0.0.1:${config.port}${path}`;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(
|
||||
"tailscale",
|
||||
[config.mode, "--bg", "--yes", "--set-path", path, localUrl],
|
||||
{ stdio: ["ignore", "pipe", "pipe"] },
|
||||
);
|
||||
const proc = spawn("tailscale", [config.mode, "--bg", "--yes", "--set-path", path, localUrl], {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
proc.kill("SIGKILL");
|
||||
@@ -249,9 +240,7 @@ export async function startTailscaleTunnel(config: {
|
||||
clearTimeout(timeout);
|
||||
if (code === 0) {
|
||||
const publicUrl = `https://${dnsName}${path}`;
|
||||
console.log(
|
||||
`[voice-call] Tailscale ${config.mode} active: ${publicUrl}`,
|
||||
);
|
||||
console.log(`[voice-call] Tailscale ${config.mode} active: ${publicUrl}`);
|
||||
|
||||
resolve({
|
||||
publicUrl,
|
||||
@@ -275,10 +264,7 @@ export async function startTailscaleTunnel(config: {
|
||||
/**
|
||||
* Stop a Tailscale serve/funnel tunnel.
|
||||
*/
|
||||
async function stopTailscaleTunnel(
|
||||
mode: "serve" | "funnel",
|
||||
path: string,
|
||||
): Promise<void> {
|
||||
async function stopTailscaleTunnel(mode: "serve" | "funnel", path: string): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn("tailscale", [mode, "off", path], {
|
||||
stdio: "ignore",
|
||||
@@ -299,9 +285,7 @@ async function stopTailscaleTunnel(
|
||||
/**
|
||||
* Start a tunnel based on configuration.
|
||||
*/
|
||||
export async function startTunnel(
|
||||
config: TunnelConfig,
|
||||
): Promise<TunnelResult | null> {
|
||||
export async function startTunnel(config: TunnelConfig): Promise<TunnelResult | null> {
|
||||
switch (config.provider) {
|
||||
case "ngrok":
|
||||
return startNgrokTunnel({
|
||||
|
||||
@@ -38,9 +38,7 @@ function plivoV3Signature(params: {
|
||||
|
||||
const sortedQuery = Array.from(queryMap.keys())
|
||||
.sort()
|
||||
.flatMap((k) =>
|
||||
[...(queryMap.get(k) ?? [])].sort().map((v) => `${k}=${v}`),
|
||||
)
|
||||
.flatMap((k) => [...(queryMap.get(k) ?? [])].sort().map((v) => `${k}=${v}`))
|
||||
.join("&");
|
||||
|
||||
const postParams = new URLSearchParams(params.postBody);
|
||||
@@ -71,24 +69,17 @@ function plivoV3Signature(params: {
|
||||
return canonicalizeBase64(digest);
|
||||
}
|
||||
|
||||
function twilioSignature(params: {
|
||||
authToken: string;
|
||||
url: string;
|
||||
postBody: string;
|
||||
}): string {
|
||||
function twilioSignature(params: { authToken: string; url: string; postBody: string }): string {
|
||||
let dataToSign = params.url;
|
||||
const sortedParams = Array.from(
|
||||
new URLSearchParams(params.postBody).entries(),
|
||||
).sort((a, b) => a[0].localeCompare(b[0]));
|
||||
const sortedParams = Array.from(new URLSearchParams(params.postBody).entries()).sort((a, b) =>
|
||||
a[0].localeCompare(b[0]),
|
||||
);
|
||||
|
||||
for (const [key, value] of sortedParams) {
|
||||
dataToSign += key + value;
|
||||
}
|
||||
|
||||
return crypto
|
||||
.createHmac("sha1", params.authToken)
|
||||
.update(dataToSign)
|
||||
.digest("base64");
|
||||
return crypto.createHmac("sha1", params.authToken).update(dataToSign).digest("base64");
|
||||
}
|
||||
|
||||
describe("verifyPlivoWebhook", () => {
|
||||
|
||||
@@ -98,10 +98,7 @@ export function reconstructWebhookUrl(ctx: WebhookContext): string {
|
||||
return `${proto}://${host}${path}`;
|
||||
}
|
||||
|
||||
function buildTwilioVerificationUrl(
|
||||
ctx: WebhookContext,
|
||||
publicUrl?: string,
|
||||
): string {
|
||||
function buildTwilioVerificationUrl(ctx: WebhookContext, publicUrl?: string): string {
|
||||
if (!publicUrl) {
|
||||
return reconstructWebhookUrl(ctx);
|
||||
}
|
||||
@@ -186,12 +183,7 @@ export function verifyTwilioWebhook(
|
||||
const params = new URLSearchParams(ctx.rawBody);
|
||||
|
||||
// Validate signature
|
||||
const isValid = validateTwilioSignature(
|
||||
authToken,
|
||||
signature,
|
||||
verificationUrl,
|
||||
params,
|
||||
);
|
||||
const isValid = validateTwilioSignature(authToken, signature, verificationUrl, params);
|
||||
|
||||
if (isValid) {
|
||||
return { ok: true, verificationUrl };
|
||||
@@ -199,8 +191,7 @@ export function verifyTwilioWebhook(
|
||||
|
||||
// Check if this is ngrok free tier - the URL might have different format
|
||||
const isNgrokFreeTier =
|
||||
verificationUrl.includes(".ngrok-free.app") ||
|
||||
verificationUrl.includes(".ngrok.io");
|
||||
verificationUrl.includes(".ngrok-free.app") || verificationUrl.includes(".ngrok.io");
|
||||
|
||||
if (
|
||||
isNgrokFreeTier &&
|
||||
@@ -353,10 +344,7 @@ function validatePlivoV3Signature(params: {
|
||||
});
|
||||
|
||||
const hmacBase = `${baseUrl}.${params.nonce}`;
|
||||
const digest = crypto
|
||||
.createHmac("sha256", params.authToken)
|
||||
.update(hmacBase)
|
||||
.digest("base64");
|
||||
const digest = crypto.createHmac("sha256", params.authToken).update(hmacBase).digest("base64");
|
||||
const expected = normalizeSignatureBase64(digest);
|
||||
|
||||
// Header can contain multiple signatures separated by commas.
|
||||
@@ -413,8 +401,7 @@ export function verifyPlivoWebhook(
|
||||
}
|
||||
|
||||
if (signatureV3 && nonceV3) {
|
||||
const method =
|
||||
ctx.method === "GET" || ctx.method === "POST" ? ctx.method : null;
|
||||
const method = ctx.method === "GET" || ctx.method === "POST" ? ctx.method : null;
|
||||
|
||||
if (!method) {
|
||||
return {
|
||||
|
||||
@@ -54,13 +54,10 @@ export class VoiceCallWebhookServer {
|
||||
* Initialize media streaming with OpenAI Realtime STT.
|
||||
*/
|
||||
private initializeMediaStreaming(): void {
|
||||
const apiKey =
|
||||
this.config.streaming?.openaiApiKey || process.env.OPENAI_API_KEY;
|
||||
const apiKey = this.config.streaming?.openaiApiKey || process.env.OPENAI_API_KEY;
|
||||
|
||||
if (!apiKey) {
|
||||
console.warn(
|
||||
"[voice-call] Streaming enabled but no OpenAI API key found",
|
||||
);
|
||||
console.warn("[voice-call] Streaming enabled but no OpenAI API key found");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -74,9 +71,7 @@ export class VoiceCallWebhookServer {
|
||||
const streamConfig: MediaStreamConfig = {
|
||||
sttProvider,
|
||||
onTranscript: (providerCallId, transcript) => {
|
||||
console.log(
|
||||
`[voice-call] Transcript for ${providerCallId}: ${transcript}`,
|
||||
);
|
||||
console.log(`[voice-call] Transcript for ${providerCallId}: ${transcript}`);
|
||||
|
||||
// Clear TTS queue on barge-in (user started speaking, interrupt current playback)
|
||||
if (this.provider.name === "twilio") {
|
||||
@@ -86,9 +81,7 @@ export class VoiceCallWebhookServer {
|
||||
// Look up our internal call ID from the provider call ID
|
||||
const call = this.manager.getCallByProviderCallId(providerCallId);
|
||||
if (!call) {
|
||||
console.warn(
|
||||
`[voice-call] No active call found for provider ID: ${providerCallId}`,
|
||||
);
|
||||
console.warn(`[voice-call] No active call found for provider ID: ${providerCallId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -106,8 +99,7 @@ export class VoiceCallWebhookServer {
|
||||
|
||||
// Auto-respond in conversation mode (inbound always, outbound if mode is conversation)
|
||||
const callMode = call.metadata?.mode as string | undefined;
|
||||
const shouldRespond =
|
||||
call.direction === "inbound" || callMode === "conversation";
|
||||
const shouldRespond = call.direction === "inbound" || callMode === "conversation";
|
||||
if (shouldRespond) {
|
||||
this.handleInboundResponse(call.callId, transcript).catch((err) => {
|
||||
console.warn(`[voice-call] Failed to auto-respond:`, err);
|
||||
@@ -123,15 +115,10 @@ export class VoiceCallWebhookServer {
|
||||
console.log(`[voice-call] Partial for ${callId}: ${partial}`);
|
||||
},
|
||||
onConnect: (callId, streamSid) => {
|
||||
console.log(
|
||||
`[voice-call] Media stream connected: ${callId} -> ${streamSid}`,
|
||||
);
|
||||
console.log(`[voice-call] Media stream connected: ${callId} -> ${streamSid}`);
|
||||
// Register stream with provider for TTS routing
|
||||
if (this.provider.name === "twilio") {
|
||||
(this.provider as TwilioProvider).registerCallStream(
|
||||
callId,
|
||||
streamSid,
|
||||
);
|
||||
(this.provider as TwilioProvider).registerCallStream(callId, streamSid);
|
||||
}
|
||||
|
||||
// Speak initial message if one was provided when call was initiated
|
||||
@@ -173,10 +160,7 @@ export class VoiceCallWebhookServer {
|
||||
// Handle WebSocket upgrades for media streams
|
||||
if (this.mediaStreamHandler) {
|
||||
this.server.on("upgrade", (request, socket, head) => {
|
||||
const url = new URL(
|
||||
request.url || "/",
|
||||
`http://${request.headers.host}`,
|
||||
);
|
||||
const url = new URL(request.url || "/", `http://${request.headers.host}`);
|
||||
|
||||
if (url.pathname === streamPath) {
|
||||
console.log("[voice-call] WebSocket upgrade for media stream");
|
||||
@@ -193,9 +177,7 @@ export class VoiceCallWebhookServer {
|
||||
const url = `http://${bind}:${port}${webhookPath}`;
|
||||
console.log(`[voice-call] Webhook server listening on ${url}`);
|
||||
if (this.mediaStreamHandler) {
|
||||
console.log(
|
||||
`[voice-call] Media stream WebSocket on ws://${bind}:${port}${streamPath}`,
|
||||
);
|
||||
console.log(`[voice-call] Media stream WebSocket on ws://${bind}:${port}${streamPath}`);
|
||||
}
|
||||
resolve(url);
|
||||
});
|
||||
@@ -258,9 +240,7 @@ export class VoiceCallWebhookServer {
|
||||
// Verify signature
|
||||
const verification = this.provider.verifyWebhook(ctx);
|
||||
if (!verification.ok) {
|
||||
console.warn(
|
||||
`[voice-call] Webhook verification failed: ${verification.reason}`,
|
||||
);
|
||||
console.warn(`[voice-call] Webhook verification failed: ${verification.reason}`);
|
||||
res.statusCode = 401;
|
||||
res.end("Unauthorized");
|
||||
return;
|
||||
@@ -274,10 +254,7 @@ export class VoiceCallWebhookServer {
|
||||
try {
|
||||
this.manager.processEvent(event);
|
||||
} catch (err) {
|
||||
console.error(
|
||||
`[voice-call] Error processing event ${event.type}:`,
|
||||
err,
|
||||
);
|
||||
console.error(`[voice-call] Error processing event ${event.type}:`, err);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -285,9 +262,7 @@ export class VoiceCallWebhookServer {
|
||||
res.statusCode = result.statusCode || 200;
|
||||
|
||||
if (result.providerResponseHeaders) {
|
||||
for (const [key, value] of Object.entries(
|
||||
result.providerResponseHeaders,
|
||||
)) {
|
||||
for (const [key, value] of Object.entries(result.providerResponseHeaders)) {
|
||||
res.setHeader(key, value);
|
||||
}
|
||||
}
|
||||
@@ -311,13 +286,8 @@ export class VoiceCallWebhookServer {
|
||||
* Handle auto-response for inbound calls using the agent system.
|
||||
* Supports tool calling for richer voice interactions.
|
||||
*/
|
||||
private async handleInboundResponse(
|
||||
callId: string,
|
||||
userMessage: string,
|
||||
): Promise<void> {
|
||||
console.log(
|
||||
`[voice-call] Auto-responding to inbound call ${callId}: "${userMessage}"`,
|
||||
);
|
||||
private async handleInboundResponse(callId: string, userMessage: string): Promise<void> {
|
||||
console.log(`[voice-call] Auto-responding to inbound call ${callId}: "${userMessage}"`);
|
||||
|
||||
// Get call context for conversation history
|
||||
const call = this.manager.getCall(callId);
|
||||
@@ -344,9 +314,7 @@ export class VoiceCallWebhookServer {
|
||||
});
|
||||
|
||||
if (result.error) {
|
||||
console.error(
|
||||
`[voice-call] Response generation error: ${result.error}`,
|
||||
);
|
||||
console.error(`[voice-call] Response generation error: ${result.error}`);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -458,9 +426,7 @@ export async function cleanupTailscaleExposureRoute(opts: {
|
||||
* Setup Tailscale serve/funnel for the webhook server.
|
||||
* This is a helper that shells out to `tailscale serve` or `tailscale funnel`.
|
||||
*/
|
||||
export async function setupTailscaleExposure(
|
||||
config: VoiceCallConfig,
|
||||
): Promise<string | null> {
|
||||
export async function setupTailscaleExposure(config: VoiceCallConfig): Promise<string | null> {
|
||||
if (config.tailscale.mode === "off") {
|
||||
return null;
|
||||
}
|
||||
@@ -479,9 +445,7 @@ export async function setupTailscaleExposure(
|
||||
/**
|
||||
* Cleanup Tailscale serve/funnel.
|
||||
*/
|
||||
export async function cleanupTailscaleExposure(
|
||||
config: VoiceCallConfig,
|
||||
): Promise<void> {
|
||||
export async function cleanupTailscaleExposure(config: VoiceCallConfig): Promise<void> {
|
||||
if (config.tailscale.mode === "off") {
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user