fix: guard remote media fetches with SSRF checks

This commit is contained in:
Peter Steinberger
2026-02-02 04:04:27 -08:00
parent d842b28a15
commit 81c68f582d
11 changed files with 422 additions and 241 deletions
+17 -1
View File
@@ -1,4 +1,4 @@
import { describe, expect, it } from "vitest";
import { describe, expect, it, vi } from "vitest";
import { fetchRemoteMedia } from "./fetch.js";
function makeStream(chunks: Uint8Array[]) {
@@ -14,6 +14,7 @@ function makeStream(chunks: Uint8Array[]) {
describe("fetchRemoteMedia", () => {
it("rejects when content-length exceeds maxBytes", async () => {
const lookupFn = vi.fn(async () => [{ address: "93.184.216.34", family: 4 }]);
const fetchImpl = async () =>
new Response(makeStream([new Uint8Array([1, 2, 3, 4, 5])]), {
status: 200,
@@ -25,11 +26,13 @@ describe("fetchRemoteMedia", () => {
url: "https://example.com/file.bin",
fetchImpl,
maxBytes: 4,
lookupFn,
}),
).rejects.toThrow("exceeds maxBytes");
});
it("rejects when streamed payload exceeds maxBytes", async () => {
const lookupFn = vi.fn(async () => [{ address: "93.184.216.34", family: 4 }]);
const fetchImpl = async () =>
new Response(makeStream([new Uint8Array([1, 2, 3]), new Uint8Array([4, 5, 6])]), {
status: 200,
@@ -40,7 +43,20 @@ describe("fetchRemoteMedia", () => {
url: "https://example.com/file.bin",
fetchImpl,
maxBytes: 4,
lookupFn,
}),
).rejects.toThrow("exceeds maxBytes");
});
it("blocks private IP literals before fetching", async () => {
const fetchImpl = vi.fn();
await expect(
fetchRemoteMedia({
url: "http://127.0.0.1/secret.jpg",
fetchImpl,
maxBytes: 1024,
}),
).rejects.toThrow(/private|internal|blocked/i);
expect(fetchImpl).not.toHaveBeenCalled();
});
});
+79 -61
View File
@@ -1,4 +1,6 @@
import path from "node:path";
import type { LookupFn, SsrFPolicy } from "../infra/net/ssrf.js";
import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
import { detectMime, extensionForMime } from "./mime.js";
type FetchMediaResult = {
@@ -26,6 +28,9 @@ type FetchMediaOptions = {
fetchImpl?: FetchLike;
filePathHint?: string;
maxBytes?: number;
maxRedirects?: number;
ssrfPolicy?: SsrFPolicy;
lookupFn?: LookupFn;
};
function stripQuotes(value: string): string {
@@ -73,83 +78,96 @@ async function readErrorBodySnippet(res: Response, maxChars = 200): Promise<stri
}
export async function fetchRemoteMedia(options: FetchMediaOptions): Promise<FetchMediaResult> {
const { url, fetchImpl, filePathHint, maxBytes } = options;
const fetcher: FetchLike | undefined = fetchImpl ?? globalThis.fetch;
if (!fetcher) {
throw new Error("fetch is not available");
}
const { url, fetchImpl, filePathHint, maxBytes, maxRedirects, ssrfPolicy, lookupFn } = options;
let res: Response;
let finalUrl = url;
let release: (() => Promise<void>) | null = null;
try {
res = await fetcher(url);
const result = await fetchWithSsrFGuard({
url,
fetchImpl,
maxRedirects,
policy: ssrfPolicy,
lookupFn,
});
res = result.response;
finalUrl = result.finalUrl;
release = result.release;
} catch (err) {
throw new MediaFetchError("fetch_failed", `Failed to fetch media from ${url}: ${String(err)}`);
}
if (!res.ok) {
const statusText = res.statusText ? ` ${res.statusText}` : "";
const redirected = res.url && res.url !== url ? ` (redirected to ${res.url})` : "";
let detail = `HTTP ${res.status}${statusText}`;
if (!res.body) {
detail = `HTTP ${res.status}${statusText}; empty response body`;
} else {
const snippet = await readErrorBodySnippet(res);
if (snippet) {
detail += `; body: ${snippet}`;
try {
if (!res.ok) {
const statusText = res.statusText ? ` ${res.statusText}` : "";
const redirected = finalUrl !== url ? ` (redirected to ${finalUrl})` : "";
let detail = `HTTP ${res.status}${statusText}`;
if (!res.body) {
detail = `HTTP ${res.status}${statusText}; empty response body`;
} else {
const snippet = await readErrorBodySnippet(res);
if (snippet) {
detail += `; body: ${snippet}`;
}
}
}
throw new MediaFetchError(
"http_error",
`Failed to fetch media from ${url}${redirected}: ${detail}`,
);
}
const contentLength = res.headers.get("content-length");
if (maxBytes && contentLength) {
const length = Number(contentLength);
if (Number.isFinite(length) && length > maxBytes) {
throw new MediaFetchError(
"max_bytes",
`Failed to fetch media from ${url}: content length ${length} exceeds maxBytes ${maxBytes}`,
"http_error",
`Failed to fetch media from ${url}${redirected}: ${detail}`,
);
}
}
const buffer = maxBytes
? await readResponseWithLimit(res, maxBytes)
: Buffer.from(await res.arrayBuffer());
let fileNameFromUrl: string | undefined;
try {
const parsed = new URL(url);
const base = path.basename(parsed.pathname);
fileNameFromUrl = base || undefined;
} catch {
// ignore parse errors; leave undefined
}
const contentLength = res.headers.get("content-length");
if (maxBytes && contentLength) {
const length = Number(contentLength);
if (Number.isFinite(length) && length > maxBytes) {
throw new MediaFetchError(
"max_bytes",
`Failed to fetch media from ${url}: content length ${length} exceeds maxBytes ${maxBytes}`,
);
}
}
const headerFileName = parseContentDispositionFileName(res.headers.get("content-disposition"));
let fileName =
headerFileName || fileNameFromUrl || (filePathHint ? path.basename(filePathHint) : undefined);
const buffer = maxBytes
? await readResponseWithLimit(res, maxBytes)
: Buffer.from(await res.arrayBuffer());
let fileNameFromUrl: string | undefined;
try {
const parsed = new URL(finalUrl);
const base = path.basename(parsed.pathname);
fileNameFromUrl = base || undefined;
} catch {
// ignore parse errors; leave undefined
}
const filePathForMime =
headerFileName && path.extname(headerFileName) ? headerFileName : (filePathHint ?? url);
const contentType = await detectMime({
buffer,
headerMime: res.headers.get("content-type"),
filePath: filePathForMime,
});
if (fileName && !path.extname(fileName) && contentType) {
const ext = extensionForMime(contentType);
if (ext) {
fileName = `${fileName}${ext}`;
const headerFileName = parseContentDispositionFileName(res.headers.get("content-disposition"));
let fileName =
headerFileName || fileNameFromUrl || (filePathHint ? path.basename(filePathHint) : undefined);
const filePathForMime =
headerFileName && path.extname(headerFileName) ? headerFileName : (filePathHint ?? finalUrl);
const contentType = await detectMime({
buffer,
headerMime: res.headers.get("content-type"),
filePath: filePathForMime,
});
if (fileName && !path.extname(fileName) && contentType) {
const ext = extensionForMime(contentType);
if (ext) {
fileName = `${fileName}${ext}`;
}
}
return {
buffer,
contentType: contentType ?? undefined,
fileName,
};
} finally {
if (release) {
await release();
}
}
return {
buffer,
contentType: contentType ?? undefined,
fileName,
};
}
async function readResponseWithLimit(res: Response, maxBytes: number): Promise<Buffer> {
+28 -70
View File
@@ -1,9 +1,4 @@
import type { Dispatcher } from "undici";
import {
closeDispatcher,
createPinnedDispatcher,
resolvePinnedHostname,
} from "../infra/net/ssrf.js";
import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
import { logWarn } from "../logger.js";
type CanvasModule = typeof import("@napi-rs/canvas");
@@ -112,10 +107,6 @@ export const DEFAULT_INPUT_PDF_MAX_PAGES = 4;
export const DEFAULT_INPUT_PDF_MAX_PIXELS = 4_000_000;
export const DEFAULT_INPUT_PDF_MIN_TEXT_CHARS = 200;
function isRedirectStatus(status: number): boolean {
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
}
export function normalizeMimeType(value: string | undefined): string | undefined {
if (!value) {
return undefined;
@@ -151,72 +142,39 @@ export async function fetchWithGuard(params: {
timeoutMs: number;
maxRedirects: number;
}): Promise<InputFetchResult> {
let currentUrl = params.url;
let redirectCount = 0;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), params.timeoutMs);
const { response, release } = await fetchWithSsrFGuard({
url: params.url,
maxRedirects: params.maxRedirects,
timeoutMs: params.timeoutMs,
headers: { "User-Agent": "OpenClaw-Gateway/1.0" },
});
try {
while (true) {
const parsedUrl = new URL(currentUrl);
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
throw new Error(`Invalid URL protocol: ${parsedUrl.protocol}. Only HTTP/HTTPS allowed.`);
}
const pinned = await resolvePinnedHostname(parsedUrl.hostname);
const dispatcher = createPinnedDispatcher(pinned);
if (!response.ok) {
throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
}
try {
const response = await fetch(parsedUrl, {
signal: controller.signal,
headers: { "User-Agent": "OpenClaw-Gateway/1.0" },
redirect: "manual",
dispatcher,
} as RequestInit & { dispatcher: Dispatcher });
if (isRedirectStatus(response.status)) {
const location = response.headers.get("location");
if (!location) {
throw new Error(`Redirect missing location header (${response.status})`);
}
redirectCount += 1;
if (redirectCount > params.maxRedirects) {
throw new Error(`Too many redirects (limit: ${params.maxRedirects})`);
}
void response.body?.cancel();
currentUrl = new URL(location, parsedUrl).toString();
continue;
}
if (!response.ok) {
throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
}
const contentLength = response.headers.get("content-length");
if (contentLength) {
const size = parseInt(contentLength, 10);
if (size > params.maxBytes) {
throw new Error(`Content too large: ${size} bytes (limit: ${params.maxBytes} bytes)`);
}
}
const buffer = Buffer.from(await response.arrayBuffer());
if (buffer.byteLength > params.maxBytes) {
throw new Error(
`Content too large: ${buffer.byteLength} bytes (limit: ${params.maxBytes} bytes)`,
);
}
const contentType = response.headers.get("content-type") || undefined;
const parsed = parseContentType(contentType);
const mimeType = parsed.mimeType ?? "application/octet-stream";
return { buffer, mimeType, contentType };
} finally {
await closeDispatcher(dispatcher);
const contentLength = response.headers.get("content-length");
if (contentLength) {
const size = parseInt(contentLength, 10);
if (size > params.maxBytes) {
throw new Error(`Content too large: ${size} bytes (limit: ${params.maxBytes} bytes)`);
}
}
const buffer = Buffer.from(await response.arrayBuffer());
if (buffer.byteLength > params.maxBytes) {
throw new Error(
`Content too large: ${buffer.byteLength} bytes (limit: ${params.maxBytes} bytes)`,
);
}
const contentType = response.headers.get("content-type") || undefined;
const parsed = parseContentType(contentType);
const mimeType = parsed.mimeType ?? "application/octet-stream";
return { buffer, mimeType, contentType };
} finally {
clearTimeout(timeoutId);
await release();
}
}