@azzouz Thank you! That was helpful. I feel the best solution would probably be creating a new crawler that extends the Playwright crawler. But in the meantime, I'm using this:
preNavigationHooks: [
async ({ request, page, log }) => {
page.once("response", async (response) => {
const contentType = response.headers()["content-type"];
if (
contentType?.includes("application/json") ||
contentType?.includes("application/x-yaml") ||
contentType?.includes("text/yaml") ||
contentType?.includes("application/yaml")
) {
const type = contentType.includes("json") ? "json" : "yaml";
request.skipNavigation = true;
request.noRetry = true;
await new Promise(async (resolve, reject) => {
page.once("download", async (download) => {
const path = await download.path();
const content = await readFile(path, "utf8");
await dataset.pushData({
url: download.url(),
contentType,
filename: download.suggestedFilename(),
type,
content,
});
log.info(`Downloaded ${type}: ${download.url()}`);
resolve();
});
await page.waitForEvent("download");
});
}
});
},
],