gpt4 book ai didi

javascript - NodeJS Puppeteer setDownloadBehavior 问题

转载 作者:行者123 更新时间:2023-11-30 13:57:42 27 4
gpt4 key购买 nike

我正在尝试设置自定义下载路径,但 chrome 无论如何都会将文件放入典型的 Downloads 文件夹中。

const puppeteer = require('puppeteer');

(async () => {
const browser = await puppeteer.launch({
executablePath: 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe',
defaultViewport: {
width: 1920,
height: 1080
},
headless: false,
userDataDir: "./user_data"
});

const page = await browser.newPage();

await page.goto(
'https://example.com/page-with-the-file-link',
{ waitUntil: 'domcontentloaded' },
);

await page._client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: 'C:/Users/Me/Downloads/custom/folder/'
});

console.log('Start downloading');

await page.click('a.download-btn');

await page.waitFor(5000);

console.log('Complete');
await browser.close();
})();

因此,它会忽略 downloadPath 选项并将文件放入默认的 C:/Users/Me/Downloads 文件夹。

同样它不会等待 5 秒,它只有时间下载文件并在点击下载链接后立即退出。

Start downloading
Complete
(node:51016) UnhandledPromiseRejectionWarning: Error: WebSocket is not open: readyState 3 (CLOSED)
at WebSocket.send (C:\Users\Me\Downloads\puppeteer\node_modules\ws\lib\websocket.js:329:19)
at WebSocketTransport.send (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\WebSocketTransport.js:60:14)
at Connection._rawSend (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Connection.js:86:21)
at Connection.send (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Connection.js:72:21)
at gracefullyCloseChrome (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Launcher.js:194:20)
at Browser.close (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\Browser.js:255:31)
at Browser.<anonymous> (C:\Users\Me\Downloads\puppeteer\node_modules\puppeteer\lib\helper.js:112:23)
at C:\Users\Me\Downloads\puppeteer\test-download-file.js:97:18
at <anonymous>
(node:51016) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 3)
(node:51016) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

为什么这个脚本在点击下载链接后就退出了?如何正确执行?有没有捕捉下载状态和监控进度的功能?

任何建议将不胜感激,谢谢!

最佳答案

如何借助拐杖解决这个问题

enter image description here

1) 我们应该捕获/等待将出现在默认 'Downloads' 目录中的文件

function checkExistsWithTimeout(filePath, timeout) {
return new Promise(function (resolve, reject) {

var timer = setTimeout(function () {
watcher.close();
reject(new Error('File did not exists and was not created during the timeout.'));
}, timeout);

fs.access(filePath, fs.constants.R_OK, function (err) {
if (!err) {
clearTimeout(timer);
watcher.close();
resolve();
}
});

var dir = path.dirname(filePath);
var basename = path.basename(filePath);
var watcher = fs.watch(dir, function (eventType, filename) {
if (eventType === 'rename' && filename === basename) {
clearTimeout(timer);
watcher.close();
resolve();
}
});
});
}

检查文件以防万一(可选)

function checkFile(path) {
return new Promise(function (resolve, reject) {
fs.access(path, fs.F_OK, (err) => {
if (err) {
reject(new Error(err));
}

//file exists
console.log('File exists');
resolve();
});
});
}

下载完成后将文件移动到我们需要的地方(可选)

function moveFile(fromPath, toPath) {
return new Promise(function (resolve, reject) {
fs.rename(fromPath, toPath, function (err) {
if (err) {
reject(new Error('File did not move.'));
throw err;
} else {
console.log('File moved');
resolve();
}
});
});
}

示例

const fs = require('fs');
const path = require('path');

const fileName = await page.evaluate(() => {
return document.querySelector('.download-file-btn').textContent.trim();
});
await page.click('.download-file-btn');
await checkExistsWithTimeout('C:/Users/Me/Downloads/'+fileName, 10000);
await moveFile('C:/Users/Me/Downloads/'+fileName, 'C:/Users/me/Desktop/Videos/'+fileName);

2) 使用'request'包从url下载文件

function download(uri, filename, callback) {
return new Promise(function (resolve, reject) {
request.head(uri, function (err, res, body) {
if (!err && res.statusCode == 200) {
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);

request(uri)
.pipe(fs.createWriteStream(filename))
.on('error', function(response) {
console.log(err);
reject(new Error(err));
})
.on('close', function() {
callback();
resolve();
});
} else {
reject(new Error(err));
}
});
});
}

示例

const videoSrc = await page.evaluate(() => {
return document.querySelector('video.vjs-tech').src;
});

await download(videoSrc, "C:/Users/Me/Downloads/Videos/video.mp4", function() {
console.log('downloaded');
});

它适用于我没有 'setDownloadBehavior' 选项,否则它会失败。现在我可以单击下载按钮或直接从某个 URL 下载文件,只需通过使用 Node 一点点操作文件系统即可。

希望对大家有所帮助。

关于javascript - NodeJS Puppeteer setDownloadBehavior 问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56905235/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com