gpt4 book ai didi

javascript - 卡在从第三帧抓取数据

转载 作者:行者123 更新时间:2023-11-30 13:56:23 26 4
gpt4 key购买 nike

我不是专业人士,只是想从网站上抓取一些数据。这里的一些人帮助我选择了第一个“帧”,但我需要从第三帧中抓取数据并将来自第 1 帧 + 第 2 帧 + 第 3 帧的数据连接到一个结果中。这是网站这是我的:

const puppeteer = require('puppeteer');

let scrape = async() => {
const browser = await puppeteer.launch({
headless: false,
slowMo: 250
});
const page = await browser.newPage();
await page.goto('', {
waituntil: "networkidle0"
});
const frame = await page.frames().find(f => f.name() === 'stanga');
const button = await frame.$('body > form > font > select > option:nth-child(12)');
button.click();
await page.waitFor(1000);
const frame1 = await page.frames().find(a => a.name() ==='centru');
const select = await frame1.$('body > form > font > select > option:nth-child(1)');
await page.waitFor(500);
select.click();
await page.waitFor(500);

const result = await page.$$eval("body > font", (options) => {
const timpi = options.map(option => option.innerText);

return timpi

});

await browser.close();
return result;
};
scrape().then((value) => {
console.log(value);
});

感谢您的帮助。

最佳答案

我已经修复了我们的脚本:

const puppeteer = require('puppeteer');

let scrape = async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();

await page.goto('http://example.com/txt', { waitUntil: "networkidle2" });
const optionSelector = 'body > form > font > select > option';
const frames = await page.frames();
const expectedFrames = ['stanga', 'centru'];
const scrapedText = [];


const getOptions = (frameName) => {
return frameName.$$eval(optionSelector, (options) => {
const result = options.map(option => option.innerText);

return result;
}, optionSelector);
}

for (const frame of frames) {
const name = frame.name();

if (expectedFrames.includes(name)) {
await frame.click(optionSelector);
await page.waitFor(1000);
const result = await getOptions(frame);

scrapedText.push({[name]: result});
} else if (name === 'dreapta') {
const result = await frame.$eval('body', elm => elm.innerText);

scrapedText.push({[name]: result.split(/\n+/g)});
}
}


await browser.close();

return scrapedText;
};

scrape().then((value) => {
console.log(value);
});

输出:

[{ 
stanga: ['Mures','A Saguna', 'A.Guttenbrun_1', ... and more items]
},
{
centru: ['[0] E3']
},
{
dreapta: ['Linia: E3','2019-07-25 23:19:40','Sosire1: 23:39','Sosire2: 23:41']
}]

关于javascript - 卡在从第三帧抓取数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57207733/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com