gpt4 book ai didi

javascript - 使用 puppeteer 选择表格的第二行

转载 作者:太空宇宙 更新时间:2023-11-03 23:58:36 25 4
gpt4 key购买 nike

我正在使用node.js和puppeteer在爬虫中工作,我的目标是获取表中两列的数据(日期和描述),代码工作正常,直到阻止从列中获取数据...

下面是完整代码,包括我正在抓取的页面的网址:

const fs = require('fs');
const puppeteer = require('puppeteer');

const urlConsulta = "http://www.tre-pr.jus.br/";
const numeroProcessoSeq = "000000889";
const numeroProcessoAno = "2014";
const numeroProcessoDigito = "6160047";

var wait = ms => new Promise((r, j)=> setTimeout(r, ms));

void (async () => {
try {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto(urlConsulta);
await page.select('#acao', 'pesquisarNumUnico');
await page.evaluate((numeroProcessoSeq, numeroProcessoAno, numeroProcessoDigito) => {
document.getElementById('numUnicoSequencial').value = numeroProcessoSeq;
document.getElementById('numUnicoAno').value = numeroProcessoAno;
document.getElementById('numUnicoOrigem').value = numeroProcessoDigito;
}, numeroProcessoSeq, numeroProcessoAno, numeroProcessoDigito);

await page.$eval('form[action*="http://www.tre-pr.jus.br/@@processrequest"]', form => form.submit());

await page.waitForNavigation();
var frame = await page.frames().find(f => f.name() === 'ifr_servicos');
await frame.click('a[href*="ExibirDadosProcesso"]');
await page.frames().find(f => f.name() === 'ifr_servicos');
await wait(10000);
await frame.click('[name*="todos"]');
await frame.$eval('[name*="ExibirPartesProcessoZona"]', form => form.submit());
await wait(10000);
let string = await buscaFases(frame);
fs.writeFile("teste.txt", string, function(err) {
if(err) {
return console.log(err);
}
console.log("The file was saved!");
});
console.log(string);
await wait(10000);
await browser.close();
} catch (error) {
console.log(error);
}
})();

async function buscaFases(frame) {
return await frame.evaluate(() => {
let div = document.querySelector('div[id*="conteudo"]');
let rowns = Array.from(div.children[4].children[0].children);
let movimentosInfo = rowns.map(row => {
let data = row.querySelector("tr td:first-child").textContent;
let descricao = row.querySelector("tr td:first-child + td").textContent;
return { data, descricao };
});
return JSON.stringify(movimentosInfo);
});
};

获取数据的具体行:

let data = row.querySelector("tr td:first-child").textContent;
let descricao = row.querySelector("tr td:first-child + td").textContent;

最佳答案

问题是并非所有 tr 都具有您期望的子元素。这可能是因为 td 标记带有 colspan。因此,您应该首先过滤数组以对其他元素进行排序。

代码

将包括 map 函数在内的行从 let movimentosInfo = ... 更改为:

let movimentosInfo = rowns.filter(row => {
return row.querySelector("tr td:first-child") && row.querySelector("tr td:first-child + td");
}).map(row => {
let data = row.querySelector("tr td:first-child").textContent;
let descricao = row.querySelector("tr td:first-child + td").textContent;
return { data, descricao };
});

这添加了一个过滤器函数,该函数在映射所需元素的内容之前测试其是否存在。

关于javascript - 使用 puppeteer 选择表格的第二行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55872007/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com