gpt4 book ai didi

headless-browser - 从 puppeteer 的 ElementHandle 中获取选择器

转载 作者:行者123 更新时间:2023-12-04 08:30:03 25 4
gpt4 key购买 nike

我目前正在与 Puppeteer 合作.我注意到在 Chrome 开发工具中,我可以从任何元素(复制/复制选择器)中获取选择器,我想知道是否也可以从 Puppeteer 中的 ElementHandle 获取完整的 css 选择器。

例如,给定以下 html:

<body>
<h1>Main Title</h1>
</body>

然后,在 Puppeteer 中,我得到了 h1 元素:
const myElement=await page.$("h1");

我想获得 myElement ( body > h1 ) 的完整 css 选择器

最佳答案

使用a userscript of mine的这部分:

var xpathNamespaceResolver = {
svg: 'http://www.w3.org/2000/svg',
mathml: 'http://www.w3.org/1998/Math/MathML'
};

getElementByXPath = function getElementByXPath(expression) {
var a = document.evaluate(expression, document.body, xpathNamespaceResolver, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
if (a.snapshotLength > 0) {
return a.snapshotItem(0);
}
};

retrieveCssOrXpathSelectorFromTextOrNode = function(arg, type) {
var root = [], node;
nodeType = type.toLowerCase();
function retrieveNodeNameAndAttributes(node) {
var output = '';
try {
var nodeName = node.nodeName.toLowerCase();
} catch(e) {
console.error('ERROR no matching node');
return;
}
if (node.hasAttributes()) {
var attrs = node.attributes;
for (var i = 0; i < attrs.length; i++) {
if (nodeType === 'xpath') {
if (attrs[i].value) {
output += '[@' + attrs[i].name + "='" + attrs[i].value + "']";
}
else {
output += '[@' + attrs[i].name + ']';
}
}
else if (nodeType === 'css') {
if (attrs[i].value) {
if (attrs[i].name === 'id') {
if (/:/.test(attrs[i].value)) {
output += "[id='" + attrs[i].value + "']"; // new Ex: [id="foo:bar"]
}
else {
output += "#" + attrs[i].value;
}
} else if (attrs[i].name === 'class') {
var classes = attrs[i].value.split(/\s+\b/).join('.');
output += '.' + classes;
} else {
output += "[" + attrs[i].name + "='" + attrs[i].value + "']";
}
}
else {
output += "[" + attrs[i].name + "]";
}
}
}
}

var txt = '';
if (nodeName === 'a' && nodeType === 'xpath') {
txt = "[text()='" + node.innerText + "']";
}

root.push({ 'name': nodeName, 'attrs': output, txt });

if (nodeName === 'body') return;
else retrieveNodeNameAndAttributes(node.parentNode); // recursive function
}

if (typeof arg === 'string') { // text from within the page
var selector = '//*[text()[contains(.,"' + arg + '")]]';
node = getElementByXPath(selector);
} else if (typeof arg === 'object') { // node argument, let's do some 'duck typing'
if (arg && arg.nodeType) {
node = arg;
}
else {
console.error("ERROR expected node, get object");
return;
}
} else {
console.error("ERROR expected node or string argumument");
return;
}

retrieveNodeNameAndAttributes(node);

var output = '';
if (nodeType === 'css') {
output = root.reverse().map(elt => elt.name + elt.attrs ).join(' > ');
}
else if (nodeType === 'xpath') {
output = '//' + root.reverse().map(elt => elt.name + elt.txt + elt.attrs ).join('/');
}
else {
console.error('ERROR unknown type ' + type);
}

return output;
//console.log(output);

};


x = function(arg) {
console.log("CSS\n" + retrieveCssOrXpathSelectorFromTextOrNode(arg, 'css'));
console.log("XPath\n" + retrieveCssOrXpathSelectorFromTextOrNode(arg, 'xpath'));
};

用法 :
console.log(x(node));

在 Chrome Dev Tools 中此页面的选定 textArea 节点上的输出:
CSS
body.question-page.new-topbar[style] > div.container._full > div#content.snippet-hidden > div[itemscope][itemtype='http://schema.org/Question'] > div.inner-content.clearfix > div#mainbar[role='main'][aria-label='question and answers'] > div#answers > div#answer-49596712.answer[data-answerid='49596712'][itemscope][itemtype='http://schema.org/Answer'] > div.post-layout > div.answercell.post-layout--right > div.inline-editor[style] > form.inline-post[action='/posts/49596712/edit-submit/dbac2e78-20ec-4f98-86ee-60c3a57fb791'][method='post'][data-post-params='{"is_suggested_edit":false,"post_type":2,"owner":1,"PostId":49596712}'] > div#post-editor-49596712.post-editor.js-post-editor > div[style='position: relative;'] > div.wmd-container > textarea#wmd-input-49596712.wmd-input.processed[name='post-text'][cols='92'][rows='15'][tabindex='81'][data-min-length]

XPath
//body[@class='question-page new-topbar'][@style]/div[@class='container _full ']/div[@id='content'][@class='snippet-hidden']/div[@itemscope][@itemtype='http://schema.org/Question']/div[@class='inner-content clearfix']/div[@id='mainbar'][@role='main'][@aria-label='question and answers']/div[@id='answers']/div[@id='answer-49596712'][@class='answer'][@data-answerid='49596712'][@itemscope][@itemtype='http://schema.org/Answer']/div[@class='post-layout']/div[@class='answercell post-layout--right']/div[@class='inline-editor'][@style]/form[@class='inline-post'][@action='/posts/49596712/edit-submit/dbac2e78-20ec-4f98-86ee-60c3a57fb791'][@method='post'][@data-post-params='{"is_suggested_edit":false,"post_type":2,"owner":1,"PostId":49596712}']/div[@id='post-editor-49596712'][@class='post-editor js-post-editor']/div[@style='position: relative;']/div[@class='wmd-container']/textarea[@id='wmd-input-49596712'][@class='wmd-input processed'][@name='post-text'][@cols='92'][@rows='15'][@tabindex='81'][@data-min-length]

关于headless-browser - 从 puppeteer 的 ElementHandle 中获取选择器,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49596638/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com