gpt4 book ai didi

algorithm - nodejs中开放图的抓取算法

转载 作者:塔克拉玛干 更新时间:2023-11-03 04:09:08 26 4
gpt4 key购买 nike

我正在尝试使用 nodejs(使用 cheerio)从 URL 获取开放图元数据,使用下面的代码。

我有这个东西要填:var result={};

  for (var ogCounter = 0; ogCounter < metalist.length; ogCounter++) {
if (!utils.isEmpty(metalist[ogCounter].attribs.property) && !utils.isEmpty(metalist[ogCounter].attribs.content)) {
if (metalist[ogCounter].attribs.property.indexOf('og') == 0) {
var ogname = metalist[ogCounter].attribs.property.split(':');
var property = ogname[1];
var content = metalist[ogCounter].attribs.content;

if (utils.isEmpty(result[property])) {
result[property] = content;
} else {
if (result[property].push) {
result[property].push(content);
} else {
result[property] = [result[property], content];
}
}

}
}
}

在我填充我在 JSON 中转换的结果后,使用这段代码我得到如下内容:

type: "video",
image: "http://i3.ytimg.com/vi/fWNaR-rxAic/mqdefault.jpg",
video: [
"http://www.youtube.com/v/fWNaR-rxAic?version=3&amp;autohide=1",
"application/x-shockwave-flash",
"1920",
"1080"
]

但是我想要的东西是这样的:

type: "video",
image: "http://i3.ytimg.com/vi/fWNaR-rxAic/mqdefault.jpg",
video: {
"http://www.youtube.com/v/fWNaR-rxAic?version=3&amp;autohide=1",
{
type:"application/x-shockwave-flash",
width:"1920",
height:"1080"
}
}

我正在尝试这个“如果”但它不起作用:

 if (utils.isEmpty(result[property])) {
result[property] = content;
} else {
if (result[property].push) {
result[property].push(content);
} else {
var subresult={};
subresult[name[2]]=content;
subresult[property]=result[property] ;

result[property] = subresult;
}
}

我不想循环所有元数据 2 次而且我不擅长 javascript 和 nodejs 函数...有什么建议吗?谢谢

最佳答案

这很棘手,因为您想如何显示 og:video。我认为你不能那样做。最简单的做法是为其分配一个标识符,例如 name 并将其与 og:video:width

放在同一级别

示例结果

{
"type": "video.other",
"url": "http://philippeharewood.com/facebook/video.html",
"title": "Simple Plan",
"video": {
"name": "http://www.youtube.com/v/Y4MnpzG5Sqc?version=3&amp;autohide=1",
"type": "application/x-shockwave-flash",
"width": "398",
"height": "224",
"release_date": "2012-05-29T21:30"
}
}

如何实现,

var cheerio = require('cheerio')
var request = require('request')

var url = 'http://philippeharewood.com/facebook/video.html';
var result = {};

request(url, function(error, response, body) {
var $ = cheerio.load(body);

var meta = $('meta')
var keys = Object.keys(meta)

keys.forEach(function(key){
if ( meta[key].attribs
&& meta[key].attribs.property
&& meta[key].attribs.property.indexOf('og') == 0
)
{
var og = meta[key].attribs.property.split(':');

if(og.length > 2) {
if(result[og[1]]) {
if (typeof result[og[1]] == 'string'
|| result[og[1]] instanceof String
)
{
var set = {};
set['name'] = result[og[1]];
set[og[2]] = meta[key].attribs.content;
result[og[1]] = set;
}
else {
ex_set = result[og[1]];
ex_set[og[2]] = meta[key].attribs.content;
result[og[1]] = ex_set;
}
}
else {
var set = {};
set[og[2]] = meta[key].attribs.content;
result[og[1]] = set;
}
}
else {
result[og[1]] = meta[key].attribs.content;
}
}
});

console.log(JSON.stringify(result, undefined, 2));

});

关于algorithm - nodejs中开放图的抓取算法,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/13050320/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com