gpt4 book ai didi

node.js - 如何修复 x-ray(NodeJS 抓取库)响应中的编码?

转载 作者:搜寻专家 更新时间:2023-10-31 22:25:52 25 4
gpt4 key购买 nike

以下脚本在我的 NodeJS 服务器中运行良好,但当我尝试抓取一些西里尔文网站时,它很少返回这样的响应。

脚本

x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]@content',
metaDescription: 'meta[name="description"]@content',
ogImage: 'meta[property="og:image"]@content',
twitterImage: 'meta[name="name="twitter:image:src""]@content',
metaImage: 'meta[name="image"]@content',
headImage: 'head img@src',
contentImage_1: '.content img@src',
contentImage_2: '.image img@src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}

编码不正确的响应示例

firstData { name: [ '(Rock, Pop) [15LP] [24/96] Queen - Studio Collection - 2015, 
FLAC (tracks) :: RuTracker.org' ],
description:
[ 'RuTracker.org » ���������� ��� (����������� ���������) »
������� ������� (Rock, Pop) [15LP] [24/96] Queen -
Studio Collection - 2015, FLAC (tracks)',
undefined ],
image: [ undefined, undefined, undefined, undefined, undefined, undefined ] }

我该如何解决这个问题?

最佳答案

你可以像这样使用 request 作为 x-ray 的驱动程序并在其中 iconv body :

var options = {};
var conv = null;
options.encoding = 'binary';
iconv = new require('iconv').Iconv('Windows-1251', 'utf8');
conv = function(body) {
if (!body) return body;
body = new Buffer.from(body, 'binary');
return iconv.convert(body).toString();
}

var request = require('request').defaults(options);
var driver = function driver(context, callback) {
var url = context.url;
request(url, function(err, response, body) {
if (!err && conv) body = conv(body);
return callback(err, body);
})
};
x.driver(driver);


x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]@content',
metaDescription: 'meta[name="description"]@content',
ogImage: 'meta[property="og:image"]@content',
twitterImage: 'meta[name="name="twitter:image:src""]@content',
metaImage: 'meta[name="image"]@content',
headImage: 'head img@src',
contentImage_1: '.content img@src',
contentImage_2: '.image img@src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}
console.log(firstData);

});

关于node.js - 如何修复 x-ray(NodeJS 抓取库)响应中的编码?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/33690257/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com