gpt4 book ai didi

node.js - Zombie 错误 - 获取 http 请求时出错

转载 作者:搜寻专家 更新时间:2023-11-01 00:09:18 25 4
gpt4 key购买 nike

我正在使用 NodeJs 和 ZombieJS 在虚拟浏览器环境中获取 url 请求。

我正在使用以下代码:

var zombie = require('zombie'),
jsdom = require('jsdom'),
my_sandbox = require('sandbox'),
url = require('url'),
http = require('http'),
request = require('request'),
httpProxy = require('./lib/node-http-proxy'),
des = '',
util = require('util'),
colors = require('colors'),
is_host = true;

var s = new my_sandbox();
var browser = new zombie.Browser;

httpProxy.createServer(9000, 'localhost').listen(8000);

function zombieFetching(page) {
browser.visit(page, { debug: false },
function(err, browser, status) {
if(err) {
console.log('There is an error. Fix it');
throw(err.message);
} else {
console.log('Browser visit successful') ;
}
});
}

var server = http.createServer(function (req, res) {
var pathname = '';

if(is_host) {
dest = req.url.substr(0, req.url.length);
pathname = dest;
is_host = false;
} else {
pathname = req.url.substr(0, req.url.length);
if(pathname.charAt(0) == "/") {
console.log('new request');
console.log(pathname);
pathname = dest + pathname;
}
}

request.get({uri: pathname}, function (err, response, html) {
console.log('The pathname is:::::::::: ' + pathname);
zombieFetching(pathname);
res.end(html);
});
});

server.listen(9000);

当我尝试获取 url 时看到以下错误:“www.yahoo.com”

home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/tokenizer.js:62
throw(e);
^
Error: undefined: Invalid character in tag name: ��
at Object.createElement (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/jsdom/lib/jsdom/level1/core.js:1174:13)
at TreeBuilder.createElement (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/treebuilder.js:29:25)
at TreeBuilder.insert_element_normal (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/treebuilder.js:61:21)
at TreeBuilder.insert_element (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/treebuilder.js:52:15)
at Object.startTagOther (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/parser/in_body_phase.js:483:12)
at Object.processStartTag (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/parser/phase.js:43:44)
at EventEmitter.do_token (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/parser.js:94:20)
at EventEmitter.<anonymous> (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/parser.js:112:30)
at EventEmitter.emit (events.js:64:17)
at EventEmitter.emitToken (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/tokenizer.js:84:7)

另外,日志语句如下:

The pathname is:::::::::: http://www.yahoo.com/
The pathname is:::::::::: http://l1.yimg.com/a/i/ww/news/2011/05/06/zuckhouse-sm.jpg
The pathname is:::::::::: http://l1.yimg.com/a/i/ww/news/2011/05/07/cable-sm.jpg
The pathname is:::::::::: http://l.yimg.com/a/a/1-/flash/promotions/yahoo/081120/70x50iltlb_2.jpg

Browser visit successful

Browser visit successful

Browser visit successful

Browser visit successful

The pathname is:::::::::: http://l.yimg.com/a/i/vm/2011may/bird74.jpg
The pathname is:::::::::: http://www.yahoo.com/jserror?ad=1&target=cms&data=FPAD

据我了解,前四个 get 请求是成功的。但是,我不确定为什么 zombie 正在获取无效请求:

"http://www.yahoo.com/jserror?ad=1&target=cms&data=FPAD"

另外,是什么导致了标签名称错误中的无效字符?

谢谢,索尼

最佳答案

favicon.ico 总是被浏览器请求; Zombie 正在正确模拟此行为。它不是 HTTP 协议(protocol)的任何地方,但它正是浏览器倾向于做的事情,所以它们在支持它的站点的地址栏中显示那个漂亮的图标。您可能会看到 jserror? 请求,因为 Zombie 在某些时候收到了指向该 URL 的 301(重定向),并且盲目地跟随它,或者页面上的其他一些元素正在引用它。默认情况下,Zombie 的处理程序会尝试跟踪所有内容,这就是您获取图像等的原因,就像浏览器一样。

如果你设置 browser.debug = true 我认为你可以获得比你的日志语句给你的信息更多的信息。

关于node.js - Zombie 错误 - 获取 http 请求时出错,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/5930893/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com