gpt4 book ai didi

javascript - NodeJS : Response of http. 请求不是预期的

转载 作者:行者123 更新时间:2023-12-03 08:59:35 29 4
gpt4 key购买 nike

当我使用 http.request 模块时,有一些奇怪的事情。编写一个网络爬虫,获取并解析this webpage的数据使用 NodeJS 是我的目的。

但是http.request响应给我的响应不符合Chrome渲染的html。

这是代码。

var https = require('https');
var fs = require('fs');

var options = {
rejectUnauthorized: false,
host: 'book.flypeach.com',
path: '/default.aspx?ao=B2CZHTW&ori=KHH&des=KIX&dep=2015-09-10&ret=2015-09-17&adt=2&chd=0&inf=0&langculture=zh-TW&bLFF=false',
};

var callback = function(response) {
var body = '';
response.on('data', function(chunk) {
body += chunk;
});

response.on('end', function() {
fs.writeFile('craw.html' , body , function(err){
if (err) return console.log(err);
});
});
}
https.request(options, callback).end();

我使用 fs.writeFile 来存储输出,但与 Chrome 浏览器上的网页不同。

更新时间:2015年9月3日

今天我尝试了phantomjs

这是我的新代码。但这也行不通。

var system = require('system');
var page = require('webpage').create();
var url = "https://book.flypeach.com/default.aspx?ao=B2CZHTW&ori=KHH&des=KIX&dep=2015-09-10&ret=2015-09-17&adt=2&chd=0&inf=0&langculture=zh-TW&bLFF=false";

page.onResourceRequested = function (request) {
system.stderr.writeLine('= onResourceRequested()');
system.stderr.writeLine(' request: ' + JSON.stringify(request, undefined, 4));
};

page.onResourceReceived = function(response) {
system.stderr.writeLine('= onResourceReceived()' );
system.stderr.writeLine(' id: ' + response.id + ', stage: "' + response.stage + '", response: ' + JSON.stringify(response));
};

page.onLoadStarted = function() {
system.stderr.writeLine('= onLoadStarted()');
var currentUrl = page.evaluate(function() {
return window.location.href;
});
system.stderr.writeLine(' leaving url: ' + currentUrl);
};

page.onLoadFinished = function(status) {
system.stderr.writeLine('= onLoadFinished()');
system.stderr.writeLine(' status: ' + status);
};

page.onNavigationRequested = function(url, type, willNavigate, main) {
system.stderr.writeLine('= onNavigationRequested');
system.stderr.writeLine(' destination_url: ' + url);
system.stderr.writeLine(' type (cause): ' + type);
system.stderr.writeLine(' will navigate: ' + willNavigate);
system.stderr.writeLine(' from page\'s main frame: ' + main);
};

page.onResourceError = function(resourceError) {
system.stderr.writeLine('= onResourceError()');
system.stderr.writeLine(' - unable to load url: "' + resourceError.url + '"');
system.stderr.writeLine(' - error code: ' + resourceError.errorCode + ', description: ' + resourceError.errorString );
};

page.onError = function(msg, trace) {
system.stderr.writeLine('= onError()');
var msgStack = [' ERROR: ' + msg];
if (trace) {
msgStack.push(' TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
});
}
system.stderr.writeLine(msgStack.join('\n'));
};

page.open(url, function(status) {
var title = page.evaluate(function() {
return document.title;
});

console.log(status);
phantom.exit();
});

我得到了详细的日志

= onNavigationRequested
destination_url: https://book.flypeach.com/default.aspx?ao=B2CZHTW&ori=KHH&des=KIX&dep=2015-09-10&ret=2015-09-17&adt=2&chd=0&inf=0&langculture=zh-TW&bLFF=false
type (cause): Other
will navigate: true
from page's main frame: true
= onResourceRequested()
request: {
"headers": [
{
"name": "User-Agent",
"value": "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/534.34 (KHTML, like Gecko) PhantomJS/1.9.8 Safari/534.34"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
}
],
"id": 1,
"method": "GET",
"time": "2015-09-03T08:42:29.674Z",
"url": "https://book.flypeach.com/default.aspx?ao=B2CZHTW&ori=KHH&des=KIX&dep=2015-09-10&ret=2015-09-17&adt=2&chd=0&inf=0&langculture=zh-TW&bLFF=false"
}
= onLoadStarted()
leaving url: about:blank
= onResourceError()
- unable to load url: "https://book.flypeach.com/default.aspx?ao=B2CZHTW&ori=KHH&des=KIX&dep=2015-09-10&ret=2015-09-17&adt=2&chd=0&inf=0&langculture=zh-TW&bLFF=false"
- error code: 6, description: SSL handshake failed
= onResourceReceived()
id: 1, stage: "end", response: {"contentType":null,"headers":[],"id":1,"redirectURL":null,"stage":"end","status":null,"statusText":null,"time":"2015-09-03T08:42:29.845Z","url":"https://book.flypeach.com/default.aspx?ao=B2CZHTW&ori=KHH&des=KIX&dep=2015-09-10&ret=2015-09-17&adt=2&chd=0&inf=0&langculture=zh-TW&bLFF=false"}
= onLoadFinished()
status: fail
fail
Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://server.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://server.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://server.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://server.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://server.js. Domains, protocols and ports must match.

最佳答案

需要使用一些渲染引擎,比如WebKit

尝试 phantomjs

http://phantomjs.org/ https://github.com/sgentle/phantomjs-node用于命令绑定(bind)的 WebKit 渲染器和 nodejs 模块。与大多数网站配合良好

关于javascript - NodeJS : Response of http. 请求不是预期的,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/32351394/

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com