gpt4 book ai didi

javascript - 通过javascript代码获取带有Jsoup填充body标签的Html内容

转载 作者:行者123 更新时间:2023-12-01 18:18:37 57 4
gpt4 key购买 nike

嗨。我尝试获取 html 数据。但我只得到 JAVASCRIPTS我怎样才能获取html内容。我不明白这个脚本。我尝试 htmlunit 等待 javascript 代码加载。但我做不到。

我还尝试使用 slenium 和 junit 来:https://www.sahibinden.com/ilan/emlak-is-yeri-kiralik-cumhuriyet-caddesinde-550m-kiralik-bina-792916998/detay

但我无法获取任何数据,如何访问?

<body> 
<script>(function(K){var W={},D={};var k(document.createEvent("CustomEvent")))</script>
<script>
(function() {
'use strict';
var afterReadyCbCalled = false;
var originalHeaders = ["Accept", "text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2","X-Origin-DC", "gytp","X-Forwarded-Proto", "https","X-Forwarded-For", "212.156.131.82","X-TLS-Version", "771","X-Client-SrcPort", "51979",];
var originalBody = "";
function afterReadyCb() {
if (afterReadyCbCalled) return;
afterReadyCbCalled = true;
var xhr = new XMLHttpRequest();
xhr.onload = function() {
var isValid = xhr.getResponseHeader("ISTL-INFINITE-LOOP");
if (isValid != null && isValid != '') return;
var a = xhr.getResponseHeader("ISTL-REDIRECT-TO");
if (a != null && a != '') {
location.replace(a);
} else {
if (window.history != null && typeof history.replaceState === 'function') {
var responseURL = xhr.responseURL != null ? xhr.responseURL : xhr.getResponseHeader("ISTL-RESPONSE-URL");
if (responseURL != null && responseURL != '') {
history.replaceState(null, '', responseURL);
}
}
window.location.reload();
}
};
xhr.open("get", location.href, true);
for (var i = 0; i < originalHeaders.length; i += 2) {
var headerName = originalHeaders[i];
try {
xhr.setRequestHeader(headerName, originalHeaders[i + 1]);
} catch (e) {}
}
xhr.setRequestHeader("ISTL-INFINITE-LOOP", '1');
xhr.send(originalBody);
var evt = document.createEvent('Event');
evt.initEvent('QLpZFJdHv', true, true);
dispatchEvent(evt);
}
addEventListener('afterReady', afterReadyCb, false);
setTimeout(afterReadyCb, 400);
}());
</script>
<style>
html, body {
margin: 0;
padding: 0;
background-color: white;
}
.preloader {
width: 100%;
height: 100%;
position: absolute;
left:0;
right:0;
top:0;
bottom:0;
background-image: qcQ==');
background-repeat: no-repeat;
background-position: center center;
z-index: 1;
}
</style>
<div class="preloader"></div>
<style> body { background: white;
}
</style>
</body>

最佳答案

这段代码完成了这里的工作。

String url = "https://www.sahibinden.com/ilan/emlak-is-yeri-kiralik-cumhuriyet-caddesinde-550m-kiralik-bina-792916998/detay";

try (final WebClient webClient = new WebClient(BrowserVersion.FIREFOX_68)) {
webClient.getOptions().setThrowExceptionOnScriptError(false);

HtmlPage page = webClient.getPage(url);
webClient.waitForBackgroundJavaScript(10_000);

System.out.println(page.asText());
System.out.println(" ----------------------------------- ");
System.out.println(page.asXml());
}

你怀念什么?

关于javascript - 通过javascript代码获取带有Jsoup填充body标签的Html内容,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/60320007/

57 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com