gpt4 book ai didi

javascript - Android WebView 未返回所需的 HTML

转载 作者:行者123 更新时间:2023-11-28 05:09:05 25 4
gpt4 key购买 nike

快速概述一下我正在做的事情我使用 Android Webview 渲染 JavaScript,然后从 javascript 读取 HTML 来解析它。

我目前无法从名为 Sport Chek 的网站检索 HTML。

这是我的 SportChekSearch 类的代码:

public class SportChekSearch extends SearchQuery{

public Elements finalDoc;
private ArrayList<Item> processed;
private final Handler uiHandler = new Handler();
public int status = 0;

//This basically is just so that the class knows which Activity we're working with
private Context c;

protected class JSHtmlInterface {
@android.webkit.JavascriptInterface
public void showHTML(String html) {
final String htmlContent = html;

uiHandler.post(
new Runnable() {
@Override
public void run() {
Document doc = Jsoup.parse(htmlContent);
}
}
);
}
}

/**
* Constructor method
* @param context The context taken from the webview (So that the asynctask can show progress)
*/
public SportChekSearch(Context context, String query) {

final Context c = context;

try {
final WebView browser = new WebView(c);
browser.setVisibility(View.INVISIBLE);
browser.setLayerType(View.LAYER_TYPE_NONE, null);
browser.getSettings().setJavaScriptEnabled(true);
browser.getSettings().setBlockNetworkImage(true);
browser.getSettings().setDomStorageEnabled(true);
browser.getSettings().setCacheMode(WebSettings.LOAD_NO_CACHE);
browser.getSettings().setLoadsImagesAutomatically(false);
browser.getSettings().setGeolocationEnabled(false);
browser.getSettings().setSupportZoom(false);
browser.getSettings().setUserAgentString("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36");
browser.addJavascriptInterface(new JSHtmlInterface(), "JSBridge");

browser.setWebViewClient(
new WebViewClient() {

@Override
public void onPageStarted(WebView view, String url, Bitmap favicon) {
super.onPageStarted(view, url, favicon);
}

@Override
public void onPageFinished(WebView view, String url) {
browser.loadUrl("javascript:window.JSBridge.showHTML('<html>'+document.getElementsByTagName('html')[0].innerHTML+'</html>');");
}
}
);


browser.loadUrl("https://www.sportchek.ca/search.html#q=" + query.replaceAll(" ", "+") + "&lastVisibleProductNumber=3");
browser.loadUrl(browser.getUrl());
final String link = browser.getUrl();
new fetcher(c).execute(link);



}
catch(Exception e){
e.printStackTrace();
}

//Get the link from the WebView, and save it in a final string so it can be accessed from worker thread


}

/**
* This subclass is a worker thread meaning it does work in the background while the user interface is doing something else
* This is done to prevent "lag".
* To call this class you must write fetcher(Context c).execute(The link you want to connect to)
*
*/
class fetcher extends AsyncTask<String, Void, Elements> {

Context mContext;
ProgressDialog pdialog;

public fetcher(Context context) {
mContext = context;
}

@Override
protected void onPreExecute() {
super.onPreExecute();
pdialog = new ProgressDialog(mContext);
pdialog.setTitle(R.string.finding_results);
pdialog.setCancelable(false);
pdialog.show();
}

//This return elements because the postExecute() method needs an Elements object to parse its results
@Override
protected Elements doInBackground(String... strings) {

//You can pass in multiple strings, so this line just says to use the first string
String link = strings[0];

//For Debug Purposes, Do NOT Remove - **Important**
System.out.println("Connecting to: " + link);

try {
doc = Jsoup.connect(link)
.ignoreContentType(true)
.userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36")
.timeout(10000)
.get();


finalDoc = doc.select("body section.product-grid-wrapper");

System.out.println(finalDoc.toString());



} catch (IOException e) {
e.printStackTrace();
}

return finalDoc;
}


@Override
protected void onPostExecute(Elements result) {


//This line clears the list of info in the Search activity
//I should probably be using a getter method but adapter is a static variable so it shouldn't matter


//parse seperates document into elements
//crunch results formats those elements into item objects
//I am saving the result of this to an ArrayList<Item> called "processed"
processed = crunchResults(result);

//For debug purposes, do NOT remove - **Important**
System.out.println(processed.size() + " results have been crunched by Sport Chek.");

//Adds all of the processed results to the list of info in Search activity
ClothingSearch.adapter.addAll(processed);


//For debug purposes, do NOt remove - **Important
System.out.println("Adapter has been notified by Sport Chek.");

//Closes the progress dialog called pdialog assigned to the AsyncTask

pdialog.dismiss();

ClothingSearch.adapter.notifyDataSetChanged();
SearchQueueHandler.makeRequest(mContext, processed, SearchQueueHandler.CLOTHING_SEARCH);




}
}



public ArrayList<Item> crunchResults(Elements e){

ArrayList<Item> results = new ArrayList<Item>();

try {

for (int i = 0; i < e.size(); i++) {

Element ele = e.get(i);


String link = "https://www.sportchek.ca" + ele.select(" a.product-grid__link").attr("href");
System.out.println("https://www.sportchek.ca" + ele.select(" a.product-grid__link").attr("href"));
String title = ele.select(" span.product-title-text").text();

String pricestring = ele.select(" span.product-price__wrap").text();
price = Double.parseDouble(pricestring.substring(pricestring.lastIndexOf("$")));
System.out.println(pricestring);

//*******************************************

String store = "Sport Chek";



//Adds the formatted item to an ArrayList of items
results.add(new Item(title, store, price, link));


//Prints the object's to String to console
//For debug purposes, do NOT remove - **Important
System.out.println(results.get(i).toString());
}
} catch (Exception a){
a.printStackTrace();
}

return results;
}

public int getStatus(){
return status;
}

}

两个相关方法是我的 AsyncTask 中的 doInBackground 和 crunchResults 方法。

这是我在实际网站上使用 Ctrl+Shift+I 得到的结果(期望结果):

Desired Result

但是当运行上面的代码并使用 println 时,这里是我为标签部分 class="product-grid-wrapper"得到的结果:

<section class="product-grid-wrapper"> 
<ul data-module-type="SearchProductGrid" class="product-grid__list product-grid__list_quickview">
<!-- #product-grid__item-template -->
</ul>
</section>

谁能帮我弄清楚为什么我没有得到我想要的结果?

感谢所有帮助

编辑:对于收集 println 数据的特定搜索,链接为 https://www.sportchek.ca/search.html#q=men+coat&lastVisibleProductNumber=3

最佳答案

看起来您实际得到的是服务器发送的实际 html,而您的“期望结果”是 JavaScript 运行后 DOM 的样子。

您的“实际”是我在 Chrome 中使用“查看源代码”时看到的结果,而您的“期望结果”是我使用 Chrome 的 DOM 检查器时看到的结果。

经过进一步检查,我发现您实际上并没有从浏览器获取 HTML,而是(间接)使用 JSoup 的 Connection 对象直接获取 HTML。不幸的是,这不会运行 Javascript。

相反,您必须在 JavaScript 运行后从 WebView 获取 HTML。有关可能的方法,请参阅 How do I get the web page contents from a WebView?

然后,使用

将从中获得的 HTML 提供给 JSoup
Jsoup.parse(html);

关于javascript - Android WebView 未返回所需的 HTML,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41525707/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com