gpt4 book ai didi

java正则表达式解析html示例分享

转载 作者:qq735679552 更新时间:2022-09-29 22:32:09 35 4
gpt4 key购买 nike

CFSDN坚持开源创造价值,我们致力于搭建一个资源共享平台,让每一个IT人在这里找到属于你的精彩世界.

这篇CFSDN的博客文章java正则表达式解析html示例分享由作者收集整理,如果你对这篇文章有兴趣,记得点赞哟.

代码如下

package work,

  。

import html">java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.regex.Matcher; import java.util.regex.Pattern,

import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpMethodParams,

public class chuanboyi { 。

 public static void main(String[] args){   // TODO Auto-generated method stub   StringBuffer html = new StringBuffer();   HttpClient httpclient = new HttpClient();   //创建GET方法实例   GetMethod getMethod = new GetMethod("//www.zzvips.com");   //使用系统提供的默认恢复策略   getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());   try{    //执行GET方法    int statusCode = httpclient.executeMethod(getMethod);    if(statusCode != HttpStatus.SC_OK){     System.out.println("Method is wrong " + getMethod.getStatusLine());    }    InputStream responseBody = getMethod.getResponseBodyAsStream();    BufferedReader reader = new BufferedReader(new InputStreamReader(responseBody,"utf-8"));    String line = reader.readLine();    while(line != null){     html.append(line).append("\n");     line = reader.readLine();    }    reader.close();    //正则表达式    String regex = "<form name=\"compareForm\"[\\s\\S]+>[\\s\\S]+</form>.*<script.*>";    String regexa ="(?<=<li>)[\\s\\S]+?(?=</li>)";    Pattern pattern = Pattern.compile(regex);          Matcher m = pattern.matcher(html);          StringBuffer str = new StringBuffer();           int i = 0;           while(m.find()){           str.append(m.group());          }          pattern = Pattern.compile(regexa);          m = pattern.matcher(str);          while(m.find()){           attrs(m.group());           i++;          }          System.out.println("共有"+i+"条数据!");   }catch (HttpException e) {    // TODO: handle exception    System.out.println("Please check your provided http address!");    e.printStackTrace();   }catch (IOException e) {    // TODO: handle exception    System.out.println("the line is wrong!");    e.printStackTrace();   }finally{    getMethod.releaseConnection();//释放链接   }  }  public static void attrs(String str){   //获取url的正则表达式   String regexURL = "[a-z]+-[0-9]+\\.html";   //获取Name的正则表达式   String regexName = "(?<=title=\")[[\\w-\\s][^x00-xff]]+(?=\")";   //获取图片的正则表达式   String regexPicture = "images.*\\.jpg";   Pattern patternURL = Pattern.compile(regexURL);   Pattern patternName = Pattern.compile(regexName);   Pattern patternPicture = Pattern.compile(regexPicture);   Matcher mURL = patternURL.matcher(str);   Matcher mName = patternName.matcher(str);   Matcher mPicture = patternPicture.matcher(str);   if(mName.find()){    System.out.println("名字:"+mName.group());   }   if(mURL.find()){    System.out.println("链接:"+mURL.group());   }   if(mPicture.find()){    System.out.println("图片:"+mPicture.group());   }  }  } 。

  。

最后此篇关于java正则表达式解析html示例分享的文章就讲到这里了,如果你想了解更多关于java正则表达式解析html示例分享的内容请搜索CFSDN的文章或继续浏览相关文章,希望大家以后支持我的博客! 。

35 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com