package com.tag;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HeaderElement;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import toptrack.tools.JQueryBase;
/**
* 得到网页编码格式
* @author dl
*/
public class JHtmlUpdateCheck {
/**文本内容编码识别类*/
private static cpdetector.io.CodepageDetectorProxy detector = cpdetector.io.CodepageDetectorProxy.getInstance();
static {
detector.add(new cpdetector.io.HTMLCodepageDetector(false));
detector.add(cpdetector.io.JChardetFacade.getInstance());
}
/**
*<br>方法说明:得到网页编码格式
*<br>输入参数:strUrl 网页链接; timeout 超时设置
*<br>返回类型:网页编码
*/
public static String getEncoding(String strUrl, int timeout) {
String strEncoding = null;
HttpClient client = new HttpClient();
client.getHttpConnectionManager().getParams().setConnectionTimeout(timeout);
GetMethod method = new GetMethod(strUrl);
method.setFollowRedirects( true );
int statusCode;
try {
statusCode = client.executeMethod(method);
if( statusCode != -1) {
//从http头得到网页编码
strEncoding = getContentCharSet(method.getResponseHeader("Content-Type"));
if (strEncoding != null) {
method.releaseConnection();
return strEncoding;
}
//通过解析meta得到网页编码
String strHtml = method.getResponseBodyAsString().toLowerCase();
StringBuffer strBuffer = new StringBuffer();
int pos = JQueryBase.getTagText(strHtml, "<meta", ">", strBuffer, false, 0);
while (strBuffer.length() > 0) {
StringBuffer strEncodingBuffer = new StringBuffer();
JQueryBase.getTagText(strBuffer.toString(), "charset=", "\"", strEncodingBuffer, 0);
if (strEncodingBuffer.length() > 0) {
strEncoding = strEncodingBuffer.toString();
method.releaseConnection();
return strEncoding;
}
strBuffer = new StringBuffer();
pos = JQueryBase.getTagText(strHtml, "<meta", ">", strBuffer, false, pos);
}
//分析字节得到网页编码
strEncoding = getFileEncoding(strUrl, timeout);
//设置默认网页字符编码
if (strEncoding == null)
strEncoding = "GBK";
}
method.releaseConnection();
} catch (Exception e) {
// TODO Auto-generated catch block
System.out.println(e.getClass() + "对" + strUrl + "提取网页编码信息出错");
return null;
}
return strEncoding;
}
/**
*<br>方法说明:通过http头得到网页编码信息
*<br>输入参数:contentheade rhttp头
*<br>返回类型:网页编码
*/
protected static String getContentCharSet(Header contentheader) {
String charset = null;
if (contentheader != null) {
HeaderElement values[] = contentheader.getElements();
if (values.length == 1) {
NameValuePair param = values[0].getParameterByName("charset");
if (param != null) {
charset = param.getValue();
}
}
}
return charset;
}
相关专题
- Java环境安装配置 (6007篇文章)
- Java编程开发手册 (8708篇文章)
- Java网络及通讯编程 (705篇文章)
- Struts 2, spring 2, hibernate 的整合 (403次浏览)
- Java远程通讯的6种可选技术及原理 (279次浏览)
- struts2 + spring + hibernate 实现CRUD (217次浏览)
- Spring 与 Log4J 进行动态日志配置切换 (182次浏览)
- 初学者对Hibernate的学习方法 (168次浏览)
- Hibernate的映射关联关系 (154次浏览)
- Spring与struts整合开发实例(一) (111次浏览)
- Java调用SQL Server的存储过程详解 (109次浏览)
- MyEclipse搞定hibernate的web应用 (104次浏览)
- 多动鼠标少动脑,Java报表工具的简约之美 (92次浏览)



