一、说明

标准的xml头部分规范定义如下:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>

Java代码主要实现的功能就是通过截取xml头中encoding对应的值,即编码方式。

二、实现代码

package com.justin.test;

import java.io.IOException;
import java.util.StringTokenizer;
/**
 * 获取xml头中的编码
 * @author Justin
 *
 */
public class GetXmlHeadEncoded {
	public static void main(String[] args) throws IOException {
		String xmlHead = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>";
		System.out.println(getEncoding(xmlHead));
	}

	private static String getEncoding(String xmlHead) {
		String result = null;
		String xml = xmlHead.trim();
		if (xml.startsWith("<?xml")) {
			int end = xml.indexOf("?>");
			String sub = xml.substring(0, end);
			StringTokenizer tokens = new StringTokenizer(sub, " =\"'");
			while (tokens.hasMoreTokens()) {
				String token = tokens.nextToken();
				if ("encoding".equals(token)) {
					if (!(tokens.hasMoreTokens()))
						break;
					result = tokens.nextToken();
					break;
				}
			}
		}
		return result;
	}
}