工作半年了,再回头看自己的博客的时候感慨良多,时至今日,完善这个抓去邮箱的程序,如果加上网络爬虫,完全是可以靠这段程序去分析网上的邮箱,从而进行批量发送邮件。
package grabmail; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Scanner; import java.util.regex.Matcher; import java.util.regex.Pattern; public class CaptrueEmailInPage { public static List getEmail(String str){ List list = new ArrayList(); String email = ""; Pattern p = Pattern.compile("\\w+@(\\w+.)+[a-z]{2,3}"); //下面的校验太复杂,用于网页收集邮箱太耗时间,不如上面的效果好 // Pattern p = Pattern.compile("^\\s*\\w+(?:\\.{0,1}[\\w-]+)*@[a-zA-Z0-9]+(?:[-.][a-zA-Z0-9]+)*\\.[a-zA-Z]+\\s*$"); Matcher m = p.matcher(str); while(m.find()){ email = m.group(); list.add(email); System.out.println("***"+email); } return list; } public static String inputURL(){ Scanner s = new Scanner(System.in); String url = s.nextLine(); return url; } public static String captrueUrl(String url) throws Exception{ URL urlStr = new URL(url); HttpURLConnection conn = (HttpURLConnection) urlStr.openConnection(); InputStreamReader isReader = new InputStreamReader(conn.getInputStream(),"utf-8"); BufferedReader bfReader = new BufferedReader(isReader); String line = ""; StringBuffer strBuffer = new StringBuffer(); while((line = bfReader.readLine()) != null){ strBuffer.append(line); } System.out.println(strBuffer); return strBuffer.toString(); } public static void main(String args[]){ System.out.println("请输入一个网址:(如:http://www.baidu.com)"); String url = inputURL(); System.out.println(); System.out.println(); try { String str = captrueUrl(url); List list = getEmail(str); Iterator it = list.iterator(); System.out.println("抓取的邮箱的如下:"); while(it.hasNext()){ System.out.println(it.next()); } } catch (Exception e) { System.out.println("获取网址失败,原因:输入了非法网址!"); e.printStackTrace(); } } }