Php多线程的使用,首先需要PHP5.3以上版本,并安装pthreads PHP扩展,可以使PHP真正的支持多线程,扩展如何安装请自行百度

 

在安装好扩展之后,就可以运用多线程了,下面贴个通过搜索结果抓取百度网盘内容的代码:


<?php include 'include/CurlLoad.class.php'; // 引入读取库 /**  * 多线程抓取内容  * @param array $url 待抓取URL列表集合  * @return 成功返回指定内容,失败返回NULL  */ function vget($url) {  $ret = BaiduSRLinksGet ( $url, 1 ); // 获取结果列表地址  if ($ret != null) {   if (array_key_exists ( "links", $ret )) {    $infos = array ();    $number = count ( $ret ['links'] );    for($i = 0; $i < $number; $i ++) {//循环创建线程对象     $thread_array [$i] = new baidu_thread_run ( $ret ['links'] [$i] );     $thread_array [$i]->start ();    }    foreach ( $thread_array as $thread_array_key => $thread_array_value ) {//检查线程是否执行结束     while ( $thread_array [$thread_array_key]->isRunning () ) {      usleep ( 10 );     }     if ($thread_array [$thread_array_key]->join ()) {//如果执行结束,取出结果      $temp = $thread_array [$thread_array_key]->data;      if ($temp != null)       $infos ['res'] [] = $temp;     }    }    $infos ['pages'] = $ret ['pages'];    $infos ['status'] = "1";   } else   $infos = null;  } else   $infos = null;  return $infos; } /**  * 获取百度搜索结果列表URL  *  * @param string $url  *         搜索结果页URL  * @param int $format  *         默认$format=0,获取默认地址;$format=1获取跳转后真实地址  * @return NULL multitype:array()  */ function BaiduSRLinksGet($url, $format = 0) {  $html = CurlLoad::HtmlGet ( $url ); // 获取页面  if ($html == null)   return null;  try {   preg_match_all ( "/"url":"(?<links>.*)"}/", $html, $rets ); // 搜索结果链接筛选   if (! array_key_exists ( 'links', $rets )) // 如果数组中不包含Links键名,表示获取失败    return null;   $ret = array ();   if ($format == 1) {    $number = count ( $rets ['links'] );    for($i = 0; $i < $number; $i ++) {     $headr_temp = CurlLoad::Get_Headers ( $rets ['links'] [$i], 1 ); // 通过headr获取真实地址     if (array_key_exists ( "Location", $headr_temp ))      $ret ['links'] [$i] = $headr_temp ['Location'];     else      $ret ['links'] = $rets ['links'];    }   } else    $ret ['links'] = $rets ['links'];   preg_match_all ( '/href="?/s?wd=site%3Apan.baidu.com%20(?<url>.+?)&ie=utf-8">/', $html, $out );   unset ( $out ['url'] [0] );   $number = count ( $out ['url'] );   for($i = 1; $i < $number; $i ++) {    preg_match_all ( '/&pn=(.*)/', $out ['url'] [$i], $temp );    $ret ['pages'] [$temp [1] [0] / 10] = base64_encode ( $out ['url'] [$i] );   }   return $ret;  } catch ( Exception $e ) {   WriteLog ( $e );   return null;  } } /**  * 百度网盘资源信息获取  *  * @param string $url  *         网盘资源页URL  * @return NULL array  */ function PanInfoGet($url) {  $html = CurlLoad::HtmlGet ( $url ); // 获取页面  if ($html == null)   return null;  try {   if (preg_match_all ( "/文件名:(?<name>.*) 文件大小:(?<size>.*) 分享者:(?<user>.*) 分享时间:(?<date>.*) 下载次数:(?<number>[0-9]+)/", $html, $ret ) == 0)    return null;   $rets ['name'] = $ret ['name'] [0];   $rets ['size'] = $ret ['size'] [0];   $rets ['user'] = $ret ['user'] [0];   $rets ['date'] = $ret ['date'] [0];   $rets ['number'] = $ret ['number'] [0];   $rets ['link'] = $url;   return $rets;  } catch ( Exception $e ) {   WriteLog ( $e );   return null;  } } function WriteLog($str) {  $file = fopen ( "../error.log", "a+" );  fwrite ( $file, "Warning:" . date ( "Y/m/d H:i:s" ) . ":" . $str . "rn" );  fclose ( $file ); } /**  * 多线程抓取对象  * @author MuXi  *  */ class baidu_thread_run extends Thread {  public $url;  public $data;  public function __construct($url) {   $this->url = $url;  }  public function run() {   if (($url = $this->url)) {    $this->data = PanInfoGet ( $url );//线程执行方法   }  } } ?>