PHP很实用的HTML内容字符串截取函数

原创

ybcola 2010-07-06 17:13:27 ©著作权

©著作权归作者所有：来自51CTO博客作者ybcola的原创作品，请联系作者获取转载授权，否则将追究法律责任

/*
   含Html的内容截取函数
    copyright by http://www.xiao688.com 笑话大全爆笑原创
    author:jason
*/
function htmlSubString($content,$maxlen=300,$charset="UTF8",$error=0,$sus=''){

    $curlength=0;
    $Tags=array();
    $outstr='';
    $cut=false;
    //把字符按HTML标签变成数组。
    for($i=0;$i<strlen($content);$i++){
        $letter=$content{$i};
        if($letter!='<'&&$letter!='>'){
            $tempv.=$letter;
        }else{
            if($letter=='<'&&$content{$i+1}!==' '){//新标记开始
                if(trim($tempv)!=''){$contents[]=$tempv;}
                $tempv=$letter;
            }elseif($letter=='>'&&$tempv{0}=='<'){ //标记结束
                $tempv.=$letter;
                if(trim($tempv)!=''){$contents[]=$tempv;}
                $tempv='';
            }else{
                $tempv.=$letter;
            }
        }
    }
    if(trim($tempv)!==''){$contents[]=$tempv;}

    foreach($contents as $value){

        if(preg_match('/<\S[^<>]*?>/si',$value)){ //处理标记

            if(substr($value,0,2)=='</'){
                $endTag=substr($value,2,strlen($value)-3);
                if(count($Tags)<1){
                    $outstr.='<'.$endTag.'>'.$value; //纠正错误标记
                    continue;
                } //丢弃错误结束标记
                $tagName=array_pop($Tags);
                while($tagName!=$endTag && $tagName!==''){
                    $outstr.="</".$tagName.">";
                    if(count($Tags)>0){
                        $tagName=array_pop($Tags);
                    }else{
                        $tagName='';
                    }
                }
                $outstr.=$value;
            }elseif(substr($value,0,3)=='</ '){ //处理'</ '这样的错误标记
                $outstr.=$value;continue;
            }else{
                //取得起始标记
                if(strpos($value,' ')!==false){
                    $tagName=substr($value,1,strpos($value,' ')-1);
                }else{
                    $tagName=substr($value,1,-1);
                }
                //压入标记到堆栈，并添加到返回字符串
                array_push($Tags,$tagName);
                $outstr.=$value;
            }
        }else{ //处理内容
            $curlength+=mb_strlen($value,$charset);

            if($maxlen<=$curlength){
                if($maxlen<$curlength){ //规避特殊标记内容不允许截断
                    if(count($Tags)>0&&preg_match('/object|iframe|script|embed/is',$Tags[count($Tags)-1])){
                        $outstr.=$value;
                    }else{
                        $outstr.=substr($value,0,$maxlen-$curlength);
                    }
                }else{
                    $outstr.=$value;
                }
                while(count($Tags)>0){
                    $tagName=array_pop($Tags);
                    $outstr.="</".$tagName.">";
                }
                $cut=true;
                break;
            }else{
                $outstr.=$value;continue;
            }
        }
    }
    return array($outstr,$cut);
}