含Html的内容截取函数
copyright by http://www.xiao688.com 笑话大全爆笑 原创
author:jason
*/
function htmlSubString($content,$maxlen=300,$charset="UTF8",$error=0,$sus=''){
$curlength=0;
$Tags=array();
$outstr='';
$cut=false;
//把字符按HTML标签变成数组。
for($i=0;$i<strlen($content);$i++){
$letter=$content{$i};
if($letter!='<'&&$letter!='>'){
$tempv.=$letter;
}else{
if($letter=='<'&&$content{$i+1}!==' '){//新标记开始
if(trim($tempv)!=''){$contents[]=$tempv;}
$tempv=$letter;
}elseif($letter=='>'&&$tempv{0}=='<'){ //标记结束
$tempv.=$letter;
if(trim($tempv)!=''){$contents[]=$tempv;}
$tempv='';
}else{
$tempv.=$letter;
}
}
}
if(trim($tempv)!==''){$contents[]=$tempv;}
foreach($contents as $value){
if(preg_match('/<\S[^<>]*?>/si',$value)){ //处理标记
if(substr($value,0,2)=='</'){
$endTag=substr($value,2,strlen($value)-3);
if(count($Tags)<1){
$outstr.='<'.$endTag.'>'.$value; //纠正错误标记
continue;
} //丢弃错误结束标记
$tagName=array_pop($Tags);
while($tagName!=$endTag && $tagName!==''){
$outstr.="</".$tagName.">";
if(count($Tags)>0){
$tagName=array_pop($Tags);
}else{
$tagName='';
}
}
$outstr.=$value;
}elseif(substr($value,0,3)=='</ '){ //处理'</ '这样的错误标记
$outstr.=$value;continue;
}else{
//取得起始标记
if(strpos($value,' ')!==false){
$tagName=substr($value,1,strpos($value,' ')-1);
}else{
$tagName=substr($value,1,-1);
}
//压入标记到堆栈,并添加到返回字符串
array_push($Tags,$tagName);
$outstr.=$value;
}
}else{ //处理内容
$curlength+=mb_strlen($value,$charset);
if($maxlen<=$curlength){
if($maxlen<$curlength){ //规避特殊标记内容不允许截断
if(count($Tags)>0&&preg_match('/object|iframe|script|embed/is',$Tags[count($Tags)-1])){
$outstr.=$value;
}else{
$outstr.=substr($value,0,$maxlen-$curlength);
}
}else{
$outstr.=$value;
}
while(count($Tags)>0){
$tagName=array_pop($Tags);
$outstr.="</".$tagName.">";
}
$cut=true;
break;
}else{
$outstr.=$value;continue;
}
}
}
return array($outstr,$cut);
}