实例 3-37 将HTML 替换成文本
本实例演示了如何通过preg_replace 函数将HTML 替换成文本,如代码3-37 所示。
【代码 3-37】 preg_replace_2.php

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>
  <head>
   <title> preg_replace_2.php </title>
   <meta charset="UTF-8">
   <meta name="Author" content="">
   <meta name="Keywords" content="">
   <meta name="Description" content="">
  </head> <body>
<?php
 $document=<<<HTML
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>
  <head>
   <title> New Document </title>
   <meta charset="UTF-8">
   <meta name="Author" content="">
   <meta name="Keywords" content="">
   <meta name="Description" content="">
  </head> <body>
 <script type="text/javascript">
 //
 //
 //
 alert("Hi world");
 </script>
 Hello World!<br>
  </body>
 </html>
 HTML;
 // $document 应包含一个 HTML 文档。
 // 本例将去掉 HTML 标记,javascript 代码
 // 和空白字符。还会将一些通用的
 // HTML 实体转换成相应的文本。
 $search = array ("'<script[^>]*?>.*?</script>'si", // 去掉 javascript
 "'<[\/\!]*?[^<>]*?>'si", // 去掉 HTML 标记
 "'([\r\n])[\s]+'", // 去掉空白字符
 "'&(quot|#34);'i", // 替换 HTML 实体
 "'&(amp|#38);'i",
 "'&(lt|#60);'i",
 "'&(gt|#62);'i",
 "'&(nbsp|#160);'i",
 "'&(iexcl|#161);'i",
 "'&(cent|#162);'i",
 "'&(pound|#163);'i",
 "'&(copy|#169);'i",
 "'&#(\d+);'e"); // 作为 PHP 代码运行
 $replace = array ("",
 "",
 "\\1",
 "\"",
 "&",
 "<",
 ">",
 " ",
 chr(161),
 chr(162),
 chr(163),
 chr(169),
 "chr(\\1)");
 $text = preg_replace ($search, $replace, $document);
 echo $text;
 ?>
 <hr><hr> </body>
 </html><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>
  <head>
   <title> preg_replace_2.php </title>
   <meta charset="UTF-8">
   <meta name="Author" content="">
   <meta name="Keywords" content="">
   <meta name="Description" content="">
  </head> <body>
 New Document 
 Hello World!
 <hr><hr> </body>
 </html>