CodeSwitch.class.php 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. <?php
  2. /**
  3. * 洛阳赤炎鹰网络科技有限公司
  4. * https://www.cyyvip.com
  5. * Copyright (c) 2022 赤店商城 All rights reserved.
  6. */
  7. // +----------------------------------------------------------------------
  8. // | ThinkPHP [ WE CAN DO IT JUST THINK IT ]
  9. // +----------------------------------------------------------------------
  10. // | Copyright (c) 2009 http://thinkphp.cn All rights reserved.
  11. // +----------------------------------------------------------------------
  12. // | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 )
  13. // +----------------------------------------------------------------------
  14. // | Author: liu21st <liu21st@gmail.com>
  15. // +----------------------------------------------------------------------
  16. namespace Org\Util;
  17. class CodeSwitch {
  18. // 错误信息
  19. static private $error = array();
  20. // 提示信息
  21. static private $info = array();
  22. // 记录错误
  23. static private function error($msg) {
  24. self::$error[] = $msg;
  25. }
  26. // 记录信息
  27. static private function info($info) {
  28. self::$info[] = $info;
  29. }
  30. /**
  31. * 编码转换函数,对整个文件进行编码转换
  32. * 支持以下转换
  33. * GB2312、UTF-8 WITH BOM转换为UTF-8
  34. * UTF-8、UTF-8 WITH BOM转换为GB2312
  35. * @access public
  36. * @param string $filename 文件名
  37. * @param string $out_charset 转换后的文件编码,与iconv使用的参数一致
  38. * @return void
  39. */
  40. static function DetectAndSwitch($filename,$out_charset) {
  41. $fpr = fopen($filename,"r");
  42. $char1 = fread($fpr,1);
  43. $char2 = fread($fpr,1);
  44. $char3 = fread($fpr,1);
  45. $originEncoding = "";
  46. if($char1==chr(239) && $char2==chr(187) && $char3==chr(191))//UTF-8 WITH BOM
  47. $originEncoding = "UTF-8 WITH BOM";
  48. elseif($char1==chr(255) && $char2==chr(254))//UNICODE LE
  49. {
  50. self::error("不支持从UNICODE LE转换到UTF-8或GB编码");
  51. fclose($fpr);
  52. return;
  53. }elseif($char1==chr(254) && $char2==chr(255)){//UNICODE BE
  54. self::error("不支持从UNICODE BE转换到UTF-8或GB编码");
  55. fclose($fpr);
  56. return;
  57. }else{//没有文件头,可能是GB或UTF-8
  58. if(rewind($fpr)===false){//回到文件开始部分,准备逐字节读取判断编码
  59. self::error($filename."文件指针后移失败");
  60. fclose($fpr);
  61. return;
  62. }
  63. while(!feof($fpr)){
  64. $char = fread($fpr,1);
  65. //对于英文,GB和UTF-8都是单字节的ASCII码小于128的值
  66. if(ord($char)<128)
  67. continue;
  68. //对于汉字GB编码第一个字节是110*****第二个字节是10******(有特例,比如联字)
  69. //UTF-8编码第一个字节是1110****第二个字节是10******第三个字节是10******
  70. //按位与出来结果要跟上面非星号相同,所以应该先判断UTF-8
  71. //因为使用GB的掩码按位与,UTF-8的111得出来的也是110,所以要先判断UTF-8
  72. if((ord($char)&224)==224) {
  73. //第一个字节判断通过
  74. $char = fread($fpr,1);
  75. if((ord($char)&128)==128) {
  76. //第二个字节判断通过
  77. $char = fread($fpr,1);
  78. if((ord($char)&128)==128) {
  79. $originEncoding = "UTF-8";
  80. break;
  81. }
  82. }
  83. }
  84. if((ord($char)&192)==192) {
  85. //第一个字节判断通过
  86. $char = fread($fpr,1);
  87. if((ord($char)&128)==128) {
  88. //第二个字节判断通过
  89. $originEncoding = "GB2312";
  90. break;
  91. }
  92. }
  93. }
  94. }
  95. if(strtoupper($out_charset)==$originEncoding) {
  96. self::info("文件".$filename."转码检查完成,原始文件编码".$originEncoding);
  97. fclose($fpr);
  98. }else {
  99. //文件需要转码
  100. $originContent = "";
  101. if($originEncoding == "UTF-8 WITH BOM") {
  102. //跳过三个字节,把后面的内容复制一遍得到utf-8的内容
  103. fseek($fpr,3);
  104. $originContent = fread($fpr,filesize($filename)-3);
  105. fclose($fpr);
  106. }elseif(rewind($fpr)!=false){//不管是UTF-8还是GB2312,回到文件开始部分,读取内容
  107. $originContent = fread($fpr,filesize($filename));
  108. fclose($fpr);
  109. }else{
  110. self::error("文件编码不正确或指针后移失败");
  111. fclose($fpr);
  112. return;
  113. }
  114. //转码并保存文件
  115. $content = iconv(str_replace(" WITH BOM","",$originEncoding),strtoupper($out_charset),$originContent);
  116. $fpw = fopen($filename,"w");
  117. fwrite($fpw,$content);
  118. fclose($fpw);
  119. if($originEncoding!="")
  120. self::info("对文件".$filename."转码完成,原始文件编码".$originEncoding.",转换后文件编码".strtoupper($out_charset));
  121. elseif($originEncoding=="")
  122. self::info("文件".$filename."中没有出现中文,但是可以断定不是带BOM的UTF-8编码,没有进行编码转换,不影响使用");
  123. }
  124. }
  125. /**
  126. * 目录遍历函数
  127. * @access public
  128. * @param string $path 要遍历的目录名
  129. * @param string $mode 遍历模式,一般取FILES,这样只返回带路径的文件名
  130. * @param array $file_types 文件后缀过滤数组
  131. * @param int $maxdepth 遍历深度,-1表示遍历到最底层
  132. * @return void
  133. */
  134. static function searchdir($path,$mode = "FULL",$file_types = array(".html",".php"),$maxdepth = -1,$d = 0) {
  135. if(substr($path,strlen($path)-1) != '/')
  136. $path .= '/';
  137. $dirlist = array();
  138. if($mode != "FILES")
  139. $dirlist[] = $path;
  140. if($handle = @opendir($path)) {
  141. while(false !== ($file = readdir($handle)))
  142. {
  143. if($file != '.' && $file != '..')
  144. {
  145. $file = $path.$file ;
  146. if(!is_dir($file))
  147. {
  148. if($mode != "DIRS")
  149. {
  150. $extension = "";
  151. $extpos = strrpos($file, '.');
  152. if($extpos!==false)
  153. $extension = substr($file,$extpos,strlen($file)-$extpos);
  154. $extension=strtolower($extension);
  155. if(in_array($extension, $file_types))
  156. $dirlist[] = $file;
  157. }
  158. }
  159. elseif($d >= 0 && ($d < $maxdepth || $maxdepth < 0))
  160. {
  161. $result = self::searchdir($file.'/',$mode,$file_types,$maxdepth,$d + 1) ;
  162. $dirlist = array_merge($dirlist,$result);
  163. }
  164. }
  165. }
  166. closedir ( $handle ) ;
  167. }
  168. if($d == 0)
  169. natcasesort($dirlist);
  170. return($dirlist) ;
  171. }
  172. /**
  173. * 对整个项目目录中的PHP和HTML文件行进编码转换
  174. * @access public
  175. * @param string $app 要遍历的项目路径
  176. * @param string $mode 遍历模式,一般取FILES,这样只返回带路径的文件名
  177. * @param array $file_types 文件后缀过滤数组
  178. * @return void
  179. */
  180. static function CodingSwitch($app = "./",$charset='UTF-8',$mode = "FILES",$file_types = array(".html",".php")) {
  181. self::info("注意: 程序使用的文件编码检测算法可能对某些特殊字符不适用");
  182. $filearr = self::searchdir($app,$mode,$file_types);
  183. foreach($filearr as $file)
  184. self::DetectAndSwitch($file,$charset);
  185. }
  186. static public function getError() {
  187. return self::$error;
  188. }
  189. static public function getInfo() {
  190. return self::$info;
  191. }
  192. }