[pLog-svn] r2362 - in plog/branches/plog-1.0.2/class: data
data/utf8 template/smarty/plugins
oscar at devel.plogworld.net
oscar at devel.plogworld.net
Mon Jul 25 19:43:11 GMT 2005
Author: oscar
Date: 2005-07-25 19:43:11 +0000 (Mon, 25 Jul 2005)
New Revision: 2362
Added:
plog/branches/plog-1.0.2/class/data/utf8/
plog/branches/plog-1.0.2/class/data/utf8/utf8_funcs.php
plog/branches/plog-1.0.2/class/template/smarty/plugins/modifier.utf8_truncate.php
Log:
added a smarty modified to replace "truncate", which is not aware of double-byte encodings.
Added: plog/branches/plog-1.0.2/class/data/utf8/utf8_funcs.php
===================================================================
--- plog/branches/plog-1.0.2/class/data/utf8/utf8_funcs.php 2005-07-25 13:22:19 UTC (rev 2361)
+++ plog/branches/plog-1.0.2/class/data/utf8/utf8_funcs.php 2005-07-25 19:43:11 UTC (rev 2362)
@@ -0,0 +1,187 @@
+<?php
+
+/**
+ * utf8 interrelated functions
+ * @autor CB
+ * @email cb.utblog at gmail.com
+ * @homepage http://www.utblog.com/plog/CB
+ * @date 25 Jul 2005
+ */
+
+/**
+ * int utf8_isValidChar(array $inArray)
+ * Is it a valid utf8 character
+ * @param $inArr input ascii characters array
+ * @return the ascii bytes of the utf8 char if it is a valid utf8 char. 0 if input array is empty, or -1 if it's invalid
+ * @note don't use pass-by-reference for $inArr here, otherwise efficiency will decreased significantly
+ */
+function utf8_isValidChar($inArr, $start = 0)
+{
+ if(empty($inArr) || $start < 0)
+ return 0;
+ $size = count($inArr);
+ if($size <= $start)
+ return 0;
+ $inOrd = ord($inArr[$start]);
+ $us = 0;
+ if($inOrd <= 0x7F) { //0xxxxxxx
+ return 1;
+ } else if($inOrd >= 0xC0 && $inOrd <= 0xDF ) { //110xxxxx 10xxxxxx
+ $us = 2;
+ } else if($inOrd >= 0xE0 && $inOrd <= 0xEF ) { //1110xxxx 10xxxxxx 10xxxxxx
+ $us = 3;
+ } else if($inOrd >= 0xF0 && $inOrd <= 0xF7 ) { //11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ $us = 4;
+ } else if($inOrd >= 0xF8 && $inOrd <= 0xFB ) { //111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ $us = 5;
+ } else if($inOrd >= 0xFC && $inOrd <= 0xFD ) { //1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ $us = 6;
+ } else
+ return -1;
+
+ if($size - $start < $us)
+ return -1;
+
+ for($i=1; $i<$us; $i++)
+ {
+ $od = ord($inArr[$start+$i]);
+ if($od <0x80 || $od > 0xBF)
+ return -1;
+ }
+ return $us;
+}
+
+/**
+ * mix utf8_substr(string $inputString, int $start_index, int $length = -1, bool $ignore_invalid_utf8_char = true)
+ * @param $inputStr
+ * @param $start start index, must be large than 0
+ * @param $length. if $length <0, return all text begin from $start
+ * @param $ignore_error whether ignore the invalid characters (in return string, these invalid chars will be replaced with '?') or not. default is true (ignore)
+ * @return the substring, or false (empty string '')
+ */
+function utf8_substr($inputStr, $start, $length = -1, $ignore_error = true)
+{
+ if($start<0 || $length == 0)
+ return false;
+ $rawArr = preg_split('//',$inputStr,-1, PREG_SPLIT_NO_EMPTY);
+ //find start
+ $si = 0;
+ $si_single = 0;
+ while($si < $start)
+ {
+ $hm = utf8_isValidChar($rawArr, $si_single);
+ if($hm == -1)
+ {
+ //ignore invalid character?
+ if(!$ignore_error)
+ return false;
+ //array_shift is very slow
+ //array_shift($rawArr);
+ $si++;
+ $si_single++;
+ }
+ else if($hm == 0)
+ {
+ //$start is bigger than the utf8_length of inputString
+ return false;
+ }
+ else
+ {
+ //for($i=0; $i<$hm; $i++) array_shift($rawArr);
+ $si++;
+ $si_single += $hm;
+ }
+ }
+ if($length < 0)
+ //return implode('', $rawArr);
+ return substr($inputStr, $si_single);
+ $retArr = array();
+ $li = 0;
+ while($li < $length)
+ {
+ $hm = utf8_isValidChar($rawArr, $si_single);
+ if($hm == -1)
+ {
+ if(!$ignore_error)
+ return false;
+ $retArr[] = '?';
+ //array_shift($rawArr);
+ $li++;
+ $si_single++;
+ }
+ else if($hm == 0)
+ {
+ //end of string
+ return implode('', $retArr);
+ }
+ else
+ {
+ //for($i=0; $i<$hm; $i++) $retArr[] = array_shift($rawArr);
+ for($i=0; $i<$hm; $i++) $retArr[] = $rawArr[$si_single++];
+ $li++;
+ }
+ }
+ return implode('', $retArr);
+}
+
+/**
+ * int utf8_strlen(string $inputString, bool $ignore_invalid_utf8_char = true)
+ * @return length of string encoded as utf8 ( how many utf8 characters )
+ * -1 if given $ignore_error is false and there's invalid utf8 char in the inputString
+ * @note if $ignore_error is true (the default value), every invalid utf8 character will be count as ONE utf8 char
+ */
+function utf8_strlen($inputStr, $ignore_error = true)
+{
+ $rawArr = preg_split('//',$inputStr,-1, PREG_SPLIT_NO_EMPTY);
+ $len = 0;
+ $si_single = 0;
+ while(($hm = utf8_isValidChar($rawArr, $si_single)) != 0)
+ {
+ if($hm == -1)
+ {
+ if(!$ignore_error)
+ return -1;
+ //array_shift($rawArr);
+ $si_single++;
+ }
+ else
+ //for($i=0; $i<$hm; $i++) array_shift($rawArr);
+ $si_single += $hm;
+ $len++;
+ }
+ return $len;
+}
+
+/**
+ * int utf8_proportion(string $inputString)
+ * @param $inputString
+ * @return percentage of valid utf8 chars of $inputString
+ * @see http://www.utblog.com/plog/1/article/292
+ */
+function utf8_proportion($inputStr)
+{
+ $rawArr = preg_split('//',$inputStr,-1, PREG_SPLIT_NO_EMPTY);
+ $rawLen = count($rawArr);
+ if($rawLen == 0)
+ return 100;
+ $validChars = 0;
+ $si_single = 0;
+ while(($hm = utf8_isValidChar($rawArr, $si_single)) != 0)
+ {
+ if($hm == -1)
+ {
+ //array_shift($rawArr);
+ $si_single++;
+ continue;
+ }
+ //for($i=0; $i<$hm; $i++) array_shift($rawArr);
+ $validChars += $hm;
+ $si_single += $hm;
+ }
+ if($validChars == $rawLen)
+ return 100;
+ else
+ return (int)($validChars*100.0/$rawLen);
+}
+
+?>
Added: plog/branches/plog-1.0.2/class/template/smarty/plugins/modifier.utf8_truncate.php
===================================================================
--- plog/branches/plog-1.0.2/class/template/smarty/plugins/modifier.utf8_truncate.php 2005-07-25 13:22:19 UTC (rev 2361)
+++ plog/branches/plog-1.0.2/class/template/smarty/plugins/modifier.utf8_truncate.php 2005-07-25 19:43:11 UTC (rev 2362)
@@ -0,0 +1,33 @@
+<?php
+include_once(PLOG_CLASS_PATH.'class/misc/utf8_funcs.php');
+
+/**
+ * Smarty truncate modifier plugin
+ *
+ * Type: modifier<br>
+ * Name: utf8_truncate<br>
+ * @param string
+ * @param integer
+ * @param string
+ * @param boolean
+ * @return string
+ */
+function smarty_modifier_utf8_truncate($string, $length = 80, $etc = '...',
+ $break_words = false)
+{
+ if ($length == 0)
+ return '';
+
+ if (utf8_strlen($string) > $length) {
+ $length -= strlen($etc);
+ if (!$break_words)
+ $string = preg_replace('/\s+?(\w+)?$/', '', utf8_substr($string, 0, $length+1));
+
+ return utf8_substr($string, 0, $length).$etc;
+ } else
+ return $string;
+}
+
+/* vim: set expandtab: */
+
+?>
More information about the pLog-svn
mailing list