[pLog-svn] r2362 - in plog/branches/plog-1.0.2/class: data data/utf8 template/smarty/plugins

oscar at devel.plogworld.net oscar at devel.plogworld.net
Mon Jul 25 19:43:11 GMT 2005


Author: oscar
Date: 2005-07-25 19:43:11 +0000 (Mon, 25 Jul 2005)
New Revision: 2362

Added:
   plog/branches/plog-1.0.2/class/data/utf8/
   plog/branches/plog-1.0.2/class/data/utf8/utf8_funcs.php
   plog/branches/plog-1.0.2/class/template/smarty/plugins/modifier.utf8_truncate.php
Log:
added a smarty modified to replace "truncate", which is not aware of double-byte encodings.


Added: plog/branches/plog-1.0.2/class/data/utf8/utf8_funcs.php
===================================================================
--- plog/branches/plog-1.0.2/class/data/utf8/utf8_funcs.php	2005-07-25 13:22:19 UTC (rev 2361)
+++ plog/branches/plog-1.0.2/class/data/utf8/utf8_funcs.php	2005-07-25 19:43:11 UTC (rev 2362)
@@ -0,0 +1,187 @@
+<?php
+
+/**
+ * utf8 interrelated functions 
+ * @autor CB
+ * @email cb.utblog at gmail.com
+ * @homepage http://www.utblog.com/plog/CB
+ * @date 25 Jul 2005
+ */
+
+/**
+ * int utf8_isValidChar(array $inArray)
+ * Is it a valid utf8 character
+ * @param $inArr input ascii characters array
+ * @return the ascii bytes of the utf8 char if it is a valid utf8 char. 0 if input array is empty, or -1 if it's invalid 
+ * @note don't use pass-by-reference for $inArr here, otherwise efficiency will decreased significantly 
+ */
+function utf8_isValidChar($inArr, $start = 0)
+{
+	if(empty($inArr) || $start < 0)
+		return 0;
+	$size = count($inArr);
+	if($size <= $start)
+		return 0;
+	$inOrd = ord($inArr[$start]);
+	$us = 0;
+	if($inOrd <= 0x7F) { //0xxxxxxx
+		return 1;
+	} else if($inOrd >= 0xC0 && $inOrd <= 0xDF ) { //110xxxxx 10xxxxxx
+		$us = 2;
+	} else if($inOrd >= 0xE0 && $inOrd <= 0xEF ) { //1110xxxx 10xxxxxx 10xxxxxx
+		$us = 3;
+	} else if($inOrd >= 0xF0 && $inOrd <= 0xF7 ) { //11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+		$us = 4;
+	} else if($inOrd >= 0xF8 && $inOrd <= 0xFB ) { //111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+		$us = 5;
+	} else if($inOrd >= 0xFC && $inOrd <= 0xFD ) { //1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+		$us = 6;
+	} else
+		return -1;
+
+	if($size - $start < $us)
+		return -1;
+
+	for($i=1; $i<$us; $i++)
+	{
+		$od = ord($inArr[$start+$i]); 
+		if($od <0x80 || $od > 0xBF)
+			return -1;
+	}
+	return $us;
+}
+
+/**
+ * mix utf8_substr(string $inputString, int $start_index, int $length = -1, bool $ignore_invalid_utf8_char = true)
+ * @param $inputStr
+ * @param $start start index, must be large than 0
+ * @param $length. if $length <0, return all text begin from $start
+ * @param $ignore_error whether ignore the invalid characters (in return string, these invalid chars will be replaced with '?') or not. default is true (ignore)
+ * @return the substring, or false (empty string '')
+ */
+function utf8_substr($inputStr, $start, $length = -1, $ignore_error = true)
+{
+	if($start<0 || $length == 0)
+		return false;
+	$rawArr = preg_split('//',$inputStr,-1, PREG_SPLIT_NO_EMPTY); 
+	//find start
+	$si = 0;
+	$si_single = 0;
+	while($si < $start)
+	{
+		$hm = utf8_isValidChar($rawArr, $si_single);
+		if($hm == -1)
+		{
+			//ignore invalid character?
+			if(!$ignore_error)
+				return false;
+			//array_shift is very slow
+			//array_shift($rawArr); 
+			$si++;
+			$si_single++;
+		}
+		else if($hm == 0)
+		{
+			//$start is bigger than the utf8_length of inputString
+			return false;
+		}
+		else
+		{
+			//for($i=0; $i<$hm; $i++) array_shift($rawArr);
+			$si++;
+			$si_single += $hm;
+		}
+	}
+	if($length < 0)
+		//return implode('', $rawArr);
+		return substr($inputStr, $si_single);
+	$retArr = array();
+	$li = 0;
+	while($li < $length)
+	{
+		$hm = utf8_isValidChar($rawArr, $si_single);
+		if($hm == -1)
+		{
+			if(!$ignore_error)
+				return false;
+			$retArr[] = '?'; 
+			//array_shift($rawArr);
+			$li++;
+			$si_single++;
+		}
+		else if($hm == 0)
+		{
+			//end of string
+			return implode('', $retArr);
+		}
+		else
+		{
+			//for($i=0; $i<$hm; $i++) $retArr[] = array_shift($rawArr);
+			for($i=0; $i<$hm; $i++) $retArr[] = $rawArr[$si_single++];
+			$li++;
+		}
+	}
+	return implode('', $retArr);
+}
+
+/**
+ * int utf8_strlen(string $inputString, bool $ignore_invalid_utf8_char = true)
+ * @return length of string encoded as utf8 ( how many utf8 characters )
+ * -1 if given $ignore_error is false and there's invalid utf8 char in the inputString
+ * @note if $ignore_error is true (the default value), every invalid utf8 character will be count as ONE utf8 char
+ */
+function utf8_strlen($inputStr, $ignore_error = true)
+{
+	$rawArr = preg_split('//',$inputStr,-1, PREG_SPLIT_NO_EMPTY); 
+	$len = 0;
+	$si_single = 0;
+	while(($hm = utf8_isValidChar($rawArr, $si_single)) != 0)
+	{
+		if($hm == -1)
+		{
+			if(!$ignore_error)
+				return -1;
+			//array_shift($rawArr);
+			$si_single++;
+		}
+		else
+			//for($i=0; $i<$hm; $i++) array_shift($rawArr);
+			$si_single += $hm;
+		$len++;
+	}
+	return $len;
+}
+
+/**
+ * int utf8_proportion(string $inputString)
+ * @param $inputString
+ * @return percentage of valid utf8 chars of $inputString
+ * @see http://www.utblog.com/plog/1/article/292
+ */ 
+function utf8_proportion($inputStr)
+{
+	$rawArr = preg_split('//',$inputStr,-1, PREG_SPLIT_NO_EMPTY); 
+	$rawLen = count($rawArr);
+	if($rawLen == 0)
+		return 100;
+	$validChars = 0;
+	$si_single = 0;
+	while(($hm = utf8_isValidChar($rawArr, $si_single)) != 0)
+	{
+		if($hm == -1)
+		{
+			//array_shift($rawArr);
+			$si_single++;
+			continue;
+		}
+		//for($i=0; $i<$hm; $i++) array_shift($rawArr);
+		$validChars += $hm;
+		$si_single += $hm;
+	}
+	if($validChars == $rawLen)
+		return 100;
+	else
+		return (int)($validChars*100.0/$rawLen);
+}
+
+?>

Added: plog/branches/plog-1.0.2/class/template/smarty/plugins/modifier.utf8_truncate.php
===================================================================
--- plog/branches/plog-1.0.2/class/template/smarty/plugins/modifier.utf8_truncate.php	2005-07-25 13:22:19 UTC (rev 2361)
+++ plog/branches/plog-1.0.2/class/template/smarty/plugins/modifier.utf8_truncate.php	2005-07-25 19:43:11 UTC (rev 2362)
@@ -0,0 +1,33 @@
+<?php
+include_once(PLOG_CLASS_PATH.'class/misc/utf8_funcs.php');
+
+/**
+ * Smarty truncate modifier plugin
+ *
+ * Type:     modifier<br>
+ * Name:     utf8_truncate<br>
+ * @param string
+ * @param integer
+ * @param string
+ * @param boolean
+ * @return string
+ */
+function smarty_modifier_utf8_truncate($string, $length = 80, $etc = '...',
+                                  $break_words = false)
+{
+    if ($length == 0)
+        return '';
+
+    if (utf8_strlen($string) > $length) {
+        $length -= strlen($etc);
+        if (!$break_words)
+            $string = preg_replace('/\s+?(\w+)?$/', '', utf8_substr($string, 0, $length+1));
+      
+        return utf8_substr($string, 0, $length).$etc;
+    } else
+        return $string;
+}
+
+/* vim: set expandtab: */
+
+?>




More information about the pLog-svn mailing list