[pLog-svn] r5091 - plog/branches/lifetype-1.2/class/data

mark at devel.lifetype.net mark at devel.lifetype.net
Thu Mar 15 01:54:57 EDT 2007


Author: mark
Date: 2007-03-15 01:54:57 -0400 (Thu, 15 Mar 2007)
New Revision: 5091

Modified:
   plog/branches/lifetype-1.2/class/data/textfilter.class.php
Log:
Fixed the htmlDecode() according to the discussion thread in svn rev. 5062 and MSN discussion with Oscar.

Modified: plog/branches/lifetype-1.2/class/data/textfilter.class.php
===================================================================
--- plog/branches/lifetype-1.2/class/data/textfilter.class.php	2007-03-14 14:28:27 UTC (rev 5090)
+++ plog/branches/lifetype-1.2/class/data/textfilter.class.php	2007-03-15 05:54:57 UTC (rev 5091)
@@ -236,12 +236,33 @@
             // replace numeric entities
             $htmlString = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $htmlString);
             $htmlString = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $htmlString);
-            // replace literal entities
-            $trans_tbl = get_html_translation_table( HTML_SPECIALCHARS, $quote_style );
-            $trans_tbl = array_flip($trans_tbl);
-            $trans_tbl['''] = "'";
-            return strtr($htmlString, $trans_tbl);
-		}
+            // get the entity translation table from PHP (current encoding is ISO-8859-1)
+            $trans_table = get_html_translation_table( HTML_ENTITIES, $quote_style );
+            // when we want to decode the input string to normalized string, there are two factors 
+            // we need to take into consideration:
+            //  - Input string encoding
+            //  - MySQL default-character-set encoding
+            // No matter what input string encoding does, the normalized text saved to MySQL should 
+            // follow MySQL data validation. If we don't follow the constraint, then MySQL will raise 
+            // an error for us. (It only happend in MySQL5 strict mode)
+            // Therefore, we need to check the db_character_set in our config file to see we should
+            // use the UTF-8 translation table or ISO-8859-1 translation table
+            // This should fixed the CJK/UTF-8 characters break by Jon's original modification.
+            //
+            // If possible, I really hope we can accept UTF-8 encoding only, it will make our life easier.
+            require_once( PLOG_CLASS_PATH . "class/config/configfilestorage.class.php" );
+			$config = new ConfigFileStorage();
+			if( $config->getValue( 'db_character_set' ) == 'utf8' ) {
+				// Convert the ISO-8859-1 translation table to UTF-8
+				foreach ( $trans_table as $key => $value ){
+					$new_trans_table[$value] = utf8_encode( $key );
+				}
+			} else {
+				// Keep original ISO-8859-1 translation table, just flip it
+            	$new_trans_table = array_flip($trans_table);
+			}
+            return strtr( $htmlString, $new_trans_table );
+		} 
 		
 		/**
 		 * Normalizes the given text. By 'normalizing', it means removing all html markup from the text as well
@@ -394,7 +415,7 @@
          *
          * ; / ? : @ & = + $ ,
          *
-         * It will convert accented characters such as ˆ, , ’, etc to their non-accented counterparts (a, e, i) And
+         * It will convert accented characters such as ? ? ? etc to their non-accented counterparts (a, e, i) And
          * any other non-alphanumeric character that hasn't been removed or replaced will be thrown away.
          *
          * @param string The string that we wish to convert into something that can be used as a URL
@@ -408,8 +429,8 @@
             $string = str_replace(array(';','/','?',':','@','&','=','+','$',','), '', $string);
 
             // replace some characters to similar ones
-            $search  = array(' ', 'ä', 'ö', 'ü','é','è','à','ç', 'à', 'è', 'ì',
-                             'ò', 'ù', 'á', 'é', 'í', 'ó', 'ú', 'ë', 'ï' );
+            $search  = array(' ', '?, '?, '?,'?,'?,'?,'?, '?, '?, '?,
+                             '?, '?, '?, '?, '?, '?, '?, '?, '? );
             lt_include( PLOG_CLASS_PATH."class/config/config.class.php" );
 			$config =& Config::getConfig();
             $separator = $config->getValue( "urlize_word_separator", URLIZE_WORD_SEPARATOR_DEFAULT );
@@ -436,7 +457,7 @@
          *
          * ; / ? : @ & = + $ ,
          *
-         * It will convert accented characters such as ˆ, , ’, etc to
+         * It will convert accented characters such as ? ? ? etc to
          * their non-accented counterparts (a, e, i) And
          * any other non-alphanumeric character that hasn't been removed
          * or replaced will be thrown away.
@@ -459,7 +480,7 @@
             // replace some characters to similar ones
             // underscores aren't allowed in domain names according to rfc specs, and
             // cause trouble in some browsers, particularly with cookies.
-            $search  = array('-', '_',' ', 'ä','ö','ü','é','è','à','ç','à','è','ì','ò','ù','á','é','í','ó','ú','ë','ï' );
+            $search  = array('-', '_',' ', '?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'? );
             $replace = array( $sep, $sep, $sep, 'a','o','u','e','e','a','c','a','e','i','o','u','a','e','i','o','u','e','i' );
             $string = str_replace($search, $replace, $string);
 



More information about the pLog-svn mailing list