[pLog-svn] r5091 - plog/branches/lifetype-1.2/class/data
mark at devel.lifetype.net
mark at devel.lifetype.net
Thu Mar 15 01:54:57 EDT 2007
Author: mark
Date: 2007-03-15 01:54:57 -0400 (Thu, 15 Mar 2007)
New Revision: 5091
Modified:
plog/branches/lifetype-1.2/class/data/textfilter.class.php
Log:
Fixed the htmlDecode() according to the discussion thread in svn rev. 5062 and MSN discussion with Oscar.
Modified: plog/branches/lifetype-1.2/class/data/textfilter.class.php
===================================================================
--- plog/branches/lifetype-1.2/class/data/textfilter.class.php 2007-03-14 14:28:27 UTC (rev 5090)
+++ plog/branches/lifetype-1.2/class/data/textfilter.class.php 2007-03-15 05:54:57 UTC (rev 5091)
@@ -236,12 +236,33 @@
// replace numeric entities
$htmlString = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $htmlString);
$htmlString = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $htmlString);
- // replace literal entities
- $trans_tbl = get_html_translation_table( HTML_SPECIALCHARS, $quote_style );
- $trans_tbl = array_flip($trans_tbl);
- $trans_tbl['''] = "'";
- return strtr($htmlString, $trans_tbl);
- }
+ // get the entity translation table from PHP (current encoding is ISO-8859-1)
+ $trans_table = get_html_translation_table( HTML_ENTITIES, $quote_style );
+ // when we want to decode the input string to normalized string, there are two factors
+ // we need to take into consideration:
+ // - Input string encoding
+ // - MySQL default-character-set encoding
+ // No matter what input string encoding does, the normalized text saved to MySQL should
+ // follow MySQL data validation. If we don't follow the constraint, then MySQL will raise
+ // an error for us. (It only happend in MySQL5 strict mode)
+ // Therefore, we need to check the db_character_set in our config file to see we should
+ // use the UTF-8 translation table or ISO-8859-1 translation table
+ // This should fixed the CJK/UTF-8 characters break by Jon's original modification.
+ //
+ // If possible, I really hope we can accept UTF-8 encoding only, it will make our life easier.
+ require_once( PLOG_CLASS_PATH . "class/config/configfilestorage.class.php" );
+ $config = new ConfigFileStorage();
+ if( $config->getValue( 'db_character_set' ) == 'utf8' ) {
+ // Convert the ISO-8859-1 translation table to UTF-8
+ foreach ( $trans_table as $key => $value ){
+ $new_trans_table[$value] = utf8_encode( $key );
+ }
+ } else {
+ // Keep original ISO-8859-1 translation table, just flip it
+ $new_trans_table = array_flip($trans_table);
+ }
+ return strtr( $htmlString, $new_trans_table );
+ }
/**
* Normalizes the given text. By 'normalizing', it means removing all html markup from the text as well
@@ -394,7 +415,7 @@
*
* ; / ? : @ & = + $ ,
*
- * It will convert accented characters such as , , , etc to their non-accented counterparts (a, e, i) And
+ * It will convert accented characters such as ? ? ? etc to their non-accented counterparts (a, e, i) And
* any other non-alphanumeric character that hasn't been removed or replaced will be thrown away.
*
* @param string The string that we wish to convert into something that can be used as a URL
@@ -408,8 +429,8 @@
$string = str_replace(array(';','/','?',':','@','&','=','+','$',','), '', $string);
// replace some characters to similar ones
- $search = array(' ', 'ä', 'ö', 'ü','é','è','à','ç', 'à', 'è', 'ì',
- 'ò', 'ù', 'á', 'é', 'í', 'ó', 'ú', 'ë', 'ï' );
+ $search = array(' ', '?, '?, '?,'?,'?,'?,'?, '?, '?, '?,
+ '?, '?, '?, '?, '?, '?, '?, '?, '? );
lt_include( PLOG_CLASS_PATH."class/config/config.class.php" );
$config =& Config::getConfig();
$separator = $config->getValue( "urlize_word_separator", URLIZE_WORD_SEPARATOR_DEFAULT );
@@ -436,7 +457,7 @@
*
* ; / ? : @ & = + $ ,
*
- * It will convert accented characters such as , , , etc to
+ * It will convert accented characters such as ? ? ? etc to
* their non-accented counterparts (a, e, i) And
* any other non-alphanumeric character that hasn't been removed
* or replaced will be thrown away.
@@ -459,7 +480,7 @@
// replace some characters to similar ones
// underscores aren't allowed in domain names according to rfc specs, and
// cause trouble in some browsers, particularly with cookies.
- $search = array('-', '_',' ', 'ä','ö','ü','é','è','à','ç','à','è','ì','ò','ù','á','é','í','ó','ú','ë','ï' );
+ $search = array('-', '_',' ', '?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'?,'? );
$replace = array( $sep, $sep, $sep, 'a','o','u','e','e','a','c','a','e','i','o','u','a','e','i','o','u','e','i' );
$string = str_replace($search, $replace, $string);
More information about the pLog-svn
mailing list