[pLog-svn] r3772 - in plog/trunk/class: dao data

jondaley at devel.lifetype.net jondaley at devel.lifetype.net
Sat Jul 22 20:05:25 GMT 2006


Author: jondaley
Date: 2006-07-22 20:05:25 +0000 (Sat, 22 Jul 2006)
New Revision: 3772

Modified:
   plog/trunk/class/dao/article.class.php
   plog/trunk/class/data/textfilter.class.php
Log:
added slugify, a function to make better slugs than urlize can, 
since slugify uses the format match from linkparser to remove all
characters that won't be recognized by the link parser.

updated urlize (and slugify) to remove duplicate and trailing
separator characters after doing the first replace.

This makes a topic of "La la & bu bu"
make a slug of "la-la-bu-bu" instead of"
"la-la---bu-bu", I think it looks better.

This change was bigger/harder than I expected it to be --
what do you all think of it?

Fixes: http://bugs.lifetype.net/view.php?id=901



Modified: plog/trunk/class/dao/article.class.php
===================================================================
--- plog/trunk/class/dao/article.class.php	2006-07-22 19:47:55 UTC (rev 3771)
+++ plog/trunk/class/dao/article.class.php	2006-07-22 20:05:25 UTC (rev 3772)
@@ -949,7 +949,7 @@
 		{
 			if( $this->_slug == "" ) {
                 include_once( PLOG_CLASS_PATH.'class/data/textfilter.class.php' );
-				$slug = Textfilter::urlize( $this->getTopic());
+				$slug = Textfilter::slugify( $this->getTopic());
 			} else {
 				$slug = $this->_slug;
             }
@@ -963,10 +963,10 @@
 		 * @param slug the new post slug
 		 */
 		function setPostSlug( $slug )
-		{
+        {
             include_once( PLOG_CLASS_PATH.'class/data/textfilter.class.php' );
-			$this->_slug = Textfilter::urlize( $slug );
-		}
+            $this->_slug = Textfilter::slugify( $slug );
+        }            
 
 		/**
 		 * returns the previous article in time

Modified: plog/trunk/class/data/textfilter.class.php
===================================================================
--- plog/trunk/class/data/textfilter.class.php	2006-07-22 19:47:55 UTC (rev 3771)
+++ plog/trunk/class/data/textfilter.class.php	2006-07-22 20:05:25 UTC (rev 3772)
@@ -8,7 +8,6 @@
      */
 
 	
-	include_once( PLOG_CLASS_PATH."class/config/config.class.php" );
 	include_once( PLOG_CLASS_PATH."class/data/stringutils.class.php" );
 	
 	/**
@@ -38,10 +37,9 @@
          */
 		function TextFilter( $removeJavaScript = true )
 		{
-			
-
 			$this->removeJavaScript = $removeJavaScript;
 
+            include_once( PLOG_CLASS_PATH."class/config/config.class.php" );
 			$config =& Config::getConfig();
 			$this->htmlAllowedTags = $config->getValue( "html_allowed_tags_in_comments" );
 			$this->_smileys = $config->getValue( "smileys" );
@@ -405,15 +403,24 @@
             $string = str_replace(array(';','/','?',':','@','&','=','+','$',','), '', $string);
 
             // replace some characters to similar ones
-            $search  = array(' ', 'ä', 'ö', 'ü','é','è','à','ç', 'à', 'è', 'ì', 'ò', 'ù', 'á', 'é', 'í', 'ó', 'ú', 'ë', 'ï' );
-			include_once( PLOG_CLASS_PATH."class/config/config.class.php" );
+            $search  = array(' ', 'ä', 'ö', 'ü','é','è','à','ç', 'à', 'è', 'ì',
+                             'ò', 'ù', 'á', 'é', 'í', 'ó', 'ú', 'ë', 'ï' );
+            include_once( PLOG_CLASS_PATH."class/config/config.class.php" );
 			$config =& Config::getConfig();
-            $replace = array( $config->getValue( "urlize_word_separator", URLIZE_WORD_SEPARATOR_DEFAULT ), 'a','o','u','e','e','a','c', 'a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u', 'e', 'i' );
+            $separator = $config->getValue( "urlize_word_separator", URLIZE_WORD_SEPARATOR_DEFAULT );
+            
+            $replace = array( $separator , 'a','o','u','e','e','a','c', 'a', 'e', 'i',
+                              'o', 'u', 'a', 'e', 'i', 'o', 'u', 'e', 'i' );
             $string = str_replace($search, $replace, $string);
             
             // and everything that is still left that hasn't been replaced/encoded, throw it away
             $string = preg_replace( '/[^a-z0-9 _.-]/', '', $string );        
             
+                // remove doubled separators
+            $string = preg_replace("/[".$separator."]+/", $separator, $string);
+                // remove starting and trailing separator chars
+            $string = trim($string, $separator);
+
             return $string;            
         }
 		
@@ -448,6 +455,8 @@
             $string = str_replace(array(';','/','?',':','@','&','=','+','$',','), '', $string);
 
             // replace some characters to similar ones
+            // underscores aren't allowed in domain names according to rfc specs, and
+            // cause trouble in some browsers, particularly with cookies.
             $search  = array('_',' ','ä','ö','ü','é','è','à','ç','à','è','ì','ò','ù','á','é','í','ó','ú','ë','ï');
             $replace = array('-','-','a','o','u','e','e','a','c','a','e','i','o','u','a','e','i','o','u','e','i');
             $string = str_replace($search, $replace, $string);
@@ -471,22 +480,65 @@
 		function xhtmlize( $string )
 		{
 		      // use kses in the "xhtml converter" mode
-		      $config =& Config::getConfig();
-		      if( $config->getValue( "xhtml_converter_enabled" )) {
+            include_once( PLOG_CLASS_PATH."class/config/config.class.php" );
+            $config =& Config::getConfig();
+            if( $config->getValue( "xhtml_converter_enabled" )) {
                	include_once( PLOG_CLASS_PATH."class/data/kses.class.php" );		      
-		          $kses = new kses( true, $config->getValue( "xhtml_converter_aggresive_mode_enabled"));
-		          $result = $kses->Parse( $string );
-		          
-		          // if balanceTags wasn't broken, we could use it...
-		          //$result = Textfilter::balanceTags( $result );		          
-		      }
-		      else
-		          $result = $string;
-		      
-		      return $result;
+                $kses = new kses( true, $config->getValue( "xhtml_converter_aggresive_mode_enabled"));
+                $result = $kses->Parse( $string );
+                
+                    // if balanceTags wasn't broken, we could use it...
+                    //$result = Textfilter::balanceTags( $result );		          
+            }
+            else
+                $result = $string;
+            
+            return $result;
 		}
-		
+
+
 		/**
+		 * slugifies a string, which is to say that it urlizes it but
+         *  additionally only uses characters allowed in a postname, as
+         *  defined by the linkparser.
+		 *
+		 * @param string
+		 * @return the xhtml-ized string
+		 */
+        function slugify( $string ){
+            include_once( PLOG_CLASS_PATH."class/config/config.class.php" );
+            include_once( PLOG_CLASS_PATH.'class/net/linkparser.class.php' );
+
+			$config =& Config::getConfig();
+            $separator = $config->getValue( "urlize_word_separator", URLIZE_WORD_SEPARATOR_DEFAULT );
+                // remove characters not allowed by the link parser
+            $lp = new LinkParser("");
+            $regexp = $lp->getValidTag("{postname}");
+            $start_bracket = strpos($regexp, "[");
+            $end_bracket = strrpos($regexp, "]");
+            $validChars = false;
+            if($start_bracket !== false && $end_bracket !== false){
+                $validChars = substr($regexp, $start_bracket+1,
+                                     $end_bracket-$start_bracket-1);
+            }
+                // link format doesn't contain brackets, or is not what we
+                // were expecting using default
+            if($validChars === false){
+                $validChars = "_0-9a-zA-Z.-";
+            }
+                // remove "bad" characters
+            $string = preg_replace("/[^".$validChars."]/", $separator, Textfilter::htmlDecode($string));
+                // remove doubled separators
+            $string = preg_replace("/[".$separator."]+/", $separator, $string);
+                // remove starting and trailing separator chars
+            $string = trim($string, $separator);
+
+            return $string;
+		}
+
+
+        
+		/**
 		 * @private
 		 */
 		function checkboxToBoolean( $value )



More information about the pLog-svn mailing list