[pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security

Wed Nov 28 15:30:56 EST 2007

Author: mark
Date: 2007-11-28 15:30:55 -0500 (Wed, 28 Nov 2007)
New Revision: 6087

Modified:
   plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
   plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
Log:
fixed bug http://bugs.lifetype.net/view.php?id=1426.

But, I found another bug, it seems the pipeline will execute twice, no idea what's going on.

Modified: plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
===================================================================

--- plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php	2007-11-28 19:35:13 UTC (rev 6086)
+++ plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php	2007-11-28 20:30:55 UTC (rev 6087)
@@ -17,7 +17,7 @@
 
     /**
      * \ingroup Security
-     * 
+     *
      * Filters the text posted in a comment by a user, to prevent spam-bots. This
      * filter only works if the incoming request has the "op" parameter as
      * "AddComment", because then it means that we're posting a comment. If it's not
@@ -27,65 +27,82 @@
      * in order to filter spam comments out. The filter needs some training but after that it should
      * be fairly reliable.
      */
-    class BayesianFilter extends PipelineFilter 
+    class BayesianFilter extends PipelineFilter
     {
-    
+
         function BayesianFilter( $pipelineRequest )
         {
             $this->PipelineFilter( $pipelineRequest );
         }
-        
+
         /**
         * Processes incoming requests
         *
         * @return A positive PipelineResult object is the comment is not spam or a negative
         * one if it is.
-        */         
+        */
         function filter()
         {
             $config =& Config::getConfig();
-            
+
             if (!$config->getValue("bayesian_filter_enabled")) {
                 return new PipelineResult(true);
             }
-        
-            // get some info            
+
+            // get some info
             $blogInfo = $this->_pipelineRequest->getBlogInfo();
             $request  = $this->_pipelineRequest->getHttpRequest();
             $previouslyRejected = $this->_pipelineRequest->getRejectedState();
-            
-            // we only have to filter the contents if the user is posting a comment 
+
+            // we only have to filter the contents if the user is posting a comment
             // or we're receiving a trackback
             // so there's no point in doing anything else if that's not the case
             if( $request->getValue( "op" ) != "AddComment" && $request->getValue( "op" ) != "AddTrackback" ) {
                 $result = new PipelineResult();
                 return $result;
             }
-			
+
             lt_include( PLOG_CLASS_PATH."class/dao/articlecomments.class.php" );
             lt_include( PLOG_CLASS_PATH."class/dao/articles.class.php" );
-            
+
+            lt_include( PLOG_CLASS_PATH."class/data/filter/htmlfilter.class.php" );
+            lt_include( PLOG_CLASS_PATH."class/data/filter/urlconverter.class.php" );
+            lt_include( PLOG_CLASS_PATH."class/data/filter/allowedhtmlfilter.class.php" );
+            lt_include( PLOG_CLASS_PATH."class/data/filter/xhtmlizefilter.class.php" );
+
             // if it's a trackback, the data is in another place...
             $parentId = "";
             $isTrackback = ($request->getValue( "op" ) == "AddTrackback");
             if( $isTrackback ) {
-                $commentText = $request->getValue( "excerpt" );
-                $commentTopic = $request->getValue( "title" );
-                $articleId = $request->getValue( "id" );
-                // that's all we can get from a trackback...
-                $userName = $request->getValue( "blog_name" );
-                $userUrl = $request->getValue( "url" );
-                $userEmail = $request->getValue( "" );
+                $f = new HtmlFilter();
+                $userName = $request->getFilteredValue( "blog_name", $f );
+                $userEmail = $request->getFilteredValue( "", $f );
+                $commentTopic = $request->getFilteredValue( "title", $f );
+                $commentText = $request->getFilteredValue( "excerpt", $f );
+
+    			$f = new HtmlFilter();
+    			$f->addFilter( new UrlConverter());
+    			$userUrl = $request->getFilteredValue( "url", $f );
+
+                $articleId = (int) $request->getValue( "id" );
             }
             else {
                 // or else let's assume that we're dealing with a comment
-                $commentText = $request->getValue( "commentText" );
-                $commentTopic = $request->getValue( "commentTopic" );
-                $userName = $request->getValue( "userName" );
-                $userEmail = $request->getValue( "userEmail" );
-                $userUrl = $request->getValue( "userUrl" );
-                $articleId = $request->getValue( "articleId" );
-                $parentId  = $request->getValue( "parentId" );          
+                $f = new HtmlFilter();
+                $userName = $request->getFilteredValue( "userName", $f );
+                $userEmail = $request->getFilteredValue( "userEmail", $f );
+                $commentTopic = $request->getFilteredValue( "commentTopic", $f );
+
+    			$f = new HtmlFilter();
+    			$f->addFilter( new UrlConverter());
+    			$userUrl = $request->getFilteredValue( "userUrl", $f );
+
+    			$f = new AllowedHtmlFilter();
+    			$f->addFilter( new XhtmlizeFilter());
+    			$commentText = $request->getFilteredValue( "commentText", $f );
+
+                $articleId = (int) $request->getValue( "articleId" );
+                $parentId  = (int) $request->getValue( "parentId" );
             }
 
 			// the two checks below are duplicating some of the code in AddCommentAction
@@ -102,30 +119,30 @@
 				// is no need to process the whole comments even if it's spam, the request will not be
 				// processed by AddCommentAction for this very same reason
                 $result = new PipelineResult();
-                return $result;	
+                return $result;
 			}
-			
+
 			// and if it does, are comments enabled for it anyway?
 			$blogSettings = $blogInfo->getSettings();
             if( $article->getCommentsEnabled() == false || $blogSettings->getValue ( "comments_enabled" ) == false ) {
 				// we let this request pass through although it may be spam, since it will be blocked
-				// later on by AddCommentAction because comments aren't enabled	
+				// later on by AddCommentAction because comments aren't enabled
                 $result = new PipelineResult();
-                return $result;	
+                return $result;
 			}
-            
+
             if( $parentId == "" )
                 $parentId = 0;
 
             $spamicity = $this->getSpamProbability($blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, $userUrl);
-            
+
             if ($spamicity >= $config->getValue("bayesian_filter_spam_probability_treshold"))
             {
                 // need this to get the locale
                 $plr = $this->getPipelineRequest();
                 $bi = $plr->getBlogInfo();
                 $locale = $bi->getLocale();
-                
+
                 // now we need to check what we have to do with this comment... either throw it away
                 // or keep it in the database
 
@@ -141,11 +158,11 @@
                                                    null, $userName, $userEmail, $userUrl, $clientIp,
                                                    0, COMMENT_STATUS_SPAM );
                     // mark it as a trackback instead of a user comment...
-                    
+
                     if( $isTrackback ) {
                         $comment->setType( COMMENT_TYPE_TRACKBACK );
                     }
-                        
+
                     // add the comment to the db
                     $comments->addComment( $comment );
                 }
@@ -160,25 +177,25 @@
                 $result = new PipelineResult(true);
                 $spam = false;
             }
-            
+
             if ( !$previouslyRejected )
             {
                 // train the filter with the message, be it spam or not...
-				lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );				
+				lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
                 BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
                                            $userUrl, $spam );
             }
             else
             {
-            	// This is a rejected message. If we think that this is non-spam, 
+            	// This is a rejected message. If we think that this is non-spam,
             	// we want to untrain it and then retrain it as spam
-				lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );				
+				lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
             	if ( !$spam )
             	{
             		// Un-train this non-spam
 					BayesianFilterCore::untrain( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
 											   $userUrl, $spam );
-											   
+
 					// train this as spam
 					BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
 											   $userUrl, true );
@@ -188,10 +205,10 @@
             //print "<h1>" . number_format($spamicity * 100, 0) . "% of spamicity</h1>";
             return $result;
         }
-        
+
         /**
         * @private
-        */        
+        */
         function getSpamProbability($blogId, $topic, $text, $userName, $userEmail, $userUrl)
         {
             lt_include( PLOG_CLASS_PATH."class/bayesian/bayesiantokenizer.class.php" );
@@ -207,40 +224,40 @@
 
             $tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl);
             $significantTokens = BayesianFilter::_getMostSignificantTokens($blogId, $tokens);
-            
+
             return BayesianFilter::_getBayesProbability($significantTokens);
         }
-        
+
         /**
         * @private
         */
         function _getMostSignificantTokens($blogId, $tokens)
-        {       
+        {
             lt_include( PLOG_CLASS_PATH."class/dao/bayesiantokens.class.php" );
             lt_include( PLOG_CLASS_PATH."class/dao/bayesianfilterinfos.class.php" );
 
-            $config =& Config::getConfig(); 
-            
+            $config =& Config::getConfig();
+
             $bayesianFilterInfos = new BayesianFilterInfos();
             $bayesianFilterInfo  = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
-            
+
             $totalSpam = $bayesianFilterInfo->getTotalSpam();
             $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
-            
+
             $bayesianTokens = new BayesianTokens();
-            
+
             foreach ($tokens as $token)
             {
                 $bayesianTokens->updateOccurrences($blogId, $token, 0, 0, $totalSpam, $totalNonSpam, false);
             }
-            
-            $tokens = $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);                                                
+
+            $tokens = $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);
             $tempArray = array();
-                        
+
             foreach ($tokens as $token)
             {
                 if ($token->isSignificant() && $token->isValid())
-                {                   
+                {
                     array_push($tempArray, abs($token->getProb() - 0.5));
                 }
             }
@@ -248,35 +265,35 @@
             arsort($tempArray);
             $significantTokens = array();
             $count = 0;
-            
+
             foreach ($tempArray as $key => $value)
             {
                 array_push($significantTokens, $tokens[$key]);
                 $count++;
-                
+
                 if ($count == $config->getValue("bayesian_filter_number_significant_tokens"))
-                {                
+                {
                     break;
                 }
             }
-                          
+
             return $significantTokens;
         }
-        
+
         /**
         * @private
         */
         function _getBayesProbability($significantTokens)
         {
             $productProb   = 1;
-            $productNoProb = 1;            
-            
+            $productNoProb = 1;
+
             foreach ($significantTokens as $token)
             {
-                $productProb   *= $token->getProb();                
-                $productNoProb *= (1 - $token->getProb());                
+                $productProb   *= $token->getProb();
+                $productNoProb *= (1 - $token->getProb());
             }
-                                
+
             return $productProb / ($productProb + $productNoProb);
         }
     }

Modified: plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
===================================================================
--- plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php	2007-11-28 19:35:13 UTC (rev 6086)
+++ plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php	2007-11-28 20:30:55 UTC (rev 6087)
@@ -8,7 +8,7 @@
      * such as the incoming HTTP request and a reference to the BlogInfo object
      * of the blog that is executing the Pipeline.
      */
-	class PipelineRequest  
+	class PipelineRequest
 	{
 
     	var $_httpRequest;
@@ -24,13 +24,13 @@
          */
         function PipelineRequest( $httpRequest, $blogInfo, $rejected = false )
         {
-        	
 
+
             if( is_array($httpRequest))
-            	$this->_httpRequest = new Properties( $httpRequest );
+            	$this->_httpRequest = new Request( $httpRequest );
             else
         		$this->_httpRequest  = $httpRequest;
-                
+
             $this->_blogInfo         = $blogInfo;
             $this->_requestRejected  = $rejected;
         }
@@ -51,15 +51,15 @@
         {
         	return $this->_httpRequest;
         }
-        
+
         /**
-        * @return Returns a boolean that indicates if this pipeline request has 
+        * @return Returns a boolean that indicates if this pipeline request has
         * already been rejected
         */
         function getRejectedState()
         {
         	return $this->_requestRejected;
         }
-        
+
     }
 ?>