[pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security

Mark Wu markplace at gmail.com
Wed Nov 28 15:32:29 EST 2007


Hi Oscar:

The bug is fixed, but another bug appers...

I seems the addcomment() or say pipeline will execute twice.

I have no idea what's going on. Can you take a look at it? 

Mark 

> -----Original Message-----
> From: plog-svn-bounces at devel.lifetype.net 
> [mailto:plog-svn-bounces at devel.lifetype.net] On Behalf Of 
> mark at devel.lifetype.net
> Sent: Thursday, November 29, 2007 4:31 AM
> To: plog-svn at devel.lifetype.net
> Subject: [pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security
> 
> Author: mark
> Date: 2007-11-28 15:30:55 -0500 (Wed, 28 Nov 2007) New Revision: 6087
> 
> Modified:
>    plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
>    plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
> Log:
> fixed bug http://bugs.lifetype.net/view.php?id=1426.
> 
> But, I found another bug, it seems the pipeline will execute 
> twice, no idea what's going on.
> 
> Modified: 
> plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
> ===================================================================
> --- 
> plog/branches/lifetype-1.2/class/security/bayesianfilter.cl
> ass.php	2007-11-28 19:35:13 UTC (rev 6086)
> +++ 
> plog/branches/lifetype-1.2/class/security/bayesianfilter.cl
> ass.php	2007-11-28 20:30:55 UTC (rev 6087)
> @@ -17,7 +17,7 @@
>  
>      /**
>       * \ingroup Security
> -     * 
> +     *
>       * Filters the text posted in a comment by a user, to 
> prevent spam-bots. This
>       * filter only works if the incoming request has the 
> "op" parameter as
>       * "AddComment", because then it means that we're 
> posting a comment. If it's not @@ -27,65 +27,82 @@
>       * in order to filter spam comments out. The filter 
> needs some training but after that it should
>       * be fairly reliable.
>       */
> -    class BayesianFilter extends PipelineFilter 
> +    class BayesianFilter extends PipelineFilter
>      {
> -    
> +
>          function BayesianFilter( $pipelineRequest )
>          {
>              $this->PipelineFilter( $pipelineRequest );
>          }
> -        
> +
>          /**
>          * Processes incoming requests
>          *
>          * @return A positive PipelineResult object is the 
> comment is not spam or a negative
>          * one if it is.
> -        */         
> +        */
>          function filter()
>          {
>              $config =& Config::getConfig();
> -            
> +
>              if (!$config->getValue("bayesian_filter_enabled")) {
>                  return new PipelineResult(true);
>              }
> -        
> -            // get some info            
> +
> +            // get some info
>              $blogInfo = $this->_pipelineRequest->getBlogInfo();
>              $request  = $this->_pipelineRequest->getHttpRequest();
>              $previouslyRejected = 
> $this->_pipelineRequest->getRejectedState();
> -            
> -            // we only have to filter the contents if the 
> user is posting a comment 
> +
> +            // we only have to filter the contents if the user is 
> + posting a comment
>              // or we're receiving a trackback
>              // so there's no point in doing anything else if 
> that's not the case
>              if( $request->getValue( "op" ) != "AddComment" 
> && $request->getValue( "op" ) != "AddTrackback" ) {
>                  $result = new PipelineResult();
>                  return $result;
>              }
> -			
> +
>              lt_include( 
> PLOG_CLASS_PATH."class/dao/articlecomments.class.php" );
>              lt_include( 
> PLOG_CLASS_PATH."class/dao/articles.class.php" );
> -            
> +
> +            lt_include( 
> PLOG_CLASS_PATH."class/data/filter/htmlfilter.class.php" );
> +            lt_include( 
> PLOG_CLASS_PATH."class/data/filter/urlconverter.class.php" );
> +            lt_include( 
> PLOG_CLASS_PATH."class/data/filter/allowedhtmlfilter.class.php" );
> +            lt_include( 
> + PLOG_CLASS_PATH."class/data/filter/xhtmlizefilter.class.php" );
> +
>              // if it's a trackback, the data is in another place...
>              $parentId = "";
>              $isTrackback = ($request->getValue( "op" ) == 
> "AddTrackback");
>              if( $isTrackback ) {
> -                $commentText = $request->getValue( "excerpt" );
> -                $commentTopic = $request->getValue( "title" );
> -                $articleId = $request->getValue( "id" );
> -                // that's all we can get from a trackback...
> -                $userName = $request->getValue( "blog_name" );
> -                $userUrl = $request->getValue( "url" );
> -                $userEmail = $request->getValue( "" );
> +                $f = new HtmlFilter();
> +                $userName = $request->getFilteredValue( 
> "blog_name", $f );
> +                $userEmail = $request->getFilteredValue( "", $f );
> +                $commentTopic = $request->getFilteredValue( 
> "title", $f );
> +                $commentText = $request->getFilteredValue( 
> "excerpt", 
> + $f );
> +
> +    			$f = new HtmlFilter();
> +    			$f->addFilter( new UrlConverter());
> +    			$userUrl = $request->getFilteredValue( 
> "url", $f );
> +
> +                $articleId = (int) $request->getValue( "id" );
>              }
>              else {
>                  // or else let's assume that we're dealing 
> with a comment
> -                $commentText = $request->getValue( "commentText" );
> -                $commentTopic = $request->getValue( "commentTopic" );
> -                $userName = $request->getValue( "userName" );
> -                $userEmail = $request->getValue( "userEmail" );
> -                $userUrl = $request->getValue( "userUrl" );
> -                $articleId = $request->getValue( "articleId" );
> -                $parentId  = $request->getValue( "parentId" 
> );          
> +                $f = new HtmlFilter();
> +                $userName = $request->getFilteredValue( 
> "userName", $f );
> +                $userEmail = $request->getFilteredValue( 
> "userEmail", $f );
> +                $commentTopic = $request->getFilteredValue( 
> + "commentTopic", $f );
> +
> +    			$f = new HtmlFilter();
> +    			$f->addFilter( new UrlConverter());
> +    			$userUrl = $request->getFilteredValue( 
> "userUrl", $f );
> +
> +    			$f = new AllowedHtmlFilter();
> +    			$f->addFilter( new XhtmlizeFilter());
> +    			$commentText = 
> $request->getFilteredValue( "commentText", $f );
> +
> +                $articleId = (int) $request->getValue( "articleId" );
> +                $parentId  = (int) $request->getValue( "parentId" );
>              }
>  
>  			// the two checks below are duplicating 
> some of the code in AddCommentAction @@ -102,30 +119,30 @@
>  				// is no need to process the 
> whole comments even if it's spam, the request will not be
>  				// processed by 
> AddCommentAction for this very same reason
>                  $result = new PipelineResult();
> -                return $result;	
> +                return $result;
>  			}
> -			
> +
>  			// and if it does, are comments enabled 
> for it anyway?
>  			$blogSettings = $blogInfo->getSettings();
>              if( $article->getCommentsEnabled() == false || 
> $blogSettings->getValue ( "comments_enabled" ) == false ) {
>  				// we let this request pass 
> through although it may be spam, since it will be blocked
> -				// later on by AddCommentAction 
> because comments aren't enabled	
> +				// later on by AddCommentAction 
> because comments aren't enabled
>                  $result = new PipelineResult();
> -                return $result;	
> +                return $result;
>  			}
> -            
> +
>              if( $parentId == "" )
>                  $parentId = 0;
>  
>              $spamicity = 
> $this->getSpamProbability($blogInfo->getId(), $commentTopic, 
> $commentText, $userName, $userEmail, $userUrl);
> -            
> +
>              if ($spamicity >= 
> $config->getValue("bayesian_filter_spam_probability_treshold"))
>              {
>                  // need this to get the locale
>                  $plr = $this->getPipelineRequest();
>                  $bi = $plr->getBlogInfo();
>                  $locale = $bi->getLocale();
> -                
> +
>                  // now we need to check what we have to do 
> with this comment... either throw it away
>                  // or keep it in the database
>  
> @@ -141,11 +158,11 @@
>                                                     null, 
> $userName, $userEmail, $userUrl, $clientIp,
>                                                     0, 
> COMMENT_STATUS_SPAM );
>                      // mark it as a trackback instead of a 
> user comment...
> -                    
> +
>                      if( $isTrackback ) {
>                          $comment->setType( COMMENT_TYPE_TRACKBACK );
>                      }
> -                        
> +
>                      // add the comment to the db
>                      $comments->addComment( $comment );
>                  }
> @@ -160,25 +177,25 @@
>                  $result = new PipelineResult(true);
>                  $spam = false;
>              }
> -            
> +
>              if ( !$previouslyRejected )
>              {
>                  // train the filter with the message, be it 
> spam or not...
> -				lt_include( 
> PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" 
> );				
> +				lt_include( 
> +PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
>                  BayesianFilterCore::train( 
> $blogInfo->getId(), $commentTopic, $commentText, $userName, 
> $userEmail,
>                                             $userUrl, $spam );
>              }
>              else
>              {
> -            	// This is a rejected message. If we think that 
> this is non-spam, 
> +            	// This is a rejected message. If we think that this is 
> + non-spam,
>              	// we want to untrain it and then retrain it as spam
> -				lt_include( 
> PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" 
> );				
> +				lt_include( 
> +PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
>              	if ( !$spam )
>              	{
>              		// Un-train this non-spam
>  					
> BayesianFilterCore::untrain( $blogInfo->getId(), 
> $commentTopic, $commentText, $userName, $userEmail,
>  								
> 			   $userUrl, $spam );
> -								
> 			   
> +
>  					// train this as spam
>  					
> BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, 
> $commentText, $userName, $userEmail,
>  								
> 			   $userUrl, true );
> @@ -188,10 +205,10 @@
>              //print "<h1>" . number_format($spamicity * 100, 
> 0) . "% of spamicity</h1>";
>              return $result;
>          }
> -        
> +
>          /**
>          * @private
> -        */        
> +        */
>          function getSpamProbability($blogId, $topic, $text, 
> $userName, $userEmail, $userUrl)
>          {
>              lt_include( 
> PLOG_CLASS_PATH."class/bayesian/bayesiantokenizer.class.php" 
> ); @@ -207,40 +224,40 @@
>  
>              $tokens = array_merge($tokensTopic, $tokensText, 
> $tokensUserName, $tokensUserEmail, $tokensUserUrl);
>              $significantTokens = 
> BayesianFilter::_getMostSignificantTokens($blogId, $tokens);
> -            
> +
>              return 
> BayesianFilter::_getBayesProbability($significantTokens);
>          }
> -        
> +
>          /**
>          * @private
>          */
>          function _getMostSignificantTokens($blogId, $tokens)
> -        {       
> +        {
>              lt_include( 
> PLOG_CLASS_PATH."class/dao/bayesiantokens.class.php" );
>              lt_include( 
> PLOG_CLASS_PATH."class/dao/bayesianfilterinfos.class.php" );
>  
> -            $config =& Config::getConfig(); 
> -            
> +            $config =& Config::getConfig();
> +
>              $bayesianFilterInfos = new BayesianFilterInfos();
>              $bayesianFilterInfo  = 
> $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
> -            
> +
>              $totalSpam = $bayesianFilterInfo->getTotalSpam();
>              $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
> -            
> +
>              $bayesianTokens = new BayesianTokens();
> -            
> +
>              foreach ($tokens as $token)
>              {
>                  $bayesianTokens->updateOccurrences($blogId, 
> $token, 0, 0, $totalSpam, $totalNonSpam, false);
>              }
> -            
> -            $tokens = 
> $bayesianTokens->getBayesianTokensFromArray($blogId, 
> $tokens);                                                
> +
> +            $tokens = 
> + $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);
>              $tempArray = array();
> -                        
> +
>              foreach ($tokens as $token)
>              {
>                  if ($token->isSignificant() && $token->isValid())
> -                {                   
> +                {
>                      array_push($tempArray, 
> abs($token->getProb() - 0.5));
>                  }
>              }
> @@ -248,35 +265,35 @@
>              arsort($tempArray);
>              $significantTokens = array();
>              $count = 0;
> -            
> +
>              foreach ($tempArray as $key => $value)
>              {
>                  array_push($significantTokens, $tokens[$key]);
>                  $count++;
> -                
> +
>                  if ($count == 
> $config->getValue("bayesian_filter_number_significant_tokens"))
> -                {                
> +                {
>                      break;
>                  }
>              }
> -                          
> +
>              return $significantTokens;
>          }
> -        
> +
>          /**
>          * @private
>          */
>          function _getBayesProbability($significantTokens)
>          {
>              $productProb   = 1;
> -            $productNoProb = 1;            
> -            
> +            $productNoProb = 1;
> +
>              foreach ($significantTokens as $token)
>              {
> -                $productProb   *= $token->getProb();                
> -                $productNoProb *= (1 - $token->getProb());   
>              
> +                $productProb   *= $token->getProb();
> +                $productNoProb *= (1 - $token->getProb());
>              }
> -                                
> +
>              return $productProb / ($productProb + $productNoProb);
>          }
>      }
> 
> Modified: 
> plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
> ===================================================================
> --- 
> plog/branches/lifetype-1.2/class/security/pipelinerequest.cl
> ass.php	2007-11-28 19:35:13 UTC (rev 6086)
> +++ 
> plog/branches/lifetype-1.2/class/security/pipelinerequest.cl
> ass.php	2007-11-28 20:30:55 UTC (rev 6087)
> @@ -8,7 +8,7 @@
>       * such as the incoming HTTP request and a reference to 
> the BlogInfo object
>       * of the blog that is executing the Pipeline.
>       */
> -	class PipelineRequest  
> +	class PipelineRequest
>  	{
>  
>      	var $_httpRequest;
> @@ -24,13 +24,13 @@
>           */
>          function PipelineRequest( $httpRequest, $blogInfo, 
> $rejected = false )
>          {
> -        	
>  
> +
>              if( is_array($httpRequest))
> -            	$this->_httpRequest = new Properties( $httpRequest );
> +            	$this->_httpRequest = new Request( $httpRequest );
>              else
>          		$this->_httpRequest  = $httpRequest;
> -                
> +
>              $this->_blogInfo         = $blogInfo;
>              $this->_requestRejected  = $rejected;
>          }
> @@ -51,15 +51,15 @@
>          {
>          	return $this->_httpRequest;
>          }
> -        
> +
>          /**
> -        * @return Returns a boolean that indicates if this 
> pipeline request has 
> +        * @return Returns a boolean that indicates if this pipeline 
> + request has
>          * already been rejected
>          */
>          function getRejectedState()
>          {
>          	return $this->_requestRejected;
>          }
> -        
> +
>      }
>  ?>
> 
> _______________________________________________
> pLog-svn mailing list
> pLog-svn at devel.lifetype.net
> http://limedaley.com/mailman/listinfo/plog-svn



More information about the pLog-svn mailing list