[pLog-svn] r6127 - plog/branches/lifetype-1.2/class/security

Mon Dec 24 15:33:03 EST 2007

Author: jondaley
Date: 2007-12-24 15:33:03 -0500 (Mon, 24 Dec 2007)
New Revision: 6127

Modified:
   plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
Log:
if previously rejected, don't bother checking it, but just train it as spam.  I didn't copy the untrain code, for two reasons: 1. since I didn't check the spamicity, I don't know whether I need to untrain, and 2. Mark's point about accidentally training the filter badly - this will have less of an impact.

Modified: plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
===================================================================

--- plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php	2007-12-24 19:21:06 UTC (rev 6126)
+++ plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php	2007-12-24 20:33:03 UTC (rev 6127)
@@ -46,7 +46,7 @@
             $config =& Config::getConfig();
 
             if (!$config->getValue("bayesian_filter_enabled")) {
-                return new PipelineResult(true);
+                return new PipelineResult();
             }
 
             // get some info
@@ -58,8 +58,7 @@
             // or we're receiving a trackback
             // so there's no point in doing anything else if that's not the case
             if( $request->getValue( "op" ) != "AddComment" && $request->getValue( "op" ) != "AddTrackback" ) {
-                $result = new PipelineResult();
-                return $result;
+                return new PipelineResult();
             }
 
             lt_include( PLOG_CLASS_PATH."class/dao/articlecomments.class.php" );
@@ -118,8 +117,7 @@
 				// if the article to which the articleId parameter refers to doesn't exist, there really
 				// is no need to process the whole comments even if it's spam, the request will not be
 				// processed by AddCommentAction for this very same reason
-                $result = new PipelineResult();
-                return $result;
+                return new PipelineResult();
 			}
 
 			// and if it does, are comments enabled for it anyway?
@@ -127,83 +125,78 @@
             if( $article->getCommentsEnabled() == false || $blogSettings->getValue ( "comments_enabled" ) == false ) {
 				// we let this request pass through although it may be spam, since it will be blocked
 				// later on by AddCommentAction because comments aren't enabled
-                $result = new PipelineResult();
-                return $result;
+                return new PipelineResult();
 			}
 
             if( $parentId == "" )
                 $parentId = 0;
 
-            $spamicity = $this->getSpamProbability($blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, $userUrl);
+            lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
+            if($previouslyRejected){
+					// train this as spam
+                BayesianFilterCore::train( $blogInfo->getId(), $commentTopic,
+                                           $commentText, $userName, $userEmail,
+                                           $userUrl, true );
+                    // return true, since we didn't check it
+                return new PipelineResult();
+            }
+            else{
+                    // check whether this is spam or not, and train appropriately
+                $spamicity = $this->getSpamProbability($blogInfo->getId(),
+                                                       $commentTopic,
+                                                       $commentText,
+                                                       $userName, $userEmail,
+                                                       $userUrl);
 
-            if ($spamicity >= $config->getValue("bayesian_filter_spam_probability_treshold"))
-            {
-                // need this to get the locale
-                $plr = $this->getPipelineRequest();
-                $bi = $plr->getBlogInfo();
-                $locale = $bi->getLocale();
-
-                // now we need to check what we have to do with this comment... either throw it away
-                // or keep it in the database
-
-                // this piece of code shouldn't really go here, but it's easier than letting
-                // the AddComment action that there was actually a comment and that it should
-                // still be added but marked as spam and so on... sometimes breaking a few
-                // rules makes things easier :)
-                if( $config->getValue( "bayesian_filter_spam_comments_action" ) == BAYESIAN_FILTER_KEEP_COMMENT_ACTION ) {
-                    $result = new PipelineResult(false, HIGH_SPAM_PROBABILITY, $locale->tr("error_comment_spam_keep" ));
-                    $comments = new ArticleComments();
-                    $clientIp = Client::getIp();
-                    $comment = new UserComment( $articleId, $blogInfo->getId(), $parentId, $commentTopic, $commentText,
-                                                   null, $userName, $userEmail, $userUrl, $clientIp,
-                                                   0, COMMENT_STATUS_SPAM );
-                    // mark it as a trackback instead of a user comment...
-
-                    if( $isTrackback ) {
-                        $comment->setType( COMMENT_TYPE_TRACKBACK );
+                if ($spamicity >= $config->getValue("bayesian_filter_spam_probability_treshold"))
+                {
+                        // need this to get the locale
+                    $plr = $this->getPipelineRequest();
+                    $bi = $plr->getBlogInfo();
+                    $locale = $bi->getLocale();
+                    
+                        // now we need to check what we have to do with this comment... either throw it away
+                        // or keep it in the database
+                    
+                        // this piece of code shouldn't really go here, but it's easier than letting
+                        // the AddComment action that there was actually a comment and that it should
+                        // still be added but marked as spam and so on... sometimes breaking a few
+                        // rules makes things easier :)
+                    if( $config->getValue( "bayesian_filter_spam_comments_action" ) == BAYESIAN_FILTER_KEEP_COMMENT_ACTION )
+                    {
+                        $result = new PipelineResult(false, HIGH_SPAM_PROBABILITY, $locale->tr("error_comment_spam_keep" ));
+                        $comments = new ArticleComments();
+                        $clientIp = Client::getIp();
+                        $comment = new UserComment( $articleId, $blogInfo->getId(), $parentId, $commentTopic, $commentText,
+                                                    null, $userName, $userEmail, $userUrl, $clientIp,
+                                                    0, COMMENT_STATUS_SPAM );
+                            // mark it as a trackback instead of a user comment...
+                        
+                        if( $isTrackback ) {
+                            $comment->setType( COMMENT_TYPE_TRACKBACK );
+                        }
+                        
+                            // add the comment to the db
+                        $comments->addComment( $comment );
                     }
-
-                    // add the comment to the db
-                    $comments->addComment( $comment );
+                    else {
+                            // nothing to do here, simply throw the comment away
+                        $result = new PipelineResult(false, HIGH_SPAM_PROBABILITY,
+                                                     $locale->tr("error_comment_spam_throw_away" ));
+                    }
+                    $spam = true;
                 }
-                else {
-                    // nothing to do here, simply throw the comment away
-                    $result = new PipelineResult(false, HIGH_SPAM_PROBABILITY, $locale->tr("error_comment_spam_throw_away" ));
+                else
+                {
+                    $result = new PipelineResult();
+                    $spam = false;
                 }
-                $spam = true;
-            }
-            else
-            {
-                $result = new PipelineResult(true);
-                $spam = false;
-            }
 
-            if ( !$previouslyRejected )
-            {
-                // train the filter with the message, be it spam or not...
-				lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
+                    // train the filter with the message, be it spam or not...
                 BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
                                            $userUrl, $spam );
+                return $result;
             }
-            else
-            {
-            	// This is a rejected message. If we think that this is non-spam,
-            	// we want to untrain it and then retrain it as spam
-				lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
-            	if ( !$spam )
-            	{
-            		// Un-train this non-spam
-					BayesianFilterCore::untrain( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
-											   $userUrl, $spam );
-
-					// train this as spam
-					BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
-											   $userUrl, true );
-            	}
-            }
-
-            //print "<h1>" . number_format($spamicity * 100, 0) . "% of spamicity</h1>";
-            return $result;
         }
 
         /**