[pLog-svn] r6127 - plog/branches/lifetype-1.2/class/security
jondaley at devel.lifetype.net
jondaley at devel.lifetype.net
Mon Dec 24 15:33:03 EST 2007
Author: jondaley
Date: 2007-12-24 15:33:03 -0500 (Mon, 24 Dec 2007)
New Revision: 6127
Modified:
plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
Log:
if previously rejected, don't bother checking it, but just train it as spam. I didn't copy the untrain code, for two reasons: 1. since I didn't check the spamicity, I don't know whether I need to untrain, and 2. Mark's point about accidentally training the filter badly - this will have less of an impact.
Modified: plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
===================================================================
--- plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php 2007-12-24 19:21:06 UTC (rev 6126)
+++ plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php 2007-12-24 20:33:03 UTC (rev 6127)
@@ -46,7 +46,7 @@
$config =& Config::getConfig();
if (!$config->getValue("bayesian_filter_enabled")) {
- return new PipelineResult(true);
+ return new PipelineResult();
}
// get some info
@@ -58,8 +58,7 @@
// or we're receiving a trackback
// so there's no point in doing anything else if that's not the case
if( $request->getValue( "op" ) != "AddComment" && $request->getValue( "op" ) != "AddTrackback" ) {
- $result = new PipelineResult();
- return $result;
+ return new PipelineResult();
}
lt_include( PLOG_CLASS_PATH."class/dao/articlecomments.class.php" );
@@ -118,8 +117,7 @@
// if the article to which the articleId parameter refers to doesn't exist, there really
// is no need to process the whole comments even if it's spam, the request will not be
// processed by AddCommentAction for this very same reason
- $result = new PipelineResult();
- return $result;
+ return new PipelineResult();
}
// and if it does, are comments enabled for it anyway?
@@ -127,83 +125,78 @@
if( $article->getCommentsEnabled() == false || $blogSettings->getValue ( "comments_enabled" ) == false ) {
// we let this request pass through although it may be spam, since it will be blocked
// later on by AddCommentAction because comments aren't enabled
- $result = new PipelineResult();
- return $result;
+ return new PipelineResult();
}
if( $parentId == "" )
$parentId = 0;
- $spamicity = $this->getSpamProbability($blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, $userUrl);
+ lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
+ if($previouslyRejected){
+ // train this as spam
+ BayesianFilterCore::train( $blogInfo->getId(), $commentTopic,
+ $commentText, $userName, $userEmail,
+ $userUrl, true );
+ // return true, since we didn't check it
+ return new PipelineResult();
+ }
+ else{
+ // check whether this is spam or not, and train appropriately
+ $spamicity = $this->getSpamProbability($blogInfo->getId(),
+ $commentTopic,
+ $commentText,
+ $userName, $userEmail,
+ $userUrl);
- if ($spamicity >= $config->getValue("bayesian_filter_spam_probability_treshold"))
- {
- // need this to get the locale
- $plr = $this->getPipelineRequest();
- $bi = $plr->getBlogInfo();
- $locale = $bi->getLocale();
-
- // now we need to check what we have to do with this comment... either throw it away
- // or keep it in the database
-
- // this piece of code shouldn't really go here, but it's easier than letting
- // the AddComment action that there was actually a comment and that it should
- // still be added but marked as spam and so on... sometimes breaking a few
- // rules makes things easier :)
- if( $config->getValue( "bayesian_filter_spam_comments_action" ) == BAYESIAN_FILTER_KEEP_COMMENT_ACTION ) {
- $result = new PipelineResult(false, HIGH_SPAM_PROBABILITY, $locale->tr("error_comment_spam_keep" ));
- $comments = new ArticleComments();
- $clientIp = Client::getIp();
- $comment = new UserComment( $articleId, $blogInfo->getId(), $parentId, $commentTopic, $commentText,
- null, $userName, $userEmail, $userUrl, $clientIp,
- 0, COMMENT_STATUS_SPAM );
- // mark it as a trackback instead of a user comment...
-
- if( $isTrackback ) {
- $comment->setType( COMMENT_TYPE_TRACKBACK );
+ if ($spamicity >= $config->getValue("bayesian_filter_spam_probability_treshold"))
+ {
+ // need this to get the locale
+ $plr = $this->getPipelineRequest();
+ $bi = $plr->getBlogInfo();
+ $locale = $bi->getLocale();
+
+ // now we need to check what we have to do with this comment... either throw it away
+ // or keep it in the database
+
+ // this piece of code shouldn't really go here, but it's easier than letting
+ // the AddComment action that there was actually a comment and that it should
+ // still be added but marked as spam and so on... sometimes breaking a few
+ // rules makes things easier :)
+ if( $config->getValue( "bayesian_filter_spam_comments_action" ) == BAYESIAN_FILTER_KEEP_COMMENT_ACTION )
+ {
+ $result = new PipelineResult(false, HIGH_SPAM_PROBABILITY, $locale->tr("error_comment_spam_keep" ));
+ $comments = new ArticleComments();
+ $clientIp = Client::getIp();
+ $comment = new UserComment( $articleId, $blogInfo->getId(), $parentId, $commentTopic, $commentText,
+ null, $userName, $userEmail, $userUrl, $clientIp,
+ 0, COMMENT_STATUS_SPAM );
+ // mark it as a trackback instead of a user comment...
+
+ if( $isTrackback ) {
+ $comment->setType( COMMENT_TYPE_TRACKBACK );
+ }
+
+ // add the comment to the db
+ $comments->addComment( $comment );
}
-
- // add the comment to the db
- $comments->addComment( $comment );
+ else {
+ // nothing to do here, simply throw the comment away
+ $result = new PipelineResult(false, HIGH_SPAM_PROBABILITY,
+ $locale->tr("error_comment_spam_throw_away" ));
+ }
+ $spam = true;
}
- else {
- // nothing to do here, simply throw the comment away
- $result = new PipelineResult(false, HIGH_SPAM_PROBABILITY, $locale->tr("error_comment_spam_throw_away" ));
+ else
+ {
+ $result = new PipelineResult();
+ $spam = false;
}
- $spam = true;
- }
- else
- {
- $result = new PipelineResult(true);
- $spam = false;
- }
- if ( !$previouslyRejected )
- {
- // train the filter with the message, be it spam or not...
- lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
+ // train the filter with the message, be it spam or not...
BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
$userUrl, $spam );
+ return $result;
}
- else
- {
- // This is a rejected message. If we think that this is non-spam,
- // we want to untrain it and then retrain it as spam
- lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
- if ( !$spam )
- {
- // Un-train this non-spam
- BayesianFilterCore::untrain( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
- $userUrl, $spam );
-
- // train this as spam
- BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
- $userUrl, true );
- }
- }
-
- //print "<h1>" . number_format($spamicity * 100, 0) . "% of spamicity</h1>";
- return $result;
}
/**
More information about the pLog-svn
mailing list