[pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security
mark at devel.lifetype.net
mark at devel.lifetype.net
Wed Nov 28 15:30:56 EST 2007
Author: mark
Date: 2007-11-28 15:30:55 -0500 (Wed, 28 Nov 2007)
New Revision: 6087
Modified:
plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
Log:
fixed bug http://bugs.lifetype.net/view.php?id=1426.
But, I found another bug, it seems the pipeline will execute twice, no idea what's going on.
Modified: plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
===================================================================
--- plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php 2007-11-28 19:35:13 UTC (rev 6086)
+++ plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php 2007-11-28 20:30:55 UTC (rev 6087)
@@ -17,7 +17,7 @@
/**
* \ingroup Security
- *
+ *
* Filters the text posted in a comment by a user, to prevent spam-bots. This
* filter only works if the incoming request has the "op" parameter as
* "AddComment", because then it means that we're posting a comment. If it's not
@@ -27,65 +27,82 @@
* in order to filter spam comments out. The filter needs some training but after that it should
* be fairly reliable.
*/
- class BayesianFilter extends PipelineFilter
+ class BayesianFilter extends PipelineFilter
{
-
+
function BayesianFilter( $pipelineRequest )
{
$this->PipelineFilter( $pipelineRequest );
}
-
+
/**
* Processes incoming requests
*
* @return A positive PipelineResult object is the comment is not spam or a negative
* one if it is.
- */
+ */
function filter()
{
$config =& Config::getConfig();
-
+
if (!$config->getValue("bayesian_filter_enabled")) {
return new PipelineResult(true);
}
-
- // get some info
+
+ // get some info
$blogInfo = $this->_pipelineRequest->getBlogInfo();
$request = $this->_pipelineRequest->getHttpRequest();
$previouslyRejected = $this->_pipelineRequest->getRejectedState();
-
- // we only have to filter the contents if the user is posting a comment
+
+ // we only have to filter the contents if the user is posting a comment
// or we're receiving a trackback
// so there's no point in doing anything else if that's not the case
if( $request->getValue( "op" ) != "AddComment" && $request->getValue( "op" ) != "AddTrackback" ) {
$result = new PipelineResult();
return $result;
}
-
+
lt_include( PLOG_CLASS_PATH."class/dao/articlecomments.class.php" );
lt_include( PLOG_CLASS_PATH."class/dao/articles.class.php" );
-
+
+ lt_include( PLOG_CLASS_PATH."class/data/filter/htmlfilter.class.php" );
+ lt_include( PLOG_CLASS_PATH."class/data/filter/urlconverter.class.php" );
+ lt_include( PLOG_CLASS_PATH."class/data/filter/allowedhtmlfilter.class.php" );
+ lt_include( PLOG_CLASS_PATH."class/data/filter/xhtmlizefilter.class.php" );
+
// if it's a trackback, the data is in another place...
$parentId = "";
$isTrackback = ($request->getValue( "op" ) == "AddTrackback");
if( $isTrackback ) {
- $commentText = $request->getValue( "excerpt" );
- $commentTopic = $request->getValue( "title" );
- $articleId = $request->getValue( "id" );
- // that's all we can get from a trackback...
- $userName = $request->getValue( "blog_name" );
- $userUrl = $request->getValue( "url" );
- $userEmail = $request->getValue( "" );
+ $f = new HtmlFilter();
+ $userName = $request->getFilteredValue( "blog_name", $f );
+ $userEmail = $request->getFilteredValue( "", $f );
+ $commentTopic = $request->getFilteredValue( "title", $f );
+ $commentText = $request->getFilteredValue( "excerpt", $f );
+
+ $f = new HtmlFilter();
+ $f->addFilter( new UrlConverter());
+ $userUrl = $request->getFilteredValue( "url", $f );
+
+ $articleId = (int) $request->getValue( "id" );
}
else {
// or else let's assume that we're dealing with a comment
- $commentText = $request->getValue( "commentText" );
- $commentTopic = $request->getValue( "commentTopic" );
- $userName = $request->getValue( "userName" );
- $userEmail = $request->getValue( "userEmail" );
- $userUrl = $request->getValue( "userUrl" );
- $articleId = $request->getValue( "articleId" );
- $parentId = $request->getValue( "parentId" );
+ $f = new HtmlFilter();
+ $userName = $request->getFilteredValue( "userName", $f );
+ $userEmail = $request->getFilteredValue( "userEmail", $f );
+ $commentTopic = $request->getFilteredValue( "commentTopic", $f );
+
+ $f = new HtmlFilter();
+ $f->addFilter( new UrlConverter());
+ $userUrl = $request->getFilteredValue( "userUrl", $f );
+
+ $f = new AllowedHtmlFilter();
+ $f->addFilter( new XhtmlizeFilter());
+ $commentText = $request->getFilteredValue( "commentText", $f );
+
+ $articleId = (int) $request->getValue( "articleId" );
+ $parentId = (int) $request->getValue( "parentId" );
}
// the two checks below are duplicating some of the code in AddCommentAction
@@ -102,30 +119,30 @@
// is no need to process the whole comments even if it's spam, the request will not be
// processed by AddCommentAction for this very same reason
$result = new PipelineResult();
- return $result;
+ return $result;
}
-
+
// and if it does, are comments enabled for it anyway?
$blogSettings = $blogInfo->getSettings();
if( $article->getCommentsEnabled() == false || $blogSettings->getValue ( "comments_enabled" ) == false ) {
// we let this request pass through although it may be spam, since it will be blocked
- // later on by AddCommentAction because comments aren't enabled
+ // later on by AddCommentAction because comments aren't enabled
$result = new PipelineResult();
- return $result;
+ return $result;
}
-
+
if( $parentId == "" )
$parentId = 0;
$spamicity = $this->getSpamProbability($blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, $userUrl);
-
+
if ($spamicity >= $config->getValue("bayesian_filter_spam_probability_treshold"))
{
// need this to get the locale
$plr = $this->getPipelineRequest();
$bi = $plr->getBlogInfo();
$locale = $bi->getLocale();
-
+
// now we need to check what we have to do with this comment... either throw it away
// or keep it in the database
@@ -141,11 +158,11 @@
null, $userName, $userEmail, $userUrl, $clientIp,
0, COMMENT_STATUS_SPAM );
// mark it as a trackback instead of a user comment...
-
+
if( $isTrackback ) {
$comment->setType( COMMENT_TYPE_TRACKBACK );
}
-
+
// add the comment to the db
$comments->addComment( $comment );
}
@@ -160,25 +177,25 @@
$result = new PipelineResult(true);
$spam = false;
}
-
+
if ( !$previouslyRejected )
{
// train the filter with the message, be it spam or not...
- lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
+ lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
$userUrl, $spam );
}
else
{
- // This is a rejected message. If we think that this is non-spam,
+ // This is a rejected message. If we think that this is non-spam,
// we want to untrain it and then retrain it as spam
- lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
+ lt_include( PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
if ( !$spam )
{
// Un-train this non-spam
BayesianFilterCore::untrain( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
$userUrl, $spam );
-
+
// train this as spam
BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail,
$userUrl, true );
@@ -188,10 +205,10 @@
//print "<h1>" . number_format($spamicity * 100, 0) . "% of spamicity</h1>";
return $result;
}
-
+
/**
* @private
- */
+ */
function getSpamProbability($blogId, $topic, $text, $userName, $userEmail, $userUrl)
{
lt_include( PLOG_CLASS_PATH."class/bayesian/bayesiantokenizer.class.php" );
@@ -207,40 +224,40 @@
$tokens = array_merge($tokensTopic, $tokensText, $tokensUserName, $tokensUserEmail, $tokensUserUrl);
$significantTokens = BayesianFilter::_getMostSignificantTokens($blogId, $tokens);
-
+
return BayesianFilter::_getBayesProbability($significantTokens);
}
-
+
/**
* @private
*/
function _getMostSignificantTokens($blogId, $tokens)
- {
+ {
lt_include( PLOG_CLASS_PATH."class/dao/bayesiantokens.class.php" );
lt_include( PLOG_CLASS_PATH."class/dao/bayesianfilterinfos.class.php" );
- $config =& Config::getConfig();
-
+ $config =& Config::getConfig();
+
$bayesianFilterInfos = new BayesianFilterInfos();
$bayesianFilterInfo = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
-
+
$totalSpam = $bayesianFilterInfo->getTotalSpam();
$totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
-
+
$bayesianTokens = new BayesianTokens();
-
+
foreach ($tokens as $token)
{
$bayesianTokens->updateOccurrences($blogId, $token, 0, 0, $totalSpam, $totalNonSpam, false);
}
-
- $tokens = $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);
+
+ $tokens = $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);
$tempArray = array();
-
+
foreach ($tokens as $token)
{
if ($token->isSignificant() && $token->isValid())
- {
+ {
array_push($tempArray, abs($token->getProb() - 0.5));
}
}
@@ -248,35 +265,35 @@
arsort($tempArray);
$significantTokens = array();
$count = 0;
-
+
foreach ($tempArray as $key => $value)
{
array_push($significantTokens, $tokens[$key]);
$count++;
-
+
if ($count == $config->getValue("bayesian_filter_number_significant_tokens"))
- {
+ {
break;
}
}
-
+
return $significantTokens;
}
-
+
/**
* @private
*/
function _getBayesProbability($significantTokens)
{
$productProb = 1;
- $productNoProb = 1;
-
+ $productNoProb = 1;
+
foreach ($significantTokens as $token)
{
- $productProb *= $token->getProb();
- $productNoProb *= (1 - $token->getProb());
+ $productProb *= $token->getProb();
+ $productNoProb *= (1 - $token->getProb());
}
-
+
return $productProb / ($productProb + $productNoProb);
}
}
Modified: plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
===================================================================
--- plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php 2007-11-28 19:35:13 UTC (rev 6086)
+++ plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php 2007-11-28 20:30:55 UTC (rev 6087)
@@ -8,7 +8,7 @@
* such as the incoming HTTP request and a reference to the BlogInfo object
* of the blog that is executing the Pipeline.
*/
- class PipelineRequest
+ class PipelineRequest
{
var $_httpRequest;
@@ -24,13 +24,13 @@
*/
function PipelineRequest( $httpRequest, $blogInfo, $rejected = false )
{
-
+
if( is_array($httpRequest))
- $this->_httpRequest = new Properties( $httpRequest );
+ $this->_httpRequest = new Request( $httpRequest );
else
$this->_httpRequest = $httpRequest;
-
+
$this->_blogInfo = $blogInfo;
$this->_requestRejected = $rejected;
}
@@ -51,15 +51,15 @@
{
return $this->_httpRequest;
}
-
+
/**
- * @return Returns a boolean that indicates if this pipeline request has
+ * @return Returns a boolean that indicates if this pipeline request has
* already been rejected
*/
function getRejectedState()
{
return $this->_requestRejected;
}
-
+
}
?>
More information about the pLog-svn
mailing list