[pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security
Jon Daley
plogworld at jon.limedaley.com
Wed Nov 28 16:32:35 EST 2007
The pipeline has always executed twice, I think. There is one
case where it makes sense to me, where one filter marks something as spam,
the pipeline needs to be run again so the other filters can do correct
stuff based on that status (if they need to).
But, I think it always executes twice, and I think we probably
need some sort of flag - (maybe there is one somewhere, that I don't
know about), so the second time through filters won't do all of their
actions if they don't need to.
http://bugs.lifetype.net/view.php?id=616
On Thu, 29 Nov 2007, Mark Wu wrote:
> Hi Oscar:
>
> The bug is fixed, but another bug appers...
>
> I seems the addcomment() or say pipeline will execute twice.
>
> I have no idea what's going on. Can you take a look at it?
>
> Mark
>
>> -----Original Message-----
>> From: plog-svn-bounces at devel.lifetype.net
>> [mailto:plog-svn-bounces at devel.lifetype.net] On Behalf Of
>> mark at devel.lifetype.net
>> Sent: Thursday, November 29, 2007 4:31 AM
>> To: plog-svn at devel.lifetype.net
>> Subject: [pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security
>>
>> Author: mark
>> Date: 2007-11-28 15:30:55 -0500 (Wed, 28 Nov 2007) New Revision: 6087
>>
>> Modified:
>> plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
>> plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
>> Log:
>> fixed bug http://bugs.lifetype.net/view.php?id=1426.
>>
>> But, I found another bug, it seems the pipeline will execute
>> twice, no idea what's going on.
>>
>> Modified:
>> plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
>> ===================================================================
>> ---
>> plog/branches/lifetype-1.2/class/security/bayesianfilter.cl
>> ass.php 2007-11-28 19:35:13 UTC (rev 6086)
>> +++
>> plog/branches/lifetype-1.2/class/security/bayesianfilter.cl
>> ass.php 2007-11-28 20:30:55 UTC (rev 6087)
>> @@ -17,7 +17,7 @@
>>
>> /**
>> * \ingroup Security
>> - *
>> + *
>> * Filters the text posted in a comment by a user, to
>> prevent spam-bots. This
>> * filter only works if the incoming request has the
>> "op" parameter as
>> * "AddComment", because then it means that we're
>> posting a comment. If it's not @@ -27,65 +27,82 @@
>> * in order to filter spam comments out. The filter
>> needs some training but after that it should
>> * be fairly reliable.
>> */
>> - class BayesianFilter extends PipelineFilter
>> + class BayesianFilter extends PipelineFilter
>> {
>> -
>> +
>> function BayesianFilter( $pipelineRequest )
>> {
>> $this->PipelineFilter( $pipelineRequest );
>> }
>> -
>> +
>> /**
>> * Processes incoming requests
>> *
>> * @return A positive PipelineResult object is the
>> comment is not spam or a negative
>> * one if it is.
>> - */
>> + */
>> function filter()
>> {
>> $config =& Config::getConfig();
>> -
>> +
>> if (!$config->getValue("bayesian_filter_enabled")) {
>> return new PipelineResult(true);
>> }
>> -
>> - // get some info
>> +
>> + // get some info
>> $blogInfo = $this->_pipelineRequest->getBlogInfo();
>> $request = $this->_pipelineRequest->getHttpRequest();
>> $previouslyRejected =
>> $this->_pipelineRequest->getRejectedState();
>> -
>> - // we only have to filter the contents if the
>> user is posting a comment
>> +
>> + // we only have to filter the contents if the user is
>> + posting a comment
>> // or we're receiving a trackback
>> // so there's no point in doing anything else if
>> that's not the case
>> if( $request->getValue( "op" ) != "AddComment"
>> && $request->getValue( "op" ) != "AddTrackback" ) {
>> $result = new PipelineResult();
>> return $result;
>> }
>> -
>> +
>> lt_include(
>> PLOG_CLASS_PATH."class/dao/articlecomments.class.php" );
>> lt_include(
>> PLOG_CLASS_PATH."class/dao/articles.class.php" );
>> -
>> +
>> + lt_include(
>> PLOG_CLASS_PATH."class/data/filter/htmlfilter.class.php" );
>> + lt_include(
>> PLOG_CLASS_PATH."class/data/filter/urlconverter.class.php" );
>> + lt_include(
>> PLOG_CLASS_PATH."class/data/filter/allowedhtmlfilter.class.php" );
>> + lt_include(
>> + PLOG_CLASS_PATH."class/data/filter/xhtmlizefilter.class.php" );
>> +
>> // if it's a trackback, the data is in another place...
>> $parentId = "";
>> $isTrackback = ($request->getValue( "op" ) ==
>> "AddTrackback");
>> if( $isTrackback ) {
>> - $commentText = $request->getValue( "excerpt" );
>> - $commentTopic = $request->getValue( "title" );
>> - $articleId = $request->getValue( "id" );
>> - // that's all we can get from a trackback...
>> - $userName = $request->getValue( "blog_name" );
>> - $userUrl = $request->getValue( "url" );
>> - $userEmail = $request->getValue( "" );
>> + $f = new HtmlFilter();
>> + $userName = $request->getFilteredValue(
>> "blog_name", $f );
>> + $userEmail = $request->getFilteredValue( "", $f );
>> + $commentTopic = $request->getFilteredValue(
>> "title", $f );
>> + $commentText = $request->getFilteredValue(
>> "excerpt",
>> + $f );
>> +
>> + $f = new HtmlFilter();
>> + $f->addFilter( new UrlConverter());
>> + $userUrl = $request->getFilteredValue(
>> "url", $f );
>> +
>> + $articleId = (int) $request->getValue( "id" );
>> }
>> else {
>> // or else let's assume that we're dealing
>> with a comment
>> - $commentText = $request->getValue( "commentText" );
>> - $commentTopic = $request->getValue( "commentTopic" );
>> - $userName = $request->getValue( "userName" );
>> - $userEmail = $request->getValue( "userEmail" );
>> - $userUrl = $request->getValue( "userUrl" );
>> - $articleId = $request->getValue( "articleId" );
>> - $parentId = $request->getValue( "parentId"
>> );
>> + $f = new HtmlFilter();
>> + $userName = $request->getFilteredValue(
>> "userName", $f );
>> + $userEmail = $request->getFilteredValue(
>> "userEmail", $f );
>> + $commentTopic = $request->getFilteredValue(
>> + "commentTopic", $f );
>> +
>> + $f = new HtmlFilter();
>> + $f->addFilter( new UrlConverter());
>> + $userUrl = $request->getFilteredValue(
>> "userUrl", $f );
>> +
>> + $f = new AllowedHtmlFilter();
>> + $f->addFilter( new XhtmlizeFilter());
>> + $commentText =
>> $request->getFilteredValue( "commentText", $f );
>> +
>> + $articleId = (int) $request->getValue( "articleId" );
>> + $parentId = (int) $request->getValue( "parentId" );
>> }
>>
>> // the two checks below are duplicating
>> some of the code in AddCommentAction @@ -102,30 +119,30 @@
>> // is no need to process the
>> whole comments even if it's spam, the request will not be
>> // processed by
>> AddCommentAction for this very same reason
>> $result = new PipelineResult();
>> - return $result;
>> + return $result;
>> }
>> -
>> +
>> // and if it does, are comments enabled
>> for it anyway?
>> $blogSettings = $blogInfo->getSettings();
>> if( $article->getCommentsEnabled() == false ||
>> $blogSettings->getValue ( "comments_enabled" ) == false ) {
>> // we let this request pass
>> through although it may be spam, since it will be blocked
>> - // later on by AddCommentAction
>> because comments aren't enabled
>> + // later on by AddCommentAction
>> because comments aren't enabled
>> $result = new PipelineResult();
>> - return $result;
>> + return $result;
>> }
>> -
>> +
>> if( $parentId == "" )
>> $parentId = 0;
>>
>> $spamicity =
>> $this->getSpamProbability($blogInfo->getId(), $commentTopic,
>> $commentText, $userName, $userEmail, $userUrl);
>> -
>> +
>> if ($spamicity >=
>> $config->getValue("bayesian_filter_spam_probability_treshold"))
>> {
>> // need this to get the locale
>> $plr = $this->getPipelineRequest();
>> $bi = $plr->getBlogInfo();
>> $locale = $bi->getLocale();
>> -
>> +
>> // now we need to check what we have to do
>> with this comment... either throw it away
>> // or keep it in the database
>>
>> @@ -141,11 +158,11 @@
>> null,
>> $userName, $userEmail, $userUrl, $clientIp,
>> 0,
>> COMMENT_STATUS_SPAM );
>> // mark it as a trackback instead of a
>> user comment...
>> -
>> +
>> if( $isTrackback ) {
>> $comment->setType( COMMENT_TYPE_TRACKBACK );
>> }
>> -
>> +
>> // add the comment to the db
>> $comments->addComment( $comment );
>> }
>> @@ -160,25 +177,25 @@
>> $result = new PipelineResult(true);
>> $spam = false;
>> }
>> -
>> +
>> if ( !$previouslyRejected )
>> {
>> // train the filter with the message, be it
>> spam or not...
>> - lt_include(
>> PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php"
>> );
>> + lt_include(
>> +PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
>> BayesianFilterCore::train(
>> $blogInfo->getId(), $commentTopic, $commentText, $userName,
>> $userEmail,
>> $userUrl, $spam );
>> }
>> else
>> {
>> - // This is a rejected message. If we think that
>> this is non-spam,
>> + // This is a rejected message. If we think that this is
>> + non-spam,
>> // we want to untrain it and then retrain it as spam
>> - lt_include(
>> PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php"
>> );
>> + lt_include(
>> +PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
>> if ( !$spam )
>> {
>> // Un-train this non-spam
>>
>> BayesianFilterCore::untrain( $blogInfo->getId(),
>> $commentTopic, $commentText, $userName, $userEmail,
>>
>> $userUrl, $spam );
>> -
>>
>> +
>> // train this as spam
>>
>> BayesianFilterCore::train( $blogInfo->getId(), $commentTopic,
>> $commentText, $userName, $userEmail,
>>
>> $userUrl, true );
>> @@ -188,10 +205,10 @@
>> //print "<h1>" . number_format($spamicity * 100,
>> 0) . "% of spamicity</h1>";
>> return $result;
>> }
>> -
>> +
>> /**
>> * @private
>> - */
>> + */
>> function getSpamProbability($blogId, $topic, $text,
>> $userName, $userEmail, $userUrl)
>> {
>> lt_include(
>> PLOG_CLASS_PATH."class/bayesian/bayesiantokenizer.class.php"
>> ); @@ -207,40 +224,40 @@
>>
>> $tokens = array_merge($tokensTopic, $tokensText,
>> $tokensUserName, $tokensUserEmail, $tokensUserUrl);
>> $significantTokens =
>> BayesianFilter::_getMostSignificantTokens($blogId, $tokens);
>> -
>> +
>> return
>> BayesianFilter::_getBayesProbability($significantTokens);
>> }
>> -
>> +
>> /**
>> * @private
>> */
>> function _getMostSignificantTokens($blogId, $tokens)
>> - {
>> + {
>> lt_include(
>> PLOG_CLASS_PATH."class/dao/bayesiantokens.class.php" );
>> lt_include(
>> PLOG_CLASS_PATH."class/dao/bayesianfilterinfos.class.php" );
>>
>> - $config =& Config::getConfig();
>> -
>> + $config =& Config::getConfig();
>> +
>> $bayesianFilterInfos = new BayesianFilterInfos();
>> $bayesianFilterInfo =
>> $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
>> -
>> +
>> $totalSpam = $bayesianFilterInfo->getTotalSpam();
>> $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
>> -
>> +
>> $bayesianTokens = new BayesianTokens();
>> -
>> +
>> foreach ($tokens as $token)
>> {
>> $bayesianTokens->updateOccurrences($blogId,
>> $token, 0, 0, $totalSpam, $totalNonSpam, false);
>> }
>> -
>> - $tokens =
>> $bayesianTokens->getBayesianTokensFromArray($blogId,
>> $tokens);
>> +
>> + $tokens =
>> + $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);
>> $tempArray = array();
>> -
>> +
>> foreach ($tokens as $token)
>> {
>> if ($token->isSignificant() && $token->isValid())
>> - {
>> + {
>> array_push($tempArray,
>> abs($token->getProb() - 0.5));
>> }
>> }
>> @@ -248,35 +265,35 @@
>> arsort($tempArray);
>> $significantTokens = array();
>> $count = 0;
>> -
>> +
>> foreach ($tempArray as $key => $value)
>> {
>> array_push($significantTokens, $tokens[$key]);
>> $count++;
>> -
>> +
>> if ($count ==
>> $config->getValue("bayesian_filter_number_significant_tokens"))
>> - {
>> + {
>> break;
>> }
>> }
>> -
>> +
>> return $significantTokens;
>> }
>> -
>> +
>> /**
>> * @private
>> */
>> function _getBayesProbability($significantTokens)
>> {
>> $productProb = 1;
>> - $productNoProb = 1;
>> -
>> + $productNoProb = 1;
>> +
>> foreach ($significantTokens as $token)
>> {
>> - $productProb *= $token->getProb();
>> - $productNoProb *= (1 - $token->getProb());
>>
>> + $productProb *= $token->getProb();
>> + $productNoProb *= (1 - $token->getProb());
>> }
>> -
>> +
>> return $productProb / ($productProb + $productNoProb);
>> }
>> }
>>
>> Modified:
>> plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
>> ===================================================================
>> ---
>> plog/branches/lifetype-1.2/class/security/pipelinerequest.cl
>> ass.php 2007-11-28 19:35:13 UTC (rev 6086)
>> +++
>> plog/branches/lifetype-1.2/class/security/pipelinerequest.cl
>> ass.php 2007-11-28 20:30:55 UTC (rev 6087)
>> @@ -8,7 +8,7 @@
>> * such as the incoming HTTP request and a reference to
>> the BlogInfo object
>> * of the blog that is executing the Pipeline.
>> */
>> - class PipelineRequest
>> + class PipelineRequest
>> {
>>
>> var $_httpRequest;
>> @@ -24,13 +24,13 @@
>> */
>> function PipelineRequest( $httpRequest, $blogInfo,
>> $rejected = false )
>> {
>> -
>>
>> +
>> if( is_array($httpRequest))
>> - $this->_httpRequest = new Properties( $httpRequest );
>> + $this->_httpRequest = new Request( $httpRequest );
>> else
>> $this->_httpRequest = $httpRequest;
>> -
>> +
>> $this->_blogInfo = $blogInfo;
>> $this->_requestRejected = $rejected;
>> }
>> @@ -51,15 +51,15 @@
>> {
>> return $this->_httpRequest;
>> }
>> -
>> +
>> /**
>> - * @return Returns a boolean that indicates if this
>> pipeline request has
>> + * @return Returns a boolean that indicates if this pipeline
>> + request has
>> * already been rejected
>> */
>> function getRejectedState()
>> {
>> return $this->_requestRejected;
>> }
>> -
>> +
>> }
>> ?>
>>
>> _______________________________________________
>> pLog-svn mailing list
>> pLog-svn at devel.lifetype.net
>> http://limedaley.com/mailman/listinfo/plog-svn
>
> _______________________________________________
> pLog-svn mailing list
> pLog-svn at devel.lifetype.net
> http://limedaley.com/mailman/listinfo/plog-svn
>
--
Jon Daley
http://jon.limedaley.com/
If one has not given everything, one has given nothing.
-- Georges Guynemer
More information about the pLog-svn
mailing list