[pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security
Mark Wu
markplace at gmail.com
Wed Nov 28 15:32:29 EST 2007
Hi Oscar:
The bug is fixed, but another bug appers...
I seems the addcomment() or say pipeline will execute twice.
I have no idea what's going on. Can you take a look at it?
Mark
> -----Original Message-----
> From: plog-svn-bounces at devel.lifetype.net
> [mailto:plog-svn-bounces at devel.lifetype.net] On Behalf Of
> mark at devel.lifetype.net
> Sent: Thursday, November 29, 2007 4:31 AM
> To: plog-svn at devel.lifetype.net
> Subject: [pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security
>
> Author: mark
> Date: 2007-11-28 15:30:55 -0500 (Wed, 28 Nov 2007) New Revision: 6087
>
> Modified:
> plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
> plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
> Log:
> fixed bug http://bugs.lifetype.net/view.php?id=1426.
>
> But, I found another bug, it seems the pipeline will execute
> twice, no idea what's going on.
>
> Modified:
> plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
> ===================================================================
> ---
> plog/branches/lifetype-1.2/class/security/bayesianfilter.cl
> ass.php 2007-11-28 19:35:13 UTC (rev 6086)
> +++
> plog/branches/lifetype-1.2/class/security/bayesianfilter.cl
> ass.php 2007-11-28 20:30:55 UTC (rev 6087)
> @@ -17,7 +17,7 @@
>
> /**
> * \ingroup Security
> - *
> + *
> * Filters the text posted in a comment by a user, to
> prevent spam-bots. This
> * filter only works if the incoming request has the
> "op" parameter as
> * "AddComment", because then it means that we're
> posting a comment. If it's not @@ -27,65 +27,82 @@
> * in order to filter spam comments out. The filter
> needs some training but after that it should
> * be fairly reliable.
> */
> - class BayesianFilter extends PipelineFilter
> + class BayesianFilter extends PipelineFilter
> {
> -
> +
> function BayesianFilter( $pipelineRequest )
> {
> $this->PipelineFilter( $pipelineRequest );
> }
> -
> +
> /**
> * Processes incoming requests
> *
> * @return A positive PipelineResult object is the
> comment is not spam or a negative
> * one if it is.
> - */
> + */
> function filter()
> {
> $config =& Config::getConfig();
> -
> +
> if (!$config->getValue("bayesian_filter_enabled")) {
> return new PipelineResult(true);
> }
> -
> - // get some info
> +
> + // get some info
> $blogInfo = $this->_pipelineRequest->getBlogInfo();
> $request = $this->_pipelineRequest->getHttpRequest();
> $previouslyRejected =
> $this->_pipelineRequest->getRejectedState();
> -
> - // we only have to filter the contents if the
> user is posting a comment
> +
> + // we only have to filter the contents if the user is
> + posting a comment
> // or we're receiving a trackback
> // so there's no point in doing anything else if
> that's not the case
> if( $request->getValue( "op" ) != "AddComment"
> && $request->getValue( "op" ) != "AddTrackback" ) {
> $result = new PipelineResult();
> return $result;
> }
> -
> +
> lt_include(
> PLOG_CLASS_PATH."class/dao/articlecomments.class.php" );
> lt_include(
> PLOG_CLASS_PATH."class/dao/articles.class.php" );
> -
> +
> + lt_include(
> PLOG_CLASS_PATH."class/data/filter/htmlfilter.class.php" );
> + lt_include(
> PLOG_CLASS_PATH."class/data/filter/urlconverter.class.php" );
> + lt_include(
> PLOG_CLASS_PATH."class/data/filter/allowedhtmlfilter.class.php" );
> + lt_include(
> + PLOG_CLASS_PATH."class/data/filter/xhtmlizefilter.class.php" );
> +
> // if it's a trackback, the data is in another place...
> $parentId = "";
> $isTrackback = ($request->getValue( "op" ) ==
> "AddTrackback");
> if( $isTrackback ) {
> - $commentText = $request->getValue( "excerpt" );
> - $commentTopic = $request->getValue( "title" );
> - $articleId = $request->getValue( "id" );
> - // that's all we can get from a trackback...
> - $userName = $request->getValue( "blog_name" );
> - $userUrl = $request->getValue( "url" );
> - $userEmail = $request->getValue( "" );
> + $f = new HtmlFilter();
> + $userName = $request->getFilteredValue(
> "blog_name", $f );
> + $userEmail = $request->getFilteredValue( "", $f );
> + $commentTopic = $request->getFilteredValue(
> "title", $f );
> + $commentText = $request->getFilteredValue(
> "excerpt",
> + $f );
> +
> + $f = new HtmlFilter();
> + $f->addFilter( new UrlConverter());
> + $userUrl = $request->getFilteredValue(
> "url", $f );
> +
> + $articleId = (int) $request->getValue( "id" );
> }
> else {
> // or else let's assume that we're dealing
> with a comment
> - $commentText = $request->getValue( "commentText" );
> - $commentTopic = $request->getValue( "commentTopic" );
> - $userName = $request->getValue( "userName" );
> - $userEmail = $request->getValue( "userEmail" );
> - $userUrl = $request->getValue( "userUrl" );
> - $articleId = $request->getValue( "articleId" );
> - $parentId = $request->getValue( "parentId"
> );
> + $f = new HtmlFilter();
> + $userName = $request->getFilteredValue(
> "userName", $f );
> + $userEmail = $request->getFilteredValue(
> "userEmail", $f );
> + $commentTopic = $request->getFilteredValue(
> + "commentTopic", $f );
> +
> + $f = new HtmlFilter();
> + $f->addFilter( new UrlConverter());
> + $userUrl = $request->getFilteredValue(
> "userUrl", $f );
> +
> + $f = new AllowedHtmlFilter();
> + $f->addFilter( new XhtmlizeFilter());
> + $commentText =
> $request->getFilteredValue( "commentText", $f );
> +
> + $articleId = (int) $request->getValue( "articleId" );
> + $parentId = (int) $request->getValue( "parentId" );
> }
>
> // the two checks below are duplicating
> some of the code in AddCommentAction @@ -102,30 +119,30 @@
> // is no need to process the
> whole comments even if it's spam, the request will not be
> // processed by
> AddCommentAction for this very same reason
> $result = new PipelineResult();
> - return $result;
> + return $result;
> }
> -
> +
> // and if it does, are comments enabled
> for it anyway?
> $blogSettings = $blogInfo->getSettings();
> if( $article->getCommentsEnabled() == false ||
> $blogSettings->getValue ( "comments_enabled" ) == false ) {
> // we let this request pass
> through although it may be spam, since it will be blocked
> - // later on by AddCommentAction
> because comments aren't enabled
> + // later on by AddCommentAction
> because comments aren't enabled
> $result = new PipelineResult();
> - return $result;
> + return $result;
> }
> -
> +
> if( $parentId == "" )
> $parentId = 0;
>
> $spamicity =
> $this->getSpamProbability($blogInfo->getId(), $commentTopic,
> $commentText, $userName, $userEmail, $userUrl);
> -
> +
> if ($spamicity >=
> $config->getValue("bayesian_filter_spam_probability_treshold"))
> {
> // need this to get the locale
> $plr = $this->getPipelineRequest();
> $bi = $plr->getBlogInfo();
> $locale = $bi->getLocale();
> -
> +
> // now we need to check what we have to do
> with this comment... either throw it away
> // or keep it in the database
>
> @@ -141,11 +158,11 @@
> null,
> $userName, $userEmail, $userUrl, $clientIp,
> 0,
> COMMENT_STATUS_SPAM );
> // mark it as a trackback instead of a
> user comment...
> -
> +
> if( $isTrackback ) {
> $comment->setType( COMMENT_TYPE_TRACKBACK );
> }
> -
> +
> // add the comment to the db
> $comments->addComment( $comment );
> }
> @@ -160,25 +177,25 @@
> $result = new PipelineResult(true);
> $spam = false;
> }
> -
> +
> if ( !$previouslyRejected )
> {
> // train the filter with the message, be it
> spam or not...
> - lt_include(
> PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php"
> );
> + lt_include(
> +PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
> BayesianFilterCore::train(
> $blogInfo->getId(), $commentTopic, $commentText, $userName,
> $userEmail,
> $userUrl, $spam );
> }
> else
> {
> - // This is a rejected message. If we think that
> this is non-spam,
> + // This is a rejected message. If we think that this is
> + non-spam,
> // we want to untrain it and then retrain it as spam
> - lt_include(
> PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php"
> );
> + lt_include(
> +PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
> if ( !$spam )
> {
> // Un-train this non-spam
>
> BayesianFilterCore::untrain( $blogInfo->getId(),
> $commentTopic, $commentText, $userName, $userEmail,
>
> $userUrl, $spam );
> -
>
> +
> // train this as spam
>
> BayesianFilterCore::train( $blogInfo->getId(), $commentTopic,
> $commentText, $userName, $userEmail,
>
> $userUrl, true );
> @@ -188,10 +205,10 @@
> //print "<h1>" . number_format($spamicity * 100,
> 0) . "% of spamicity</h1>";
> return $result;
> }
> -
> +
> /**
> * @private
> - */
> + */
> function getSpamProbability($blogId, $topic, $text,
> $userName, $userEmail, $userUrl)
> {
> lt_include(
> PLOG_CLASS_PATH."class/bayesian/bayesiantokenizer.class.php"
> ); @@ -207,40 +224,40 @@
>
> $tokens = array_merge($tokensTopic, $tokensText,
> $tokensUserName, $tokensUserEmail, $tokensUserUrl);
> $significantTokens =
> BayesianFilter::_getMostSignificantTokens($blogId, $tokens);
> -
> +
> return
> BayesianFilter::_getBayesProbability($significantTokens);
> }
> -
> +
> /**
> * @private
> */
> function _getMostSignificantTokens($blogId, $tokens)
> - {
> + {
> lt_include(
> PLOG_CLASS_PATH."class/dao/bayesiantokens.class.php" );
> lt_include(
> PLOG_CLASS_PATH."class/dao/bayesianfilterinfos.class.php" );
>
> - $config =& Config::getConfig();
> -
> + $config =& Config::getConfig();
> +
> $bayesianFilterInfos = new BayesianFilterInfos();
> $bayesianFilterInfo =
> $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
> -
> +
> $totalSpam = $bayesianFilterInfo->getTotalSpam();
> $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
> -
> +
> $bayesianTokens = new BayesianTokens();
> -
> +
> foreach ($tokens as $token)
> {
> $bayesianTokens->updateOccurrences($blogId,
> $token, 0, 0, $totalSpam, $totalNonSpam, false);
> }
> -
> - $tokens =
> $bayesianTokens->getBayesianTokensFromArray($blogId,
> $tokens);
> +
> + $tokens =
> + $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);
> $tempArray = array();
> -
> +
> foreach ($tokens as $token)
> {
> if ($token->isSignificant() && $token->isValid())
> - {
> + {
> array_push($tempArray,
> abs($token->getProb() - 0.5));
> }
> }
> @@ -248,35 +265,35 @@
> arsort($tempArray);
> $significantTokens = array();
> $count = 0;
> -
> +
> foreach ($tempArray as $key => $value)
> {
> array_push($significantTokens, $tokens[$key]);
> $count++;
> -
> +
> if ($count ==
> $config->getValue("bayesian_filter_number_significant_tokens"))
> - {
> + {
> break;
> }
> }
> -
> +
> return $significantTokens;
> }
> -
> +
> /**
> * @private
> */
> function _getBayesProbability($significantTokens)
> {
> $productProb = 1;
> - $productNoProb = 1;
> -
> + $productNoProb = 1;
> +
> foreach ($significantTokens as $token)
> {
> - $productProb *= $token->getProb();
> - $productNoProb *= (1 - $token->getProb());
>
> + $productProb *= $token->getProb();
> + $productNoProb *= (1 - $token->getProb());
> }
> -
> +
> return $productProb / ($productProb + $productNoProb);
> }
> }
>
> Modified:
> plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
> ===================================================================
> ---
> plog/branches/lifetype-1.2/class/security/pipelinerequest.cl
> ass.php 2007-11-28 19:35:13 UTC (rev 6086)
> +++
> plog/branches/lifetype-1.2/class/security/pipelinerequest.cl
> ass.php 2007-11-28 20:30:55 UTC (rev 6087)
> @@ -8,7 +8,7 @@
> * such as the incoming HTTP request and a reference to
> the BlogInfo object
> * of the blog that is executing the Pipeline.
> */
> - class PipelineRequest
> + class PipelineRequest
> {
>
> var $_httpRequest;
> @@ -24,13 +24,13 @@
> */
> function PipelineRequest( $httpRequest, $blogInfo,
> $rejected = false )
> {
> -
>
> +
> if( is_array($httpRequest))
> - $this->_httpRequest = new Properties( $httpRequest );
> + $this->_httpRequest = new Request( $httpRequest );
> else
> $this->_httpRequest = $httpRequest;
> -
> +
> $this->_blogInfo = $blogInfo;
> $this->_requestRejected = $rejected;
> }
> @@ -51,15 +51,15 @@
> {
> return $this->_httpRequest;
> }
> -
> +
> /**
> - * @return Returns a boolean that indicates if this
> pipeline request has
> + * @return Returns a boolean that indicates if this pipeline
> + request has
> * already been rejected
> */
> function getRejectedState()
> {
> return $this->_requestRejected;
> }
> -
> +
> }
> ?>
>
> _______________________________________________
> pLog-svn mailing list
> pLog-svn at devel.lifetype.net
> http://limedaley.com/mailman/listinfo/plog-svn
More information about the pLog-svn
mailing list