[pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security
Oscar Renalias
oscar at renalias.net
Thu Nov 29 02:45:01 EST 2007
I unfortunately haven't got the time to look into this, sorry. Is it a
big problem that the pipeline is running twice? I think it's been
working like this, like, forever :-) But I have to admit that I never
noticed it or affected any code... is it a problem now?
On Nov 28, 2007 10:32 PM, Mark Wu <markplace at gmail.com> wrote:
> Hi Oscar:
>
> The bug is fixed, but another bug appers...
>
> I seems the addcomment() or say pipeline will execute twice.
>
> I have no idea what's going on. Can you take a look at it?
>
> Mark
>
>
> > -----Original Message-----
> > From: plog-svn-bounces at devel.lifetype.net
> > [mailto:plog-svn-bounces at devel.lifetype.net] On Behalf Of
> > mark at devel.lifetype.net
> > Sent: Thursday, November 29, 2007 4:31 AM
> > To: plog-svn at devel.lifetype.net
> > Subject: [pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security
> >
> > Author: mark
> > Date: 2007-11-28 15:30:55 -0500 (Wed, 28 Nov 2007) New Revision: 6087
> >
> > Modified:
> > plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
> > plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
> > Log:
> > fixed bug http://bugs.lifetype.net/view.php?id=1426.
> >
> > But, I found another bug, it seems the pipeline will execute
> > twice, no idea what's going on.
> >
> > Modified:
> > plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
> > ===================================================================
> > ---
> > plog/branches/lifetype-1.2/class/security/bayesianfilter.cl
> > ass.php 2007-11-28 19:35:13 UTC (rev 6086)
> > +++
> > plog/branches/lifetype-1.2/class/security/bayesianfilter.cl
> > ass.php 2007-11-28 20:30:55 UTC (rev 6087)
> > @@ -17,7 +17,7 @@
> >
> > /**
> > * \ingroup Security
> > - *
> > + *
> > * Filters the text posted in a comment by a user, to
> > prevent spam-bots. This
> > * filter only works if the incoming request has the
> > "op" parameter as
> > * "AddComment", because then it means that we're
> > posting a comment. If it's not @@ -27,65 +27,82 @@
> > * in order to filter spam comments out. The filter
> > needs some training but after that it should
> > * be fairly reliable.
> > */
> > - class BayesianFilter extends PipelineFilter
> > + class BayesianFilter extends PipelineFilter
> > {
> > -
> > +
> > function BayesianFilter( $pipelineRequest )
> > {
> > $this->PipelineFilter( $pipelineRequest );
> > }
> > -
> > +
> > /**
> > * Processes incoming requests
> > *
> > * @return A positive PipelineResult object is the
> > comment is not spam or a negative
> > * one if it is.
> > - */
> > + */
> > function filter()
> > {
> > $config =& Config::getConfig();
> > -
> > +
> > if (!$config->getValue("bayesian_filter_enabled")) {
> > return new PipelineResult(true);
> > }
> > -
> > - // get some info
> > +
> > + // get some info
> > $blogInfo = $this->_pipelineRequest->getBlogInfo();
> > $request = $this->_pipelineRequest->getHttpRequest();
> > $previouslyRejected =
> > $this->_pipelineRequest->getRejectedState();
> > -
> > - // we only have to filter the contents if the
> > user is posting a comment
> > +
> > + // we only have to filter the contents if the user is
> > + posting a comment
> > // or we're receiving a trackback
> > // so there's no point in doing anything else if
> > that's not the case
> > if( $request->getValue( "op" ) != "AddComment"
> > && $request->getValue( "op" ) != "AddTrackback" ) {
> > $result = new PipelineResult();
> > return $result;
> > }
> > -
> > +
> > lt_include(
> > PLOG_CLASS_PATH."class/dao/articlecomments.class.php" );
> > lt_include(
> > PLOG_CLASS_PATH."class/dao/articles.class.php" );
> > -
> > +
> > + lt_include(
> > PLOG_CLASS_PATH."class/data/filter/htmlfilter.class.php" );
> > + lt_include(
> > PLOG_CLASS_PATH."class/data/filter/urlconverter.class.php" );
> > + lt_include(
> > PLOG_CLASS_PATH."class/data/filter/allowedhtmlfilter.class.php" );
> > + lt_include(
> > + PLOG_CLASS_PATH."class/data/filter/xhtmlizefilter.class.php" );
> > +
> > // if it's a trackback, the data is in another place...
> > $parentId = "";
> > $isTrackback = ($request->getValue( "op" ) ==
> > "AddTrackback");
> > if( $isTrackback ) {
> > - $commentText = $request->getValue( "excerpt" );
> > - $commentTopic = $request->getValue( "title" );
> > - $articleId = $request->getValue( "id" );
> > - // that's all we can get from a trackback...
> > - $userName = $request->getValue( "blog_name" );
> > - $userUrl = $request->getValue( "url" );
> > - $userEmail = $request->getValue( "" );
> > + $f = new HtmlFilter();
> > + $userName = $request->getFilteredValue(
> > "blog_name", $f );
> > + $userEmail = $request->getFilteredValue( "", $f );
> > + $commentTopic = $request->getFilteredValue(
> > "title", $f );
> > + $commentText = $request->getFilteredValue(
> > "excerpt",
> > + $f );
> > +
> > + $f = new HtmlFilter();
> > + $f->addFilter( new UrlConverter());
> > + $userUrl = $request->getFilteredValue(
> > "url", $f );
> > +
> > + $articleId = (int) $request->getValue( "id" );
> > }
> > else {
> > // or else let's assume that we're dealing
> > with a comment
> > - $commentText = $request->getValue( "commentText" );
> > - $commentTopic = $request->getValue( "commentTopic" );
> > - $userName = $request->getValue( "userName" );
> > - $userEmail = $request->getValue( "userEmail" );
> > - $userUrl = $request->getValue( "userUrl" );
> > - $articleId = $request->getValue( "articleId" );
> > - $parentId = $request->getValue( "parentId"
> > );
> > + $f = new HtmlFilter();
> > + $userName = $request->getFilteredValue(
> > "userName", $f );
> > + $userEmail = $request->getFilteredValue(
> > "userEmail", $f );
> > + $commentTopic = $request->getFilteredValue(
> > + "commentTopic", $f );
> > +
> > + $f = new HtmlFilter();
> > + $f->addFilter( new UrlConverter());
> > + $userUrl = $request->getFilteredValue(
> > "userUrl", $f );
> > +
> > + $f = new AllowedHtmlFilter();
> > + $f->addFilter( new XhtmlizeFilter());
> > + $commentText =
> > $request->getFilteredValue( "commentText", $f );
> > +
> > + $articleId = (int) $request->getValue( "articleId" );
> > + $parentId = (int) $request->getValue( "parentId" );
> > }
> >
> > // the two checks below are duplicating
> > some of the code in AddCommentAction @@ -102,30 +119,30 @@
> > // is no need to process the
> > whole comments even if it's spam, the request will not be
> > // processed by
> > AddCommentAction for this very same reason
> > $result = new PipelineResult();
> > - return $result;
> > + return $result;
> > }
> > -
> > +
> > // and if it does, are comments enabled
> > for it anyway?
> > $blogSettings = $blogInfo->getSettings();
> > if( $article->getCommentsEnabled() == false ||
> > $blogSettings->getValue ( "comments_enabled" ) == false ) {
> > // we let this request pass
> > through although it may be spam, since it will be blocked
> > - // later on by AddCommentAction
> > because comments aren't enabled
> > + // later on by AddCommentAction
> > because comments aren't enabled
> > $result = new PipelineResult();
> > - return $result;
> > + return $result;
> > }
> > -
> > +
> > if( $parentId == "" )
> > $parentId = 0;
> >
> > $spamicity =
> > $this->getSpamProbability($blogInfo->getId(), $commentTopic,
> > $commentText, $userName, $userEmail, $userUrl);
> > -
> > +
> > if ($spamicity >=
> > $config->getValue("bayesian_filter_spam_probability_treshold"))
> > {
> > // need this to get the locale
> > $plr = $this->getPipelineRequest();
> > $bi = $plr->getBlogInfo();
> > $locale = $bi->getLocale();
> > -
> > +
> > // now we need to check what we have to do
> > with this comment... either throw it away
> > // or keep it in the database
> >
> > @@ -141,11 +158,11 @@
> > null,
> > $userName, $userEmail, $userUrl, $clientIp,
> > 0,
> > COMMENT_STATUS_SPAM );
> > // mark it as a trackback instead of a
> > user comment...
> > -
> > +
> > if( $isTrackback ) {
> > $comment->setType( COMMENT_TYPE_TRACKBACK );
> > }
> > -
> > +
> > // add the comment to the db
> > $comments->addComment( $comment );
> > }
> > @@ -160,25 +177,25 @@
> > $result = new PipelineResult(true);
> > $spam = false;
> > }
> > -
> > +
> > if ( !$previouslyRejected )
> > {
> > // train the filter with the message, be it
> > spam or not...
> > - lt_include(
> > PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php"
> > );
> > + lt_include(
> > +PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
> > BayesianFilterCore::train(
> > $blogInfo->getId(), $commentTopic, $commentText, $userName,
> > $userEmail,
> > $userUrl, $spam );
> > }
> > else
> > {
> > - // This is a rejected message. If we think that
> > this is non-spam,
> > + // This is a rejected message. If we think that this is
> > + non-spam,
> > // we want to untrain it and then retrain it as spam
> > - lt_include(
> > PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php"
> > );
> > + lt_include(
> > +PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
> > if ( !$spam )
> > {
> > // Un-train this non-spam
> >
> > BayesianFilterCore::untrain( $blogInfo->getId(),
> > $commentTopic, $commentText, $userName, $userEmail,
> >
> > $userUrl, $spam );
> > -
> >
> > +
> > // train this as spam
> >
> > BayesianFilterCore::train( $blogInfo->getId(), $commentTopic,
> > $commentText, $userName, $userEmail,
> >
> > $userUrl, true );
> > @@ -188,10 +205,10 @@
> > //print "<h1>" . number_format($spamicity * 100,
> > 0) . "% of spamicity</h1>";
> > return $result;
> > }
> > -
> > +
> > /**
> > * @private
> > - */
> > + */
> > function getSpamProbability($blogId, $topic, $text,
> > $userName, $userEmail, $userUrl)
> > {
> > lt_include(
> > PLOG_CLASS_PATH."class/bayesian/bayesiantokenizer.class.php"
> > ); @@ -207,40 +224,40 @@
> >
> > $tokens = array_merge($tokensTopic, $tokensText,
> > $tokensUserName, $tokensUserEmail, $tokensUserUrl);
> > $significantTokens =
> > BayesianFilter::_getMostSignificantTokens($blogId, $tokens);
> > -
> > +
> > return
> > BayesianFilter::_getBayesProbability($significantTokens);
> > }
> > -
> > +
> > /**
> > * @private
> > */
> > function _getMostSignificantTokens($blogId, $tokens)
> > - {
> > + {
> > lt_include(
> > PLOG_CLASS_PATH."class/dao/bayesiantokens.class.php" );
> > lt_include(
> > PLOG_CLASS_PATH."class/dao/bayesianfilterinfos.class.php" );
> >
> > - $config =& Config::getConfig();
> > -
> > + $config =& Config::getConfig();
> > +
> > $bayesianFilterInfos = new BayesianFilterInfos();
> > $bayesianFilterInfo =
> > $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
> > -
> > +
> > $totalSpam = $bayesianFilterInfo->getTotalSpam();
> > $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
> > -
> > +
> > $bayesianTokens = new BayesianTokens();
> > -
> > +
> > foreach ($tokens as $token)
> > {
> > $bayesianTokens->updateOccurrences($blogId,
> > $token, 0, 0, $totalSpam, $totalNonSpam, false);
> > }
> > -
> > - $tokens =
> > $bayesianTokens->getBayesianTokensFromArray($blogId,
> > $tokens);
> > +
> > + $tokens =
> > + $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);
> > $tempArray = array();
> > -
> > +
> > foreach ($tokens as $token)
> > {
> > if ($token->isSignificant() && $token->isValid())
> > - {
> > + {
> > array_push($tempArray,
> > abs($token->getProb() - 0.5));
> > }
> > }
> > @@ -248,35 +265,35 @@
> > arsort($tempArray);
> > $significantTokens = array();
> > $count = 0;
> > -
> > +
> > foreach ($tempArray as $key => $value)
> > {
> > array_push($significantTokens, $tokens[$key]);
> > $count++;
> > -
> > +
> > if ($count ==
> > $config->getValue("bayesian_filter_number_significant_tokens"))
> > - {
> > + {
> > break;
> > }
> > }
> > -
> > +
> > return $significantTokens;
> > }
> > -
> > +
> > /**
> > * @private
> > */
> > function _getBayesProbability($significantTokens)
> > {
> > $productProb = 1;
> > - $productNoProb = 1;
> > -
> > + $productNoProb = 1;
> > +
> > foreach ($significantTokens as $token)
> > {
> > - $productProb *= $token->getProb();
> > - $productNoProb *= (1 - $token->getProb());
> >
> > + $productProb *= $token->getProb();
> > + $productNoProb *= (1 - $token->getProb());
> > }
> > -
> > +
> > return $productProb / ($productProb + $productNoProb);
> > }
> > }
> >
> > Modified:
> > plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
> > ===================================================================
> > ---
> > plog/branches/lifetype-1.2/class/security/pipelinerequest.cl
> > ass.php 2007-11-28 19:35:13 UTC (rev 6086)
> > +++
> > plog/branches/lifetype-1.2/class/security/pipelinerequest.cl
> > ass.php 2007-11-28 20:30:55 UTC (rev 6087)
> > @@ -8,7 +8,7 @@
> > * such as the incoming HTTP request and a reference to
> > the BlogInfo object
> > * of the blog that is executing the Pipeline.
> > */
> > - class PipelineRequest
> > + class PipelineRequest
> > {
> >
> > var $_httpRequest;
> > @@ -24,13 +24,13 @@
> > */
> > function PipelineRequest( $httpRequest, $blogInfo,
> > $rejected = false )
> > {
> > -
> >
> > +
> > if( is_array($httpRequest))
> > - $this->_httpRequest = new Properties( $httpRequest );
> > + $this->_httpRequest = new Request( $httpRequest );
> > else
> > $this->_httpRequest = $httpRequest;
> > -
> > +
> > $this->_blogInfo = $blogInfo;
> > $this->_requestRejected = $rejected;
> > }
> > @@ -51,15 +51,15 @@
> > {
> > return $this->_httpRequest;
> > }
> > -
> > +
> > /**
> > - * @return Returns a boolean that indicates if this
> > pipeline request has
> > + * @return Returns a boolean that indicates if this pipeline
> > + request has
> > * already been rejected
> > */
> > function getRejectedState()
> > {
> > return $this->_requestRejected;
> > }
> > -
> > +
> > }
> > ?>
> >
> > _______________________________________________
> > pLog-svn mailing list
> > pLog-svn at devel.lifetype.net
> > http://limedaley.com/mailman/listinfo/plog-svn
>
> _______________________________________________
> pLog-svn mailing list
> pLog-svn at devel.lifetype.net
> http://limedaley.com/mailman/listinfo/plog-svn
>
More information about the pLog-svn
mailing list