[pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security

Oscar Renalias oscar at renalias.net
Thu Nov 29 02:45:01 EST 2007


I unfortunately haven't got the time to look into this, sorry. Is it a
big problem that the pipeline is running twice? I think it's been
working like this, like, forever :-) But I have to admit that I never
noticed it or affected any code... is it a problem now?

On Nov 28, 2007 10:32 PM, Mark Wu <markplace at gmail.com> wrote:
> Hi Oscar:
>
> The bug is fixed, but another bug appers...
>
> I seems the addcomment() or say pipeline will execute twice.
>
> I have no idea what's going on. Can you take a look at it?
>
> Mark
>
>
> > -----Original Message-----
> > From: plog-svn-bounces at devel.lifetype.net
> > [mailto:plog-svn-bounces at devel.lifetype.net] On Behalf Of
> > mark at devel.lifetype.net
> > Sent: Thursday, November 29, 2007 4:31 AM
> > To: plog-svn at devel.lifetype.net
> > Subject: [pLog-svn] r6087 - plog/branches/lifetype-1.2/class/security
> >
> > Author: mark
> > Date: 2007-11-28 15:30:55 -0500 (Wed, 28 Nov 2007) New Revision: 6087
> >
> > Modified:
> >    plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
> >    plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
> > Log:
> > fixed bug http://bugs.lifetype.net/view.php?id=1426.
> >
> > But, I found another bug, it seems the pipeline will execute
> > twice, no idea what's going on.
> >
> > Modified:
> > plog/branches/lifetype-1.2/class/security/bayesianfilter.class.php
> > ===================================================================
> > ---
> > plog/branches/lifetype-1.2/class/security/bayesianfilter.cl
> > ass.php       2007-11-28 19:35:13 UTC (rev 6086)
> > +++
> > plog/branches/lifetype-1.2/class/security/bayesianfilter.cl
> > ass.php       2007-11-28 20:30:55 UTC (rev 6087)
> > @@ -17,7 +17,7 @@
> >
> >      /**
> >       * \ingroup Security
> > -     *
> > +     *
> >       * Filters the text posted in a comment by a user, to
> > prevent spam-bots. This
> >       * filter only works if the incoming request has the
> > "op" parameter as
> >       * "AddComment", because then it means that we're
> > posting a comment. If it's not @@ -27,65 +27,82 @@
> >       * in order to filter spam comments out. The filter
> > needs some training but after that it should
> >       * be fairly reliable.
> >       */
> > -    class BayesianFilter extends PipelineFilter
> > +    class BayesianFilter extends PipelineFilter
> >      {
> > -
> > +
> >          function BayesianFilter( $pipelineRequest )
> >          {
> >              $this->PipelineFilter( $pipelineRequest );
> >          }
> > -
> > +
> >          /**
> >          * Processes incoming requests
> >          *
> >          * @return A positive PipelineResult object is the
> > comment is not spam or a negative
> >          * one if it is.
> > -        */
> > +        */
> >          function filter()
> >          {
> >              $config =& Config::getConfig();
> > -
> > +
> >              if (!$config->getValue("bayesian_filter_enabled")) {
> >                  return new PipelineResult(true);
> >              }
> > -
> > -            // get some info
> > +
> > +            // get some info
> >              $blogInfo = $this->_pipelineRequest->getBlogInfo();
> >              $request  = $this->_pipelineRequest->getHttpRequest();
> >              $previouslyRejected =
> > $this->_pipelineRequest->getRejectedState();
> > -
> > -            // we only have to filter the contents if the
> > user is posting a comment
> > +
> > +            // we only have to filter the contents if the user is
> > + posting a comment
> >              // or we're receiving a trackback
> >              // so there's no point in doing anything else if
> > that's not the case
> >              if( $request->getValue( "op" ) != "AddComment"
> > && $request->getValue( "op" ) != "AddTrackback" ) {
> >                  $result = new PipelineResult();
> >                  return $result;
> >              }
> > -
> > +
> >              lt_include(
> > PLOG_CLASS_PATH."class/dao/articlecomments.class.php" );
> >              lt_include(
> > PLOG_CLASS_PATH."class/dao/articles.class.php" );
> > -
> > +
> > +            lt_include(
> > PLOG_CLASS_PATH."class/data/filter/htmlfilter.class.php" );
> > +            lt_include(
> > PLOG_CLASS_PATH."class/data/filter/urlconverter.class.php" );
> > +            lt_include(
> > PLOG_CLASS_PATH."class/data/filter/allowedhtmlfilter.class.php" );
> > +            lt_include(
> > + PLOG_CLASS_PATH."class/data/filter/xhtmlizefilter.class.php" );
> > +
> >              // if it's a trackback, the data is in another place...
> >              $parentId = "";
> >              $isTrackback = ($request->getValue( "op" ) ==
> > "AddTrackback");
> >              if( $isTrackback ) {
> > -                $commentText = $request->getValue( "excerpt" );
> > -                $commentTopic = $request->getValue( "title" );
> > -                $articleId = $request->getValue( "id" );
> > -                // that's all we can get from a trackback...
> > -                $userName = $request->getValue( "blog_name" );
> > -                $userUrl = $request->getValue( "url" );
> > -                $userEmail = $request->getValue( "" );
> > +                $f = new HtmlFilter();
> > +                $userName = $request->getFilteredValue(
> > "blog_name", $f );
> > +                $userEmail = $request->getFilteredValue( "", $f );
> > +                $commentTopic = $request->getFilteredValue(
> > "title", $f );
> > +                $commentText = $request->getFilteredValue(
> > "excerpt",
> > + $f );
> > +
> > +                     $f = new HtmlFilter();
> > +                     $f->addFilter( new UrlConverter());
> > +                     $userUrl = $request->getFilteredValue(
> > "url", $f );
> > +
> > +                $articleId = (int) $request->getValue( "id" );
> >              }
> >              else {
> >                  // or else let's assume that we're dealing
> > with a comment
> > -                $commentText = $request->getValue( "commentText" );
> > -                $commentTopic = $request->getValue( "commentTopic" );
> > -                $userName = $request->getValue( "userName" );
> > -                $userEmail = $request->getValue( "userEmail" );
> > -                $userUrl = $request->getValue( "userUrl" );
> > -                $articleId = $request->getValue( "articleId" );
> > -                $parentId  = $request->getValue( "parentId"
> > );
> > +                $f = new HtmlFilter();
> > +                $userName = $request->getFilteredValue(
> > "userName", $f );
> > +                $userEmail = $request->getFilteredValue(
> > "userEmail", $f );
> > +                $commentTopic = $request->getFilteredValue(
> > + "commentTopic", $f );
> > +
> > +                     $f = new HtmlFilter();
> > +                     $f->addFilter( new UrlConverter());
> > +                     $userUrl = $request->getFilteredValue(
> > "userUrl", $f );
> > +
> > +                     $f = new AllowedHtmlFilter();
> > +                     $f->addFilter( new XhtmlizeFilter());
> > +                     $commentText =
> > $request->getFilteredValue( "commentText", $f );
> > +
> > +                $articleId = (int) $request->getValue( "articleId" );
> > +                $parentId  = (int) $request->getValue( "parentId" );
> >              }
> >
> >                       // the two checks below are duplicating
> > some of the code in AddCommentAction @@ -102,30 +119,30 @@
> >                               // is no need to process the
> > whole comments even if it's spam, the request will not be
> >                               // processed by
> > AddCommentAction for this very same reason
> >                  $result = new PipelineResult();
> > -                return $result;
> > +                return $result;
> >                       }
> > -
> > +
> >                       // and if it does, are comments enabled
> > for it anyway?
> >                       $blogSettings = $blogInfo->getSettings();
> >              if( $article->getCommentsEnabled() == false ||
> > $blogSettings->getValue ( "comments_enabled" ) == false ) {
> >                               // we let this request pass
> > through although it may be spam, since it will be blocked
> > -                             // later on by AddCommentAction
> > because comments aren't enabled
> > +                             // later on by AddCommentAction
> > because comments aren't enabled
> >                  $result = new PipelineResult();
> > -                return $result;
> > +                return $result;
> >                       }
> > -
> > +
> >              if( $parentId == "" )
> >                  $parentId = 0;
> >
> >              $spamicity =
> > $this->getSpamProbability($blogInfo->getId(), $commentTopic,
> > $commentText, $userName, $userEmail, $userUrl);
> > -
> > +
> >              if ($spamicity >=
> > $config->getValue("bayesian_filter_spam_probability_treshold"))
> >              {
> >                  // need this to get the locale
> >                  $plr = $this->getPipelineRequest();
> >                  $bi = $plr->getBlogInfo();
> >                  $locale = $bi->getLocale();
> > -
> > +
> >                  // now we need to check what we have to do
> > with this comment... either throw it away
> >                  // or keep it in the database
> >
> > @@ -141,11 +158,11 @@
> >                                                     null,
> > $userName, $userEmail, $userUrl, $clientIp,
> >                                                     0,
> > COMMENT_STATUS_SPAM );
> >                      // mark it as a trackback instead of a
> > user comment...
> > -
> > +
> >                      if( $isTrackback ) {
> >                          $comment->setType( COMMENT_TYPE_TRACKBACK );
> >                      }
> > -
> > +
> >                      // add the comment to the db
> >                      $comments->addComment( $comment );
> >                  }
> > @@ -160,25 +177,25 @@
> >                  $result = new PipelineResult(true);
> >                  $spam = false;
> >              }
> > -
> > +
> >              if ( !$previouslyRejected )
> >              {
> >                  // train the filter with the message, be it
> > spam or not...
> > -                             lt_include(
> > PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php"
> > );
> > +                             lt_include(
> > +PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
> >                  BayesianFilterCore::train(
> > $blogInfo->getId(), $commentTopic, $commentText, $userName,
> > $userEmail,
> >                                             $userUrl, $spam );
> >              }
> >              else
> >              {
> > -             // This is a rejected message. If we think that
> > this is non-spam,
> > +             // This is a rejected message. If we think that this is
> > + non-spam,
> >               // we want to untrain it and then retrain it as spam
> > -                             lt_include(
> > PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php"
> > );
> > +                             lt_include(
> > +PLOG_CLASS_PATH."class/bayesian/bayesianfiltercore.class.php" );
> >               if ( !$spam )
> >               {
> >                       // Un-train this non-spam
> >
> > BayesianFilterCore::untrain( $blogInfo->getId(),
> > $commentTopic, $commentText, $userName, $userEmail,
> >
> >                          $userUrl, $spam );
> > -
> >
> > +
> >                                       // train this as spam
> >
> > BayesianFilterCore::train( $blogInfo->getId(), $commentTopic,
> > $commentText, $userName, $userEmail,
> >
> >                          $userUrl, true );
> > @@ -188,10 +205,10 @@
> >              //print "<h1>" . number_format($spamicity * 100,
> > 0) . "% of spamicity</h1>";
> >              return $result;
> >          }
> > -
> > +
> >          /**
> >          * @private
> > -        */
> > +        */
> >          function getSpamProbability($blogId, $topic, $text,
> > $userName, $userEmail, $userUrl)
> >          {
> >              lt_include(
> > PLOG_CLASS_PATH."class/bayesian/bayesiantokenizer.class.php"
> > ); @@ -207,40 +224,40 @@
> >
> >              $tokens = array_merge($tokensTopic, $tokensText,
> > $tokensUserName, $tokensUserEmail, $tokensUserUrl);
> >              $significantTokens =
> > BayesianFilter::_getMostSignificantTokens($blogId, $tokens);
> > -
> > +
> >              return
> > BayesianFilter::_getBayesProbability($significantTokens);
> >          }
> > -
> > +
> >          /**
> >          * @private
> >          */
> >          function _getMostSignificantTokens($blogId, $tokens)
> > -        {
> > +        {
> >              lt_include(
> > PLOG_CLASS_PATH."class/dao/bayesiantokens.class.php" );
> >              lt_include(
> > PLOG_CLASS_PATH."class/dao/bayesianfilterinfos.class.php" );
> >
> > -            $config =& Config::getConfig();
> > -
> > +            $config =& Config::getConfig();
> > +
> >              $bayesianFilterInfos = new BayesianFilterInfos();
> >              $bayesianFilterInfo  =
> > $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId);
> > -
> > +
> >              $totalSpam = $bayesianFilterInfo->getTotalSpam();
> >              $totalNonSpam = $bayesianFilterInfo->getTotalNonSpam();
> > -
> > +
> >              $bayesianTokens = new BayesianTokens();
> > -
> > +
> >              foreach ($tokens as $token)
> >              {
> >                  $bayesianTokens->updateOccurrences($blogId,
> > $token, 0, 0, $totalSpam, $totalNonSpam, false);
> >              }
> > -
> > -            $tokens =
> > $bayesianTokens->getBayesianTokensFromArray($blogId,
> > $tokens);
> > +
> > +            $tokens =
> > + $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens);
> >              $tempArray = array();
> > -
> > +
> >              foreach ($tokens as $token)
> >              {
> >                  if ($token->isSignificant() && $token->isValid())
> > -                {
> > +                {
> >                      array_push($tempArray,
> > abs($token->getProb() - 0.5));
> >                  }
> >              }
> > @@ -248,35 +265,35 @@
> >              arsort($tempArray);
> >              $significantTokens = array();
> >              $count = 0;
> > -
> > +
> >              foreach ($tempArray as $key => $value)
> >              {
> >                  array_push($significantTokens, $tokens[$key]);
> >                  $count++;
> > -
> > +
> >                  if ($count ==
> > $config->getValue("bayesian_filter_number_significant_tokens"))
> > -                {
> > +                {
> >                      break;
> >                  }
> >              }
> > -
> > +
> >              return $significantTokens;
> >          }
> > -
> > +
> >          /**
> >          * @private
> >          */
> >          function _getBayesProbability($significantTokens)
> >          {
> >              $productProb   = 1;
> > -            $productNoProb = 1;
> > -
> > +            $productNoProb = 1;
> > +
> >              foreach ($significantTokens as $token)
> >              {
> > -                $productProb   *= $token->getProb();
> > -                $productNoProb *= (1 - $token->getProb());
> >
> > +                $productProb   *= $token->getProb();
> > +                $productNoProb *= (1 - $token->getProb());
> >              }
> > -
> > +
> >              return $productProb / ($productProb + $productNoProb);
> >          }
> >      }
> >
> > Modified:
> > plog/branches/lifetype-1.2/class/security/pipelinerequest.class.php
> > ===================================================================
> > ---
> > plog/branches/lifetype-1.2/class/security/pipelinerequest.cl
> > ass.php       2007-11-28 19:35:13 UTC (rev 6086)
> > +++
> > plog/branches/lifetype-1.2/class/security/pipelinerequest.cl
> > ass.php       2007-11-28 20:30:55 UTC (rev 6087)
> > @@ -8,7 +8,7 @@
> >       * such as the incoming HTTP request and a reference to
> > the BlogInfo object
> >       * of the blog that is executing the Pipeline.
> >       */
> > -     class PipelineRequest
> > +     class PipelineRequest
> >       {
> >
> >       var $_httpRequest;
> > @@ -24,13 +24,13 @@
> >           */
> >          function PipelineRequest( $httpRequest, $blogInfo,
> > $rejected = false )
> >          {
> > -
> >
> > +
> >              if( is_array($httpRequest))
> > -             $this->_httpRequest = new Properties( $httpRequest );
> > +             $this->_httpRequest = new Request( $httpRequest );
> >              else
> >                       $this->_httpRequest  = $httpRequest;
> > -
> > +
> >              $this->_blogInfo         = $blogInfo;
> >              $this->_requestRejected  = $rejected;
> >          }
> > @@ -51,15 +51,15 @@
> >          {
> >               return $this->_httpRequest;
> >          }
> > -
> > +
> >          /**
> > -        * @return Returns a boolean that indicates if this
> > pipeline request has
> > +        * @return Returns a boolean that indicates if this pipeline
> > + request has
> >          * already been rejected
> >          */
> >          function getRejectedState()
> >          {
> >               return $this->_requestRejected;
> >          }
> > -
> > +
> >      }
> >  ?>
> >
> > _______________________________________________
> > pLog-svn mailing list
> > pLog-svn at devel.lifetype.net
> > http://limedaley.com/mailman/listinfo/plog-svn
>
> _______________________________________________
> pLog-svn mailing list
> pLog-svn at devel.lifetype.net
> http://limedaley.com/mailman/listinfo/plog-svn
>


More information about the pLog-svn mailing list