[pLog-svn] r5479 - in plugins/branches/lifetype-1.2/related: . class/action class/view locale templates

pwestbro at devel.lifetype.net pwestbro at devel.lifetype.net
Sat Jun 2 05:32:35 EDT 2007


Author: pwestbro
Date: 2007-06-02 05:32:34 -0400 (Sat, 02 Jun 2007)
New Revision: 5479

Modified:
   plugins/branches/lifetype-1.2/related/class/action/pluginrelatedupdateconfigaction.class.php
   plugins/branches/lifetype-1.2/related/class/view/pluginrelatedconfigview.class.php
   plugins/branches/lifetype-1.2/related/locale/locale_en_UK.php
   plugins/branches/lifetype-1.2/related/pluginrelated.class.php
   plugins/branches/lifetype-1.2/related/templates/related.template
Log:
Updated the related plugin with the following features:
1) Allow the keywords to be retrieved from the body of the post, and not
just the title
2) Allowed a list of keywords to be banned, so these words will not be used
to find related posts


Modified: plugins/branches/lifetype-1.2/related/class/action/pluginrelatedupdateconfigaction.class.php
===================================================================
--- plugins/branches/lifetype-1.2/related/class/action/pluginrelatedupdateconfigaction.class.php	2007-06-02 04:29:21 UTC (rev 5478)
+++ plugins/branches/lifetype-1.2/related/class/action/pluginrelatedupdateconfigaction.class.php	2007-06-02 09:32:34 UTC (rev 5479)
@@ -30,6 +30,8 @@
 		var $_numRelatedArticles;
 		var $_minWordLength;
 		var $_refreshInterval;
+		var $_parseBody;
+		var $_bannedKeywords;
 
 
     	/**
@@ -51,6 +53,10 @@
             $this->_numRelatedArticles = $this->_request->getValue( "numArticles" );
             $this->_minWordLength = $this->_request->getValue( "minWordLength" );
             $this->_refreshInterval = $this->_request->getValue( "interval" );
+            $this->_parseBody = $this->_request->getValue( "parseBody" );
+            $this->_parseBody = ($this->_parseBody != "" );			
+            $this->_bannedKeywords = $this->_request->getValue( "bannedKeywords" );
+
             
             lt_include( PLOG_CLASS_PATH."class/data/validator/integervalidator.class.php" );
                 
@@ -107,6 +113,8 @@
             $blogSettings->setValue( "plugin_related_num_articles", $this->_numRelatedArticles );
             $blogSettings->setValue( "plugin_related_min_word_length", $this->_minWordLength );
             $blogSettings->setValue( "plugin_related_refresh_interval", $this->_refreshInterval );
+            $blogSettings->setValue( "plugin_related_extract_keywords_from_body", $this->_parseBody );
+            $blogSettings->setValue( "plugin_related_banned_keywords", $this->_bannedKeywords );
 
             $this->_blogInfo->setSettings( $blogSettings ); 
 		

Modified: plugins/branches/lifetype-1.2/related/class/view/pluginrelatedconfigview.class.php
===================================================================
--- plugins/branches/lifetype-1.2/related/class/view/pluginrelatedconfigview.class.php	2007-06-02 04:29:21 UTC (rev 5478)
+++ plugins/branches/lifetype-1.2/related/class/view/pluginrelatedconfigview.class.php	2007-06-02 09:32:34 UTC (rev 5479)
@@ -39,6 +39,10 @@
 			$numArticles = $blogSettings->getValue( "plugin_related_num_articles" );			
 			$minWordLength = $blogSettings->getValue( "plugin_related_min_word_length" );			
 			$refreshInterval = $blogSettings->getValue( "plugin_related_refresh_interval" );			
+			$parseBody = $blogSettings->getValue( "plugin_related_extract_keywords_from_body" );			
+			$bannedKeywords = $blogSettings->getValue( "plugin_related_banned_keywords" );
+			if ($bannedKeywords == "") $bannedKeywords =  implode( ",", array( 'a', 'an', 'the', 'and', 'of', 'i', 'its' , 'to', 'is', 'in', 'with', 'for', 'as', 'that', 'on', 'at', 'this', 'my', 'was', 'our', 'it', 'you', 'we', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '10', 'about', 'after', 'all', 'almost', 'along', 'also', 'amp', 'another', 'any', 'are', 'area', 'around', 'available', 'back', 'be', 'because', 'been', 'being', 'best', 'better', 'big', 'bit', 'both', 'but', 'by', 'c', 'came', 'can', 'capable', 'control', 'could', 'course', 'd', 'dan', 'day', 'decided', 'did', 'didn', 'different', 'div', 'do', 'doesn', 'don', 'down', 'drive', 'e', 'each', 'easily', 'easy', 'edition', 'end', 'enough', 'even', 'every', 'example', 'few', 'find', 'first', 'found', 'from', 'get', 'go', 'going', 'good', 'got', 'gt', 'had', 'hard', 'has', 'have', 'he', 'her', 'here', 'how', 'if', 'into', 'isn', 'just', 'know', 'last', 'left', 'li', 'like', 'little', 'll', 'long', 'look', 'lot', 'lt
 ', 'm', 'made', 'make', 'many', 'mb', 'me', 'menu', 'might', 'mm', 'more', 'most', 'much', 'name', 'nbsp', 'need', 'new', 'no', 'not', 'now', 'number', 'off', 'old', 'one', 'only', 'or', 'original', 'other', 'out', 'over', 'part', 'place', 'point', 'pretty', 'probably', 'problem', 'put', 'quite', 'quot', 'r', 're', 'really', 'results', 'right', 's', 'same', 'saw', 'see', 'set', 'several', 'she', 'sherree', 'should', 'since', 'size', 'small', 'so', 'some', 'something', 'special', 'still', 'stuff', 'such', 'sure', 'system', 't', 'take', 'than', 'their', 'them', 'then', 'there', 'these', 'they', 'thing', 'things', 'think', 'those', 'though', 'through', 'time', 'today', 'together', 'too', 'took', 'two', 'up', 'us', 'use', 'used', 'using', 've', 'very', 'want', 'way', 'well', 'went', 'were', 'what', 'when', 'where', 'which', 'while', 'white', 'who', 'will', 'would', 'your'));
+
 			
 
 			// create a view and export the settings to the template
@@ -46,6 +50,8 @@
 			$this->setValue( "numArticles", $numArticles );
 			$this->setValue( "minWordLength", $minWordLength );
 			$this->setValue( "interval", $refreshInterval );
+			$this->setValue( "parseBody", $parseBody );
+			$this->setValue( "bannedKeywords", $bannedKeywords );
 
 			parent::render();
 		}

Modified: plugins/branches/lifetype-1.2/related/locale/locale_en_UK.php
===================================================================
--- plugins/branches/lifetype-1.2/related/locale/locale_en_UK.php	2007-06-02 04:29:21 UTC (rev 5478)
+++ plugins/branches/lifetype-1.2/related/locale/locale_en_UK.php	2007-06-02 09:32:34 UTC (rev 5479)
@@ -12,10 +12,12 @@
 $messages["related_invalid_num_articles"] = "Number of articles needs to be an integer";
 $messages["related_missing_min_length"] = "Minimum keyword length needs to be specified";
 $messages["related_invalid_min_length"] = "Minumum keyword length needs to be an integer";
+$messages["related_banned_keywords"] = "Keywords that should not be used to find related posts (comma separated).";
 
 $messages["related_articles"] = "Number of related articles to return.";
 $messages["related_word_length"] = "Minimum length of keyword used to generate related article.";
 $messages["related_cache"] = "Lifetime for the related article cache.";
+$messages["parse_body"] = "Parse the body of articles to generate keywords.  (This may cause generating related posts to take longer.)";
 
 $messages["label_configuration"] = "Configuration";
 $messages["label_enable"] = "Enable";
@@ -23,5 +25,7 @@
 $messages["related_max_articles"] = "Number Articles";
 $messages["related_min_word_length"] = "Minimum Keyword Length";
 $messages["related_cache_lifetime"] = "Cache Lifetime";
+$messages["related_parse_body"] = "Parse Body";
+$messages["banned_keywords"] = "Banned Keywords";
 
 ?>
\ No newline at end of file

Modified: plugins/branches/lifetype-1.2/related/pluginrelated.class.php
===================================================================
--- plugins/branches/lifetype-1.2/related/pluginrelated.class.php	2007-06-02 04:29:21 UTC (rev 5478)
+++ plugins/branches/lifetype-1.2/related/pluginrelated.class.php	2007-06-02 09:32:34 UTC (rev 5479)
@@ -27,6 +27,8 @@
 		var $minWordLength;
 		var $refreshInterval;
         var $cacheFolder;
+        var $extractKeywordsFromBody;
+        var $bannedWords;
 		
 		function PluginRelated( $source = "" )
 		{
@@ -61,6 +63,10 @@
 			$this->numRelatedArticles = $blogSettings->getValue( "plugin_related_num_articles" );
 			$this->minWordLength = $blogSettings->getValue( "plugin_related_min_word_length" );
 			$this->refreshInterval = $blogSettings->getValue( "plugin_related_refresh_interval" );
+			$this->extractKeywordsFromBody = $blogSettings->getValue( "plugin_related_extract_keywords_from_body" );
+		    $this->bannedWords = $blogSettings->getValue( "plugin_related_banned_keywords" );
+		    $this->bannedWords = explode(",", strtolower($this->bannedWords));
+
 			
            if(!$this->isEnabled())
                 return;
@@ -107,36 +113,28 @@
                     return $relatedArticles;
                 }
     
-                // Get the title of the article
-                // XXX NOTE: There probably is a better way to get a list of 
-                // keywords from a post.  But, until then, just use the title. 
-                // (Assuming the author used a relevant title)
-    
-                $title = $article->getTopic();
+ 
+                // Get the keywords
+                $keywords = $this->getArticleKeywords($article);
                 
-                // Split out the words
-                $keywords = explode(" ", $title);
                 
-                
                 foreach($keywords as $word) {
-                    if (strlen($word) >= $this->minWordLength) {
-                        // Build the list of articles that have this keyword
-                        lt_include( PLOG_CLASS_PATH."class/dao/searchengine.class.php" );
-                        lt_include( PLOG_CLASS_PATH."class/dao/articlestatus.class.php" );
-                        $searchEngine = new SearchEngine();
-                        $results = $searchEngine->search( $this->blogInfo->getId(), $word );	
-                        
-                        // Now add the article results to the internal list of related articles
-    
-                        foreach( $results as $result ) {
-                            if( $result->getType() == SEARCH_RESULT_ARTICLE ) {
-                                $foundArticle = $result->getArticle();
-                                if ($foundArticle->getId() != $article->getId() ) {
-                                    if (isset($tempList[$foundArticle->getId() ])) 
-                                        $tempList[$foundArticle->getId() ] += 1;
-                                    else 
-                                        $tempList[$foundArticle->getId() ] = 1;
-                                }
+                    // Build the list of articles that have this keyword
+                    lt_include( PLOG_CLASS_PATH."class/dao/searchengine.class.php" );
+                    lt_include( PLOG_CLASS_PATH."class/dao/articlestatus.class.php" );
+                    $searchEngine = new SearchEngine();
+                    $results = $searchEngine->search( $this->blogInfo->getId(), $word );	
+                    
+                    // Now add the article results to the internal list of related articles
+
+                    foreach( $results as $result ) {
+                        if( $result->getType() == SEARCH_RESULT_ARTICLE ) {
+                            $foundArticle = $result->getArticle();
+                            if ($foundArticle->getId() != $article->getId() ) {
+                                if (isset($tempList[$foundArticle->getId() ])) 
+                                    $tempList[$foundArticle->getId() ] += 1;
+                                else 
+                                    $tempList[$foundArticle->getId() ] = 1;
                             }
                         }
                     }
@@ -262,7 +260,45 @@
             
             return true;
 	    }
+	    
+	    // Returns an array with the keywords for an article
+	    function getArticleKeywords( $article )
+	    {
+            // Get the title of the article
+            // XXX NOTE: There probably is a better way to get a list of 
+            // keywords from a post.  But, until then, just use the title. 
+            // (Assuming the author used a relevant title)
 
+            $text = $article->getTopic();
+            
+            if ($this->extractKeywordsFromBody)
+            {
+                // Get the body
+                $body = strip_tags($article->getText());
+                
+                $text = $text . " " . $body;
+            
+            }
+    
+            // Split keywords
+            $words = preg_split('/\s*[\s+\.|\?|,|(|)|\-+|\'|\"|!|=|;|×|\$|\/|:|{|}]\s*/i', strtolower($text));
+            $keywords = array_unique( $words );
+
+            $filteredKeywords = Array();
+            foreach($keywords as $word) {
+                // Make sure that it is not in the banned list
+                $found = in_array($word,$this->bannedWords);
+                if(($found === FALSE) && (strlen($word) >= $this->minWordLength)) {
+                    if (!isset($filteredKeywords[$word])) 
+                        $filteredKeywords[$word] = $word;
+                }
+            }
+
+            return $filteredKeywords;
+	    }
+	    
+	    
+
 	    function getPluginConfigurationKeys()
 		{			
 			return( Array(
@@ -270,6 +306,7 @@
 				Array( "name" => "plugin_related_num_articles", "type" => "integer" ),
 				Array( "name" => "plugin_related_min_word_length", "type" => "integer" ),
 				Array( "name" => "plugin_related_refresh_interval", "type" => "list", "options" => Array( "-1" => "-1", "0" => "0", "1" => "1", "24" => "24", "168" => "168", "720" => "720" )),
+				Array( "name" => "plugin_related_extract_keywords_from_body", "type" => "boolean" ),
 			));
 		}
 		

Modified: plugins/branches/lifetype-1.2/related/templates/related.template
===================================================================
--- plugins/branches/lifetype-1.2/related/templates/related.template	2007-06-02 04:29:21 UTC (rev 5478)
+++ plugins/branches/lifetype-1.2/related/templates/related.template	2007-06-02 09:32:34 UTC (rev 5479)
@@ -50,6 +50,32 @@
  </div>
 
   
+ <div class="field">
+   <label for="width">{$locale->tr("related_parse_body")}</label>
+   <div class="formHelp">
+    <input class="checkbox" type="checkbox" name="parseBody"
+    	   id="parseBody" {if $parseBody} checked="checked" {/if}
+    	   {user_cannot_override
+             key=plugin_related_extract_keywords_from_body}disabled="disabled"
+           {/user_cannot_override} 
+    	   value="1" />
+    	   {$locale->tr("parse_body")}
+  </div>
+  </div>
+  
+  <div class="field">
+   <label for="width">{$locale->tr("banned_keywords")}</label>
+   <span class="required">*</span>
+   <div class="formHelp">{$locale->tr("related_banned_keywords")}</div>
+   <input class="text" type="text" name="bannedKeywords"
+           {user_cannot_override
+               key=plugin_related_banned_keywords}readonly="readonly"
+           {/user_cannot_override}
+          id="bannedKeywords" value="{$bannedKeywords}" width="10" /> 
+  </div>
+
+  
+ 
  </fieldset>  
 
  <div class="buttons">



More information about the pLog-svn mailing list