[pLog-svn] r3414 - plog/trunk/tools

oscar at devel.lifetype.net oscar at devel.lifetype.net
Sun May 14 18:25:02 GMT 2006


Author: oscar
Date: 2006-05-14 18:25:01 +0000 (Sun, 14 May 2006)
New Revision: 3414

Added:
   plog/trunk/tools/play.py
Log:
this script is capable to play the CSV files generated by genloadtestdata.php and output some hopefully meaningful statistics. Sorry about mixing up languages, but I needed support for threading and PHP doesn't have it.
In order to use this script:
1) generate a CSV script via genloadtestdata.php
2) call play.py like this: python play.py -n numthreads -f script.csv -w waitmillis

where 'numthreads' is the number of threads that will process the csv script at the same time, script.csv is the script that we'd like to execute and 'waitmillis' is the amount of milliseconds to wait between each request, affecting all threads.

I have already been able to gather some statistics, like that normal GET requests work great but that posting comments is awfully slow, I believe because of the bayesian filter. I need to play around a bit more with this, and run a few tests in the very same server (so far I've only done it from home...) but suggestions and ideas are welcome.

== TODO ==
- implement support for running during 'x' seconds, regardless of the number of requests in the script file
- possibility to dump per-thread stats to a csv file
- more testing, I get slightly lower figures when running apabche bench with comparable parameters


Added: plog/trunk/tools/play.py
===================================================================
--- plog/trunk/tools/play.py	2006-05-14 14:51:54 UTC (rev 3413)
+++ plog/trunk/tools/play.py	2006-05-14 18:25:01 UTC (rev 3414)
@@ -0,0 +1,208 @@
+#!/usr/bin/python
+
+import os
+import sys
+import re
+import time
+from threading import Thread
+from optparse import OptionParser
+import httplib
+import urlparse
+import urllib
+
+#
+# redefine these to suit your needs
+#
+MAX_THREADS = 1
+REQUESTS_PER_MINUTE = 0
+NUMBER_OF_REQUESTS  = 10
+
+#
+# Thread class that takes care of making the requests
+#
+class WorkerThread(Thread):
+    def __init__(self, id, data, wait):
+        Thread.__init__(self)
+        self.id = id
+        self.wait = wait
+        self.data = data
+        self.max_request_time = 0
+        self.min_request_time = 999999
+        self.avg_time_per_request = 0
+        self.total_time = 0
+        self.num_requests = 0
+        
+    def run(self ):        
+        
+        for line in self.data:
+            # prepare the data
+            urldata = urlparse.urlsplit( line['url'] )            
+            request = urldata[2]
+            if urldata[4]:
+                request = request + urldata[4];            
+            
+            # store the starting time
+            req_start_time = time.time()
+        
+            # make the connection
+            conn = httplib.HTTPConnection( urldata[1] )
+                            
+            if line['type'] == "GET":
+                conn.request( "GET", request )
+            else:
+                headers = { 'Content-Type': 'application/x-www-form-urlencoded' }
+                params = urllib.urlencode( line['form'] )
+                conn.request( "POST", request, params, headers )
+                            
+            # and get the response
+            response = conn.getresponse()
+
+            # save the number of requests
+            self.num_requests = self.num_requests + 1            
+            
+            # update the max and min request times if needed
+            req_end_time = time.time()            
+            req_time_diff = req_end_time - req_start_time
+            if req_time_diff < self.min_request_time:
+                self.min_request_time = req_time_diff
+            if req_time_diff > self.max_request_time:
+                self.max_request_time = req_time_diff
+                
+            # save the time it took to perform this request
+            self.total_time = self.total_time + req_time_diff
+                
+            # check if we have to wait
+            if self.wait > 0.0:
+                time.sleep( self.wait )
+            
+        # save the average time
+        self.avg_time_per_request = self.total_time / self.num_requests
+            
+        print "requests = " + str( self.num_requests ) + ", total = " + str( self.total_time ) + ", avg = " + str( self.avg_time_per_request ) + ", max = " +  str( self.max_request_time ) + ", min = " + str( self.min_request_time )
+
+#
+# class that loads a script file and parses it
+#
+class ScriptParser:
+    def __init__( self, filename ):
+        self.filename = filename
+        self.data = []
+        self.num_lines = 0
+        
+    def parse(self):
+        # open the file
+        try:
+            f = open( self.filename, "r" )
+        except IOError:
+            print "Error opening file!"        
+            sys.exit( -1 )
+        # read each one of its lines, parse it and store it in our
+        # internal structure
+        for line in f:
+            self.data.append( self.parseLine( line ))
+            self.num_lines = self.num_lines + 1
+        
+        f.close()
+        
+    def parseLine( self, line ):
+        # split the line into its parts
+        parts = line.split( ",", 3 )
+        result = {}
+        
+        # put everything back together
+        result['type'] = parts[0].strip()
+        result['url']  = parts[1].strip()
+
+        # check the form vars
+        if result['type'] == "POST":
+            result['form'] = self.parseFormVars( parts[2] )
+        
+        return result
+
+    def parseFormVars( self, formVars ):
+        # split the line into its parts
+        parts = formVars.split( "|" )
+        # and reprocess each one of the form vars
+        result = {}
+        for part in parts:
+            var = part.split( "=", 2 )
+            result[var[0].strip().replace( "\"", "" )] = var[1].strip()
+            
+        return result
+# 
+# process command line args
+#
+parser = OptionParser()
+parser.add_option( "-n", "--thread-number", type="int", dest="num_threads", default=MAX_THREADS )
+parser.add_option( "-m", "--reqs-per-minute", type="int", dest="reqs_per_minute", default=REQUESTS_PER_MINUTE )
+#parser.add_option( "-r", "--number-of-runs", type="int", dest="num_runs", default=1 )
+parser.add_option( "-w", "--wait", type="int", dest="milliseconds_wait", default=0 )
+parser.add_option( "-f", "--file", type="string", dest="filename" )
+(options, args ) = parser.parse_args()
+
+if options.filename == "":
+    print "File name must be provided"
+    sys.exit(-1)
+    
+loader = ScriptParser( options.filename )
+loader.parse()
+print "=== Parameters ==="
+print "  Number of requests in file: " + str( loader.num_lines )
+
+#
+# array for the thread pool
+#        
+pool = []
+
+# show the parameters
+print "  Number of threads: " + str(options.num_threads)
+print "  Milliseconds to wait between request: " +  str(options.milliseconds_wait)
+seconds_wait = options.milliseconds_wait / 1000.0
+
+# mark the time when we start
+time_start = time.time()
+
+# initialize and call threads
+for t in range( 0, options.num_threads ):
+    thread = WorkerThread( t, loader.data, seconds_wait )
+    pool.append( thread )
+    thread.start()
+    
+# collect statistics information from the threads
+print "Waiting for threads to terminate..."
+total_average_time = 0.0
+total_max_time = 0.0
+total_min_time = 99999999.0
+total_requests = 0
+for t in pool:
+    t.join()
+    
+# and the time when we end
+time_end = time.time()
+    
+# process statistics from all threads
+for t in pool:
+    total_average_time = total_average_time + t.avg_time_per_request
+    total_requests = total_requests + t.num_requests
+    if t.min_request_time < total_min_time:
+        total_min_time =  t.min_request_time
+    if t.max_request_time > total_max_time:
+        total_max_time = t.max_request_time
+        
+# calculate the total average
+total_average_time = total_average_time / options.num_threads
+
+# the total time of all threads, but we need to take into account the time
+# we've spent waiting between request per thread
+total_time = (time_end - time_start) - (options.num_threads * seconds_wait * total_requests )
+
+# number of requests per second
+reqs_per_sec = total_requests / total_time
+
+# output final information
+print "total = " + str( total_time )
+print "req = " + str( total_requests )
+print "reqs/sec = " + str( reqs_per_sec )
+print "avg = " + str( total_average_time )
+print "min = " + str( total_min_time )
+print "max = " + str( total_max_time )
\ No newline at end of file


Property changes on: plog/trunk/tools/play.py
___________________________________________________________________
Name: svn:executable
   + *



More information about the pLog-svn mailing list