1#! /usr/bin/env python
2#
3# Class for profiling python code. rev 1.0  6/2/94
4#
5# Based on prior profile module by Sjoerd Mullender...
6#   which was hacked somewhat by: Guido van Rossum
7
8"""Class for profiling Python code."""
9
10# Copyright 1994, by InfoSeek Corporation, all rights reserved.
11# Written by James Roskind
12#
13# Permission to use, copy, modify, and distribute this Python software
14# and its associated documentation for any purpose (subject to the
15# restriction in the following sentence) without fee is hereby granted,
16# provided that the above copyright notice appears in all copies, and
17# that both that copyright notice and this permission notice appear in
18# supporting documentation, and that the name of InfoSeek not be used in
19# advertising or publicity pertaining to distribution of the software
20# without specific, written prior permission.  This permission is
21# explicitly restricted to the copying and modification of the software
22# to remain in Python, compiled Python, or other languages (such as C)
23# wherein the modified or derived code is exclusively imported into a
24# Python module.
25#
26# INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
27# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
28# FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
29# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
30# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
31# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
32# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
33
34
35
36import sys
37import os
38import time
39import marshal
40from optparse import OptionParser
41
42__all__ = ["run", "runctx", "help", "Profile"]
43
44# Sample timer for use with
45#i_count = 0
46#def integer_timer():
47#       global i_count
48#       i_count = i_count + 1
49#       return i_count
50#itimes = integer_timer # replace with C coded timer returning integers
51
52#**************************************************************************
53# The following are the static member functions for the profiler class
54# Note that an instance of Profile() is *not* needed to call them.
55#**************************************************************************
56
57def run(statement, filename=None, sort=-1):
58    """Run statement under profiler optionally saving results in filename
59
60    This function takes a single argument that can be passed to the
61    "exec" statement, and an optional file name.  In all cases this
62    routine attempts to "exec" its first argument and gather profiling
63    statistics from the execution. If no file name is present, then this
64    function automatically prints a simple profiling report, sorted by the
65    standard name string (file/line/function-name) that is presented in
66    each line.
67    """
68    prof = Profile()
69    try:
70        prof = prof.run(statement)
71    except SystemExit:
72        pass
73    if filename is not None:
74        prof.dump_stats(filename)
75    else:
76        return prof.print_stats(sort)
77
78def runctx(statement, globals, locals, filename=None, sort=-1):
79    """Run statement under profiler, supplying your own globals and locals,
80    optionally saving results in filename.
81
82    statement and filename have the same semantics as profile.run
83    """
84    prof = Profile()
85    try:
86        prof = prof.runctx(statement, globals, locals)
87    except SystemExit:
88        pass
89
90    if filename is not None:
91        prof.dump_stats(filename)
92    else:
93        return prof.print_stats(sort)
94
95# Backwards compatibility.
96def help():
97    print "Documentation for the profile module can be found "
98    print "in the Python Library Reference, section 'The Python Profiler'."
99
100if hasattr(os, "times"):
101    def _get_time_times(timer=os.times):
102        t = timer()
103        return t[0] + t[1]
104
105# Using getrusage(3) is better than clock(3) if available:
106# on some systems (e.g. FreeBSD), getrusage has a higher resolution
107# Furthermore, on a POSIX system, returns microseconds, which
108# wrap around after 36min.
109_has_res = 0
110try:
111    import resource
112    resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
113    def _get_time_resource(timer=resgetrusage):
114        t = timer()
115        return t[0] + t[1]
116    _has_res = 1
117except ImportError:
118    pass
119
120class Profile:
121    """Profiler class.
122
123    self.cur is always a tuple.  Each such tuple corresponds to a stack
124    frame that is currently active (self.cur[-2]).  The following are the
125    definitions of its members.  We use this external "parallel stack" to
126    avoid contaminating the program that we are profiling. (old profiler
127    used to write into the frames local dictionary!!) Derived classes
128    can change the definition of some entries, as long as they leave
129    [-2:] intact (frame and previous tuple).  In case an internal error is
130    detected, the -3 element is used as the function name.
131
132    [ 0] = Time that needs to be charged to the parent frame's function.
133           It is used so that a function call will not have to access the
134           timing data for the parent frame.
135    [ 1] = Total time spent in this frame's function, excluding time in
136           subfunctions (this latter is tallied in cur[2]).
137    [ 2] = Total time spent in subfunctions, excluding time executing the
138           frame's function (this latter is tallied in cur[1]).
139    [-3] = Name of the function that corresponds to this frame.
140    [-2] = Actual frame that we correspond to (used to sync exception handling).
141    [-1] = Our parent 6-tuple (corresponds to frame.f_back).
142
143    Timing data for each function is stored as a 5-tuple in the dictionary
144    self.timings[].  The index is always the name stored in self.cur[-3].
145    The following are the definitions of the members:
146
147    [0] = The number of times this function was called, not counting direct
148          or indirect recursion,
149    [1] = Number of times this function appears on the stack, minus one
150    [2] = Total time spent internal to this function
151    [3] = Cumulative time that this function was present on the stack.  In
152          non-recursive functions, this is the total execution time from start
153          to finish of each invocation of a function, including time spent in
154          all subfunctions.
155    [4] = A dictionary indicating for each function name, the number of times
156          it was called by us.
157    """
158
159    bias = 0  # calibration constant
160
161    def __init__(self, timer=None, bias=None):
162        self.timings = {}
163        self.cur = None
164        self.cmd = ""
165        self.c_func_name = ""
166
167        if bias is None:
168            bias = self.bias
169        self.bias = bias     # Materialize in local dict for lookup speed.
170
171        if not timer:
172            if _has_res:
173                self.timer = resgetrusage
174                self.dispatcher = self.trace_dispatch
175                self.get_time = _get_time_resource
176            elif hasattr(time, 'clock'):
177                self.timer = self.get_time = time.clock
178                self.dispatcher = self.trace_dispatch_i
179            elif hasattr(os, 'times'):
180                self.timer = os.times
181                self.dispatcher = self.trace_dispatch
182                self.get_time = _get_time_times
183            else:
184                self.timer = self.get_time = time.time
185                self.dispatcher = self.trace_dispatch_i
186        else:
187            self.timer = timer
188            t = self.timer() # test out timer function
189            try:
190                length = len(t)
191            except TypeError:
192                self.get_time = timer
193                self.dispatcher = self.trace_dispatch_i
194            else:
195                if length == 2:
196                    self.dispatcher = self.trace_dispatch
197                else:
198                    self.dispatcher = self.trace_dispatch_l
199                # This get_time() implementation needs to be defined
200                # here to capture the passed-in timer in the parameter
201                # list (for performance).  Note that we can't assume
202                # the timer() result contains two values in all
203                # cases.
204                def get_time_timer(timer=timer, sum=sum):
205                    return sum(timer())
206                self.get_time = get_time_timer
207        self.t = self.get_time()
208        self.simulate_call('profiler')
209
210    # Heavily optimized dispatch routine for os.times() timer
211
212    def trace_dispatch(self, frame, event, arg):
213        timer = self.timer
214        t = timer()
215        t = t[0] + t[1] - self.t - self.bias
216
217        if event == "c_call":
218            self.c_func_name = arg.__name__
219
220        if self.dispatch[event](self, frame,t):
221            t = timer()
222            self.t = t[0] + t[1]
223        else:
224            r = timer()
225            self.t = r[0] + r[1] - t # put back unrecorded delta
226
227    # Dispatch routine for best timer program (return = scalar, fastest if
228    # an integer but float works too -- and time.clock() relies on that).
229
230    def trace_dispatch_i(self, frame, event, arg):
231        timer = self.timer
232        t = timer() - self.t - self.bias
233
234        if event == "c_call":
235            self.c_func_name = arg.__name__
236
237        if self.dispatch[event](self, frame, t):
238            self.t = timer()
239        else:
240            self.t = timer() - t  # put back unrecorded delta
241
242    # Dispatch routine for macintosh (timer returns time in ticks of
243    # 1/60th second)
244
245    def trace_dispatch_mac(self, frame, event, arg):
246        timer = self.timer
247        t = timer()/60.0 - self.t - self.bias
248
249        if event == "c_call":
250            self.c_func_name = arg.__name__
251
252        if self.dispatch[event](self, frame, t):
253            self.t = timer()/60.0
254        else:
255            self.t = timer()/60.0 - t  # put back unrecorded delta
256
257    # SLOW generic dispatch routine for timer returning lists of numbers
258
259    def trace_dispatch_l(self, frame, event, arg):
260        get_time = self.get_time
261        t = get_time() - self.t - self.bias
262
263        if event == "c_call":
264            self.c_func_name = arg.__name__
265
266        if self.dispatch[event](self, frame, t):
267            self.t = get_time()
268        else:
269            self.t = get_time() - t # put back unrecorded delta
270
271    # In the event handlers, the first 3 elements of self.cur are unpacked
272    # into vrbls w/ 3-letter names.  The last two characters are meant to be
273    # mnemonic:
274    #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
275    #     _it  self.cur[1] "internal time" time spent directly in the function
276    #     _et  self.cur[2] "external time" time spent in subfunctions
277
278    def trace_dispatch_exception(self, frame, t):
279        rpt, rit, ret, rfn, rframe, rcur = self.cur
280        if (rframe is not frame) and rcur:
281            return self.trace_dispatch_return(rframe, t)
282        self.cur = rpt, rit+t, ret, rfn, rframe, rcur
283        return 1
284
285
286    def trace_dispatch_call(self, frame, t):
287        if self.cur and frame.f_back is not self.cur[-2]:
288            rpt, rit, ret, rfn, rframe, rcur = self.cur
289            if not isinstance(rframe, Profile.fake_frame):
290                assert rframe.f_back is frame.f_back, ("Bad call", rfn,
291                                                       rframe, rframe.f_back,
292                                                       frame, frame.f_back)
293                self.trace_dispatch_return(rframe, 0)
294                assert (self.cur is None or \
295                        frame.f_back is self.cur[-2]), ("Bad call",
296                                                        self.cur[-3])
297        fcode = frame.f_code
298        fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
299        self.cur = (t, 0, 0, fn, frame, self.cur)
300        timings = self.timings
301        if fn in timings:
302            cc, ns, tt, ct, callers = timings[fn]
303            timings[fn] = cc, ns + 1, tt, ct, callers
304        else:
305            timings[fn] = 0, 0, 0, 0, {}
306        return 1
307
308    def trace_dispatch_c_call (self, frame, t):
309        fn = ("", 0, self.c_func_name)
310        self.cur = (t, 0, 0, fn, frame, self.cur)
311        timings = self.timings
312        if fn in timings:
313            cc, ns, tt, ct, callers = timings[fn]
314            timings[fn] = cc, ns+1, tt, ct, callers
315        else:
316            timings[fn] = 0, 0, 0, 0, {}
317        return 1
318
319    def trace_dispatch_return(self, frame, t):
320        if frame is not self.cur[-2]:
321            assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
322            self.trace_dispatch_return(self.cur[-2], 0)
323
324        # Prefix "r" means part of the Returning or exiting frame.
325        # Prefix "p" means part of the Previous or Parent or older frame.
326
327        rpt, rit, ret, rfn, frame, rcur = self.cur
328        rit = rit + t
329        frame_total = rit + ret
330
331        ppt, pit, pet, pfn, pframe, pcur = rcur
332        self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
333
334        timings = self.timings
335        cc, ns, tt, ct, callers = timings[rfn]
336        if not ns:
337            # This is the only occurrence of the function on the stack.
338            # Else this is a (directly or indirectly) recursive call, and
339            # its cumulative time will get updated when the topmost call to
340            # it returns.
341            ct = ct + frame_total
342            cc = cc + 1
343
344        if pfn in callers:
345            callers[pfn] = callers[pfn] + 1  # hack: gather more
346            # stats such as the amount of time added to ct courtesy
347            # of this specific call, and the contribution to cc
348            # courtesy of this call.
349        else:
350            callers[pfn] = 1
351
352        timings[rfn] = cc, ns - 1, tt + rit, ct, callers
353
354        return 1
355
356
357    dispatch = {
358        "call": trace_dispatch_call,
359        "exception": trace_dispatch_exception,
360        "return": trace_dispatch_return,
361        "c_call": trace_dispatch_c_call,
362        "c_exception": trace_dispatch_return,  # the C function returned
363        "c_return": trace_dispatch_return,
364        }
365
366
367    # The next few functions play with self.cmd. By carefully preloading
368    # our parallel stack, we can force the profiled result to include
369    # an arbitrary string as the name of the calling function.
370    # We use self.cmd as that string, and the resulting stats look
371    # very nice :-).
372
373    def set_cmd(self, cmd):
374        if self.cur[-1]: return   # already set
375        self.cmd = cmd
376        self.simulate_call(cmd)
377
378    class fake_code:
379        def __init__(self, filename, line, name):
380            self.co_filename = filename
381            self.co_line = line
382            self.co_name = name
383            self.co_firstlineno = 0
384
385        def __repr__(self):
386            return repr((self.co_filename, self.co_line, self.co_name))
387
388    class fake_frame:
389        def __init__(self, code, prior):
390            self.f_code = code
391            self.f_back = prior
392
393    def simulate_call(self, name):
394        code = self.fake_code('profile', 0, name)
395        if self.cur:
396            pframe = self.cur[-2]
397        else:
398            pframe = None
399        frame = self.fake_frame(code, pframe)
400        self.dispatch['call'](self, frame, 0)
401
402    # collect stats from pending stack, including getting final
403    # timings for self.cmd frame.
404
405    def simulate_cmd_complete(self):
406        get_time = self.get_time
407        t = get_time() - self.t
408        while self.cur[-1]:
409            # We *can* cause assertion errors here if
410            # dispatch_trace_return checks for a frame match!
411            self.dispatch['return'](self, self.cur[-2], t)
412            t = 0
413        self.t = get_time() - t
414
415
416    def print_stats(self, sort=-1):
417        import pstats
418        pstats.Stats(self).strip_dirs().sort_stats(sort). \
419                  print_stats()
420
421    def dump_stats(self, file):
422        f = open(file, 'wb')
423        self.create_stats()
424        marshal.dump(self.stats, f)
425        f.close()
426
427    def create_stats(self):
428        self.simulate_cmd_complete()
429        self.snapshot_stats()
430
431    def snapshot_stats(self):
432        self.stats = {}
433        for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
434            callers = callers.copy()
435            nc = 0
436            for callcnt in callers.itervalues():
437                nc += callcnt
438            self.stats[func] = cc, nc, tt, ct, callers
439
440
441    # The following two methods can be called by clients to use
442    # a profiler to profile a statement, given as a string.
443
444    def run(self, cmd):
445        import __main__
446        dict = __main__.__dict__
447        return self.runctx(cmd, dict, dict)
448
449    def runctx(self, cmd, globals, locals):
450        self.set_cmd(cmd)
451        sys.setprofile(self.dispatcher)
452        try:
453            exec cmd in globals, locals
454        finally:
455            sys.setprofile(None)
456        return self
457
458    # This method is more useful to profile a single function call.
459    def runcall(self, func, *args, **kw):
460        self.set_cmd(repr(func))
461        sys.setprofile(self.dispatcher)
462        try:
463            return func(*args, **kw)
464        finally:
465            sys.setprofile(None)
466
467
468    #******************************************************************
469    # The following calculates the overhead for using a profiler.  The
470    # problem is that it takes a fair amount of time for the profiler
471    # to stop the stopwatch (from the time it receives an event).
472    # Similarly, there is a delay from the time that the profiler
473    # re-starts the stopwatch before the user's code really gets to
474    # continue.  The following code tries to measure the difference on
475    # a per-event basis.
476    #
477    # Note that this difference is only significant if there are a lot of
478    # events, and relatively little user code per event.  For example,
479    # code with small functions will typically benefit from having the
480    # profiler calibrated for the current platform.  This *could* be
481    # done on the fly during init() time, but it is not worth the
482    # effort.  Also note that if too large a value specified, then
483    # execution time on some functions will actually appear as a
484    # negative number.  It is *normal* for some functions (with very
485    # low call counts) to have such negative stats, even if the
486    # calibration figure is "correct."
487    #
488    # One alternative to profile-time calibration adjustments (i.e.,
489    # adding in the magic little delta during each event) is to track
490    # more carefully the number of events (and cumulatively, the number
491    # of events during sub functions) that are seen.  If this were
492    # done, then the arithmetic could be done after the fact (i.e., at
493    # display time).  Currently, we track only call/return events.
494    # These values can be deduced by examining the callees and callers
495    # vectors for each functions.  Hence we *can* almost correct the
496    # internal time figure at print time (note that we currently don't
497    # track exception event processing counts).  Unfortunately, there
498    # is currently no similar information for cumulative sub-function
499    # time.  It would not be hard to "get all this info" at profiler
500    # time.  Specifically, we would have to extend the tuples to keep
501    # counts of this in each frame, and then extend the defs of timing
502    # tuples to include the significant two figures. I'm a bit fearful
503    # that this additional feature will slow the heavily optimized
504    # event/time ratio (i.e., the profiler would run slower, fur a very
505    # low "value added" feature.)
506    #**************************************************************
507
508    def calibrate(self, m, verbose=0):
509        if self.__class__ is not Profile:
510            raise TypeError("Subclasses must override .calibrate().")
511
512        saved_bias = self.bias
513        self.bias = 0
514        try:
515            return self._calibrate_inner(m, verbose)
516        finally:
517            self.bias = saved_bias
518
519    def _calibrate_inner(self, m, verbose):
520        get_time = self.get_time
521
522        # Set up a test case to be run with and without profiling.  Include
523        # lots of calls, because we're trying to quantify stopwatch overhead.
524        # Do not raise any exceptions, though, because we want to know
525        # exactly how many profile events are generated (one call event, +
526        # one return event, per Python-level call).
527
528        def f1(n):
529            for i in range(n):
530                x = 1
531
532        def f(m, f1=f1):
533            for i in range(m):
534                f1(100)
535
536        f(m)    # warm up the cache
537
538        # elapsed_noprofile <- time f(m) takes without profiling.
539        t0 = get_time()
540        f(m)
541        t1 = get_time()
542        elapsed_noprofile = t1 - t0
543        if verbose:
544            print "elapsed time without profiling =", elapsed_noprofile
545
546        # elapsed_profile <- time f(m) takes with profiling.  The difference
547        # is profiling overhead, only some of which the profiler subtracts
548        # out on its own.
549        p = Profile()
550        t0 = get_time()
551        p.runctx('f(m)', globals(), locals())
552        t1 = get_time()
553        elapsed_profile = t1 - t0
554        if verbose:
555            print "elapsed time with profiling =", elapsed_profile
556
557        # reported_time <- "CPU seconds" the profiler charged to f and f1.
558        total_calls = 0.0
559        reported_time = 0.0
560        for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
561                p.timings.items():
562            if funcname in ("f", "f1"):
563                total_calls += cc
564                reported_time += tt
565
566        if verbose:
567            print "'CPU seconds' profiler reported =", reported_time
568            print "total # calls =", total_calls
569        if total_calls != m + 1:
570            raise ValueError("internal error: total calls = %d" % total_calls)
571
572        # reported_time - elapsed_noprofile = overhead the profiler wasn't
573        # able to measure.  Divide by twice the number of calls (since there
574        # are two profiler events per call in this test) to get the hidden
575        # overhead per event.
576        mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
577        if verbose:
578            print "mean stopwatch overhead per profile event =", mean
579        return mean
580
581#****************************************************************************
582def Stats(*args):
583    print 'Report generating functions are in the "pstats" module\a'
584
585def main():
586    usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
587    parser = OptionParser(usage=usage)
588    parser.allow_interspersed_args = False
589    parser.add_option('-o', '--outfile', dest="outfile",
590        help="Save stats to <outfile>", default=None)
591    parser.add_option('-s', '--sort', dest="sort",
592        help="Sort order when printing to stdout, based on pstats.Stats class",
593        default=-1)
594
595    if not sys.argv[1:]:
596        parser.print_usage()
597        sys.exit(2)
598
599    (options, args) = parser.parse_args()
600    sys.argv[:] = args
601
602    if len(args) > 0:
603        progname = args[0]
604        sys.path.insert(0, os.path.dirname(progname))
605        with open(progname, 'rb') as fp:
606            code = compile(fp.read(), progname, 'exec')
607        globs = {
608            '__file__': progname,
609            '__name__': '__main__',
610            '__package__': None,
611        }
612        runctx(code, globs, None, options.outfile, options.sort)
613    else:
614        parser.print_usage()
615    return parser
616
617# When invoked as main program, invoke the profiler on a script
618if __name__ == '__main__':
619    main()
620