1# Copyright 2006 Google, Inc. All Rights Reserved. 2# Licensed to PSF under a Contributor Agreement. 3 4"""Refactoring framework. 5 6Used as a main program, this can refactor any number of files and/or 7recursively descend down directories. Imported as a module, this 8provides infrastructure to write your own refactoring tool. 9""" 10 11from __future__ import with_statement 12 13__author__ = "Guido van Rossum <[email protected]>" 14 15 16# Python imports 17import os 18import sys 19import logging 20import operator 21import collections 22import StringIO 23from itertools import chain 24 25# Local imports 26from .pgen2 import driver, tokenize, token 27from .fixer_util import find_root 28from . import pytree, pygram 29from . import btm_utils as bu 30from . import btm_matcher as bm 31 32 33def get_all_fix_names(fixer_pkg, remove_prefix=True): 34 """Return a sorted list of all available fix names in the given package.""" 35 pkg = __import__(fixer_pkg, [], [], ["*"]) 36 fixer_dir = os.path.dirname(pkg.__file__) 37 fix_names = [] 38 for name in sorted(os.listdir(fixer_dir)): 39 if name.startswith("fix_") and name.endswith(".py"): 40 if remove_prefix: 41 name = name[4:] 42 fix_names.append(name[:-3]) 43 return fix_names 44 45 46class _EveryNode(Exception): 47 pass 48 49 50def _get_head_types(pat): 51 """ Accepts a pytree Pattern Node and returns a set 52 of the pattern types which will match first. """ 53 54 if isinstance(pat, (pytree.NodePattern, pytree.LeafPattern)): 55 # NodePatters must either have no type and no content 56 # or a type and content -- so they don't get any farther 57 # Always return leafs 58 if pat.type is None: 59 raise _EveryNode 60 return set([pat.type]) 61 62 if isinstance(pat, pytree.NegatedPattern): 63 if pat.content: 64 return _get_head_types(pat.content) 65 raise _EveryNode # Negated Patterns don't have a type 66 67 if isinstance(pat, pytree.WildcardPattern): 68 # Recurse on each node in content 69 r = set() 70 for p in pat.content: 71 for x in p: 72 r.update(_get_head_types(x)) 73 return r 74 75 raise Exception("Oh no! I don't understand pattern %s" %(pat)) 76 77 78def _get_headnode_dict(fixer_list): 79 """ Accepts a list of fixers and returns a dictionary 80 of head node type --> fixer list. """ 81 head_nodes = collections.defaultdict(list) 82 every = [] 83 for fixer in fixer_list: 84 if fixer.pattern: 85 try: 86 heads = _get_head_types(fixer.pattern) 87 except _EveryNode: 88 every.append(fixer) 89 else: 90 for node_type in heads: 91 head_nodes[node_type].append(fixer) 92 else: 93 if fixer._accept_type is not None: 94 head_nodes[fixer._accept_type].append(fixer) 95 else: 96 every.append(fixer) 97 for node_type in chain(pygram.python_grammar.symbol2number.itervalues(), 98 pygram.python_grammar.tokens): 99 head_nodes[node_type].extend(every) 100 return dict(head_nodes) 101 102 103def get_fixers_from_package(pkg_name): 104 """ 105 Return the fully qualified names for fixers in the package pkg_name. 106 """ 107 return [pkg_name + "." + fix_name 108 for fix_name in get_all_fix_names(pkg_name, False)] 109 110def _identity(obj): 111 return obj 112 113if sys.version_info < (3, 0): 114 import codecs 115 _open_with_encoding = codecs.open 116 # codecs.open doesn't translate newlines sadly. 117 def _from_system_newlines(input): 118 return input.replace(u"\r\n", u"\n") 119 def _to_system_newlines(input): 120 if os.linesep != "\n": 121 return input.replace(u"\n", os.linesep) 122 else: 123 return input 124else: 125 _open_with_encoding = open 126 _from_system_newlines = _identity 127 _to_system_newlines = _identity 128 129 130def _detect_future_features(source): 131 have_docstring = False 132 gen = tokenize.generate_tokens(StringIO.StringIO(source).readline) 133 def advance(): 134 tok = gen.next() 135 return tok[0], tok[1] 136 ignore = frozenset((token.NEWLINE, tokenize.NL, token.COMMENT)) 137 features = set() 138 try: 139 while True: 140 tp, value = advance() 141 if tp in ignore: 142 continue 143 elif tp == token.STRING: 144 if have_docstring: 145 break 146 have_docstring = True 147 elif tp == token.NAME and value == u"from": 148 tp, value = advance() 149 if tp != token.NAME or value != u"__future__": 150 break 151 tp, value = advance() 152 if tp != token.NAME or value != u"import": 153 break 154 tp, value = advance() 155 if tp == token.OP and value == u"(": 156 tp, value = advance() 157 while tp == token.NAME: 158 features.add(value) 159 tp, value = advance() 160 if tp != token.OP or value != u",": 161 break 162 tp, value = advance() 163 else: 164 break 165 except StopIteration: 166 pass 167 return frozenset(features) 168 169 170class FixerError(Exception): 171 """A fixer could not be loaded.""" 172 173 174class RefactoringTool(object): 175 176 _default_options = {"print_function" : False} 177 178 CLASS_PREFIX = "Fix" # The prefix for fixer classes 179 FILE_PREFIX = "fix_" # The prefix for modules with a fixer within 180 181 def __init__(self, fixer_names, options=None, explicit=None): 182 """Initializer. 183 184 Args: 185 fixer_names: a list of fixers to import 186 options: an dict with configuration. 187 explicit: a list of fixers to run even if they are explicit. 188 """ 189 self.fixers = fixer_names 190 self.explicit = explicit or [] 191 self.options = self._default_options.copy() 192 if options is not None: 193 self.options.update(options) 194 if self.options["print_function"]: 195 self.grammar = pygram.python_grammar_no_print_statement 196 else: 197 self.grammar = pygram.python_grammar 198 self.errors = [] 199 self.logger = logging.getLogger("RefactoringTool") 200 self.fixer_log = [] 201 self.wrote = False 202 self.driver = driver.Driver(self.grammar, 203 convert=pytree.convert, 204 logger=self.logger) 205 self.pre_order, self.post_order = self.get_fixers() 206 207 208 self.files = [] # List of files that were or should be modified 209 210 self.BM = bm.BottomMatcher() 211 self.bmi_pre_order = [] # Bottom Matcher incompatible fixers 212 self.bmi_post_order = [] 213 214 for fixer in chain(self.post_order, self.pre_order): 215 if fixer.BM_compatible: 216 self.BM.add_fixer(fixer) 217 # remove fixers that will be handled by the bottom-up 218 # matcher 219 elif fixer in self.pre_order: 220 self.bmi_pre_order.append(fixer) 221 elif fixer in self.post_order: 222 self.bmi_post_order.append(fixer) 223 224 self.bmi_pre_order_heads = _get_headnode_dict(self.bmi_pre_order) 225 self.bmi_post_order_heads = _get_headnode_dict(self.bmi_post_order) 226 227 228 229 def get_fixers(self): 230 """Inspects the options to load the requested patterns and handlers. 231 232 Returns: 233 (pre_order, post_order), where pre_order is the list of fixers that 234 want a pre-order AST traversal, and post_order is the list that want 235 post-order traversal. 236 """ 237 pre_order_fixers = [] 238 post_order_fixers = [] 239 for fix_mod_path in self.fixers: 240 mod = __import__(fix_mod_path, {}, {}, ["*"]) 241 fix_name = fix_mod_path.rsplit(".", 1)[-1] 242 if fix_name.startswith(self.FILE_PREFIX): 243 fix_name = fix_name[len(self.FILE_PREFIX):] 244 parts = fix_name.split("_") 245 class_name = self.CLASS_PREFIX + "".join([p.title() for p in parts]) 246 try: 247 fix_class = getattr(mod, class_name) 248 except AttributeError: 249 raise FixerError("Can't find %s.%s" % (fix_name, class_name)) 250 fixer = fix_class(self.options, self.fixer_log) 251 if fixer.explicit and self.explicit is not True and \ 252 fix_mod_path not in self.explicit: 253 self.log_message("Skipping implicit fixer: %s", fix_name) 254 continue 255 256 self.log_debug("Adding transformation: %s", fix_name) 257 if fixer.order == "pre": 258 pre_order_fixers.append(fixer) 259 elif fixer.order == "post": 260 post_order_fixers.append(fixer) 261 else: 262 raise FixerError("Illegal fixer order: %r" % fixer.order) 263 264 key_func = operator.attrgetter("run_order") 265 pre_order_fixers.sort(key=key_func) 266 post_order_fixers.sort(key=key_func) 267 return (pre_order_fixers, post_order_fixers) 268 269 def log_error(self, msg, *args, **kwds): 270 """Called when an error occurs.""" 271 raise 272 273 def log_message(self, msg, *args): 274 """Hook to log a message.""" 275 if args: 276 msg = msg % args 277 self.logger.info(msg) 278 279 def log_debug(self, msg, *args): 280 if args: 281 msg = msg % args 282 self.logger.debug(msg) 283 284 def print_output(self, old_text, new_text, filename, equal): 285 """Called with the old version, new version, and filename of a 286 refactored file.""" 287 pass 288 289 def refactor(self, items, write=False, doctests_only=False): 290 """Refactor a list of files and directories.""" 291 292 for dir_or_file in items: 293 if os.path.isdir(dir_or_file): 294 self.refactor_dir(dir_or_file, write, doctests_only) 295 else: 296 self.refactor_file(dir_or_file, write, doctests_only) 297 298 def refactor_dir(self, dir_name, write=False, doctests_only=False): 299 """Descends down a directory and refactor every Python file found. 300 301 Python files are assumed to have a .py extension. 302 303 Files and subdirectories starting with '.' are skipped. 304 """ 305 py_ext = os.extsep + "py" 306 for dirpath, dirnames, filenames in os.walk(dir_name): 307 self.log_debug("Descending into %s", dirpath) 308 dirnames.sort() 309 filenames.sort() 310 for name in filenames: 311 if (not name.startswith(".") and 312 os.path.splitext(name)[1] == py_ext): 313 fullname = os.path.join(dirpath, name) 314 self.refactor_file(fullname, write, doctests_only) 315 # Modify dirnames in-place to remove subdirs with leading dots 316 dirnames[:] = [dn for dn in dirnames if not dn.startswith(".")] 317 318 def _read_python_source(self, filename): 319 """ 320 Do our best to decode a Python source file correctly. 321 """ 322 try: 323 f = open(filename, "rb") 324 except IOError as err: 325 self.log_error("Can't open %s: %s", filename, err) 326 return None, None 327 try: 328 encoding = tokenize.detect_encoding(f.readline)[0] 329 finally: 330 f.close() 331 with _open_with_encoding(filename, "r", encoding=encoding) as f: 332 return _from_system_newlines(f.read()), encoding 333 334 def refactor_file(self, filename, write=False, doctests_only=False): 335 """Refactors a file.""" 336 input, encoding = self._read_python_source(filename) 337 if input is None: 338 # Reading the file failed. 339 return 340 input += u"\n" # Silence certain parse errors 341 if doctests_only: 342 self.log_debug("Refactoring doctests in %s", filename) 343 output = self.refactor_docstring(input, filename) 344 if output != input: 345 self.processed_file(output, filename, input, write, encoding) 346 else: 347 self.log_debug("No doctest changes in %s", filename) 348 else: 349 tree = self.refactor_string(input, filename) 350 if tree and tree.was_changed: 351 # The [:-1] is to take off the \n we added earlier 352 self.processed_file(unicode(tree)[:-1], filename, 353 write=write, encoding=encoding) 354 else: 355 self.log_debug("No changes in %s", filename) 356 357 def refactor_string(self, data, name): 358 """Refactor a given input string. 359 360 Args: 361 data: a string holding the code to be refactored. 362 name: a human-readable name for use in error/log messages. 363 364 Returns: 365 An AST corresponding to the refactored input stream; None if 366 there were errors during the parse. 367 """ 368 features = _detect_future_features(data) 369 if "print_function" in features: 370 self.driver.grammar = pygram.python_grammar_no_print_statement 371 try: 372 tree = self.driver.parse_string(data) 373 except Exception as err: 374 self.log_error("Can't parse %s: %s: %s", 375 name, err.__class__.__name__, err) 376 return 377 finally: 378 self.driver.grammar = self.grammar 379 tree.future_features = features 380 self.log_debug("Refactoring %s", name) 381 self.refactor_tree(tree, name) 382 return tree 383 384 def refactor_stdin(self, doctests_only=False): 385 input = sys.stdin.read() 386 if doctests_only: 387 self.log_debug("Refactoring doctests in stdin") 388 output = self.refactor_docstring(input, "<stdin>") 389 if output != input: 390 self.processed_file(output, "<stdin>", input) 391 else: 392 self.log_debug("No doctest changes in stdin") 393 else: 394 tree = self.refactor_string(input, "<stdin>") 395 if tree and tree.was_changed: 396 self.processed_file(unicode(tree), "<stdin>", input) 397 else: 398 self.log_debug("No changes in stdin") 399 400 def refactor_tree(self, tree, name): 401 """Refactors a parse tree (modifying the tree in place). 402 403 For compatible patterns the bottom matcher module is 404 used. Otherwise the tree is traversed node-to-node for 405 matches. 406 407 Args: 408 tree: a pytree.Node instance representing the root of the tree 409 to be refactored. 410 name: a human-readable name for this tree. 411 412 Returns: 413 True if the tree was modified, False otherwise. 414 """ 415 416 for fixer in chain(self.pre_order, self.post_order): 417 fixer.start_tree(tree, name) 418 419 #use traditional matching for the incompatible fixers 420 self.traverse_by(self.bmi_pre_order_heads, tree.pre_order()) 421 self.traverse_by(self.bmi_post_order_heads, tree.post_order()) 422 423 # obtain a set of candidate nodes 424 match_set = self.BM.run(tree.leaves()) 425 426 while any(match_set.values()): 427 for fixer in self.BM.fixers: 428 if fixer in match_set and match_set[fixer]: 429 #sort by depth; apply fixers from bottom(of the AST) to top 430 match_set[fixer].sort(key=pytree.Base.depth, reverse=True) 431 432 if fixer.keep_line_order: 433 #some fixers(eg fix_imports) must be applied 434 #with the original file's line order 435 match_set[fixer].sort(key=pytree.Base.get_lineno) 436 437 for node in list(match_set[fixer]): 438 if node in match_set[fixer]: 439 match_set[fixer].remove(node) 440 441 try: 442 find_root(node) 443 except AssertionError: 444 # this node has been cut off from a 445 # previous transformation ; skip 446 continue 447 448 if node.fixers_applied and fixer in node.fixers_applied: 449 # do not apply the same fixer again 450 continue 451 452 results = fixer.match(node) 453 454 if results: 455 new = fixer.transform(node, results) 456 if new is not None: 457 node.replace(new) 458 #new.fixers_applied.append(fixer) 459 for node in new.post_order(): 460 # do not apply the fixer again to 461 # this or any subnode 462 if not node.fixers_applied: 463 node.fixers_applied = [] 464 node.fixers_applied.append(fixer) 465 466 # update the original match set for 467 # the added code 468 new_matches = self.BM.run(new.leaves()) 469 for fxr in new_matches: 470 if not fxr in match_set: 471 match_set[fxr]=[] 472 473 match_set[fxr].extend(new_matches[fxr]) 474 475 for fixer in chain(self.pre_order, self.post_order): 476 fixer.finish_tree(tree, name) 477 return tree.was_changed 478 479 def traverse_by(self, fixers, traversal): 480 """Traverse an AST, applying a set of fixers to each node. 481 482 This is a helper method for refactor_tree(). 483 484 Args: 485 fixers: a list of fixer instances. 486 traversal: a generator that yields AST nodes. 487 488 Returns: 489 None 490 """ 491 if not fixers: 492 return 493 for node in traversal: 494 for fixer in fixers[node.type]: 495 results = fixer.match(node) 496 if results: 497 new = fixer.transform(node, results) 498 if new is not None: 499 node.replace(new) 500 node = new 501 502 def processed_file(self, new_text, filename, old_text=None, write=False, 503 encoding=None): 504 """ 505 Called when a file has been refactored, and there are changes. 506 """ 507 self.files.append(filename) 508 if old_text is None: 509 old_text = self._read_python_source(filename)[0] 510 if old_text is None: 511 return 512 equal = old_text == new_text 513 self.print_output(old_text, new_text, filename, equal) 514 if equal: 515 self.log_debug("No changes to %s", filename) 516 return 517 if write: 518 self.write_file(new_text, filename, old_text, encoding) 519 else: 520 self.log_debug("Not writing changes to %s", filename) 521 522 def write_file(self, new_text, filename, old_text, encoding=None): 523 """Writes a string to a file. 524 525 It first shows a unified diff between the old text and the new text, and 526 then rewrites the file; the latter is only done if the write option is 527 set. 528 """ 529 try: 530 f = _open_with_encoding(filename, "w", encoding=encoding) 531 except os.error as err: 532 self.log_error("Can't create %s: %s", filename, err) 533 return 534 try: 535 f.write(_to_system_newlines(new_text)) 536 except os.error as err: 537 self.log_error("Can't write %s: %s", filename, err) 538 finally: 539 f.close() 540 self.log_debug("Wrote changes to %s", filename) 541 self.wrote = True 542 543 PS1 = ">>> " 544 PS2 = "... " 545 546 def refactor_docstring(self, input, filename): 547 """Refactors a docstring, looking for doctests. 548 549 This returns a modified version of the input string. It looks 550 for doctests, which start with a ">>>" prompt, and may be 551 continued with "..." prompts, as long as the "..." is indented 552 the same as the ">>>". 553 554 (Unfortunately we can't use the doctest module's parser, 555 since, like most parsers, it is not geared towards preserving 556 the original source.) 557 """ 558 result = [] 559 block = None 560 block_lineno = None 561 indent = None 562 lineno = 0 563 for line in input.splitlines(True): 564 lineno += 1 565 if line.lstrip().startswith(self.PS1): 566 if block is not None: 567 result.extend(self.refactor_doctest(block, block_lineno, 568 indent, filename)) 569 block_lineno = lineno 570 block = [line] 571 i = line.find(self.PS1) 572 indent = line[:i] 573 elif (indent is not None and 574 (line.startswith(indent + self.PS2) or 575 line == indent + self.PS2.rstrip() + u"\n")): 576 block.append(line) 577 else: 578 if block is not None: 579 result.extend(self.refactor_doctest(block, block_lineno, 580 indent, filename)) 581 block = None 582 indent = None 583 result.append(line) 584 if block is not None: 585 result.extend(self.refactor_doctest(block, block_lineno, 586 indent, filename)) 587 return u"".join(result) 588 589 def refactor_doctest(self, block, lineno, indent, filename): 590 """Refactors one doctest. 591 592 A doctest is given as a block of lines, the first of which starts 593 with ">>>" (possibly indented), while the remaining lines start 594 with "..." (identically indented). 595 596 """ 597 try: 598 tree = self.parse_block(block, lineno, indent) 599 except Exception as err: 600 if self.logger.isEnabledFor(logging.DEBUG): 601 for line in block: 602 self.log_debug("Source: %s", line.rstrip(u"\n")) 603 self.log_error("Can't parse docstring in %s line %s: %s: %s", 604 filename, lineno, err.__class__.__name__, err) 605 return block 606 if self.refactor_tree(tree, filename): 607 new = unicode(tree).splitlines(True) 608 # Undo the adjustment of the line numbers in wrap_toks() below. 609 clipped, new = new[:lineno-1], new[lineno-1:] 610 assert clipped == [u"\n"] * (lineno-1), clipped 611 if not new[-1].endswith(u"\n"): 612 new[-1] += u"\n" 613 block = [indent + self.PS1 + new.pop(0)] 614 if new: 615 block += [indent + self.PS2 + line for line in new] 616 return block 617 618 def summarize(self): 619 if self.wrote: 620 were = "were" 621 else: 622 were = "need to be" 623 if not self.files: 624 self.log_message("No files %s modified.", were) 625 else: 626 self.log_message("Files that %s modified:", were) 627 for file in self.files: 628 self.log_message(file) 629 if self.fixer_log: 630 self.log_message("Warnings/messages while refactoring:") 631 for message in self.fixer_log: 632 self.log_message(message) 633 if self.errors: 634 if len(self.errors) == 1: 635 self.log_message("There was 1 error:") 636 else: 637 self.log_message("There were %d errors:", len(self.errors)) 638 for msg, args, kwds in self.errors: 639 self.log_message(msg, *args, **kwds) 640 641 def parse_block(self, block, lineno, indent): 642 """Parses a block into a tree. 643 644 This is necessary to get correct line number / offset information 645 in the parser diagnostics and embedded into the parse tree. 646 """ 647 tree = self.driver.parse_tokens(self.wrap_toks(block, lineno, indent)) 648 tree.future_features = frozenset() 649 return tree 650 651 def wrap_toks(self, block, lineno, indent): 652 """Wraps a tokenize stream to systematically modify start/end.""" 653 tokens = tokenize.generate_tokens(self.gen_lines(block, indent).next) 654 for type, value, (line0, col0), (line1, col1), line_text in tokens: 655 line0 += lineno - 1 656 line1 += lineno - 1 657 # Don't bother updating the columns; this is too complicated 658 # since line_text would also have to be updated and it would 659 # still break for tokens spanning lines. Let the user guess 660 # that the column numbers for doctests are relative to the 661 # end of the prompt string (PS1 or PS2). 662 yield type, value, (line0, col0), (line1, col1), line_text 663 664 665 def gen_lines(self, block, indent): 666 """Generates lines as expected by tokenize from a list of lines. 667 668 This strips the first len(indent + self.PS1) characters off each line. 669 """ 670 prefix1 = indent + self.PS1 671 prefix2 = indent + self.PS2 672 prefix = prefix1 673 for line in block: 674 if line.startswith(prefix): 675 yield line[len(prefix):] 676 elif line == prefix.rstrip() + u"\n": 677 yield u"\n" 678 else: 679 raise AssertionError("line=%r, prefix=%r" % (line, prefix)) 680 prefix = prefix2 681 while True: 682 yield "" 683 684 685class MultiprocessingUnsupported(Exception): 686 pass 687 688 689class MultiprocessRefactoringTool(RefactoringTool): 690 691 def __init__(self, *args, **kwargs): 692 super(MultiprocessRefactoringTool, self).__init__(*args, **kwargs) 693 self.queue = None 694 self.output_lock = None 695 696 def refactor(self, items, write=False, doctests_only=False, 697 num_processes=1): 698 if num_processes == 1: 699 return super(MultiprocessRefactoringTool, self).refactor( 700 items, write, doctests_only) 701 try: 702 import multiprocessing 703 except ImportError: 704 raise MultiprocessingUnsupported 705 if self.queue is not None: 706 raise RuntimeError("already doing multiple processes") 707 self.queue = multiprocessing.JoinableQueue() 708 self.output_lock = multiprocessing.Lock() 709 processes = [multiprocessing.Process(target=self._child) 710 for i in xrange(num_processes)] 711 try: 712 for p in processes: 713 p.start() 714 super(MultiprocessRefactoringTool, self).refactor(items, write, 715 doctests_only) 716 finally: 717 self.queue.join() 718 for i in xrange(num_processes): 719 self.queue.put(None) 720 for p in processes: 721 if p.is_alive(): 722 p.join() 723 self.queue = None 724 725 def _child(self): 726 task = self.queue.get() 727 while task is not None: 728 args, kwargs = task 729 try: 730 super(MultiprocessRefactoringTool, self).refactor_file( 731 *args, **kwargs) 732 finally: 733 self.queue.task_done() 734 task = self.queue.get() 735 736 def refactor_file(self, *args, **kwargs): 737 if self.queue is not None: 738 self.queue.put((args, kwargs)) 739 else: 740 return super(MultiprocessRefactoringTool, self).refactor_file( 741 *args, **kwargs) 742