# Copyright 2003 Dave Abrahams # Copyright 2001, 2002 Vladimir Prus # Copyright 2012 Jurko Gospodnetic # Distributed under the Boost Software License, Version 1.0. # (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt) ############################################################################### # # Based in part on an old Subversion tree.py source file (tools for comparing # directory trees). See http://subversion.tigris.org for more information. # # Copyright (c) 2001 Sam Tobin-Hochstadt. All rights reserved. # # This software is licensed as described in the file COPYING, which you should # have received as part of this distribution. The terms are also available at # http://subversion.tigris.org/license-1.html. If newer versions of this # license are posted there, you may use a newer version instead, at your # option. # ############################################################################### import os import os.path import stat import sys class TreeNode: """ Fundamental data type used to build file system tree structures. If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN is a list of the nodes representing that directory's children. NAME is simply the name of the file or directory. CONTENTS is a string holding the file's contents (if a file). """ def __init__(self, name, children=None, contents=None): assert children is None or contents is None self.name = name self.mtime = 0 self.children = children self.contents = contents self.path = name def add_child(self, newchild): assert not self.is_file() for a in self.children: if a.name == newchild.name: if newchild.is_file(): a.contents = newchild.contents a.path = os.path.join(self.path, newchild.name) else: for i in newchild.children: a.add_child(i) break else: self.children.append(newchild) newchild.path = os.path.join(self.path, newchild.name) def get_child(self, name): """ If the given TreeNode directory NODE contains a child named NAME, return the child; else, return None. """ for n in self.children: if n.name == name: return n def is_file(self): return self.children is None def pprint(self): print(" * Node name: %s" % self.name) print(" Path: %s" % self.path) print(" Contents: %s" % self.contents) if self.is_file(): print(" Children: is a file.") else: print(" Children: %d" % len(self.children)) class TreeDifference: def __init__(self): self.added_files = [] self.removed_files = [] self.modified_files = [] self.touched_files = [] def append(self, other): self.added_files.extend(other.added_files) self.removed_files.extend(other.removed_files) self.modified_files.extend(other.modified_files) self.touched_files.extend(other.touched_files) def ignore_directories(self): """Removes directories from our lists of found differences.""" not_dir = lambda x : x[-1] != "/" self.added_files = filter(not_dir, self.added_files) self.removed_files = filter(not_dir, self.removed_files) self.modified_files = filter(not_dir, self.modified_files) self.touched_files = filter(not_dir, self.touched_files) def pprint(self, file=sys.stdout): file.write("Added files : %s\n" % self.added_files) file.write("Removed files : %s\n" % self.removed_files) file.write("Modified files: %s\n" % self.modified_files) file.write("Touched files : %s\n" % self.touched_files) def empty(self): return not (self.added_files or self.removed_files or self.modified_files or self.touched_files) def build_tree(path): """ Takes PATH as the folder path, walks the file system below that path, and creates a tree structure based on any files and folders found there. Returns the prepared tree structure plus the maximum file modification timestamp under the given folder. """ return _handle_dir(os.path.normpath(path)) def tree_difference(a, b): """Compare TreeNodes A and B, and create a TreeDifference instance.""" return _do_tree_difference(a, b, "", True) def _do_tree_difference(a, b, parent_path, root=False): """Internal recursive worker function for tree_difference().""" # We do not want to list root node names. if root: assert not parent_path assert not a.is_file() assert not b.is_file() full_path = "" else: assert a.name == b.name full_path = parent_path + a.name result = TreeDifference() # A and B are both files. if a.is_file() and b.is_file(): if a.contents != b.contents: result.modified_files.append(full_path) elif a.mtime != b.mtime: result.touched_files.append(full_path) return result # Directory converted to file. if not a.is_file() and b.is_file(): result.removed_files.extend(_traverse_tree(a, parent_path)) result.added_files.append(full_path) # File converted to directory. elif a.is_file() and not b.is_file(): result.removed_files.append(full_path) result.added_files.extend(_traverse_tree(b, parent_path)) # A and B are both directories. else: if full_path: full_path += "/" accounted_for = [] # Children present in both trees. for a_child in a.children: b_child = b.get_child(a_child.name) if b_child: accounted_for.append(b_child) result.append(_do_tree_difference(a_child, b_child, full_path)) else: result.removed_files.append(full_path + a_child.name) for b_child in b.children: if b_child not in accounted_for: result.added_files.extend(_traverse_tree(b_child, full_path)) return result def _traverse_tree(t, parent_path): """Returns a list of all names in a tree.""" assert not parent_path or parent_path[-1] == "/" full_node_name = parent_path + t.name if t.is_file(): result = [full_node_name] else: name_prefix = full_node_name + "/" result = [name_prefix] for i in t.children: result.extend(_traverse_tree(i, name_prefix)) return result def _get_text(path): """Return a string with the textual contents of a file at PATH.""" fp = open(path, 'r') try: return fp.read() finally: fp.close() def _handle_dir(path): """ Main recursive worker function for build_tree(). Returns a newly created tree node representing the given normalized folder path as well as the maximum file/folder modification time detected under the same path. """ files = [] dirs = [] node = TreeNode(os.path.basename(path), children=[]) max_mtime = node.mtime = os.stat(path).st_mtime # List files & folders. for f in os.listdir(path): f = os.path.join(path, f) if os.path.isdir(f): dirs.append(f) elif os.path.isfile(f): files.append(f) # Add a child node for each file. for f in files: fcontents = _get_text(f) new_file_node = TreeNode(os.path.basename(f), contents=fcontents) new_file_node.mtime = os.stat(f).st_mtime max_mtime = max(max_mtime, new_file_node.mtime) node.add_child(new_file_node) # For each subdir, create a node, walk its tree, add it as a child. for d in dirs: new_dir_node, new_max_mtime = _handle_dir(d) max_mtime = max(max_mtime, new_max_mtime) node.add_child(new_dir_node) return node, max_mtime