#!/usr/bin/env python3 from functools import partial from pathlib import Path import os import sys def ensure_path(p): return p if isinstance(p, Path) else Path(p) def include_all(*args): return True class File: def __init__(self, path): self.path = ensure_path(path) self.stat = os.stat(self.path) @property def name(self): return self.path.name def size(self): return self.stat.st_size def get(values, *, existing=set()): for val in values: if val in existing: continue if val.size() == 0: continue yield val def by_size(x, *, filter=None): return x.size(filter=filter) def by_name(x): return x.name class Directory: def __init__(self, base_dir): self.base = ensure_path(base_dir) self.files = {} self.dirs = {} @property def name(self): return self.base def list_by_size(self, *, files=None): files = set() if files is None else files while True: dirs = get(self.dirs.values(), existing=files) without_file = lambda x: x not in files filtered_by_size = partial(by_size, filter=without_file) dirs_by_size = sorted(dirs, key=filtered_by_size, reverse=True) if len(dirs_by_size): largest_dir = dirs_by_size[0] current_file = next(largest_dir.list_by_size(files=files)) files.add(current_file) yield current_file continue files_by_name = sorted(get(self.files.values(), existing=files), key=by_name) if len(files_by_name): current_file = files_by_name[0] files.add(current_file) yield current_file continue return def size(self, *, filter=None): filter = include_all if filter is None else filter total = 0 for node in self.files.values(): if filter(node): total += node.size() for node in self.dirs.values(): total += node.size(filter=filter) return total def directory(self, *parts): if len(parts) == 0: return self part, *rest = parts node = self.dirs.get(part) if node is None: node = Directory(part) self.dirs[part] = node return node.directory(*rest) def add_file(self, path): node = self.directory(*path.parent.parts) node.files[path.name] = File(path) return node.files[path.name] def main(base_dir): tree = Directory(base_dir) for root, dirs, files in os.walk(base_dir): for f in files: tree.add_file(Path(root) / f) for f in tree.list_by_size(): print("{}/{}".format(base_dir, f.path)) if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('-o', '--output', default='.') parser.add_argument('-d', '--directory', default=Path('.'), type=Path) args = parser.parse_args() main(args.directory)