You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
3.1 KiB
Plaintext

#!/usr/bin/env python3
from functools import partial
from pathlib import Path
import os
import sys
def ensure_path(p):
return p if isinstance(p, Path) else Path(p)
def include_all(*args):
return True
class File:
def __init__(self, path):
self.path = ensure_path(path)
self.stat = os.stat(self.path)
@property
def name(self):
return self.path.name
def size(self):
return self.stat.st_size
def get(values, *, existing=set()):
for val in values:
if val in existing:
continue
if val.size() == 0:
continue
yield val
def by_size(x, *, filter=None):
return x.size(filter=filter)
def by_name(x):
return x.name
class Directory:
def __init__(self, base_dir):
self.base = ensure_path(base_dir)
self.files = {}
self.dirs = {}
@property
def name(self):
return self.base
def list_by_size(self, *, files=None):
files = set() if files is None else files
while True:
dirs = get(self.dirs.values(), existing=files)
without_file = lambda x: x not in files
filtered_by_size = partial(by_size, filter=without_file)
dirs_by_size = sorted(dirs, key=filtered_by_size, reverse=True)
if len(dirs_by_size):
largest_dir = dirs_by_size[0]
current_file = next(largest_dir.list_by_size(files=files))
files.add(current_file)
yield current_file
continue
files_by_name = sorted(get(self.files.values(), existing=files),
key=by_name)
if len(files_by_name):
current_file = files_by_name[0]
files.add(current_file)
yield current_file
continue
return
def size(self, *, filter=None):
filter = include_all if filter is None else filter
total = 0
for node in self.files.values():
if filter(node):
total += node.size()
for node in self.dirs.values():
total += node.size(filter=filter)
return total
def directory(self, *parts):
if len(parts) == 0:
return self
part, *rest = parts
node = self.dirs.get(part)
if node is None:
node = Directory(part)
self.dirs[part] = node
return node.directory(*rest)
def add_file(self, path):
node = self.directory(*path.parent.parts)
node.files[path.name] = File(path)
return node.files[path.name]
def main(base_dir):
tree = Directory(base_dir)
for root, dirs, files in os.walk(base_dir):
for f in files:
tree.add_file(Path(root) / f)
for f in tree.list_by_size():
print("{}/{}".format(base_dir, f.path))
if __name__ == '__main__':
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument('-o', '--output', default='.')
parser.add_argument('-d', '--directory', default=Path('.'), type=Path)
args = parser.parse_args()
main(args.directory)