#!/usr/bin/python3

import os, sys
import hashlib
 
def findDup(parentFolder):
    # Dups in format {hash:[names]}
    dups = {}
    for dirName, subdirs, fileList in os.walk(parentFolder):
        print('Scanning %s...' % dirName)
        for filename in fileList:
            # Get the path to the file
            path = os.path.join(dirName, filename)
            # ignore links
            if os.path.islink(path):
                continue
            # Calculate hash
            file_hash = hashfile(path)
            # Add or append the file path
            if file_hash in dups:
                dups[file_hash].append(path)
            else:
                dups[file_hash] = [path]
    return dups
 
 
# Joins two dictionaries
def joinDicts(dict1, dict2):
    for key in dict2.keys():
        if key in dict1:
            dict1[key] = dict1[key] + dict2[key]
        else:
            dict1[key] = dict2[key]
 
 
def hashfile(path, blocksize = 65536):
    afile = open(path, 'rb')
    hasher = hashlib.md5()
    buf = afile.read(blocksize)
    while len(buf) > 0:
        hasher.update(buf)
        buf = afile.read(blocksize)
    afile.close()
    return hasher.hexdigest()
 
 
def printResults(dict1):
    results = list(filter(lambda x: len(x) > 1, dict1.values()))
    if len(results) > 0:
        print('Duplicates Found:')
        print('The following files are identical. The name could differ, but the content is identical')
        print('___________________')
        for result in results:
            for subresult in result:
                filename = subresult.split("/")[-1]
                links = False
                for size in [16, 22, 24, 32, 48, 64, 96, 256]:
                    alt = subresult.replace(filename, "../%s/%s" % (size, filename))
                    if os.path.islink(alt):
                        links = True
                if links:
                    print('\t\t%s' % subresult)
                else:
                    print('\t\t%s (main)' % subresult)

            print('___________________')
 
        print ("Duplicated sets: %s" % len(results))
    else:
        print('No duplicate files found.')
 
 
if __name__ == '__main__':
    dups = {}
    folders = ["usr/share/icons/Mint-Y/apps"]
    for i in folders:
        # Iterate the folders given
        if os.path.exists(i):
            # Find the duplicated files and append them to the dups
            joinDicts(dups, findDup(i))
        else:
            print('%s is not a valid path, please verify' % i)
            sys.exit()
    printResults(dups)
