Hello please help
I have a folder that contains multiple pdf files. I am trying to create code that will help me sanitize these filenames using a whitelist. Can anyone help? This is the code I have thus far:
import string
import unicodedata
import os
valid_filename_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
char_limit = 255
os.chdir('dir')
def clean_filename(filename, whitelist=valid_filename_chars, replace=' '):
for r in replace:
filename = filename.replace(r,'_')
cleaned_filename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').decode()
cleaned_filename = ''.join(c for c in cleaned_filename if c in whitelist)
if len(cleaned_filename)>char_limit:
print("Warning, filename truncated because it was over {}. Filenames may no longer be unique".format(char_limit))
return cleaned_filename[:char_limit]
clean_filename(filename)