1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def process_file(img_path):
"""
处理图片去重
"""
try:
phasher = PHash()
# 生成图像目录中所有图像的二值hash编码
encodings = phasher.encode_images(image_dir=img_path)
# print(encodings)
# 对已编码图像寻找重复图像
duplicates = phasher.find_duplicates(encoding_map=encodings)
# print(duplicates)
only_img = [] # 唯一图片
like_img = [] # 相似图片

for img, img_list in duplicates.items():
if ".png" in img:
continue
if img not in only_img and img not in like_img:
only_img.append(img)
like_img.extend(img_list)

print("唯一图片:", len(only_img))
print("相似图片:", len(like_img))

if not os.path.exists("like_img"):
os.makedirs("like_img")
for item_src in like_img:
os.rename(f"{img_path}/{item_src}", f"like_img/{item_src}")

# 删除文件
# for like in like_img:
# like_src = os.path.join(img_path, like)
# png_src = like_src[:-4] + ".png"
# if os.path.exists(like_src):
# os.remove(like_src)
# if os.path.exists(png_src):
# os.remove(png_src)

except Exception as e:
print(e)


def test03():
# 处理图片去重
img_path = "downloads"
process_file(img_path)

# num = 0
# for root, dirs, files in os.walk(img_path):
# for dir_ in dirs:
# file_dir_path = os.path.join(root, dir_)
# process_file(file_dir_path)
# num += 1
# print("处理文件夹个数:{}".format(num))