diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..da8d1e2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ + +.vscode/settings.json diff --git a/README b/README deleted file mode 100644 index fbc9f98..0000000 --- a/README +++ /dev/null @@ -1,5 +0,0 @@ - -Image Duplicate Finder is exactly what it sounds like. If you have a large number of images some of which are duplciated or nearly duplicated (resizings, different shadings, minor cropping etc.), this will identify many of them. It has a fairly high false negative rate (i.e. it will miss some duplicates); but at the default level of sensitivity has not had an observed false positive (i.e. it has not identified two images are duplicates when they are not). If this happens to you, let me know! - - --- Caelyn McAulay, caelyn@ceruleanrodent.com, January 2012 diff --git a/README.md b/README.md new file mode 100644 index 0000000..fc248e8 --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +# Image Duplicate Finder +Create an python program that will sort duplicated images into folders and move unduplicated images into one folder + +# Screenshot + +![After Running Python Program](https://github.com/ronknight/Image-Duplicate-Finder/blob/master/images/screenshots.png) + +# Installation + +1. intall python 3.7.3 or newer +2. run "pip install -r requirements.txt" +3. copy all images to be sorted on your installation folder +4. run "python imgdupfind.py" +5. image that are duplicated are on their newly created folders. +6. unduplicated images are on folder named "unduplicated". + +# Error +1. if you encounter this error below, just delete the existing "unduplicated" folder and run the program again. + +![Folder Exist Error](https://github.com/ronknight/Image-Duplicate-Finder/blob/master/images/error.png) + +# Acknowledgement + +-- Caelyn McAulay, caelyn@ceruleanrodent.com, January 2012 + +# Description + +Image Duplicate Finder is exactly what it sounds like. If you have a large number of images some of which are duplicated or nearly duplicated (resizings, different shadings, minor cropping etc.), this will identify many of them. It has a fairly high false negative rate (i.e. it will miss some duplicates); but at the default level of sensitivity has not had an observed false positive (i.e. it has not identified two images are duplicates when they are not). If this happens to you, let me know! + + diff --git a/images/error.png b/images/error.png new file mode 100644 index 0000000..149f2fe Binary files /dev/null and b/images/error.png differ diff --git a/images/screenshots.png b/images/screenshots.png new file mode 100644 index 0000000..755e438 Binary files /dev/null and b/images/screenshots.png differ diff --git a/imgdupfind.py b/imgdupfind.py index 9fc244f..c88144c 100755 --- a/imgdupfind.py +++ b/imgdupfind.py @@ -15,7 +15,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import Image +from PIL import Image +from pytesseract import image_to_string import os from shutil import copy2, move from optparse import OptionParser @@ -31,7 +32,7 @@ def __init__(self, log_level): def log_level_value(self, log_level): if log_level == "error": - return self.ERROR; + return self.ERROR elif log_level == "warning": return self.WARNING elif log_level == "info": @@ -58,7 +59,7 @@ def log(self, message_log_level, message): class ScreenLogger(Logger): def log(self, message_log_level, message): if self.log_level >= message_log_level: - print message + print (message) class FileLogger(Logger): def __init__(self, log_level, file_name): @@ -200,7 +201,7 @@ def image_hash(im): logger.log(logger.INFO,"The following are groups of identical images:\n") - for dup_set in dup_dict.viewvalues(): + for dup_set in dup_dict.values(): for img_name in dup_set: logger.log(logger.INFO, img_name) logger.log(logger.INFO, "\n") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9fca0d6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +tesseract >= 0.1.3 +pytesseract >= 0.2.6 \ No newline at end of file