Recently, I helped my parents recover some deleted photographs from their hard drive using a Linux program called "photorec." Though there were probably only a few hundred photos deleted, photorec was able to find more than 100,000 jpegs totalling about 1GB of in size. Many of these photos consisted of tiny thumbnails that had been temporarily cached by the web browser. To help me sort through these photos, I wrote a script using the Python Imaging Library (PIL) that examines the exif metadata.
# -*- coding: utf-8 -*-
"""
Created on Sat Jul 27 01:02:56 2013
@author: dan
"""
from PIL import Image
from PIL.ExifTags import TAGS
import os, shutil
#helps to sort the photos into separate folders with model names
def main(directory,dest):
modellist = review_models(directory,dest)
handle_files(directory,dest,modellist)
# function to review exif data
def get_exif_data(fname):
ret = {}
img = Image.open(fname)
if hasattr(img,'_getexif'):
exifinfo = img._getexif()
if exifinfo != None:
for tag, value in exifinfo.items():
decoded = TAGS.get(tag, tag)
ret[decoded] = value
return ret
#select files taken by a specific model of camera and move to a specified folder
def review_files(directory,dest,model):
fileses = os.walk(directory)
for f in fileses:
path = f[0]
images = f[2]
for i in images:
g = os.path.join(path,i)
try:
ret = get_exif_data(g)
items = ret.items()
for item in items:
if item[0] == 'Model':
if item[1] == model:
shutil.move(g,dest)
except:
pass
# sorts photos by camera brand
def handle_files(directory,dest,modellist):
models = open(modellist,'r')
ml = models.read()
mm = ml.split(",")
for m in mm:
try:
newpath = os.path.join(dest,m)
if not os.path.exists(newpath):
os.makedirs(newpath)
except:
pass
fileses = os.walk(directory)
for f in fileses:
path = f[0]
images = f[2]
for i in images:
g = os.path.join(path,i)
try:
ret = get_exif_data(g)
items = ret.items()
for item in items:
if item[0] == 'Model':
outpath = os.path.join(dest,item[1])
shutil.move(g,outpath)
except:
pass
#creates a txt file listing all models seperated by commas
def review_models(directory,dest):
fileses = os.walk(directory)
txt = os.path.join(directory,"errors.txt")
txt2 = os.path.join(dest,"Models.txt")
models = open(txt2,'w')
models.close()
errorfile = open(txt,'w')
errorfile.close()
for f in fileses:
path = f[0]
images = f[2]
for i in images:
try:
g = os.path.join(path,i)
ret = get_exif_data(g)
items = ret.items()
for item in items:
if item[0] == 'Model':
models = open(txt2,'a')
models.write(item[1])
models.write(",")
models.close()
except:
error = open(txt,'a')
error.write(i + " \n")
error.close()
return txt2
#finds photos with identical creation datetimes and moves the smaller #fragments to a different location
def rmDuplicates(directory,dest):
fileses = os.walk(directory)
for f in fileses:
path = f[0]
images = f[2]
for i in images:
try:
g1 = os.path.join(path,i)
date1 = findDate(path,i)
fileses2 = os.walk(directory)
for f2 in fileses2:
path2 = f2[0]
images2 = f2[2]
for i2 in images2:
try:
g2 = os.path.join(path2,i2)
if g2 == g1:
pass
else:
stat1 = os.stat(g1)
stat2 = os.stat(g2)
s1 = stat1.st_size
s2 = stat2.st_size
date2 = findDate(path2,i2)
if date2 == date1:
if int(s1)>int(s2):
shutil.move(g2,dest)
else:
shutil.move(g1,dest)
except:
pass
except:
pass
#returns date and time image was created
def findDate(path,filename):
g = os.path.join(path,filename)
ret = get_exif_data(g)
items = ret.items()
photo = None
for item in items:
if item[0] == 'DateTimeDigitized':
photo = item[1]
return photo
def print_exif(path,filename):
g = os.path.join(path,filename)
ret = get_exif_data(g)
items = ret.items()
vals = ret.values()
for item in items:
if item[0] == 'DateTimeDigitized':
print item[1]
#print ret.keys()
# print vals
print " \n"
print "EXIF data for photo: " + g + ". "
#for item in items:
# print item[:-1]
# moves all of the photographs from their recup* folders to a single #directory
def group_files(larger_directory,outputfile):
fileses = os.listdir(larger_directory)
for files in fileses:
path = os.path.join(larger_directory,files)
f = os.walk(path)
txt = os.path.join(outputfile,"errors.txt")
errorfile = open(txt,'w')
errorfile.close()
for i in f:
images = i[2]
for image in images:
ipath = os.path.join(path,image)
try:
shutil.move(ipath,outputfile)
except:
error = open(txt,'a')
error.write(ipath + " \n")
error.close()