Update: As of 2020, an updated documentation for this project is now on my website at hiz.al/photodata.

After meeting Quan and getting to know each other, we discovered we wanted to make a portrait of each other based on our preferences as photographers. We wanted to compare styles, subjects, etc. We came up with a bunch of ideas, and settled on using metadata to create data-driven visualzations of our preferences. We also orignally wanted to use Google’s Vision app to interpret what the subject of the photos was, but that turned out to be too slow/complicated. Instead we focused on core aspects of our photographic styles. For example we wanted to know how we compared based solely on aperture and focal length preferences, as Quan has special lenses and I prefer using a single 50mm prime lens. We also wanted to see what kinds of ambient light we photographed in, and how that compared to what we thought of ourselves. I’m surprised, because I usually think I’m an indoor photographer, but a lot of my photos turned out to be daylight-level ambience.

Quan has about 90k images and I have about 25k images. The process for Quan was a bit longer than me since he stores his images as RAW files, but for it to be easier on us we decided to convert his images to small JPEGs with metadata intact, and read it into the python script below. As for my images, the count is because I clear unneeded images and generally can reduce a photoshoot of about 700-900 photos down to 50 photos to be kept. All my photos are already in JPEG so I didn’t need to convert anything (a process that took over a week for Quan). After conversions, the python script grabbed the necessary metadata (time, focal length, shutter speed, aperture, etc) and computed the data for the four visualizations below. For ambience, we made a rating-based system comprised of a combination of aperture, shutter speed and ISO to create a number that represented the overall ambient light that the location must have had (instead of the actual brightness of the photo).

After the data is created, it is saved to js files and read into various HTML pages and loaded into Chart.js charts to create the visualizations below. Quan, since he didn’t write as much code, also made a second project visible on his page. For the ambience ones, each dot represents an image, and each image has a certain amount of transparency. The brighter the white, the more the images during that day, in that lighting.

Ambience (Hizal)

Ambience (Soonho)



(the following code was adapted for the various analysis above)
Python Script: Analyze metadata from 120k images for ambience

from collections import OrderedDict 
from os.path import exists, join
from datetime import datetime
from os import makedirs, walk
import logging, traceback
import exifread
import json

debug = False
default_folder = "imgs"
extentions = ('.jpg','.jpeg','.png','.tif','.tiff','.gif')
files = []
metadata = {}
days = {}
data = []

def load(folder = None):
  global files
  if not folder:
    folder = default_folder

  for r, dir, f in walk(folder):
    for file in f:
      if join(r,file).lower().endswith(extentions):
        files.append(join(r, file))

  perc = 0
  count = 0
  for file in files:
    if debug:
      print file

    image = None
    while not image:
        image = open(file, 'rb')
        print "ERROR: File not found: " + file
        raw_input("Press enter to continue when reconnected ");
    tags = exifread.process_file(image, details=False)  
      # timestamp
      ts = datetime.strptime(str(tags['EXIF DateTimeOriginal']), '%Y:%m:%d %H:%M:%S')

      # aperture
      fstop = str(tags['EXIF FNumber']).split('/')
      if len(fstop) > 1:
        f = float(fstop[0])/float(fstop[1])
        f = float(fstop[0])

      # shutter speed
      speed = str(tags['EXIF ExposureTime']).split('/')
      if len(speed) > 1:
        ss = float(speed[0])/float(speed[1])
        ss = float(speed[0])
      # iso
      iso = int(str(tags['EXIF ISOSpeedRatings']))

      # focal length
      mm = str(tags['EXIF FocalLength']).split('/')
      if len(mm) > 1:
        fl = float(mm[0])/float(mm[1])
        fl = float(mm[0])

      if debug:
        print "\tTimestamp: " + str(ts)
        print "\tAperture: f" + str(f)
        print "\tShutter: " + str(tags['EXIF ExposureTime']) + " (" + str(ss) + ")"
        print "\tISO: " + str(iso)
        print "\tFocal length: " + str(fl) + "mm"

      metadata[file] = {'f':f, 'ss':ss, 'iso':iso, 'fl':fl, 'ts':ts}

    except Exception as e:
      if debug:
        print file

    # print progress
    if count == 0:
      print " 0% ",
    count += 1
    new_perc = int(round(((count * 1.0) / len(files)) * 100))
    if new_perc > perc and new_perc%10==0:
      print "\n" + str(new_perc) + "% ",
    elif new_perc > perc and new_perc%1==0:
      print ".",
    perc = new_perc

  print ""
  print str(len(files)) + " files found.\n"

def write():
  filename = "data.js"
  if debug:
    filename = "debug.txt"

  print "Writing " + filename + "... ",
  with open(filename, 'w') as f:
    f.write("window.chartdata = [\n")
    for day in data:
      for i in xrange(len(day)):
        if i != len(day)-1:

  print "\t\tdone."

def map(value, srcMin, srcMax, tgtMin, tgtMax):
  return tgtMin + (tgtMax - tgtMin) * ((float(value) - srcMin) / (srcMax - srcMin))

def constrain(value, min, max):
  if value < min: return min if value > max:
    return max
  return value

def getRating(meta):
  iso = constrain(map(meta['iso'], 100, 6400, 0, 100), 0, 100)
  f = constrain(map(meta['f'], 22, 1.4, 0, 100), 0, 100)
  ss = constrain(map(meta['ss'], float(1.0/8000), 1, 0, 100), 0, 100)

  if debug:
    print "\tISO: " + str(meta['iso']) + "/" + str(iso)
    print "\tF: " + str(meta['f']) + "/" + str(f)
    print "\tSS: " + str(meta['ss']) + "/" + str(ss)

  return int(iso + f + ss)

def analyze(index = None):
  global metadata, data, days

  count = 0
  perc = 0
  for img in metadata:
    meta = metadata[img]
    rating = getRating(meta)
    if debug:
      print ""
      print img
      print rating
    if rating >= 250:
      print img

    if str(meta['ts'].date()) in days:
      days[str(meta['ts'].date())] = [rating]

    # print progress
    count += 1
    new_perc = int(round(((count * 1.0) / len(metadata)) * 100))
    if new_perc > perc and new_perc%10==0:
      print str(new_perc) + "% "
    perc = new_perc

  # save as ordered days
  ordered = OrderedDict(sorted(days.items(), key=lambda t: t[0]))
  for day in ordered:

  if debug:
    print days
    print ordered
    print data

  print str(len(metadata)) + " files processed."

def test():

while True:
  print "0: Exit (without saving)"
  print "1: Auto"
  print "2: Load"
  print "3: Analyze"
  print "4: Save data"
  choice = (int)(raw_input("> "))

  if choice == 0:

  if choice == 1:
  elif choice == 2:
    folder = raw_input("Folder section: ")
  elif choice == 3:
  elif choice == 4:
  elif choice == 626:
    print ""

  print ""