File Comparison With Some DB Bells and Whistles

Python    November Post-A-Day    2009-11-04

Right. So yesterday's post was mind-numbingly simple. But that's the just the file comparison. The script that I actually use also hits the db and generates a slew of additional updates for me, based on the results of that diff.


#!/usr/bin/python

# usage: ./compare.py pwdata102409.txt pwdata103109.txt used_codes_wk6

import sys, datetime, MySQLdb as Database

todaysdate = datetime.datetime.now().strftime("%Y%m%d")
file1 = sys.argv[1]
file2 = sys.argv[2]

# for archiving purposes, we keep each week's changes in a separate table
#  - it's a little weird, I know, but we have our reasons - don't ask
thisweekstable = sys.argv[3]

# a list to hold all the updated codes
diffcodes = []

def writeChanges():
    f1 = open(file1, "r")
    f2 = open(file2, "r")

    fileOne = f1.readlines()
    fileTwo = f2.readlines()

    f1.close()
    f2.close()

    outFile1 = open(todaysdate + "_results.txt", "w")
    outFile2 = open(todaysdate + "_updates.txt", "w")

    x = 0
    for i in fileOne:
        if i != fileTwo[x]:
            outFile1.write(i.rstrip() + " <> " + fileTwo[x].rstrip() + "\n")

            code = fileTwo[x].split("|")[0]
            codes_used = fileTwo[x].split("|")[3]
            codes_limit = fileTwo[x].split("|")[4].rstrip()
            outFile2.write("UPDATE used_codes SET codes_used='" + codes_used + "', code_limit='" + code_limit + "' WHERE passcode = '" + code + "';" + "\n")

            # take the code that's been split out from the record in fileTwo and add it to the list
            diffcodes.append(code)
        x += 1

    outFile1.close()
    outFile2.close()

    print "number of records changed: " + str(x)
    return diffcodes

def writeUpdates():
    outFile3 = open(todaysdate + "_doubleused.txt", "w")
    outFile4 = open(todaysdate + "_singleused.txt", "w")

    db = Database.connect("server", "username", "password", "dbname")
    cursor = db.cursor()

    # convert diffcodes to a simple comma-delimited sequence before passing them to the select
    codes = "', '".join(map(str, diffcodes))

    # for codes that come back with a count greater than one, 
    # update the main coupons table to set the used value to 8
    sqlOne = """SELECT passcode FROM %s WHERE passcode IN ('%s') 
	GROUP BY passcode HAVING COUNT(passcode) > '1'""" %(thisweekstable, codes)
    cursor.execute(sqlOne)
    double = cursor.fetchall()
    doubles = []
    for code in double:
        doubles.append(code[0])
    doubleused = "', '".join(map(str, doubles))

    # again, yeah, I'm writing the updates to a file before running them against production
    #  - it's just a quirk, I like to eyeball everything first, then run updates from the command line
    outFile3.write("UPDATE coupons SET used = '8' WHERE code IN (" + doubleused + ");")

    # for the remaining codes, those with a count equal to 1
    sqlTwo = """SELECT passcode FROM %s WHERE passcode IN ('%s') 
	GROUP BY passcode HAVING COUNT(passcode) = '1'""" %(thisweekstable, codes)
    cursor.execute(sqlTwo)
    single = cursor.fetchall()
    singles = []
    for code in single:
        singles.append(code[0])
    singlecodes = "', '".join(map(str, singles))

    # do another select to identify a subset that meets one of two conditions
    # my 'end_date' column is a date type, so I'm passing in a YYYY-MM-DD date to do the comparison, 
    # where I might otherwise just use UNIX_TIMESTAMP() 
    currentdate = datetime.date.today()
    sqlThree = """SELECT passcode FROM %s WHERE ((code_limit = '1' AND end_date < '%s') 
	OR (codes_used = '4' AND code_limit != '1')) 
	AND passcode IN ('%s')""" %(thisweekstable, currentdate, singlecodes)
    cursor.execute(sqlThree)
    sused = cursor.fetchall()
    ssused = []
    for code in sused:
        ssused.append(code[0])
    singleused = "', '".join(map(str, ssused))
    outFile4.write("UPDATE coupons SET used = '4' WHERE code IN ('" + singleused + "');")

    cursor.close()
    db.close()

    outFile3.close()
    outFile4.close()

writeChanges()
writeUpdates()