import csv
import random
from array import array
import glob
# indexes into data arrays
CENTER = 0
TABLE = 1
MACHINE = 2
YES = 3
NO = 4
# read in the original spreadsheet converted to CSV
def read_csv_comprehensive(filename):
reader = csv.reader(file(filename))
data = []
for row in reader:
center = int(row[3])
table = int(row[4])
machine = int(row[5])
no = int(row[6])
yes = int(row[8])
data.append([center, table, machine, yes, no])
return data;
# read in a data file in my simple format making a dictionary keyed off the center id
def read_csv_simple(filename):
reader = csv.reader(file(filename))
data = {}
for row in reader:
center = int(row[0])
table = int(row[1])
machine = int(row[2])
yes = int(row[3])
no = int(row[4])
if data.has_key(center):
data[center].append([center, table, machine, yes, no])
else:
data[center] = [[center, table, machine, yes, no]]
return data;
# write out a data file in my simple format (data must be flat)
def write_csv_simple(filename, data):
writer = csv.writer(file(filename,"w"))
for row in data:
writer.writerow(row)
# return a list of the voting center ids
def create_center_set(data):
return data.keys()
# return the data records accociated with center
def records_in_center(data, center):
return data[center]
# return the number of yes votes in data
def yes_votes(data):
sum = 0
for rec in data:
sum += rec[YES]
return sum
# return the number of no votes in data
def no_votes(data):
sum = 0
for rec in data:
sum += rec[NO]
return sum
# return a lists of the total votes on each machine in data
def machine_totals(data):
l = []
for rec in data:
l.append(rec[YES]+rec[NO])
return l
# return an array with Y's and N's corresponding to the number of yes and no votes
def make_deck(data):
yes_list = array('c', 'Y') * yes_votes(data)
no_list = array('c', 'N') * no_votes(data)
return yes_list + no_list
# return a list of the votes assigned to each voting machine
def deal(deck,machines):
dealt = 0
l = []
for m in machines:
l.append(deck[dealt:dealt+m])
dealt += m
return l
# silmulate one voting center
def simulate_center(data, center):
data = records_in_center(data, center)
deck = make_deck(data)
random.shuffle(deck);
hands = deal(deck,machine_totals(data))
i = 0
l = []
for rec in data:
yes = hands[i].count('Y')
no = hands[i].count('N')
l.append([rec[CENTER], rec[TABLE], rec[MACHINE], yes, no])
i += 1
return l
# simulate an entire election using the data and the list of center ids
def simulate_election(data, centers):
l = []
for center in centers:
l = l + simulate_center(data, center)
return l
# run n full simulations
def run_simulations(n):
data = read_csv_simple("votes-simple.csv")
centers = create_center_set(data)
for i in range(n):
print i
output = simulate_election(data, centers)
write_csv_simple("votes-random-" + str(i) +".csv", output)
# are there duplicate elements in l?
def dups(l):
set = {}
map(set.__setitem__, l, [])
return len(set.keys()) != len(l)
# find the statistic we're interested in for one file
def file_stats(filename):
data = read_csv_simple(filename)
centers = create_center_set(data)
count, yescount, nocount = 0, 0, 0
length = len(centers)
for center in centers:
recs = records_in_center(data,center)
yes, no =[], []
for rec in recs:
yes.append(rec[YES])
no.append(rec[NO])
#replace depending on statistic; this computes cap-consistent precints
if yes.count(max(yes)) > 1:
count += 1
return count
# print and return stats on all files in directory
def gather_stats():
g = glob.glob("*.csv")
s = []
for f in g:
t = count_collide(f)
print t
s.append(t)
return s
regreso a documentos