-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathleaper.py
1284 lines (1172 loc) · 75.2 KB
/
leaper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# pip install sodapy
import numpy as np
import pandas as pd
# from sodapy import Socrata
import time as time
from pandas.api.types import is_string_dtype
import pickle
from collections import OrderedDict
from manual_corrections import pac_correction
from manual_corrections import ie_correction
from synonyms import do_ie_synonyms
from synonyms import add_synonyms
from synonyms import do_pac_synonyms2
from donors_of_interest import make_donors_interest
from difflib import get_close_matches
import yaml
# from fuzzywuzzy import process
import json
# use cases:
# 1. search by contributor & summarize by party
# 2. summarize by candidate votes on a bill - IEs and donations to losing candidates will not be counted in bill votes
class Donor():
def __init__(self):
self.name = " "
self.aliases = [] # stores list of synonyms when we combine_donors
self.type = " " # ind, pac, party, caucus
self.filer_id = " "
self.total_in = 0.0
self.total_out = 0.0
# only for PAC/caucus/party, empty for Individual donors
self.money_in = pd.DataFrame(columns=['donor', 'donor_id', 'amount'])
# amount, receiver, receiver type (Cand vs PAC)
self.money_out = pd.DataFrame(columns=["receiver", "amount", "party", "type"])
# money_out resolved to end candidates
self.money_out_resolved = pd.DataFrame( columns=["receiver", "amount", "party", "type", "proportion"])
self.has_resolved = False # indicates whether money_out_resolved has been successfully computed yet
self.pending_resolve = False # this is used to detect recursion loops in resolve_donations
def __repr__(self):
if self.money_out_resolved.empty:
return "<Name:%s ID:%s Money in/out:%s / %s>" % (self.name, self.filer_id, self.money_in['amount'].sum(),
self.money_out['amount'].abs().sum())
# note sum() will be 0 if df is empty
this_d_plus = self.money_out_resolved.loc[(self.money_out_resolved['party']=='DEMOCRAT')&
(self.money_out_resolved['proportion']>0),'proportion'].sum()
this_d_minus = self.money_out_resolved.loc[(self.money_out_resolved['party']=='DEMOCRAT')&
(self.money_out_resolved['proportion']<0),'proportion'].sum()
this_r_plus = self.money_out_resolved.loc[(self.money_out_resolved['party'] == 'REPUBLICAN') &
(self.money_out_resolved['proportion'] > 0), 'proportion'].sum()
this_r_minus = self.money_out_resolved.loc[(self.money_out_resolved['party'] == 'REPUBLICAN') &
(self.money_out_resolved['proportion'] < 0), 'proportion'].sum()
return "<Name:%s ID:%s Money in/out:%s / %s Pct: %s D+ / %s R+ / %s D- / %s R->" % (self.name, self.filer_id,
self.money_in['amount'].sum(), self.money_out['amount'].sum(),
int(100*this_d_plus),int(100*this_r_plus),int(100*this_d_minus),int(100*this_r_minus))
def sum_donations(self):
# check that money_in and money_out are non-empty (otherwise it would break), and if so sum them by receiver
# include party and type (these won't vary for one receiver) so we don't lose these columns
# the reset_index() is needed to put this back into a dataframe with the correct columns
if (len(self.money_out) > 1):
self.money_out = pd.DataFrame(self.money_out.groupby(['receiver', 'party', 'type']).sum()).reset_index()
if (len(self.money_in) > 1):
self.money_in = pd.DataFrame(self.money_in.groupby(['donor', 'donor_id']).sum()).reset_index()
self.total_in = self.money_in['amount'].abs().sum()
self.total_out = self.money_out['amount'].abs().sum()
return
def resolve_donations(self, fulldata):
# TODO: check for second-level recursion problems, i.e. if PAC A -> PAC B -> PAC C -> PAC A
# recursion is detected if PAC A -> PAC A or if PAC A -> PAC B -> PAC A
print(self.name)
if (self.has_resolved == True):
return
self.pending_resolve = True
# reset the money_out_resolved dataframe
self.money_out_resolved = pd.DataFrame(columns=["receiver", "amount", "party", "type", "proportion"])
# check if this is a party pac
my_party_name = 'No Party'
if self.name in fulldata.party_pac_dict.keys():
my_party_name = fulldata.party_pac_dict[self.name]
# get number of donations to resolve
n_donations = len(self.money_out)
# check if money_out is empty
if n_donations < 1:
self.money_out_resolved = pd.DataFrame([[self.name, self.money_in.amount.abs().sum(), my_party_name, 'Terminal PAC', 1.0]],
columns=["receiver", "amount", "party", "type", "proportion"])
# mark self as resolved
self.pending_resolve = False
self.has_resolved = True
# print('Resolved no donations')
return
# for each line in money_out, check if it is to a candidate or pac, and update money_out_resolved
# need to store list of zero receivers to process after this for loop
zero_receivers = []
for i in range(n_donations):
# if candidate, then the amount is 100% to that candidate, so nothing to do
# if pac, then get candidates/proportions from that pac, and allocate amounts accordingly
this_receiver = self.money_out.iloc[i,]['receiver']
this_amount = self.money_out.iloc[i,]['amount']
this_party = self.money_out.iloc[i,]['party']
this_type = self.money_out.iloc[i,]['type']
if this_type == 'Candidate':
this_add = pd.DataFrame([[this_receiver, this_amount, this_party, this_type, 0]],
columns=["receiver", "amount", "party", "type", "proportion"])
self.money_out_resolved = self.money_out_resolved.append(this_add, ignore_index=True)
else:
# this_amount can only be negative for a candidate (for IE spending against)
if this_amount<0:
raise ValueError('in resolve_donations: this_amount should not be negative for a PAC')
# look up the pac that this money went to
if this_receiver in fulldata.all_donors:
# check if this donation is to self
if this_receiver == self.name:
# we are in a recursion loop because this PAC donated to itself
# raise warning and ignore this donation
print('resolve_donations: Recursion loop, ignoring donation to self by ' + self.name)
# remove from money_in and money_out
self.money_in = self.money_in.loc[self.money_in.donor != this_receiver, :]
zero_receivers = zero_receivers + [this_receiver] # this will remove from money_out after for loop
# if this is the only donation then this is a terminal PAC, so set money_out_resolved and return
if (self.money_out_resolved.empty) & (i == (n_donations - 1)):
self.money_out_resolved = pd.DataFrame(
[[self.name, self.money_in.amount.abs().sum(), my_party_name, 'Terminal PAC', 1.0]],
columns=["receiver", "amount", "party", "type",
"proportion"])
# remove from money_out since we are not continuing for loop (should make it empty)
self.money_out = self.money_out.loc[self.money_out.receiver != this_receiver, :]
# mark self as resolved
self.pending_resolve = False
self.has_resolved = True
return
else:
continue # don't add to money_out_resolved since this was donation to self
# check for recursion loop PAC A -> PAC B -> PAC A
# check if receiver is also a donor to self, if so then fix to avoid recursion loop
if this_receiver in self.money_in.donor.values:
print('resolve_donations: Recursion loop from '+self.name+' to '+this_receiver)
# get both amounts
amount_to_me = fulldata.all_donors[this_receiver].money_out.loc[fulldata.all_donors[this_receiver].money_out.receiver == self.name, 'amount']
if len(amount_to_me)==1:
amount_to_me = amount_to_me.iloc[0]
else:
raise ValueError('resolve_donations: amount_to_me not length 1')
amount_from_me = this_amount
print('From: '+str(amount_from_me)+' To: '+str(amount_to_me))
if amount_to_me > amount_from_me:
# subtract amount_from_me from donation to me
fulldata.all_donors[this_receiver].money_out.loc[fulldata.all_donors[this_receiver].money_out.receiver == self.name, 'amount'] = \
fulldata.all_donors[this_receiver].money_out.loc[fulldata.all_donors[this_receiver].money_out.receiver == self.name, 'amount'] - amount_from_me
self.money_in.loc[self.money_in.donor == this_receiver, 'amount'] = self.money_in.loc[self.money_in.donor == this_receiver, 'amount'] - amount_from_me
# remove line from self.money_out and this_receiver's money_in
fulldata.all_donors[this_receiver].money_in = fulldata.all_donors[this_receiver].money_in.loc[
fulldata.all_donors[this_receiver].money_in.donor != self.name,:]
# self.money_out = self.money_out.loc[self.money_out.receiver != this_receiver,:]
# can't edit this while we are iterating over it, have to remove it after for loop
zero_receivers = zero_receivers + [this_receiver]
continue # this donation from me to receiver is zero, so do not add to money_out_resolved
elif amount_to_me < amount_from_me:
# subtract amount_to_me from donation from me
self.money_out.loc[self.money_out.receiver==this_receiver,'amount'] = self.money_out.loc[self.money_out.receiver==this_receiver,'amount'] - amount_to_me
fulldata.all_donors[this_receiver].money_in.loc[fulldata.all_donors[this_receiver].money_in.donor == self.name, 'amount'] = \
fulldata.all_donors[this_receiver].money_in.loc[fulldata.all_donors[this_receiver].money_in.donor == self.name, 'amount'] - amount_to_me
# remove line from self.money_in and this_receiver's money_out
fulldata.all_donors[this_receiver].money_out = fulldata.all_donors[this_receiver].money_out.loc[
fulldata.all_donors[this_receiver].money_out.receiver != self.name,:]
self.money_in = self.money_in.loc[self.money_in.donor != this_receiver, :]
# adjust this_amount
this_amount = this_amount - amount_to_me
elif amount_to_me == amount_from_me:
# remove line from self.money_in and this_receiver's money_out and this_receiver's money_in
fulldata.all_donors[this_receiver].money_out = fulldata.all_donors[this_receiver].money_out.loc[
fulldata.all_donors[this_receiver].money_out.receiver != self.name,:]
fulldata.all_donors[this_receiver].money_in = fulldata.all_donors[this_receiver].money_in.loc[
fulldata.all_donors[this_receiver].money_in.donor != self.name,:]
self.money_in = self.money_in.loc[self.money_in.donor != this_receiver, :]
# self.money_out = self.money_out.loc[self.money_out.receiver != this_receiver,:]
# can't edit this while we are iterating over it, have to remove it after for loop
zero_receivers = zero_receivers + [this_receiver]
continue # this donation from me to receiver is zero, so do not add to money_out_resolved
else:
# this shouldn't happen, raise an error
print(this_receiver)
print(self.name)
print(amount_to_me)
print(amount_from_me)
raise ValueError('in resolve_donations: cant compare amount_to_me and amount_from_me ')
# now check for higher level recursion, e.g. A -> B -> C -> A
if fulldata.all_donors[this_receiver].pending_resolve == True:
# we found a recursion loop between self and this_receiver (and possibly others)
# since this_receiver is already pending.
# print info and skip this donation to break the recursion
print('In donor.resolve_donations: Recursion loop - skipped contribution')
print('Self: '+self.name+' Receiver: '+this_receiver+' Amount: '+str(this_amount))
# remove line from self.money_out and this_receiver's money_in
fulldata.all_donors[this_receiver].money_in = fulldata.all_donors[this_receiver].money_in.loc[
fulldata.all_donors[this_receiver].money_in.donor != self.name, :]
# self.money_out = self.money_out.loc[self.money_out.receiver != this_receiver,:]
# can't edit this while we are iterating over it, have to remove it after for loop
zero_receivers = zero_receivers + [this_receiver]
continue # skip this donation and go on to the next
# check if the receiver is resolved yet, if not, tell them to resolve
if not (fulldata.all_donors[this_receiver].has_resolved):
fulldata.all_donors[this_receiver].resolve_donations(fulldata)
# get money_out_resolved
pac_resolved = fulldata.all_donors[this_receiver].money_out_resolved
# allocate this_amount by proportion to candidates in pac_resolved
#
# Suppose PAC 1 gives $1000 to PAC 2. PAC 2 give $10 to Candidate A, and no other donations.
# Then PAC 2 resolved shows $10, 100% to Candidate A.
# Then PAC 1 resolved should not show $1000 * 100% = $1000 to Candidate A
# Must limit to $10.
#
# Resolve donations based on proportions of outgoing donations:
amount_resolved = this_amount * pac_resolved.loc[:, 'proportion']
##### Old code section, no longer used
##### Don't need prop_in and all these checks since we explicitly compute unspent balance
# Adjust based on proportion of money_in, but not if the PAC must have had a starting balance
if fulldata.all_donors[this_receiver].total_in>=fulldata.all_donors[this_receiver].total_out:
prop_in = this_amount / fulldata.all_donors[this_receiver].money_in.amount.sum()
else:
prop_in = 1.0
if prop_in>1.0:
raise ValueError('in resolve_donations: this_amount greater than sum of money_in')
# Limit amount_resolved to the amount that was spent (elementwise):
if any( (prop_in*abs(pac_resolved.loc[:, 'amount']))<(0.999*abs(amount_resolved) )):
temp_filter = ((amount_resolved>=0)*2)-1 # save the signs of each element
#expanding this to debug
# TODO: remove this debugging code. This is fixed, and this section should not be reached.
amount_resolved_index = amount_resolved.index
temp1 = prop_in * pac_resolved.loc[:, 'amount']
temp2 = temp1.abs()
print('From '+self.name+' to '+this_receiver)
print(prop_in)
print(this_amount)
print(i)
print(amount_resolved)
print(temp1)
temp3 = amount_resolved.abs()
#temp2.index = amount_resolved_index
temp4 = pd.concat([temp3,temp2],axis=1).min(axis=1)
temp5 = temp_filter*temp4
amount_resolved = temp5
print(temp5)
############# end of old code section
#
# construct DataFrame from pac_resolved and amount_resolved (proportion will be computed later)
this_add = pac_resolved.copy()
# relying on the order of amount to be the same as it was when we got the proportions
this_add.loc[:, 'amount'] = amount_resolved
this_add.loc[:, 'proportion'] = 0 # set to zero for now, it will be computed later
self.money_out_resolved = self.money_out_resolved.append(this_add, ignore_index=True)
else:
# we didn't find this pac in our data, so we don't know where the money went!
# this should never happen, because if we saw a donation, we would have created an entry
# TODO: raise an error here, this shouldn't happen
this_add = pd.DataFrame([[this_receiver, this_amount, this_party, 'Unresolved PAC', 0]],
columns=["receiver", "amount", "party", "type", "proportion"])
self.money_out_resolved = self.money_out_resolved.append(this_add, ignore_index=True)
# done with for loop, so finish removing the zero receivers
for i in zero_receivers:
# remove from self.money_out
self.money_out = self.money_out.loc[self.money_out.receiver != i, :]
# check if money_out_resolved is empty (might have skipped all donations due to recursion loops)
if self.money_out_resolved.empty:
# mark unspent if there is money_in. first check if there is money_in
if self.money_in.empty: # can't be recursion loop if no money in
raise ValueError('resolve_donations: money_in and money_out_resolved cant both be empty')
this_add = pd.DataFrame([[self.name, self.money_in.amount.abs().sum(), my_party_name, 'PAC Unspent Balance', 0]],
columns=["receiver", "amount", "party", "type", "proportion"])
self.money_out_resolved = self.money_out_resolved.append(this_add, ignore_index=True)
else:
# note resolution may have many intermediate pacs donating to the same candidates, so have to sum again
# TODO: if negative and positive donations to same candidate, these will cancel out in sum
self.money_out_resolved = pd.DataFrame(
self.money_out_resolved.groupby(["receiver", "party", "type", "proportion"]).sum()).reset_index()
# if this pac is reporting more money_in than money_out, then record it as unspent
amount_balance = self.money_in.amount.abs().sum() - self.money_out_resolved.amount.abs().sum()
# note the above line can be different from
# amount_balance = self.money_in.amount.abs().sum() - self.money_out.amount.abs().sum()
if amount_balance>0.01:
this_add = pd.DataFrame([[self.name, amount_balance , my_party_name, 'PAC Unspent Balance', 0]],
columns=["receiver", "amount", "party", "type", "proportion"])
self.money_out_resolved = self.money_out_resolved.append(this_add, ignore_index=True)
# store the unspent balance in money_out also so it balances?
# this_add2 = pd.DataFrame([[self.name, amount_balance, my_party_name, 'PAC Unspent Balance']],
# columns=["receiver", "amount", "party", "type"])
# self.money_out = self.money_out.append(this_add2, ignore_index=True)
# when all donations have been resolved
# get total donation amount and divide to obtain proportion for each candidate
# use abs() in denominator since amounts can be negative (e.g. from an IE)
# no abs() in numerator since we want to preserve sign in the proportion
self.money_out_resolved.loc[:, 'proportion'] = self.money_out_resolved.loc[:,
'amount'] / self.money_out_resolved.loc[:, 'amount'].abs().sum()
# mark self as resolved
self.pending_resolve = False
self.has_resolved = True
# print('Resolved with donations')
return
class Candidate():
def __init__(self):
self.name = " "
self.filer_id = " "
self.party = " "
self.type = ' '
self.aliases = [] # stores list of synonyms when we combine_candidates
self.total_in = 0.0
self.money_in = pd.DataFrame(columns=["donor", "donor_id", "amount"])
def __repr__(self):
return "<Name:%s ID:%s money_in: %s>" % (self.name, self.filer_id, self.money_in['amount'].sum())
def sum_donations(self):
# group donations by donor so there is one line per donor with a total from that donor
if (len(self.money_in) > 1):
self.money_in = pd.DataFrame(self.money_in.groupby(['donor', 'donor_id']).sum()).reset_index()
self.total_in = self.money_in['amount'].abs().sum()
class Data():
def __init__(self):
self.all_donors = {} # list of all donors (ind and pac)
self.all_candidates = {} # list of all candidates
self.party_pac_dict = {} # map of pac names that are party organizations
def sum_donations(self):
for i in self.all_donors.keys():
self.all_donors[i].sum_donations()
for i in self.all_candidates.keys():
self.all_candidates[i].sum_donations()
def combine_candidates(self, cand_list, rename=False):
# combine several Candidate() objects in self.all_candidates into a single Candidate()
#
# cand_list is a list of candidate names (strings) which are keys in self.all_candidates
# rename = False uses first item in cand_list as name of the combined candidate
# rename = text uses text as the new name
#
# make sure cand_list is a list
if not isinstance(cand_list,list):
if isinstance(cand_list, basestring):
cand_list = [cand_list]
else:
raise ValueError('combine_candidates: cand_list should be string or list of strings')
# remove duplicates
cand_list = list(OrderedDict.fromkeys(cand_list))
# if cand_list is length 1 this still works, it can rename a candidate and propogate that name change to donors
# check if all strings in cand_list are valid keys in self.all_candidates
if not set(cand_list).issubset(self.all_candidates.keys()):
print('combine_candidates error: cand_list contains invalid key(s)')
print(set(cand_list)-set(self.all_candidates.keys()))
return
# first item in donor_list will be the new key
first_cand = self.all_candidates.pop(cand_list[0])
new_cand = Candidate()
new_cand.name = cand_list[0]
if rename:
new_cand.name = rename
new_cand.aliases = cand_list
new_cand.party = first_cand.party
new_cand.type = first_cand.type # could check that this matches the type of others in cand_list
new_cand.filer_id = first_cand.filer_id
new_cand.money_in = first_cand.money_in
# if cand_list is length 1, this for loop will just be skipped, which is fine
for i in cand_list[1:]:
temp = self.all_candidates.pop(i) # will raise error if i is not a valid key
new_cand.money_in = new_cand.money_in.append(temp.money_in, ignore_index=True)
# sum_donations again
new_cand.sum_donations() # this sum will erase if a donor made donations both for and against
self.all_candidates[new_cand.name] = new_cand
# Now we have to follow through to everyone in money_in and update them to the new candidate name
all_givers = [x for x in new_cand.money_in['donor'] ]
for i in all_givers:
# for giver i, replace all occurences of cand_list with the new name
for j in cand_list:
self.all_donors[i].money_out.loc[self.all_donors[i].money_out.loc[:, 'receiver'] == j, 'receiver'] = new_cand.name
self.all_donors[i].sum_donations()
return
def combine_donors(self, donor_list, rename = False):
# remove duplicates
donor_list = list(OrderedDict.fromkeys(donor_list))
# make sure donor_list is a list
if not isinstance(donor_list, list):
if isinstance(donor_list, basestring):
donor_list = [donor_list]
else:
raise ValueError('combine_donors: donor_list should be string or list of strings')
# check if all strings in donor_list are valid keys in self.all_donors
if not set(donor_list).issubset(self.all_donors.keys()):
print('combine_donors warning: ignoring invalid key(s) in donor_list')
print(set(donor_list)-set(self.all_donors.keys()))
donor_list = list(set(donor_list)&set(self.all_donors.keys()))
#TODO: check if donor_list is now empty; if so, we are not combining with a valid donor, so throw error and exit
# first item in donor_list will be the new key
first_donor = self.all_donors.pop(donor_list[0])
new_donor = Donor()
new_donor.name = donor_list[0]
if rename:
new_donor.name = rename
new_donor.aliases = donor_list
new_donor.type = first_donor.type
new_donor.filer_id = first_donor.filer_id
new_donor.money_in = first_donor.money_in
new_donor.money_out = first_donor.money_out
new_donor.has_resolved = False
for i in donor_list[1:]:
temp = self.all_donors.pop(i) # will raise error if i is not a valid key
new_donor.money_in = new_donor.money_in.append(temp.money_in, ignore_index=True)
new_donor.money_out = new_donor.money_out.append(temp.money_out, ignore_index=True)
# check if donor has established a type yet
# when we get to a donor created by that donor's report in the original data (instead of created by
# the receiver's report), then it will have a non-blank party
if new_donor.type ==' ':
new_donor.type = temp.type
# Remove donors/receivers from money_in/money_out which are in donor_list, then sum_donations again
new_donor.money_in = new_donor.money_in.loc[~new_donor.money_in.donor.isin(donor_list), :]
new_donor.money_out = new_donor.money_out.loc[~new_donor.money_out.receiver.isin(donor_list), :]
new_donor.sum_donations()
self.all_donors[new_donor.name] = new_donor
# Now we have to follow through to everyone in money_in and money_out and update them to the new donor name
# first screen out any donations within the synonym set itself
all_receivers = [x for x in new_donor.money_out['receiver'] if x not in donor_list]
for i in all_receivers:
if new_donor.money_out.loc[new_donor.money_out['receiver']==i,'type'].tolist()[0]=='Candidate':
for j in donor_list:
self.all_candidates[i].money_in.loc[self.all_candidates[i].money_in.loc[:,'donor'] == j,'donor'] = new_donor.name
else:
for j in donor_list:
self.all_donors[i].money_in.loc[self.all_donors[i].money_in.loc[:,'donor'] == j,'donor'] = new_donor.name
# now update this name for all donors to this entity
# first screen out any donations within the synonym set itself
all_givers = [x for x in new_donor.money_in['donor'] if x not in donor_list]
for i in all_givers:
# no candidates will be donors to this pac, so don't have to check type
for j in donor_list:
self.all_donors[i].money_out.loc[self.all_donors[i].money_out.loc[:, 'receiver'] == j, 'receiver'] = new_donor.name
def merge_ie_candidates(data_pac, data_ie):
# change keys on all data_ie.all_candidates to match keys on data_pac.all_candidates
# this will allow easier merging of the IE donors
# for each key in ie, split at comma
# if there is a number, add it to appropriate ballot measure data
# otherwise, check for first and last name appearing in a pac key
# if there is 1 and only 1, then change ie key to pac key
# otherwise write to log file
pass
def merge_ie_pac(donor_ie, data2, donor_key):
# donor_ie is an IE donor (a Donor object)
# data2 is a full dataset (a Data object) into which donor_ie will be merged
# donor_key is the key in data2 to merge into - data2.all_donors[donor_key]
#
# ie data has no money_in, so just get donor_ie.money_out and merge with data2.all_donors[donor_key].money_out
# then get all recipients donor_ie.money_out.loc[:,'receiver'] and update in data2.all_candidates
#
data2.all_donors[donor_key].money_out = data2.all_donors[donor_key].money_out.append(donor_ie.money_out, ignore_index=True)
cand_receivers = [i for i in donor_ie.money_out['receiver'] if donor_ie.money_out['type']=='Candidate' ]
ballot_receivers = [i for i in donor_ie.money_out['receiver'] if donor_ie.money_out['type'] == 'Ballot']
# todo: make sure for statements skip quietly if iteration set is empty
for i in cand_receivers:
# check if candidate exists in data2.all_candidates - if so append donation
# if not check synonym list
# if in synonym list, find the right one and append it
# if still can't find it, create it and add donation and print message or write to log file
# data2.all_candidates.keys()
# todo: code here
pass
for i in ballot_receivers:
# check if pac exists in data2.all_donors - if so append donation
# if not check synonym list
# if in synonym list, find the right one and append it
# if still can't find it, create it and add donation and write to log file
# should scrutinize name closely - any ballot committee should already exist in the larger pac dataset,
# might need to merge pacs if the name is not found
# data2.all_donors.keys()
# todo: code here
pass
def wrapper_get_close_matches(word,possibilities,n=1,cutoff=0.7):
temp = get_close_matches(word=word, possibilities=possibilities,n=n,cutoff=cutoff)
if len(temp)==0:
temp = ['No Match']
return temp[0]
def resolved_to_dataframe(data,donor_list):
# finds data.all_donors[i].money_out_resolved for all i in donor_list
# adds a column of donor names and combines all of these into a dataframe to return
#
# check if donor_list is a list
if not isinstance(donor_list, list):
if isinstance(donor_list, basestring):
donor_list = [donor_list]
else:
raise ValueError('combine_donors: donor_list should be string or list of strings')
# check if all donor_list are valid keys
if not set(donor_list).issubset(data.all_donors.keys()):
print('resolved_to_dataframe: invalid donor values on donor_list')
return
# check if money_out_resolved exists for all donor_list
if any([data.all_donors[i].money_out_resolved.empty for i in donor_list]):
print('resolved_to_dataframe: money_out_resolved is empty for values of donor_list')
data_out = pd.DataFrame()
for i in donor_list:
this_add = data.all_donors[i].money_out_resolved
this_add['donor'] = i
data_out = data_out.append(this_add, ignore_index=True)
return data_out
################################
# JSON save and load functions
def save_to_json(data, filename = 'data.json'):
with open(filename, "w") as write_file:
json.dump(data, write_file)
def load_from_json(filename = 'data.json'):
with open(filename, "r") as read_file:
data = json.load(read_file)
return data
################################
# Pickle Save and Load functions for intermediate data
# Usage:
# save_to_file(data2,'data2.pkl')
# data2 = load_from_file('data2.pkl')
#
def save_to_pkl(data, filename='data.pkl'):
# todo: check for valid filename
pickle_out = open(filename, 'wb')
pickle.dump(data, pickle_out)
pickle_out.close()
print('Saved to file: ' + filename)
def load_from_pkl(filename='data.pkl'):
pickle_in = open(filename, "rb")
print('Loading from file: ' + filename)
return pickle.load(pickle_in)
def load_ie_data(filename):
# check if file exists, if so load it
iedata = pd.read_csv(filename)
# todo: screen out 2017 election year data (if needed, or just filter at source)
# we can't screen by election year for ie data; they only report calendar year
# we need to get all donations from 2017 which were used for 2018 election year
# so we have to screen out spending which was done in 2017
my_cols = ['origin', 'sponsor_id', 'sponsor_name', 'candidate_last_name', 'candidate_first_name', 'candidate_party',
'ballot_name', 'ballot_number', 'portion_of_amount', 'for_or_against']
# drop all columns except my_cols
iedata = iedata[my_cols]
# drop all rows except 'Identified Entities' rows
iedata = iedata.loc[iedata.loc[:, 'origin'] == 'C6.3 - Identified Entities'] # better way to do this?
print(iedata.shape)
# convert the donation amount from str to float
if is_string_dtype(iedata['portion_of_amount']):
iedata['portion_of_amount'] = iedata['portion_of_amount'].str.replace(',', '').astype('float')
# construct ballot name from number and name, stripping decimal from the float (if present).
# note str.replace is regex by default and breaks if set to regex=False
iedata['ballot_name_full'] = iedata['ballot_number'].astype('str').str.replace('\.0', '') + ', ' + iedata[
'ballot_name']
# construct full candidate name
iedata['candidate_name_full'] = iedata['candidate_last_name'] + ', ' + iedata['candidate_first_name']
# construct receiver type so we can tell candidates from other stuff
iedata['receiver_type'] = 'Candidate'
# if no candidate name, then assume it is ballot measure
iedata.loc[iedata.loc[:, 'candidate_name_full'].isnull(), 'receiver_type'] = 'Ballot'
# if ballot name is missing but there is a ballot number, then use ballot number as the name (this has been observed to happen)
iedata.loc[
iedata.loc[:, 'ballot_name_full'].isnull() & iedata.loc[:, 'ballot_number'].notnull(), 'ballot_name_full'] = \
iedata.loc[
iedata.loc[:, 'ballot_name_full'].isnull() & iedata.loc[:,
'ballot_number'].notnull(), 'ballot_number'].astype(
'str').str.replace('\.0', '')+','
data1 = Data()
n_rows = iedata.shape[0]
for i in range(n_rows):
this_row = iedata.iloc[i,]
this_giver_name = this_row["sponsor_name"]
this_giver_id = this_row["sponsor_id"]
if pd.isna(this_giver_id):
raise ValueError('IE sponsor_id should not be missing')
if this_row['receiver_type'] == 'Candidate':
this_receiver_name = this_row['candidate_name_full']
elif this_row['receiver_type'] == 'Ballot':
this_receiver_name = this_row['ballot_name_full']
else:
raise ValueError('receiver_type must be candidate or ballot')
this_receiver_party = this_row['candidate_party'] # this will be NaN for ballot measures
if pd.isna(this_receiver_party):
if this_row['receiver_type'] == 'Candidate':
this_receiver_party = 'Missing'
elif this_row['receiver_type'] == 'Ballot':
this_receiver_party = 'Ballot'
else:
this_receiver_party = 'None'
this_amount = this_row["portion_of_amount"]
if this_row["for_or_against"] == "Against":
this_amount = -this_amount
this_receiver_type = this_row['receiver_type']
# check if candidate already exists
if this_receiver_name in data1.all_candidates:
# if so, add to that candidate data
# this_cand = data1.all_candidates[this_receiver_name]
this_add = pd.DataFrame([[this_giver_name, this_giver_id, this_amount]],
columns=['donor', 'donor_id', 'amount'])
data1.all_candidates[this_receiver_name].money_in = data1.all_candidates[
this_receiver_name].money_in.append(this_add, ignore_index=True)
# todo: check that this_cand.party = this_receiver_party
# what to do if they are different?
# data1.all_candidates[this_receiver_name].party = this_receiver_party
else:
# create new candidate and add to data
new_cand = Candidate()
new_add = pd.DataFrame([[this_giver_name, this_giver_id, this_amount]],
columns=['donor', 'donor_id', 'amount'])
new_cand.money_in = new_cand.money_in.append(new_add, ignore_index=True)
new_cand.name = this_receiver_name
new_cand.type = this_receiver_type
new_cand.party = this_receiver_party
data1.all_candidates[this_receiver_name] = new_cand
# check if donor already exists
if this_giver_name in data1.all_donors:
# if so, add to that donor data
this_add = pd.DataFrame([[this_receiver_name, this_amount, this_receiver_party, this_receiver_type]],
columns=["receiver", "amount", "party", "type"])
data1.all_donors[this_giver_name].money_out = data1.all_donors[this_giver_name].money_out.append(this_add, ignore_index=True)
else:
# create new donor and add them
new_donor = Donor()
new_donor.name = this_giver_name
new_donor.filer_id = this_giver_id
new_add = pd.DataFrame([[this_receiver_name, this_amount, this_receiver_party, this_receiver_type]],
columns=["receiver", "amount", "party", "type"])
new_donor.money_out = new_donor.money_out.append(new_add, ignore_index=True)
data1.all_donors[this_giver_name] = new_donor
return data1
def load_pac_data(filename, nrows=0, debug=True, name_dict = {}):
# filename is the name of the data file to load
# nrows: if 0, load all data; if >0, only load first nrows
# check if file exists, if so load it
# candidate and PAC donations
campdata = pd.read_csv(filename)
# note ignore mixed type warnings on columns 11, 23 since we do not use them
my_cols = ['filer_id', 'type', 'filer_name', 'party', 'ballot_number', 'for_or_against', 'amount', 'code',
'contributor_name', 'contributor_address', 'contributor_city', 'contributor_zip']
campdata = campdata[my_cols]
if debug:
print(campdata.shape)
# note filer_id values in this file do not match the filer_id values in the IE data file
# the for_or_against value is merely descriptive of ballot committees here, it does not indicate spending against a candidate
# convert amount str to float
if is_string_dtype(campdata['amount']):
campdata['amount'] = campdata['amount'].str.replace(',', '').astype('float')
# exclude rows with amount <= 0
campdata = campdata.loc[(campdata.loc[:, 'amount'] > 0), :]
if debug:
print(campdata.shape)
# drop rows from Individual donors (for now, since interest is mostly in PACs)
# TODO: include individuals with notably large donations, e.g. over $20,000
campdata = campdata.loc[(campdata.loc[:, 'code'] != 'Individual'), :]
print(campdata.shape)
# TODO: Correct party affiliations to align candidates with their caucus: Tim Sheldon R, Rodney Tom R, etc.
# This is being done manually for now
# compute number of rows to load
nrows = int(nrows)
if nrows==0:
nrows = campdata.shape[0]
if nrows>campdata.shape[0]:
print('Loading full data set')
nrows = campdata.shape[0]
# convert names using synonym dictionary
if name_dict:
# could probably do this faster by iterating through name_dict.keys() instead of through nrows
for i in range(nrows):
this_row = campdata.iloc[i,]
this_giver_name = this_row["contributor_name"]
this_receiver_name = this_row['filer_name']
if this_giver_name in name_dict.keys():
col_index = campdata.columns.get_loc('contributor_name')
campdata.iloc[i, col_index] = name_dict[this_giver_name]
if this_receiver_name in name_dict.keys():
col_index = campdata.columns.get_loc('filer_name')
campdata.iloc[i,col_index] = name_dict[this_receiver_name]
start = time.time()
print(start)
print(nrows)
data2 = Data()
print('Meow')
for i in range(nrows):
this_row = campdata.iloc[i,]
this_giver_name = this_row["contributor_name"]
# this_giver_address = this_row['contributor_address']
# this_giver_city = this_row['contributor_city']
# this_giver_zip = this_row['contributor_zip']
this_receiver_id = this_row['filer_id']
this_receiver_name = this_row['filer_name']
this_receiver_type = this_row['type']
if pd.isna(this_receiver_type):
this_receiver_type = 'None'
this_receiver_party = this_row['party']
# this_receiver_party will be NaN for ballot measures and PACs, and the nans can break stuff
if pd.isna(this_receiver_party):
this_receiver_party = 'None'
this_amount = float(this_row['amount'])
this_giver_id = 'None' # donor id is not provided in this data
if this_receiver_type == 'Candidate':
# check if we have seen this candidate before
if this_receiver_name in data2.all_candidates:
# if so, add to that candidate data
this_add = pd.DataFrame([[this_giver_name, this_giver_id, this_amount]],
columns=['donor', 'donor_id', 'amount'])
data2.all_candidates[this_receiver_name].money_in = data2.all_candidates[
this_receiver_name].money_in.append(this_add, ignore_index=True)
# todo: check that this_cand.party = this_receiver_party
# data2.all_candidates[this_receiver_id] = this_cand
else:
# create new candidate and add to data
new_cand = Candidate()
new_add = pd.DataFrame([[this_giver_name, this_giver_id, this_amount]],
columns=['donor', 'donor_id', 'amount'])
new_cand.money_in = new_cand.money_in.append(new_add, ignore_index=True)
new_cand.name = this_receiver_name
new_cand.type = this_receiver_type
new_cand.party = this_receiver_party
# add new_cand to the dictionary
data2.all_candidates[this_receiver_name] = new_cand
# now process the donor to this candidate
# check if donor already exists
if this_giver_name in data2.all_donors:
# if so, add to that donor data
this_add = pd.DataFrame([[this_receiver_name, this_amount, this_receiver_party, this_receiver_type]],
columns=["receiver", "amount", "party", "type"])
data2.all_donors[this_giver_name].money_out = data2.all_donors[this_giver_name].money_out.append(
this_add, ignore_index=True)
else:
# create new donor and add
new_donor = Donor()
new_donor.name = this_giver_name
# new_donor.filer_id = this_giver_id #no donor id from C3 data
new_add = pd.DataFrame([[this_receiver_name, this_amount, this_receiver_party, this_receiver_type]],
columns=["receiver", "amount", "party", "type"])
new_donor.money_out = new_donor.money_out.append(new_add, ignore_index=True)
data2.all_donors[this_giver_name] = new_donor
else: # receiver type is not Candidate, should only be Political Committee
# check if we have seen this pac before (maybe as a donor) and update data
if this_receiver_name in data2.all_donors:
this_add = pd.DataFrame([[this_giver_name, this_giver_id, this_amount]],
columns=['donor', 'donor_id', 'amount'])
data2.all_donors[this_receiver_name].money_in = data2.all_donors[this_receiver_name].money_in.append(
this_add, ignore_index=True)
# if we have only seen this pac before as a donor, then we don't have a value for type and filer_id
if data2.all_donors[this_receiver_name].type != this_receiver_type:
data2.all_donors[this_receiver_name].type = this_receiver_type
data2.all_donors[this_receiver_name].filer_id = this_receiver_id
else:
# create new pac and add to data
new_pac = Donor()
new_add = pd.DataFrame([[this_giver_name, this_giver_id, this_amount]],
columns=['donor', 'donor_id', 'amount'])
new_pac.money_in = new_pac.money_in.append(new_add, ignore_index=True)
new_pac.name = this_receiver_name
new_pac.type = this_receiver_type
new_pac.filer_id = this_receiver_id
data2.all_donors[this_receiver_name] = new_pac
# now process the donors to this donor/pac
# check if donor already exists
if this_giver_name in data2.all_donors:
# if so, add to that donor data
this_add = pd.DataFrame([[this_receiver_name, this_amount, this_receiver_party, this_receiver_type]],
columns=["receiver", "amount", "party", "type"])
data2.all_donors[this_giver_name].money_out = data2.all_donors[this_giver_name].money_out.append(
this_add, ignore_index=True)
else:
# create new donor and add
new_donor = Donor()
new_donor.name = this_giver_name
# new_donor.filer_id = this_giver_id #no donor id from C3 data
new_add = pd.DataFrame([[this_receiver_name, this_amount, this_receiver_party, this_receiver_type]],
columns=["receiver", "amount", "party", "type"])
new_donor.money_out = new_donor.money_out.append(new_add, ignore_index=True)
data2.all_donors[this_giver_name] = new_donor
end = time.time()
if debug:
print(end - start)
print(len(data2.all_donors.keys()))
return data2
def main_load(file_ie, file_pac):
donor_synonym_dict = {} # dict of synonym:main_name for donors
donor_synonym_dict = do_pac_synonyms2(donor_synonym_dict)
data_ie = load_ie_data(file_ie)
data_pac = load_pac_data(file_pac,name_dict=donor_synonym_dict)
data_ie.sum_donations()
data_pac.sum_donations()
print("Make manual corrections")
pac_correction(data_pac)
ie_correction(data_ie)
do_ie_synonyms(data_ie)
data_ie.sum_donations()
data_pac.sum_donations()
print('Converting data_ie.all_candidates.keys() to match data_pac.all_candidates.keys()')
oldname = [i for i in data_ie.all_candidates.keys() if not any(char.isdigit() for char in i)]
newname = [wrapper_get_close_matches(i, data_pac.all_candidates.keys()) for i in oldname]
cand_change = pd.DataFrame.from_items([('oldnames', oldname), ('newnames', newname)])
# Write out a log file of all the matches. Cases with 'No Match' will be dropped.
# All cases must be manually verified for accuracy and manually fixed (e.g. William Colyer != William Cooper)
f = open("log-candidate name matching IE to PAC.txt", "w")
for i in range(len(cand_change.oldnames)):
f.write("%s\n" % [cand_change.iloc[i, :][0], cand_change.iloc[i, :][1]])
f.close()
# manual fix for Colyer/Cooper
cand_change.newnames.loc[cand_change.newnames == 'COOPER WILLIAM T'] = 'No Match'
# drop cases with no match - these are usually local candidates using mini-reporting with only small IE donations
cand_change = cand_change.loc[cand_change.newnames != 'No Match', :]
# update the key names using combine_candidates
for i in range(len(cand_change.oldnames)):
data_ie.combine_candidates([cand_change.iloc[i, :].oldnames], rename=cand_change.iloc[i, :].newnames)
# now the candidate names/keys in data_ie match the ones in data_pac
print('Finding matches between data_ie.all_donors.keys() and data_pac.all_donors.keys()')
oldname = data_ie.all_donors.keys()
newname = [wrapper_get_close_matches(i, data_pac.all_donors.keys()) for i in oldname]
# donor_change is a map from oldnames (in data_ie) to newnames (in data_pac)
donor_change = pd.DataFrame.from_items([('oldnames', oldname), ('newnames', newname)])
# manually fix some of the mistakes made by the automated method
temp = [i for i in data_ie.all_donors.keys() if i.find('ENTERPRISE WA') >= 0]
for i in temp:
donor_change.newnames.loc[donor_change.oldnames == i] = 'ENTERPRISE WASHINGTON'
donor_change.newnames.loc[donor_change.oldnames == 'MARATHON PETROLEM CORP SUBSIDIARY ANDEAVOR LLC'] = 'ANDEAVOR'
donor_change.newnames.loc[
donor_change.oldnames == 'NRA POLITICAL VICTORY FUND'] = 'NATIONAL RIFLE ASSOCIATION OF AMERICA'
donor_change.newnames.loc[donor_change.oldnames == 'WA EDUCATION ASSN PAC'] = 'WASHINGTON EDUCATION ASSOCIATION'
donor_change.newnames.loc[
donor_change.oldnames == 'WASHINGTON REALTORS POLITICAL ACTION COMMITTEE'] = 'WA REALTORS PAC'
donor_change.newnames.loc[donor_change.oldnames == 'FRIENDS OF ELAINE PHELPS'] = 'No Match'
donor_change.newnames.loc[donor_change.oldnames == 'WA FORWARD'] = 'WA FORWARD (THE LEADERSHIP COUNCIL)'
donor_change.newnames.loc[donor_change.oldnames == 'BUILDING INDUSTRY ASSOCIATION OF CLARK COUNTY BUILDING INDUSTRY GROUP'] = 'BUILDING INDUSTRY ASSOCIATION'
# write out the mapping for further manual inspection
f = open("log-donor name matching IE to PAC.txt", "w")
for i in range(len(donor_change.oldnames)):
f.write("%s\n" % [donor_change.iloc[i, :][0], donor_change.iloc[i, :][1]])
f.close()
# now data_ie.all_candidates.keys() are consistent with data_pac.all_candidates except ballot measures
# and we have a map of data_ie.all_donors.keys() to keys in data_pac.all_donors.keys()
#####
# Merge the IE candidates data into the PAC candidates data
# for each candidate in data_ie,
# we append its money_in to the corresponding candidate in data_pac, correcting the donor names as we go
# and update each donor in data_pac with the donation
# TODO: maybe more efficient to change the approach here to use a dictionary instead of dataframe (see donor_old_to_new below)
for i in cand_change.newnames:
print(i)
this_donors = data_ie.all_candidates[i].money_in.donor.copy()
this_name = data_ie.all_candidates[i].name
this_party = data_ie.all_candidates[i].party
this_type = data_ie.all_candidates[i].type
# if there is only one donor, it will return as a string, so convert to a list to use with the for statement
if isinstance(this_donors, basestring):
this_donors2 = [this_donors]
else:
this_donors2 = this_donors # doing this to avoid the setting with copy warning on this_donors
# for each donor in this candidate's money_in:
for j in this_donors2:
# get the amount from donor j to candidate i
this_amount = data_ie.all_candidates[i].money_in.amount.loc[this_donors == j].values[0]
# get the new pac donor name
new_donor = donor_change.newnames.loc[donor_change.oldnames == j].values[0]
# change the old ie donor name to the new pac donor name
# TODO: Next line gives "set on copy of slice" warning. Tested and it works. How to avoid warning?
data_ie.all_candidates[i].money_in.donor.loc[this_donors2 == j] = new_donor
# if the new name is 'No Match' then this donor is not in data_pac;
# so don't update data_pac, give a log message showing amount, and after for loop, drop the no match rows
if new_donor == 'No Match':
print('No Match - skipped:')
print(j)
print(this_amount)
else:
# update the donor's money_out with this candidate
this_add = pd.DataFrame([[this_name, this_amount, this_party, this_type]],
columns=["receiver", "amount", "party", "type"])
data_pac.all_donors[new_donor].money_out = data_pac.all_donors[
new_donor].money_out.append(this_add, ignore_index=True)
# drop the no match rows
data_ie.all_candidates[i].money_in = data_ie.all_candidates[i].money_in.loc[
data_ie.all_candidates[i].money_in.donor != 'No Match', :]
# append ie candidate's updated money_in to the pac candidate's money_in
data_pac.all_candidates[i].money_in = data_pac.all_candidates[i].money_in.append(
data_ie.all_candidates[i].money_in, ignore_index=True)
# Merge the IE initiative groups in to the PAC initiative groups
# Combine initiative groups into single group for or against each initiative
data_pac.combine_donors(donor_list=['DE-ESCALATE WA I-940'], rename='FOR 940')
data_pac.combine_donors(donor_list=['COPS AGAINST I-940 WA COUNCIL OF POLICE & SHERIFFS', 'COALITION FOR A SAFER WA'],
rename='AGAINST 940')
data_pac.combine_donors(donor_list=['CLEAN AIR CLEAN ENERGY WA'], rename='FOR 1631')
data_pac.combine_donors(donor_list=['NO ON 1631',
'NO ON 1631 (SPONSORED BY WESTERN STATES PETROLEUM ASSN)',
'I-1631 SPONSORED BY ASSOC OF WA BUSINESS'], rename='AGAINST 1631')
data_pac.combine_donors(donor_list=['YES! TO AFFORDABLE GROCERIES (SEE EMAIL FOR REST OF NAME)'], rename='FOR 1634')
data_pac.combine_donors(donor_list=['HEALTHY KIDS COALITION'], rename='AGAINST 1634')
data_pac.combine_donors(donor_list=['SAFE SCHOOLS SAFE COMMUNITIES'], rename='FOR 1639')
data_pac.combine_donors(donor_list=['STOP 1639 - SPONSOR SHALL NOT BE INFRINGED', 'SHALL NOT BE INFRINGED',
'SAVE OUR SECURITY NO ON I-1639',
'WASHINGTONIANS AND THE NATIONAL RIFLE ASSN FOR FREEDOM'],rename='AGAINST 1639')
donor_old_to_new = dict(zip(donor_change.oldnames, donor_change.newnames))
donor_new_to_old = dict(zip(donor_change.newnames, donor_change.oldnames))
this_ie_label_list = ['940', '1631', '1634', '1639']
this_pac_label_dict = {'940': ['FOR 940', 'AGAINST 940'],
'1631': ['FOR 1631', 'AGAINST 1631'],
'1634': ['FOR 1634', 'AGAINST 1634'],
'1639': ['FOR 1639', 'AGAINST 1639']}
for this_ie_label in this_ie_label_list:
this_pac_label = this_pac_label_dict[this_ie_label]
# get positive (for) rows and negative (against) rows and put them in the appropriate places, converting donor names too
for i in range(len(data_ie.all_candidates[this_ie_label].money_in)):
this_row = data_ie.all_candidates[this_ie_label].money_in.iloc[i, :]
this_donor = this_row.donor
this_donor_id = this_row.donor_id
this_amount = this_row.amount
# convert old (IE) name to new (PAC) name
this_donor = donor_old_to_new[this_donor]
# skip any donors that have no match in PAC data
if this_donor != 'No Match':
if this_amount >= 0:
this_add = pd.DataFrame([[this_donor, this_donor_id, this_amount]],
columns=['donor', 'donor_id', 'amount'])
data_pac.all_donors[this_pac_label[0]].money_in = data_pac.all_donors[
this_pac_label[0]].money_in.append(this_add, ignore_index=True)
else:
# convert negative spend to a positive contribution to the Against side
this_amount = this_amount * (-1)
this_add = pd.DataFrame([[this_donor, this_donor_id, this_amount]],
columns=['donor', 'donor_id', 'amount'])
data_pac.all_donors[this_pac_label[1]].money_in = data_pac.all_donors[
this_pac_label[1]].money_in.append(this_add, ignore_index=True)
# mark all party organizations with their party so donor percentages reflect their party
data_pac.party_pac_dict = {'WA STATE DEMOCRATIC PARTY':'DEMOCRAT',
'WA STATE REPUBLICAN PARTY': 'REPUBLICAN',
'LOCAL DEMOCRATIC ORGS':'DEMOCRAT',
'LOCAL REPUBLICAN ORGS': 'REPUBLICAN',
'SENATE REPUBLICAN CAMPAIGN COMMITTEE': 'REPUBLICAN',
'HOUSE REPUBLICAN ORGANIZATION COMMITTEE': 'REPUBLICAN',
'HOUSE DEMOCRATIC CAUCUS CAMPAIGN COMMITTEE':'DEMOCRAT',
'WASHINGTON SENATE DEMOCRATIC CAMPAIGN':'DEMOCRAT',
'HARRY TRUMAN FUND':'DEMOCRAT',
'REAGAN FUND': 'REPUBLICAN',
'KENNEDY FUND':'DEMOCRAT',
'THE LEADERSHIP COUNCIL': 'REPUBLICAN',
'MAINSTREAM REPUB OF WA ST PAC': 'REPUBLICAN'}