-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathglobal_utils.py
134 lines (117 loc) · 5.01 KB
/
global_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
###
##
# Copyright (C) 2021 James A. Bowery
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see: <http://www.gnu.org/licenses/>.
##
###
import logging
import pandas as pd
import re
import json
import base64
import os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(override=True)
VOTING_DISTRICT = os.getenv("VOTING_DISTRICT")
STATE_OR_PROVINCE = os.getenv("STATE_OR_PROVINCE")
dd_path = Path('dynamic_data')
vu_path = Path('voter_downloads')
vu_filename = Path('all.csv')
vu_filepath = vu_path/vu_filename
v_filepath = dd_path/(f'{STATE_OR_PROVINCE}'+('_district{VOTING_DISTRICT}' if VOTING_DISTRICT else '')+'_voters.csv')
first_tentative_vid = int(1e10) # well beyond anything the Iowa SoS has allocated in its records
def bp():
# breakpoint within server creating file blocking additional thread spawns
# the file must be deleted to enable the server to continue spawning
# e.g. see 'def respond'
with open('nowdebugging','w') as f:
print(' ',file=f)
breakpoint()
def show_val(valname,val):
logging.debug(f'{valname}.__class__: {val.__class__}')
logging.debug(f'{valname}: {val}')
def phonemes_idx_sigma_match(phdists,sigma=1):
midx = phdists.idxmin()
minpd = phdists.min()
meanpd = phdists.mean()
stdpd = phdists.std()
logging.debug(f'min {minpd} mean {meanpd} std {str(stdpd)}')
return midx if stdpd/meanpd > (1/4) or sigma==0 else False
def not_None(isnoneq):
return isnoneq.__class__ != None.__class__
def series_to_query(ser):
query = ' and '.join([str(index)+'=='+'"'+str(value)+'"' for index,value in ser.items()])
logging.debug(query)
return query
def select(df,selector):
if selector.__class__ == dict:
selector = pd.Series(selector)
desired_df = df.query(series_to_query(selector))
# desired_df = df[pd.DataFrame([df[x]==selector_series[x] for x in selector_series.index.intersection(df.columns)]).all()]
return desired_df
def select_indirect(full_df, indseries, selector):
###
## Select rows in a full_df via a highly indexed (multiindexed) intermediate that yields a simple index for df
## This is useful for situations in which it is desirable to populate the multiidex with data for fast lookup
## while retaining the ability to access that data with ordinary DataFrame syntax such as 'full_df[full_df.FIRST_NAME==name]'
##
## full_df: selecting from this DataFrame
## indseries: A series possessing the queryable (multi)index yielding the index of full_df
if indseries.__class__ == dict:
indseries = pd.Series(indseries)
narrowed_indseries_as_df = select(pd.DataFrame(indseries), selector)
desired_df = full_df.loc[narrowed_indseries_as_df.iloc[:,0]] # There is only one data column (possessing the labels for full_df)
return desired_df
def encode_client_state(client_obj):
return base64.urlsafe_b64encode(json.dumps(client_obj).encode()).decode()
def decode_client_state(client_data):
# logging.debug(type(client_data))
return json.loads(base64.urlsafe_b64decode(client_data.encode()).decode())
def just_numbers(not_just_numbers):
return re.sub(r'[^0-9]','',not_just_numbers)
import collections
class PhoneNumber(collections.UserString):
# The main utility of this class is in touch tone UI where
# entry of, say, 2352 can be imputed to have an area code and exchange
# identical to that of the caller so that it is easier to identify
# the person intended as, say, a delegate or, in the ultimate
# system where a local currency is involved, the recipient of
# payment.
def default(self, obj):
return json.dumps(self.e123)
def normalize_phonenum(phonenum):
return phonenum
def __init__(self, USpn):
super().__init__(USpn)
USpn = self.data
pn = re.sub(r'[^0-9]','',USpn)
if len(pn)==11 and pn[0]=='1':
pn = pn[1:]
if len(pn)!=10:
self.data = ''
return
self.area = pn[0:3]
self.exchange = pn[3:6]
self.station = pn[6:10]
self.country = '1'
self.e123 = '+'+self.country+self.area+self.exchange+self.station # E.123 standard format
self.ten_digit = self.e123[2:]
self.ten_digit_hyphenated = self.area+'-'+self.exchange+'-'+self.station
self.data = self.ten_digit_hyphenated
# if not(re.match(r'^\d\d\d-\d\d\d-\d\d\d\d$',str(self))):
# raise ValueError(USpn,'is not a US complete phone number')
def _asdict(self):
return self.__dict__