-
Notifications
You must be signed in to change notification settings - Fork 0
/
shang_Tokyo_HMFD.py
307 lines (259 loc) · 11.7 KB
/
shang_Tokyo_HMFD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
#QBS [email protected]
import math
import pandas as pd
import xlrd
import os
import numpy as np
from scipy.stats import pearsonr,spearmanr,kendalltau
import xlwt
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from scipy.stats import t, ttest_1samp
from scipy.stats import norm
import warnings
warnings.filterwarnings("ignore")
def ttest(x, m=0, alpha=0.05, tail='both'):
# Sample size
n = len(x)
# Sample mean and standard deviation
xmean = np.mean(x)
sd = np.std(x, ddof=1) # ddof=1 for unbiased estimate of population variance
# t-statistic
tval = (xmean - m) / (sd / np.sqrt(n))
# Degrees of freedom
df = n - 1
# Compute p-value
if tail == 'both':
p = 2 * t.cdf(-abs(tval), df)
ci = (xmean - t.ppf(1 - alpha / 2, df) * sd / np.sqrt(n),
xmean + t.ppf(1 - alpha / 2, df) * sd / np.sqrt(n))
elif tail == 'right':
p = t.cdf(-tval, df)
ci = (xmean - t.ppf(1 - alpha, df) * sd / np.sqrt(n), np.inf)
elif tail == 'left':
p = t.cdf(tval, df)
ci = (-np.inf, xmean + t.ppf(1 - alpha, df) * sd / np.sqrt(n))
# Determine if the actual significance exceeds the desired significance
h = p <= alpha
# Create stats dictionary
stats = {'tstat': tval, 'df': df, 'sd': sd}
return h, p
def ztest(x, m, sigma, alpha=0.05, alternative='two-sided'):
# Error checking
if not np.isscalar(m):
raise ValueError("M must be a scalar")
if not np.isscalar(sigma) or sigma < 0:
raise ValueError("Sigma must be a non-negative scalar")
# Calculate necessary values
xmean = np.mean(x)
samplesize = 1
ser = sigma / np.sqrt(samplesize)
zval = (xmean - m) / ser
# Compute p-value
if alternative == 'two-sided':
p = 2 * norm.cdf(-np.abs(zval))
elif alternative == 'greater':
p = norm.cdf(-zval)
elif alternative == 'less':
p = norm.cdf(zval)
else:
raise ValueError("Invalid alternative hypothesis")
# Compute confidence interval if necessary
if alternative == 'two-sided':
crit = norm.ppf(1 - alpha / 2) * ser
ci = (xmean - crit, xmean + crit)
elif alternative == 'greater':
crit = norm.ppf(1 - alpha) * ser
ci = (xmean - crit, np.inf)
elif alternative == 'less':
crit = norm.ppf(1 - alpha) * ser
ci = (-np.inf, xmean + crit)
# Determine if the null hypothesis can be rejected
h = 1 if p <= alpha else 0
return h,p
#需要调整参数
'''
HFMD:
time_window=6
file_start_week=6
(10, 20)
flu 25
'''
for gap_year in range(8,9):
time_window = 6
file_start_week = gap_year
book1 = xlwt.Workbook()
book2 = xlwt.Workbook()
book3 = xlwt.Workbook()
book4 = xlwt.Workbook()
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体
# 设置英文字体为 Times New Roman
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像时负号'-'显示为方块的问题
# 设置全局字体大小
plt.rcParams.update({'font.size': 14})
np.set_printoptions(threshold=10000, linewidth=10000, suppress=True) # 设置使np输出时更美观
# 读取Excel文件
excel_file = "data/东京都手足口病人数.xls" # "data/东京都手足口病人数.xls" #"data/东京都流感人数.xls" "data/东京都肠胃炎人数.xls"
df = pd.read_excel(excel_file)
# 获取做图时需要的行数,列数固定每行两列
df_len = len(df) - file_start_week # 减去22是什么意思
year = df_len // 52
if year % 2 == 1:
rows = (year // 2) + 1
else:
rows = year // 2
point_num = df.shape[1] - 2 # 假设每个文件的前两列均为 时间 和 总和
# 作图的起始年份
start_year = round(df.iloc[1][0])
# 创建图层
fig = plt.figure(figsize=(30, 7 * rows))
fig1 = plt.figure(figsize=(18, 4 * rows))
gap = [] # 提前多少周预测出来?
It_5 = [] # 记录后5个It值
for f in range(8, 20): # 由于程序编写的问题 1-21为2000-2024的
'''
#年:{
地区:{It分数,每周}
预警点
爆发点
}
'''
weight_str = [] # 权重文件
# 感染人数数据,作图用
infection_number = df.iloc[file_start_week + 52 * f: 52 + file_start_week + 52 * f,
1].tolist() # iloc是根据行号进行切片,行号从0开始
# 起始行 22 + 52 * f 终止行 74 + 52 * f 74=22+52 选取第2列(总感染人数列)
max_num = max(infection_number)
index = infection_number.index(max_num) + 1 # 应该是爆发期绘图的位置
startweek = file_start_week - time_window + 1 + 52 * f # 18=22-4
endweek = 52 + file_start_week + 52 * f # 74=52+22
week = endweek - startweek + 1
print("week:",week)#58
book = xlrd.open_workbook(
'data/东京都手足口病定点报告数.xls') # 应该是点的内容 'data/东京都手足口病定点报告数.xls' 'data/东京都流感定点报告数.xls' 'data/东京都肠胃炎定点报告数.xls'
sheet = book.sheet_by_index(0)
data = np.zeros((week, point_num)) # 23是 东京都 有23个地区
for col in range(startweek, endweek + 1):
for row in range(0, point_num):
data[col - startweek][row] = sheet.cell(col, row + 1).value # read data
# print("data.shape",data.shape)
# 提取后52行数据
subset_data = data[6:, :]
# 将行变成列
subset_data_transposed = subset_data.T
subsetdf = pd.DataFrame(subset_data_transposed)
# 指定要保存的文件路径并使用 f-string 格式化文件名
file_path = rf'C:\Users\qiqi\Desktop\dynamical network entropy (DNE)\raw_data_and_code\It_ward_excel_HMDL\HMDL_point_num\Year {start_year + f}-{start_year + f + 1}.xlsx'
# 将 DataFrame 写入 Excel 文件
subsetdf.to_excel(file_path, index=False)
# network
network = [[0, 2, 5, 11, 17, 7],
[1, 9, 18, 17, 12],
[2, 0, 5, 6, 8, 15],
[3, 19, 16, 11, 17, 12],
[4, 13, 16, 10, 20, 22],
[5, 0, 11, 10, 6, 2],
[6, 2, 5, 10, 20, 8],
[7, 0, 17, 18, 9],
[8, 2, 6, 20, 21, 14, 15],
[9, 1, 18, 7],
[10, 5, 11, 16, 4, 20, 6],
[12, 19, 3, 17, 1],
[13, 19, 16, 4],
[11, 0, 17, 3, 16, 10, 5],
[14, 15, 8, 21],
[15, 2, 8, 14],
[16, 19, 3, 11, 10, 4, 13],
[17, 1, 12, 3, 11, 0, 7, 18],
[18, 1, 9, 7, 17],
[19, 12, 3, 16, 13],
[20, 22, 4, 21, 8, 6, 10],
[21, 22, 20, 8, 14],
[22, 4, 20, 21]]
# 从网络中提取边的信息,为了顺利绘图,无向图
edge = []
for item in network:
for i in range(1, item.__len__()):
if [item[0], item[i]] in edge or [item[i], item[0]] in edge:
continue
else:
edge.append([item[0], item[i]])
sheet1 = book1.add_sheet(str(startweek) + '-' + str(endweek))
delta_PCC = np.zeros((network.__len__(), week - time_window)) # (23 , 52)
numTemp = 0
for i in range(0, week - time_window): # 52 week=57,时间窗口多5
# 时间for
b = 0
for node in network: # 23
# 地图for
# 两者之间的皮尔逊相关系数
pcc_temp3 = 0
pcc_ij = []
pcct_ij = []
for j in range(1, node.__len__()): # 遍历邻接点
# 邻接点for
# node[0]为当前中心结点
pcc_temp1 = abs(kendalltau(data[i: i + time_window, node[0]], data[i: i + time_window, node[j]])[
0]) # 中心节点t-1时刻与其它节点的皮尔森相关系数
pcc_temp2 = abs(kendalltau(data[i + 1: i + time_window + 1, node[0]],
data[i + 1: i + time_window + 1, node[j]])[0]) # 中心节点t时刻与其它节点的皮尔森相关系数
# 为什么要注释掉?
# pcc_temp1 = np.nan_to_num(pcc_temp1)
# pcc_temp2 = np.nan_to_num(pcc_temp2)
weight = abs(pcc_temp2 - pcc_temp1) # dieta_PCCt
weight_zi = []
weight_zi.append(i)
weight_zi.append(node[0])
weight_zi.append(node[j])
weight_zi.append(weight)
weight_str.append(weight_zi)
# 中心节点与一阶邻点的权重
pcc_ij.append(weight)
# print('节点', node[0], '与节点', node[j], '的权重', weight)
# 总和
pcc_temp3 += weight # 求概率
# print('节点', node[0], pcc_temp3)
if pcc_temp3 == 0: # 总和为0,概率当然为0
delta_PCC[b][i] == 0
b += 1
continue # 跳出该中心节点的范围
for k in range(0, pcc_ij.__len__()): # 为什么不用len(pcc_ij)
pcct_ij.append(pcc_ij[k] / pcc_temp3) # 求概率
# 计算零的个数
# zero_count = sum(1 for prob in pcct_ij if prob == 0)
entropy = 0
for h in range(0, pcct_ij.__len__()):
if pcct_ij[h] > 0: # 避免log(0)的情况
entropy -= (pcct_ij[h]) * math.log(pcct_ij[h], 2)
# entropy = (1/(math.log(node.__len__(), 2))) * entropy
# entropy = (1 / 100) * entropy
# 标准差
sd_temp1 = np.std(data[i: i + time_window, node[0]], ddof=1)
sd_temp2 = np.std(data[i + 1: i + time_window + 1, node[0]], ddof=1)
delta_PCC[b][i] = math.sqrt(abs(sd_temp2 - sd_temp1)) * entropy # 单个节点的It指标
b += 1
# 将 weight_list 转换为 DataFrame
df_weight = pd.DataFrame(weight_str, columns=['week', 'in', 'out', 'value'])
# 将 NaN 替换为 0
df_weight['value'] = df_weight['value'].fillna(0)
folder_path = r'C:\Users\qiqi\Desktop\dynamical network entropy (DNE)\raw_data_and_code\weight_HMDL'
file_path = os.path.join(folder_path, f'Year {start_year + f}-{start_year + f + 1}weight.xlsx')
# 将 DataFrame 写入 Excel 文件
df_weight.to_excel(file_path, index=False)
for i in range(0, week - time_window): #
for j in range(0, network.__len__()): # 23
sheet1.write(j + 1, i + 1, delta_PCC[j][i])
if i == 0:
# 纵坐标
sheet1.write(j + 1, 0, str(network[j][0] + 1))
sheet1.write(0, i + 1, startweek + time_window + i)
# print("delta_PCC:",delta_PCC.shape)#(23, 52)
# 求和
It = [] # 所有节点的It值
for i in range(0, week - time_window):
# delta_PCC (23 , 52)
It.append(np.sum(delta_PCC[:, i]) / network.__len__()) # 计算了所选列中所有数值的总和均值
sheet1.write(network.__len__() + 1, i + 1, It[i])
#len(It)=52,The DNE score represents the 52 weeks of each year, and you can make critical judgments based on it.