-
Notifications
You must be signed in to change notification settings - Fork 118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
将代码移植进BasicTS的问题 #188
Comments
您好,我们之前试过Corrformer,似乎没有遇到问题。您可以贴上更多的代码,这样方便我们debug。 |
arch 代码 import numpy
import pandas as pd
import torch
import datetime
import torch.nn as nn
from baselines.Corrformer_ori.arch.Corr_layers.Embed import DataEmbedding
from baselines.Corrformer_ori.arch.Corr_layers.Causal_Conv import CausalConv
from baselines.Corrformer_ori.arch.Corr_layers.Multi_Correlation import AutoCorrelation, AutoCorrelationLayer, CrossCorrelation, CrossCorrelationLayer, \
MultiCorrelation
from baselines.Corrformer_ori.arch.Corr_layers.Corrformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, \
my_Layernorm, series_decomp
from datetime import datetime, timedelta
from baselines.Corrformer.arch.utils.timefeatures import time_features
torch.autograd.set_detect_anomaly(True)
class Corrformer_ori(nn.Module):
def __init__(self, **model_args):
super(Corrformer_ori, self).__init__()
self.seq_len = model_args["input_len"]
self.label_len = model_args["label_len"]
self.pred_len = model_args["output_len"]
self.num_nodes = model_args["num_nodes"]
self.node_num = model_args["num_node"]
self.node_list = model_args["node_list"] # node_num = node_list[0]*node_list[1]*node_list[2]...
self.node_list = [int(x) for x in self.node_list.split(',')]
self.device = model_args["device"]
self.freq = model_args["freq"]
self.output_attention = model_args["output_attention"]
# Decomp
kernel_size = model_args["moving_avg"]
self.decomp = series_decomp(kernel_size)
# Encoding
self.enc_embedding = DataEmbedding(model_args["enc_in"], model_args["d_model"], model_args["root_path"],
model_args["num_nodes"], model_args["embed"], model_args["freq"],
model_args["dropout"])
self.dec_embedding = DataEmbedding(model_args["dec_in"], model_args["d_model"], model_args["root_path"],
model_args["num_nodes"], model_args["embed"], model_args["freq"],
model_args["dropout"])
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
MultiCorrelation(
AutoCorrelationLayer(
AutoCorrelation(False, model_args["factor_temporal"], attention_dropout=model_args["dropout"],
output_attention=model_args["output_attention"]),
model_args["d_model"], model_args["n_heads"]),
CrossCorrelationLayer(
CrossCorrelation(
CausalConv(
num_inputs=model_args["d_model"] // model_args["n_heads"] * self.seq_len,
num_channels=[model_args["d_model"] // model_args["n_heads"] * self.seq_len] \
* model_args["dec_tcn_layers"],
kernel_size=3),
False, model_args["factor_spatial"], attention_dropout=model_args["dropout"],
output_attention=self.output_attention),
model_args["d_model"], model_args["n_heads"]),
self.node_num,
self.node_list,
dropout=model_args["dropout"],
),
model_args["d_model"],
model_args["d_ff"],
moving_avg=model_args["moving_avg"],
dropout=model_args["dropout"],
activation=model_args["activation"]
) for l in range(model_args["e_layers"])
],
norm_layer=my_Layernorm(model_args["d_model"])
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
MultiCorrelation(
AutoCorrelationLayer(
AutoCorrelation(True, model_args["factor_temporal"], attention_dropout=model_args["dropout"],
output_attention=False),
model_args["d_model"], model_args["n_heads"]),
CrossCorrelationLayer(
CrossCorrelation(
CausalConv(
num_inputs=model_args["d_model"] // model_args["n_heads"] * (self.label_len + self.pred_len),
num_channels=[model_args["d_model"] // model_args["n_heads"] * (self.label_len + self.pred_len)] \
* model_args["dec_tcn_layers"],
kernel_size=3),
False, model_args["factor_spatial"], attention_dropout=model_args["dropout"],
output_attention=self.output_attention),
model_args["d_model"], model_args["n_heads"]),
self.node_num,
self.node_list,
dropout=model_args["dropout"],
),
MultiCorrelation(
AutoCorrelationLayer(
AutoCorrelation(False, model_args["factor_temporal"], attention_dropout=model_args["dropout"],
output_attention=False),
model_args["d_model"], model_args["n_heads"]),
CrossCorrelationLayer(
CrossCorrelation(
CausalConv(
num_inputs=model_args["d_model"] // model_args["n_heads"] * (self.label_len + self.pred_len),
num_channels=[model_args["d_model"] // model_args["n_heads"] * (self.label_len + self.pred_len)] \
* model_args["dec_tcn_layers"],
kernel_size=3),
False, model_args["factor_spatial"], attention_dropout=model_args["dropout"],
output_attention=self.output_attention),
model_args["d_model"], model_args["n_heads"]),
self.node_num,
self.node_list,
dropout=model_args["dropout"],
),
model_args["d_model"],
model_args["c_out"],
model_args["d_ff"],
moving_avg=model_args["moving_avg"],
dropout=model_args["dropout"],
activation=model_args["activation"],
)
for l in range(model_args["d_layers"])
],
norm_layer=my_Layernorm(model_args["d_model"]),
projection=nn.Linear(model_args["d_model"], model_args["c_out"], bias=True)
)
self.affine_weight = nn.Parameter(torch.ones(1, 1, model_args["enc_in"]))
self.affine_bias = nn.Parameter(torch.zeros(1, 1, model_args["enc_in"]))
self.tran_oriDim_to_tarDim = nn.Conv1d(in_channels=self.num_nodes, out_channels=self.node_num, kernel_size=1)
self.tran_tarDim_to_oriDim = nn.Conv1d(in_channels=self.node_num, out_channels=self.num_nodes, kernel_size=1)
def forward(self, history_data,
enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None,
future_data: torch.Tensor = None, batch_seen: int = None, **kwargs) -> torch.Tensor:
# init & normalization
b,l,n,_ = history_data.shape
x_enc = history_data[:, :, :, 0]
# decoder input
x_dec = torch.zeros_like(future_data[:, :, :, 0]).float()
x_dec = torch.cat([x_enc[:, -self.label_len: , :], x_dec], dim=1).float().to(self.device)
#encoder mark
x_mark_enc = history_data[:, :, :, 3]
x_mark_enc = float_array_to_date_array(x_mark_enc)
x_mark_enc = pd.to_datetime(x_mark_enc)
x_mark_enc = time_features(pd.to_datetime(x_mark_enc), freq=self.freq)
x_mark_enc = x_mark_enc.transpose(1,0)
x_mark_enc = torch.tensor(x_mark_enc.reshape(b,l,x_mark_enc.shape[-1]), dtype=torch.float).to(self.device)
#decoder mark
x_mark_dec = future_data[:, :, :, 3]
x_mark_dec = float_array_to_date_array(x_mark_dec)
x_mark_dec = pd.to_datetime(x_mark_dec)
x_mark_dec = time_features(pd.to_datetime(x_mark_dec), freq=self.freq)
x_mark_dec = x_mark_dec.transpose(1, 0)
x_mark_dec = torch.tensor(x_mark_dec.reshape(b, l, x_mark_dec.shape[-1]), dtype=torch.float).to(self.device)
x_mark_dec = torch.cat((x_mark_enc[:, -self.label_len:, :], x_mark_dec), dim=1)
# init & normalization
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
x_enc = x_enc * self.affine_weight.repeat(1, 1, self.node_num) + self.affine_bias.repeat(1, 1, self.node_num)
# decomp
mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]]).cuda()
seasonal_init, trend_init = self.decomp(x_enc)
# decoder input init
trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1)
# enc
B, L, D = x_enc.shape
_, _, C = x_mark_enc.shape
x_enc = x_enc.view(B, L, self.node_num, -1).permute(0, 2, 1, 3).contiguous() \
.view(B * self.node_num, L, D // self.node_num)
x_mark_enc = x_mark_enc.unsqueeze(1).repeat(1, self.node_num, 1, 1).view(B * self.node_num, L, C)
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out = self.encoder(enc_out, attn_mask=enc_self_mask)
# dec
B, L, D = seasonal_init.shape
_, _, C = x_mark_dec.shape
seasonal_init = seasonal_init.view(B, L, self.node_num, -1).permute(0, 2, 1, 3).contiguous() \
.view(B * self.node_num, L, D // self.node_num)
trend_init = trend_init.view(B, L, self.node_num, -1).permute(0, 2, 1, 3).contiguous() \
.view(B * self.node_num, L, D // self.node_num)
x_mark_dec = x_mark_dec.unsqueeze(1).repeat(1, self.node_num, 1, 1).view(B * self.node_num, L, C)
dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask,
trend=trend_init)
# final
dec_out = trend_part + seasonal_part
dec_out = dec_out[:, -self.pred_len:, :] \
.view(B, self.node_num, self.pred_len, D // self.node_num).permute(0, 2, 1, 3).contiguous() \
.view(B, self.pred_len, D) # B L D
# scale back
dec_out = dec_out - self.affine_bias.repeat(1, 1, self.node_num)
dec_out = dec_out / (self.affine_weight.repeat(1, 1, self.node_num) + 1e-10)
dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out.unsqueeze(-1)
return dec_out # [B, L, D]
def float_array_to_date_array(float_array, start_date=datetime(1970, 1, 1)):
temp = float_array
res = []
b, l, n = temp.shape
# print(temp.shape)
for i in range(b):
for j in range(l):
# print((start_date + timedelta(seconds=float(temp[i,j,0]))).strftime('%Y-%m-%d %H:%M:%S'))
# for k in range(n):
# print(type(float_value))
# print(float_value)
res.append((start_date + timedelta(seconds=float(temp[i,j,0]))).strftime('%Y-%m-%d %H:%M:%S'))
res = numpy.array(res)
# res = numpy.reshape(res, (b,l,n))
return res |
generate data代码 import os
import json
import shutil
import time
import numpy as np
from generate_adj_mx import generate_adj_pems04 as generate_adj
from datetime import datetime, timedelta
# Hyperparameters
dataset_name = 'PEMS04'
data_file_path = f'datasets/raw_data/{dataset_name}/{dataset_name}.npz'
graph_file_path = f'datasets/raw_data/{dataset_name}/adj_{dataset_name}.pkl'
output_dir = f'datasets/{dataset_name}'
target_channel = [0] # Target traffic flow channel
add_time_of_day = True # Add time of day as a feature
add_day_of_week = True # Add day of the week as a feature
steps_per_day = 288 # Number of time steps per day
frequency = 1440 // steps_per_day
domain = 'traffic flow'
feature_description = [domain, 'time of day', 'day of week']
regular_settings = {
'INPUT_LEN': 12,
'OUTPUT_LEN': 12,
'TRAIN_VAL_TEST_RATIO': [0.6, 0.2, 0.2],
'NORM_EACH_CHANNEL': False,
'RESCALE': True,
'METRICS': ['MAE', 'RMSE', 'MAPE'],
'NULL_VAL': 0.0
}
def load_and_preprocess_data():
'''Load and preprocess raw data, selecting the specified channel(s).'''
print(np.load(data_file_path))
data = np.load(data_file_path)['data']
data = data[..., target_channel]
print(f'Raw time series shape: {data.shape}')
return data
def seconds_to_time_series(iter):
time_lst = []
# date_lst = []
base_time = datetime(2018, 1, 1)
for i in range(iter):
time_str = (base_time + timedelta(seconds=i*5*60)).strftime('%Y-%m-%d %H:%M:%S')
datetime_obj = datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
# date_lst.append([datetime_obj])
timestamp = time.mktime(datetime_obj.timetuple())
time_lst.append([timestamp])
# time_lst.append([i])
# date_lst = np.array(date_lst).repeat(307, axis=1)
# date_lst = np.reshape(date_lst, (date_lst.shape[0], date_lst.shape[1], 1))
# print(date_lst)
time_lst = np.array(time_lst).repeat(307, axis=1)
time_lst = np.reshape(time_lst, (time_lst.shape[0], time_lst.shape[1], 1))
print(time_lst)
return time_lst
def add_temporal_features(data):
'''Add time of day and day of week as features to the data.'''
l, n, _ = data.shape
feature_list = [data]
if add_time_of_day:
time_of_day = np.array([i % steps_per_day / steps_per_day for i in range(l)])
time_of_day_tiled = np.tile(time_of_day, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(time_of_day_tiled)
if add_day_of_week:
day_of_week = np.array([(i // steps_per_day) % 7 / 7 for i in range(l)])
day_of_week_tiled = np.tile(day_of_week, [1, n, 1]).transpose((2, 1, 0))
feature_list.append(day_of_week_tiled)
time_list = seconds_to_time_series(16992)
feature_list.append(time_list)
data_with_features = np.concatenate(feature_list, axis=-1) # L x N x C
return data_with_features
def save_data(data):
'''Save the preprocessed data to a binary file.'''
if not os.path.exists(output_dir):
os.makedirs(output_dir)
file_path = os.path.join(output_dir, 'data.dat')
fp = np.memmap(file_path, dtype='float32', mode='w+', shape=data.shape)
fp[:] = data[:]
fp.flush()
del fp
print(f'Data saved to {file_path}')
def save_graph():
'''Save the adjacency matrix to the output directory, generating it if necessary.'''
output_graph_path = os.path.join(output_dir, 'adj_mx.pkl')
if os.path.exists(graph_file_path):
shutil.copyfile(graph_file_path, output_graph_path)
else:
generate_adj()
shutil.copyfile(graph_file_path, output_graph_path)
print(f'Adjacency matrix saved to {output_graph_path}')
def save_description(data):
'''Save a description of the dataset to a JSON file.'''
description = {
'name': dataset_name,
'domain': domain,
'shape': data.shape,
'num_time_steps': data.shape[0],
'num_nodes': data.shape[1],
'num_features': data.shape[2],
'feature_description': feature_description,
'has_graph': graph_file_path is not None,
'frequency (minutes)': frequency,
'regular_settings': regular_settings
}
description_path = os.path.join(output_dir, 'desc.json')
with open(description_path, 'w') as f:
json.dump(description, f, indent=4)
print(f'Description saved to {description_path}')
print(description)
def main():
# Load and preprocess data
data = load_and_preprocess_data()
# Add temporal features
data_with_features = add_temporal_features(data)
# Save processed data
save_data(data_with_features)
# Copy or generate and save adjacency matrix
save_graph()
# Save dataset description
save_description(data_with_features)
if __name__ == '__main__':
main() |
参数设置 import os
import sys
import torch
from easydict import EasyDict
sys.path.append(os.path.abspath(__file__ + '/../../..'))
from basicts.metrics import masked_mae, masked_mape, masked_rmse
from basicts.data import TimeSeriesForecastingDataset
from basicts.runners import SimpleTimeSeriesForecastingRunner
from basicts.scaler import ZScoreScaler
from basicts.utils import get_regular_settings, load_adj
from .arch import Corrformer_ori
############################## Hot Parameters ##############################
# Dataset & Metrics configuration
DATA_NAME = 'PEMS04' # Dataset name
regular_settings = get_regular_settings(DATA_NAME)
INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence
OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence
TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios
NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data
RESCALE = regular_settings['RESCALE'] # Whether to rescale the data
NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data
# Model architecture and parameters
MODEL_ARCH = Corrformer_ori
MODEL_PARAM = {
"input_len": INPUT_LEN,
"label_len": INPUT_LEN // 2,
"output_len": OUTPUT_LEN,
"num_nodes": 307,
"num_node": 307,
"node_list": '307',
"output_attention": False,
"moving_avg": 25,
"enc_in": 1,
"dec_in": 1,
"c_out": 1,
"d_model": 1,
"root_path": "datasets/" + DATA_NAME,
"embed": "timeF",
# freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly],
# you can also use more detailed freq like 15min or 3h
"freq": "h",
"dropout": 0.05,
"factor_temporal": 1,
"n_heads": 1,
"enc_tcn_layers": 1,
"dec_tcn_layers": 1,
"factor_spatial": 1,
"d_ff": 1,
"activation": 'gelu',
"e_layers": 2,
"d_layers": 1,
"device": 0,
}
NUM_EPOCHS = 150
############################## General Configuration ##############################
CFG = EasyDict()
# General settings
CFG.DESCRIPTION = 'An Example Config'
CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode)
# Runner
CFG.RUNNER = SimpleTimeSeriesForecastingRunner
############################## Dataset Configuration ##############################
CFG.DATASET = EasyDict()
# Dataset settings
CFG.DATASET.NAME = DATA_NAME
CFG.DATASET.TYPE = TimeSeriesForecastingDataset
CFG.DATASET.PARAM = EasyDict({
'dataset_name': DATA_NAME,
'train_val_test_ratio': TRAIN_VAL_TEST_RATIO,
'input_len': INPUT_LEN,
'output_len': OUTPUT_LEN,
# 'mode' is automatically set by the runner
})
############################## Scaler Configuration ##############################
CFG.SCALER = EasyDict()
# Scaler settings
CFG.SCALER.TYPE = ZScoreScaler # Scaler class
CFG.SCALER.PARAM = EasyDict({
'dataset_name': DATA_NAME,
'train_ratio': TRAIN_VAL_TEST_RATIO[0],
'norm_each_channel': NORM_EACH_CHANNEL,
'rescale': RESCALE,
})
############################## Model Configuration ##############################
CFG.MODEL = EasyDict()
# Model settings
CFG.MODEL.NAME = MODEL_ARCH.__name__
CFG.MODEL.ARCH = MODEL_ARCH
CFG.MODEL.PARAM = MODEL_PARAM
CFG.MODEL.FORWARD_FEATURES = [0, 1, 2, 3]
CFG.MODEL.TARGET_FEATURES = [0]
############################## Metrics Configuration ##############################
CFG.METRICS = EasyDict()
# Metrics settings
CFG.METRICS.FUNCS = EasyDict({
'MAE': masked_mae,
'MAPE': masked_mape,
'RMSE': masked_rmse,
})
CFG.METRICS.TARGET = 'MAE'
CFG.METRICS.NULL_VAL = NULL_VAL
############################## Training Configuration ##############################
CFG.TRAIN = EasyDict()
CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
'checkpoints',
MODEL_ARCH.__name__,
'_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)])
)
CFG.TRAIN.LOSS = masked_mae
# Optimizer settings
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.001,
"weight_decay": 0.0001,
}
# Learning rate scheduler settings
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 50, 80],
"gamma": 0.5
}
CFG.TRAIN.CLIP_GRAD_PARAM = {
'max_norm': 5.0
}
# Train data loader settings
CFG.TRAIN.DATA = EasyDict()
CFG.TRAIN.DATA.BATCH_SIZE = 64
CFG.TRAIN.DATA.SHUFFLE = True
############################## Validation Configuration ##############################
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
CFG.VAL.DATA = EasyDict()
CFG.VAL.DATA.BATCH_SIZE = 64
############################## Test Configuration ##############################
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 16
CFG.TEST.DATA = EasyDict()
CFG.TEST.DATA.BATCH_SIZE = 64
############################## Evaluation Configuration ##############################
CFG.EVAL = EasyDict()
# Evaluation parameters
CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: []
CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True |
我删除了原代码中的位置编码的部分 |
抱歉,我无法复现出您的报错信息,我这边跑着是正常的。您用的什么版本的pytorch? |
torch 1.10.0+cu113 |
请问您的环境是什么,我换成您的环境试一试 |
我在更换环境后还是出现了这个问题 |
奇怪,麻烦您将代码发送到[email protected]吧,我需要用您的代码来debug。 |
您好,在我将corrformer代码移植进BasicTS后遇到了Function 'WeightNormCudaInterfaceBackward0' returned nan value的问题,请问您之前是否有遇到过相似的情况
The text was updated successfully, but these errors were encountered: