Skip to content

Commit

Permalink
Merge pull request #34 from Enraged-Dun-Cookie-Development-Team/auto-…
Browse files Browse the repository at this point in the history
…scheduler-v0.1

内存监控打到频道
  • Loading branch information
YoungHector authored Oct 26, 2024
2 parents 3bc4cd1 + 23930ee commit a2bc4de
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
9 changes: 8 additions & 1 deletion src/_data_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
sys.path.append(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))

from src.instance_utils import get_new_instance_name
from src._log_lib import logger
from src._log_lib import logger, get_memory_usage
from src.db import HandleMysql, HandleRedis
from src.strategy import *
from src._conf_lib import CONFIG, AUTO_SCHE_CONFIG
Expand Down Expand Up @@ -339,6 +339,9 @@ def limit_cpu(interval):
time.sleep(0.05)
batch = X_list[i:i + batch_size]
batch_predictions = self.model.predict(batch)
if i % 100 == 0:
messager.send_to_bot_shortcut('预测中,批次{} 内存:{}'.format(i, get_memory_usage()))

predictions.extend(batch_predictions)
if i == 0:
messager.send_to_bot_shortcut('预测结果第一批样例形状:')
Expand Down Expand Up @@ -458,14 +461,18 @@ def _set_model_predicted_result_pool(self, X_list, predicted_result):
"""
把预测结果和原始输入,整合成方便查找蹲饼时间和对应数据源的形式。
"""
messager.send_to_bot_shortcut('开始后处理,将预测结果与输入拼接 内存:{}'.format(get_memory_usage()))

X_list['predicted_y'] = np.array(predicted_result) > 0.99999
messager.send_to_bot_shortcut('将预测结果与特征完成拼接,完整形状为:')
messager.send_to_bot_shortcut(X_list.shape)
messager.send_to_bot_shortcut('预测结果与输入完成拼接 内存:{}'.format(get_memory_usage()))

X_list.columns = ['datasource', '1', '2', '3', '4', 'year', 'month', 'day', 'hour', 'minute', 'second', '11', 'predicted_y']

X_list['datetime'] = pd.to_datetime(X_list[['year', 'month', 'day', 'hour', 'minute', 'second']])
messager.send_to_bot_shortcut('完成时间戳转换')
messager.send_to_bot_shortcut('完成时间戳转换 内存:{}'.format(get_memory_usage()))

# 使用.dt.strftime()将日期时间对象格式化为字符串
X_list['datetime_str'] = X_list['datetime'].dt.strftime('%Y-%m-%d %H:%M:%S')
Expand Down
9 changes: 9 additions & 0 deletions src/_log_lib.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
import logging.config
import psutil

logger = None

def set_logger(log_conf_file=''):

"""初始化log,如果不指定conf_file的话,默认输出到stderr"""
if len(log_conf_file):
try:
Expand All @@ -29,4 +31,11 @@ def set_default_logger():
logger.addHandler(ch)


def get_memory_usage():
# 获取当前进程的内存使用情况,返回字符串。
process = psutil.Process()
memory_info = process.memory_info()
memory_in_mb = memory_info.rss / 1024 / 1024 # 使用rss,即Resident Set Size,返回以字节为单位; 转化成mb
return f"{memory_in_mb:.2f} MB"

set_logger(log_conf_file='../conf.conf.log')
9 changes: 8 additions & 1 deletion src/auto_sche/model_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))

from src.auto_sche.model_loader import MODEL_DICT
from src._log_lib import get_memory_usage
from src._conf_lib import AUTO_SCHE_CONFIG
import numpy as np
import pandas as pd
from tqdm import tqdm

import psutil
from src._grpc_lib import messager


Expand All @@ -28,11 +30,14 @@ def feature_combine(self):
X_list = []
feature_num = 12
datasource_num = AUTO_SCHE_CONFIG['DATASOURCE_POSSIBLE_NUMS'] # 域内实际出现过的蹲饼器编号数量.
messager.send_to_bot_shortcut('开始特征工程时的内存:{}'.format(get_memory_usage()))
messager.send_to_bot_shortcut('适配的的数据源数量:{}'.format(datasource_num))
messager.send_to_bot_shortcut('开始梳理时间相关的特征')
messager.send_to_bot_shortcut('开始梳理时间相关的特征 内存:{}'.format(get_memory_usage()))

time_points = self.feature_of_time()
messager.send_to_bot_shortcut('梳理时间相关的特征完成')
messager.send_to_bot_shortcut('梳理时间相关的特征完成 内存:{}'.format(get_memory_usage()))

time_points_nums = len(time_points)

Expand All @@ -41,6 +46,7 @@ def feature_combine(self):
# 打印10次中间过程。
if t_idx % (time_points_nums // 10) == 0:
messager.send_to_bot_shortcut('合成最终特征中,进度{}/{}'.format(t_idx, time_points_nums))
messager.send_to_bot_shortcut('内存:{}'.format(get_memory_usage()))

cur_feature = np.zeros([datasource_num, feature_num], dtype=int)

Expand All @@ -55,7 +61,8 @@ def feature_combine(self):

# 组织成dataframe用于模型输入.
X_list = pd.DataFrame(np.concatenate(X_list))

messager.send_to_bot_shortcut('全部特征输入模型前 内存:{}'.format(get_memory_usage()))

X_list.columns = ['datasource_encoded',
'is_top',
'is_retweeted',
Expand Down

0 comments on commit a2bc4de

Please sign in to comment.