train.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. import os
  2. from datetime import datetime
  3. from pathlib import Path
  4. from pprint import pprint
  5. import pandas as pd
  6. from ...model.DHU.DHU_AB import DHU_AB
  7. from ...model.DHU.SDHU_AB import SDHU_AB
  8. from ...model.Room.room import RoomDewPredictor
  9. from .config_reader import ConfigReader
  10. from ...tools.data_loader import DataLoader
  11. NOW = datetime.now().replace(second=0,microsecond=0)
  12. PATH = os.path.dirname(os.path.realpath(__file__)).replace('\\','/')
  13. MODEL_FUNC_PATH = f'{PATH}/model_func.py'
  14. MODEL_FILE_PATH = f'./model.pkl'
  15. def train(*inputs,config=None):
  16. config = {} if config is None else config
  17. if '__LOCAL' in config.keys():
  18. config_reader_path = config['__LOCAL']
  19. data_URL = config['__URL']
  20. else:
  21. config_reader_path = '/mnt/workflow_data'
  22. data_URL = 'http://basedataportal-svc:8080/data/getpointsdata'
  23. config_reader = ConfigReader(path=f'{config_reader_path}/DHU配置.xlsx')
  24. ALL_RESULT = {
  25. 'EXCEPTION':{
  26. 'Data': {},
  27. 'Fit' : {},
  28. 'Save': {},
  29. 'Plot': {}
  30. }
  31. }
  32. for each_eaup_name in config_reader.all_equp_names:
  33. equp_type = config_reader.get_equp_info(each_eaup_name,key='设备类型',info_type='str')
  34. print(f'{each_eaup_name}开始训练,设备类型为{equp_type}')
  35. # 获取数据
  36. try:
  37. equp_data = load_data(
  38. each_eaup_name=each_eaup_name,each_equp_type=equp_type,config_reader=config_reader,
  39. config_reader_path=config_reader_path,data_URL=data_URL
  40. )
  41. except Exception as E:
  42. ALL_RESULT['EXCEPTION']['Data'][each_eaup_name] = E
  43. raise E
  44. continue
  45. # 训练模型
  46. try:
  47. equp_model,equp_data_clean = train_equp_model(
  48. each_eaup_name=each_eaup_name,each_equp_type=equp_type,equp_data=equp_data,
  49. config_reader=config_reader,config_reader_path=config_reader_path)
  50. train_room_model(
  51. each_eaup_name=each_eaup_name,each_equp_type=equp_type,equp_data=equp_data,
  52. config_reader=config_reader,config_reader_path=config_reader_path
  53. )
  54. except Exception as E:
  55. ALL_RESULT['EXCEPTION']['Fit'][each_eaup_name] = E
  56. print(f'{each_eaup_name}模型训练异常 {E}')
  57. continue
  58. # 保存可视化结果
  59. if config_reader.get_app_info(each_eaup_name,'模型训练','保存可视化结果','bool') and equp_model is not None:
  60. try:
  61. save_train_info(
  62. equp_model=equp_model,equp_data=equp_data_clean,
  63. config_reader_path=config_reader_path,each_eaup_name=each_eaup_name)
  64. except Exception as E:
  65. ALL_RESULT['EXCEPTION']['Plot'][each_eaup_name] = E
  66. pass
  67. # 模型迭代
  68. if not config_reader.get_app_info(each_eaup_name,'模型训练','迭代模型','bool') and equp_model is not None:
  69. continue
  70. try:
  71. monitor_point = config_reader.point.loc[lambda dt:dt.类型=='B']
  72. model_update_info = {}
  73. for i in range(len(monitor_point)):
  74. name = monitor_point.loc[:,'编号'].iat[i]
  75. name_cn = monitor_point.loc[:,'名称'].iat[i]
  76. MAE = monitor_point.loc[:,'指标MAE'].iat[i]
  77. model_update_info[name] = {
  78. 'point_id' : name,
  79. 'point_name' : name_cn,
  80. 'point_class': name,
  81. 'thre_mae' : MAE,
  82. 'thre_mape' : 1,
  83. 'thre_days' : 7
  84. }
  85. equp_model.save_to_platform(
  86. version_id = datetime.now().strftime('%Y%m'),
  87. model_id = config_reader.get_equp_info(each_eaup_name,'模型编号','str'),
  88. update_method = 'update',
  89. model_info = model_update_info,
  90. MODEL_FILE_PATH = MODEL_FILE_PATH,
  91. MODEL_FUNC_PATH = MODEL_FUNC_PATH,
  92. )
  93. except Exception as E:
  94. ALL_RESULT['EXCEPTION']['Save'][each_eaup_name] = E
  95. continue
  96. pprint(ALL_RESULT)
  97. def save_data(dir,file:str,data:pd.DataFrame):
  98. Path(dir).mkdir(parents=True,exist_ok=True)
  99. if file.endswith('.csv'):
  100. data.to_csv(os.path.join(dir,file),index=True)
  101. elif file.endswith('.pkl'):
  102. data.to_pickle(os.path.join(dir,file))
  103. else:
  104. raise Exception('file type error')
  105. def load_data(each_eaup_name,each_equp_type,config_reader,config_reader_path,data_URL):
  106. # 部分情况下设备不需要部分点位表中的点位
  107. rm_point_name = []
  108. if not config_reader.get_equp_info(each_eaup_name,'存在回风口','bool'):
  109. rm_point_name += ['mixed_1_TinM','mixed_1_DinM']
  110. if not config_reader.get_equp_info(each_eaup_name,'存在补风口','bool'):
  111. rm_point_name += ['mixed_2_TinM','mixed_2_DinM']
  112. # 获取历史数据
  113. data_loader = DataLoader(
  114. path = f'{config_reader_path}/data/train/data_his/',
  115. start_time = config_reader.get_app_info(each_eaup_name,app_type='模型训练',key='开始时间',info_type='datetime'),
  116. end_time = config_reader.get_app_info(each_eaup_name,app_type='模型训练',key='结束时间',info_type='datetime'),
  117. print_process = config_reader.get_app_info(each_eaup_name,app_type='模型训练',key='打印取数日志',info_type='bool'),
  118. )
  119. data_loader.download_equp_data(
  120. equp_name = each_eaup_name,
  121. point = config_reader.get_equp_point(each_eaup_name,equp_class=['A','B','C']),
  122. url = data_URL,
  123. clean_cache = False,
  124. rm_point_name = rm_point_name
  125. )
  126. equp_data = data_loader.get_equp_data(each_eaup_name)
  127. save_data(f'{config_reader_path}/data/train/data_his_raw',f'{each_eaup_name}.pkl',equp_data)
  128. return equp_data
  129. def train_equp_model(each_eaup_name,each_equp_type,equp_data,config_reader,config_reader_path):
  130. if each_equp_type in ['DHU_A','DHU_B']:
  131. equp_model = DHU_AB(
  132. DHU_type = each_equp_type,
  133. exist_Fa_H = config_reader.get_equp_info(each_eaup_name,'存在回风口','bool'),
  134. exist_Fa_B = config_reader.get_equp_info(each_eaup_name,'存在补风口','bool'),
  135. )
  136. elif each_equp_type in ['SDHU_A','SDHU_B']:
  137. equp_model = SDHU_AB(
  138. DHU_type = each_equp_type,
  139. exist_Fa_H = config_reader.get_equp_info(each_eaup_name,'存在回风口','bool'),
  140. exist_Fa_H0= config_reader.get_equp_info(each_eaup_name,'存在回风口(前表冷后)','bool'),
  141. )
  142. else:
  143. raise NotImplementedError
  144. # 清洗数据
  145. Path(f'{config_reader_path}/data/train/clean_log/').mkdir(parents=True, exist_ok=True)
  146. equp_data_clean = equp_model.clean_data(
  147. data = equp_data,
  148. data_type = ['input','observed'],
  149. print_process = True,
  150. fill_zero = False,
  151. save_log = f'{config_reader_path}/data/train/clean_log/{each_eaup_name}.txt',
  152. )
  153. equp_data_clean = equp_data_clean.resample('15min').mean().dropna()
  154. save_data(f'{config_reader_path}/data/train/data_his_clean',f'{each_eaup_name}.pkl',equp_data_clean)
  155. if not config_reader.get_app_info(each_eaup_name,'模型训练','训练设备模型','bool'):
  156. return None,None
  157. if each_equp_type in ['DHU_A','DHU_B']:
  158. equp_model.fit(
  159. input_data = equp_data_clean,
  160. observed_data = equp_data_clean,
  161. plot_TVP = False,
  162. rw_FA_val = config_reader.get_app_info(each_eaup_name,'模型训练','新风阀门开度参数','bool')
  163. )
  164. elif each_equp_type in ['SDHU_A','SDHU_B']:
  165. equp_model:SDHU_AB
  166. equp_model.fit(
  167. input_data = equp_data_clean,
  168. observed_data = equp_data_clean,
  169. plot_TVP = False
  170. )
  171. else:
  172. raise NotImplementedError
  173. Path(f'{config_reader_path}/model').mkdir(parents=True, exist_ok=True)
  174. equp_model.save(f'{config_reader_path}/model/{each_eaup_name}.pkl')
  175. save_data(f'{config_reader_path}/data/train/data_TVP',f'{each_eaup_name}.csv',equp_model.TVP_data)
  176. save_data(f'{config_reader_path}/data/train/data_metric',f'{each_eaup_name}.csv',equp_model.TVP_metric.round(2))
  177. return equp_model,equp_data_clean
  178. def train_room_model(each_eaup_name,each_equp_type,equp_data,config_reader:ConfigReader,config_reader_path):
  179. if not config_reader.get_app_info(each_eaup_name,'模型训练','训练房间模型','bool'):
  180. return None
  181. N_fit = 24 * 60 * 3
  182. try:
  183. equp_model_path = f'{config_reader_path}/model/{each_eaup_name}.pkl'
  184. if each_equp_type in ['DHU_A','DHU_B']:
  185. equp_model = DHU_AB.load(equp_model_path)
  186. Dout = equp_model.predict(equp_data.iloc[-N_fit:,:])['coil_3']['DoutA']
  187. elif each_equp_type in ['SDHU_A','SDHU_B']:
  188. equp_model = SDHU_AB.load(equp_model_path)
  189. Dout = equp_model.predict(equp_data.iloc[-N_fit:,:])['wheel_1']['DoutP']
  190. else:
  191. raise NotImplementedError
  192. except Exception as E:
  193. Dout = None
  194. print(f'{each_eaup_name}设备模型加载失败,只训练基于实际送风露点的房间露点模型')
  195. # 实际送风露点
  196. if each_equp_type in ['DHU_A','DHU_B']:
  197. Dout_real = equp_data.iloc[-N_fit:,:].loc[:,'wheel_2_DoutP'].values
  198. elif each_equp_type in ['SDHU_A','SDHU_B']:
  199. Dout_real = equp_data.iloc[-N_fit:,:].loc[:,'coil_2_DoutA'].values
  200. else:
  201. raise NotImplementedError
  202. N_room = config_reader.get_equp_info(each_eaup_name,'房间数量','int')
  203. path_diffdata = f'{config_reader_path}/plot/plot_room_diffdata/'
  204. path_lagcorr = f'{config_reader_path}/plot/plot_room_lagcorr/'
  205. path_diffdata_bk = f'{config_reader_path}/plot/plot_room_diffdata_bk/'
  206. path_lagcorr_bk = f'{config_reader_path}/plot/plot_room_lagcorr_bk/'
  207. Path(path_diffdata).mkdir(parents=True, exist_ok=True)
  208. Path(path_lagcorr).mkdir(parents=True, exist_ok=True)
  209. Path(path_diffdata_bk).mkdir(parents=True, exist_ok=True)
  210. Path(path_lagcorr_bk).mkdir(parents=True, exist_ok=True)
  211. for i in range(1,N_room+1):
  212. Droom = equp_data.iloc[-N_fit:,:].loc[:,f'room_{i}_Dpv'].values
  213. if Dout is not None:
  214. room_model = RoomDewPredictor().fit_Droom(Dout=Dout,Droom=Droom)
  215. room_model.save(f'{config_reader_path}/model/{each_eaup_name}_room_{i}_Dpv.pkl')
  216. room_model.plot_diffdata(Dout,Droom).save(filename=f'{path_diffdata}/{each_eaup_name}_room_{i}_Dpv.png')
  217. room_model.plot_diffdata_lagcorr(Dout,Droom).save(filename=f'{path_lagcorr}/{each_eaup_name}_room_{i}_Dpv.png')
  218. room_model_bk = RoomDewPredictor().fit_Droom(Dout=Dout_real,Droom=Droom)
  219. room_model_bk.save(f'{config_reader_path}/model/{each_eaup_name}_room_{i}_Dpv_bk.pkl')
  220. room_model_bk.plot_diffdata(Dout_real,Droom).save(filename=f'{path_diffdata_bk}/{each_eaup_name}_room_{i}_Dpv.png')
  221. room_model_bk.plot_diffdata_lagcorr(Dout_real,Droom).save(filename=f'{path_lagcorr_bk}/{each_eaup_name}_room_{i}_Dpv.png')
  222. def save_train_info(equp_model,equp_data,config_reader_path,each_eaup_name):
  223. for plot_name,plot in equp_model.plot_check(equp_data).items():
  224. path = f'{config_reader_path}/plot/{plot_name}/'
  225. Path(path).mkdir(parents=True, exist_ok=True)
  226. plot.save(filename=f'{path}/{each_eaup_name}.png')
  227. path = f'{config_reader_path}/plot/TVP'
  228. Path(path).mkdir(parents=True, exist_ok=True)
  229. equp_model.plot_TVP(equp_model.TVP_data,save_path=f'{path}/{each_eaup_name}.png')