train.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import os
  2. from datetime import datetime
  3. from pathlib import Path
  4. from pprint import pprint
  5. import pandas as pd
  6. from .config_reader import ConfigReader
  7. from ..DHU.config_reader import ConfigReader as ConfigReader_DHU
  8. from ..._data.main import get_data
  9. NOW = datetime.now().replace(second=0,microsecond=0)
  10. PATH = os.path.dirname(os.path.realpath(__file__)).replace('\\','/')
  11. MODEL_FUNC_PATH = f'{PATH}/model_func.py'
  12. MODEL_FILE_PATH = f'./model.pkl'
  13. def train(*inputs,config=None):
  14. config = {} if config is None else config
  15. if '__LOCAL' in config.keys():
  16. config_reader_path = config['__LOCAL']
  17. data_URL = config['__URL']
  18. plot_metric = True
  19. else:
  20. config_reader_path = '/mnt/workflow_data'
  21. data_URL = 'http://basedataportal-svc:8080/data/getpointsdata'
  22. plot_metric = False
  23. config_reader = ConfigReader(
  24. path = f'{config_reader_path}/Room.xlsx',
  25. DHU_AB_config = ConfigReader_DHU(path=f'{config_reader_path}/DHU_AB.xlsx')
  26. )
  27. # 历史数据
  28. data_raw_folder = os.path.join(config_reader_path,f'data/train/data_his_raw/')
  29. if not os.path.exists(data_raw_folder):
  30. os.makedirs(data_raw_folder)
  31. for each_room_dew in config_reader.all_room_dew:
  32. all_data = []
  33. data_his_folder = os.path.join(config_reader_path,f'data/train/data_his/{each_room_dew}')
  34. if not os.path.exists(data_his_folder):
  35. os.makedirs(data_his_folder)
  36. points = config_reader.get_point(each_room_dew)
  37. for each_point_name,each_point_id in points.items():
  38. file_path = os.path.join(data_his_folder,f'{each_point_name}.pkl')
  39. get_data(
  40. points_id = [each_point_id],
  41. time_start = config_reader.get_app_info(each_room_dew,'模型训练','开始时间','datetime'),
  42. time_end = config_reader.get_app_info(each_room_dew,'模型训练','结束时间','datetime'),
  43. int_time = 'M',
  44. url = data_URL,
  45. from_cache = True,
  46. PATH = Path(file_path),
  47. )
  48. all_data.append(pd.read_pickle(file_path).set_axis([each_point_name],axis=1))
  49. all_data = pd.concat(all_data,axis=1).to_pickle(f'{data_raw_folder}/{each_room_dew}.pkl')