model.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. import time
  2. import numpy as np
  3. import pandas as pd
  4. from .._utils.data_summary import summary_dataframe
  5. try:
  6. from executor import exec_workflow
  7. from workflowlib.utils import queryset
  8. except:
  9. pass
  10. def main(config=None):
  11. model_name = config['model_name']
  12. model_code = config['model_code']
  13. input_code = config['input_code']
  14. output_code = config['output_code']
  15. get_mod_io = config.get('get_mod_io',True)
  16. model = SystemModel(model_name=model_name,model_code=model_code,input_code=input_code,output_code=output_code,get_mod_io=get_mod_io)
  17. return model
  18. class SystemModel:
  19. def __init__(
  20. self,
  21. model_name : str = None,
  22. model_code : str = None,
  23. input_code : str = None,
  24. output_code : str = None,
  25. get_mod_io : bool = None
  26. ) -> None:
  27. self.MODEL_NAME = model_name
  28. self.MODEL_CODE = model_code
  29. self.INPUT_CODE = input_code
  30. self.OUTPUT_CODE = output_code
  31. self.GET_MOD_IO = get_mod_io
  32. self.PAST_TIME = []
  33. # 获取画布下的系统模型组件的输入输出桩名称
  34. if self.GET_MOD_IO == True:
  35. # 系统模型的输入和输出的point_id
  36. node = queryset.nodes().filter(code=self.MODEL_CODE).first()
  37. try:
  38. self.MODEL_INPUT_ID = [i.get('point_id') for i in node.ports_in]
  39. self.MODEL_OUTPUT_ID = [i.get('point_id') for i in node.ports_out]
  40. except:
  41. raise Exception('未获取到系统模型的输入输出ID, 需检查优化算法组件的配置是否正确')
  42. else:
  43. self.MODEL_INPUT_ID = None
  44. self.MODEL_OUTPUT_ID = None
  45. def predict(self, data:pd.DataFrame) -> pd.DataFrame:
  46. all_data = []
  47. for i in range(data.shape[1]):
  48. try:
  49. all_data.append(data.iloc[:,[i]].astype('float64'))
  50. except Exception as E:
  51. data_i = data.iloc[:,[i]]
  52. print(f'{data_i}数据有误,不可转换为float')
  53. summary_dataframe(data,df_name='系统模型输入排查')
  54. raise Exception(E)
  55. time_start = time.time()
  56. try:
  57. output_data = exec_workflow(self.MODEL_NAME, self.INPUT_CODE, self.OUTPUT_CODE, *all_data)
  58. except Exception as E:
  59. summary_dataframe(data,df_name='系统模型输入排查')
  60. raise Exception(E)
  61. time_end = time.time()
  62. past_time = round(time_end-time_start,2)
  63. self.PAST_TIME.append(past_time)
  64. print(f'第{len(self.PAST_TIME)}次调用系统模型,本次调用时长为:{past_time}秒')
  65. output_data = output_data if isinstance(output_data,list) else [output_data]
  66. output_data = pd.concat(output_data,axis=1)
  67. return output_data
  68. def summary_past_time(self):
  69. try:
  70. print(
  71. f'SUM :{np.sum(self.PAST_TIME).round(2)} \n'
  72. f'Mean:{np.mean(self.PAST_TIME).round(2)} \n'
  73. f'SD :{np.std(self.PAST_TIME).round(2)} \n'
  74. f'MIN :{np.min(self.PAST_TIME)} \n'
  75. f'MAX :{np.max(self.PAST_TIME)} \n'
  76. f'Q25 :{np.quantile(self.PAST_TIME,0.25)} \n'
  77. f'Q75 :{np.quantile(self.PAST_TIME,0.75)} \n'
  78. )
  79. except:
  80. pass
  81. def predict_derivate_1(self,data:pd.DataFrame,vars:list,eps=1e-4) -> pd.DataFrame:
  82. if pd.Index(data=vars).has_duplicates is True:
  83. raise Exception(f'{vars}中存在重复值')
  84. vars = list(vars)
  85. n_var = len(vars)
  86. n_row = data.shape[0]
  87. sample_index = pd.Index(np.tile(np.arange(n_row),n_var+1),dtype='str')
  88. var_index = pd.Index(np.repeat(['raw']+vars,repeats=n_row),dtype='str')
  89. index = '__'+var_index+'__'+sample_index
  90. index.name = '__VAR__SAMPLE'
  91. data = pd.DataFrame(
  92. data = np.tile(data.values,[n_var+1,1]),
  93. columns = data.columns,
  94. index = index
  95. )
  96. for var in vars:
  97. if var not in data.columns:
  98. raise Exception(f'数据中不包含给定的列{var}')
  99. data.loc[lambda dt:dt.index.str.contains(f'__{var}__'),var] += eps
  100. pred = self.predict(data=data)
  101. pred.index = data.index
  102. all_derivate_1 = []
  103. for var in vars:
  104. f_dx = pred.loc[lambda dt:dt.index.str.contains(f'__{var}__'),:]
  105. f_x = pred.loc[lambda dt:dt.index.str.contains(f'__raw__'),:]
  106. derivate_1 = pd.DataFrame((f_dx.values-f_x.values)/eps,index=f_dx.index,columns=pred.columns)
  107. all_derivate_1.append(derivate_1)
  108. all_derivate_1 = pd.concat(all_derivate_1,axis=0)
  109. return all_derivate_1
  110. def predict_derivate_2(self,data:pd.DataFrame,vars:str,eps=1e-4) -> pd.DataFrame:
  111. if pd.Index(data=vars).has_duplicates is True:
  112. raise Exception(f'{vars}中存在重复值')
  113. f_x = self.predict_derivate_1(data=data,vars=vars,eps=eps)
  114. f_dx = self.predict_derivate_1(data=data+eps,vars=vars,eps=eps)
  115. all_derivate_2 = (f_dx-f_x)/eps
  116. return all_derivate_2