opt_obj.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. import numpy as np
  2. import pandas as pd
  3. import geatpy as ea
  4. from .model.model import SystemModel
  5. from .constrains.parse import parse_constrains
  6. from .constrains.execute import execute_constrains
  7. class Opt(ea.Problem):
  8. def __init__(
  9. self,
  10. lb : list,
  11. ub : list,
  12. varTypes : list,
  13. var_precis : list,
  14. maxormins : int,
  15. collection_map : dict,
  16. var_map : dict,
  17. boundary_insert: dict,
  18. opt_var : pd.DataFrame,
  19. sys_var : pd.DataFrame,
  20. system_model : SystemModel,
  21. oth_var : pd.DataFrame,
  22. cst_var : pd.DataFrame,
  23. opt_target : str,
  24. constrains : list=[]
  25. ):
  26. name = 'Opt'
  27. M = 1
  28. maxormins = [maxormins] # 1:最小化该目标;-1:最大化该目标
  29. Dim = len(lb)
  30. lb = lb
  31. ub = ub
  32. varTypes = varTypes # 变量的类型 0为连续变量,1为离散变量
  33. ea.Problem.__init__(
  34. self,
  35. name = name,
  36. M = M,
  37. maxormins = maxormins,
  38. Dim = Dim,
  39. varTypes = varTypes,
  40. lb = lb,
  41. ub = ub
  42. )
  43. self.lb = lb
  44. self.up = ub
  45. self.opt_var = opt_var #DF
  46. self.sys_var = sys_var #DF
  47. self.oth_var = oth_var #DF
  48. self.cst_var = cst_var #DF
  49. self.name_opt_vars = opt_var.columns.to_list()
  50. self.system_model = system_model
  51. self.opt_target = opt_target # 优化目标
  52. self.constrains = constrains # 约束
  53. self.collection_map = collection_map # 集合类型/映射类型的边界映射 {'u1':{0:1.5,1:2.3},'u2':{0:1.1,1:3.2}}
  54. self.var_map = var_map # 映射类型的边界的优化变量与系统变量之间的映射关系 {'u1':{'x1':{1:0.1,2:0.4},'x2':{2:0.1,3:0.4}}}
  55. self.var_precis = var_precis
  56. self.boundary_insert = boundary_insert
  57. self.name_opt_vars_c = self.get_name_opt_vars_c(varTypes) # 属于连续变量的优化变量
  58. self.index = opt_var.index # 用于给优化算法组件的输出的index赋值
  59. self.current_system_x = self.get_current_system_x()
  60. self.current_system_y = self.get_current_system_y() # 基于当前值的预测,因此并不能够反映真实值,会受模型误差的影响
  61. self.current_system_x_y = pd.concat([self.current_system_y,self.current_system_x],axis=1)
  62. # 系统模型中的x和y可能会出现重复的名称,这种情况只允许出现在x的【其他变量】中
  63. # 约束的时候会将这部分x从输入中剔除,避免重复索引,所以约束变量不允许使用【其他变量】中的变量
  64. oth_var_col = self.oth_var.columns if isinstance(oth_var,pd.DataFrame) else []
  65. self.current_system_x_y_cst = pd.concat([self.current_system_y,self.current_system_x.drop(oth_var_col,axis=1)],axis=1)
  66. self.constrains_var = pd.concat([self.current_system_x_y_cst, self.cst_var],axis=1)
  67. self.constrains = parse_constrains(self.constrains,self.constrains_var)
  68. # 校验当前值是否违反边界约束,若违反则可能会使得结果出现负优化的现象
  69. # 仅用于参考,因为模拟量不在约束范围内可能是由于设备未开启导致的,因此即使出现该现象,也不一定导致负优化
  70. # 该负优化现象并不是由算法失效导致的,而是由于不合理的边界或可行性约束导致的
  71. self.cur_sys_constrains_cfl = self.get_cur_sys_constrains_cfl()
  72. def get_name_opt_vars_c(self,varTypes):
  73. # 从变量名称中提取属于连续变量的名称
  74. complete_var_types = np.array(self.insert_sync_var(varTypes,flatten=True))
  75. complete_var_names = np.array(self.name_opt_vars)
  76. is_var_continus = complete_var_types == 0
  77. name_opt_vars_c = complete_var_names[is_var_continus]
  78. return name_opt_vars_c
  79. def check_data_and_system_input(self,data_input:list) -> None:
  80. # 检查系统模型和优化组件的输入
  81. system_model_input = self.system_model.MODEL_INPUT_ID
  82. if system_model_input is None:
  83. return None
  84. #TODO 当不校验系统输入的名称时,也需要校验长度
  85. # 检查是否有多或少
  86. input_diff = set(data_input).symmetric_difference(system_model_input)
  87. if len(input_diff) !=0:
  88. raise Exception(f'系统模型的输入与优化组件的输入不匹配,差异的输入为{input_diff},其中系统模型的输入为{system_model_input},优化组件的输入为{data_input}')
  89. # 检查顺序,若顺序有差异,输出系统模型的输入
  90. if data_input != system_model_input:
  91. raise Exception(f'系统模型和优化组件输入的顺序有差异,其中系统模型的输入为{system_model_input},优化组件的输入为{data_input}')
  92. def get_current_system_x(self):
  93. data_x_current = pd.concat([self.opt_var,self.sys_var,self.oth_var],axis=1)
  94. self.check_data_and_system_input(data_x_current.columns.to_list())
  95. # 数据校验
  96. if data_x_current.shape[0] != 1:
  97. print('系统模型的初始输入数据')
  98. print(data_x_current)
  99. raise Exception('系统模型的初始输入数据的行维度不等于1,检查输入数据')
  100. return data_x_current
  101. def get_current_system_y(self):
  102. data_y_current = self.system_model.predict(self.current_system_x)
  103. print(f'系统模型的输出变量:{data_y_current.columns.to_list()}')
  104. ## 校验系统模型的输出是否符合预期
  105. # 目标变量存在性校验
  106. if self.opt_target not in data_y_current.columns:
  107. raise ValueError(f'未在系统模型的输出中找到优化目标{self.opt_target},需检查系统模型优化的输出{data_y_current.columns.to_list()}')
  108. # 目标变量唯一性校验
  109. if isinstance(data_y_current.loc[:,self.opt_target],pd.DataFrame):
  110. raise ValueError(f'在系统模型的输出中找到多个相同名称的优化目标{self.opt_target},需检查系统模型的输出')
  111. return data_y_current
  112. def get_cur_sys_constrains_cfl(self) -> dict:
  113. """
  114. 获取与当前值违背的约束条件及边界条件
  115. """
  116. conflicts = {
  117. 'boundary_int': {}, # 区间边界
  118. 'boundary_col': {}, # 集合及映射边界
  119. 'constrains' : []
  120. }
  121. # 边界检查
  122. for idx,name in enumerate(self.name_opt_vars):
  123. cur_value = self.current_system_x.loc[:,[name]].iat[0,0]
  124. # 集合与映射边界的优化变量
  125. if name in self.collection_map.keys():
  126. collection = self.collection_map[name].values()
  127. opt_ub = max(collection)
  128. opt_lb = min(collection)
  129. if cur_value not in collection:
  130. conflicts['boundary_col'][name] = '优化变量不在集合内,但未超出边界范围'
  131. if (cur_value > opt_ub) or (cur_value < opt_lb):
  132. message = f'优化变量不在集合内,且超出边界范围,当前值={cur_value},上边界={opt_ub},下边界={opt_lb}'
  133. conflicts['boundary_col'][name] = message
  134. # 区间边界的优化变量
  135. else:
  136. opt_ub = self.insert_sync_var(self.ub,flatten=True)[idx]
  137. opt_lb = self.insert_sync_var(self.lb,flatten=True)[idx]
  138. if (cur_value > opt_ub) or (cur_value < opt_lb):
  139. message = f'超出边界范围,当前值={round(cur_value,2)},上边界={opt_ub},下边界={opt_lb}'
  140. conflicts['boundary_int'][name] = message
  141. # 可行性检查
  142. for cst in self.constrains:
  143. cv = execute_constrains(self.current_system_x_y_cst,cst).flatten()[0]
  144. if cv > 0:
  145. conflicts['constrains'].append(cst)
  146. return conflicts
  147. def restore_data(self,data_x,step=['var_map','col_map']):
  148. # 处理映射类型边界的系统变量(由于依赖于离散变量的序号,因此该步必须先于离散变量还原以前进行)
  149. # 根据优化变量, 还原系统变量
  150. if (len(self.var_map) > 0) and ('var_map' in step):
  151. for var_opt_name,var_map_dict in self.var_map.items():
  152. for var_sys_name,var_sys_map in var_map_dict.items():
  153. data_x[var_sys_name] = data_x[var_opt_name].astype(int).map(var_sys_map,na_action='ignore')
  154. # 处理边界(集合型/映射型),在优化算法组件中需将这部分变量还原
  155. # 还原优化变量
  156. if (len(self.collection_map) > 0) and ('col_map' in step):
  157. for var_name,map_dict in self.collection_map.items():
  158. data_x[var_name] = data_x[var_name].astype('int').map(map_dict,na_action='ignore')
  159. return data_x
  160. def insert_sync_var(self,data:np.ndarray,flatten=False) -> np.ndarray:
  161. cum_num = 0
  162. for pos,num in self.boundary_insert.items():
  163. if num > 0:
  164. cur_pos = cum_num + pos
  165. if flatten == False:
  166. insert_array = np.repeat(data[:,[cur_pos]],repeats=num,axis=1)
  167. data = np.insert(data,[cur_pos],insert_array,axis=1)
  168. elif flatten == True:
  169. insert_array = np.repeat(data[cur_pos],repeats=num)
  170. data = np.insert(data,cur_pos,insert_array)
  171. cum_num += num
  172. return data
  173. def aimFunc(self, pop):
  174. Vars = pop.Phen
  175. Vars = self.insert_sync_var(Vars)
  176. N = Vars.shape[0]
  177. # 汇总系统模型的输入
  178. data_opt = pd.DataFrame(data=Vars,columns=self.name_opt_vars)
  179. data_sys = data_copy_row(self.sys_var,N_row=N)
  180. data_oth = data_copy_row(self.oth_var,N_row=N)
  181. data_x = pd.concat([data_opt,data_sys,data_oth],axis=1)
  182. # 待解决问题
  183. # 输入给系统模型的Index可能会包含时间信息参与计算,因此需要输入
  184. # 但是输入相同的时间index可能会导致系统模型报错(例如出现reindex的操作)
  185. # data_x.index = self.index.repeat(N)
  186. # 根据边界条件将数据还原
  187. data_x = self.restore_data(data_x)
  188. self.data_y = self.system_model.predict(data_x)
  189. pop.ObjV = self.data_y.loc[:,[self.opt_target]].to_numpy().reshape(-1,1)
  190. data = pd.concat([data_x,self.data_y],axis=1)
  191. # 设定约束条件
  192. if self.constrains is None:
  193. return
  194. if len(self.constrains)==0:
  195. return
  196. cv = []
  197. for cst in self.constrains:
  198. cv.append(execute_constrains(data,cst))
  199. pop.CV = np.hstack(cv)
  200. report_cv_info(cv=pop.CV,constrains=self.constrains)
  201. def data_copy_row(data:pd.DataFrame,N_row:int):
  202. if data is None:
  203. return None
  204. result = np.ones([N_row,len(data.columns)]) * data.to_numpy()
  205. result = pd.DataFrame(data=result,columns=data.columns)
  206. return result
  207. def report_cv_info(cv:np.ndarray,constrains:list):
  208. suit_cv = cv<=0
  209. suit_pop = suit_cv.all(axis=1)
  210. not_suit_cst = suit_cv.mean(axis=0)<0.7
  211. not_suit_cst_idx = np.arange(cv.shape[1])[not_suit_cst]
  212. not_suit_cst_pct = suit_cv.mean(axis=0)[not_suit_cst].round(2)
  213. print(f'本次迭代可行种群数为{suit_pop.sum()},占比{(suit_pop.mean()*100).round(2)}%')
  214. if len(not_suit_cst_idx) > 0:
  215. print(f'满足以下约束条件的种群数量小于70%, 共{not_suit_cst.sum()}条约束, 可能会影响优化性能')
  216. for idx,pct in dict(zip(not_suit_cst_idx,not_suit_cst_pct)).items():
  217. cst = constrains[idx]
  218. print(f'序号:{idx} \t 约束满足比例:{round(pct*100,2)}% \t 约束:{cst}')