room.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. from typing import Union
  2. import numpy as np
  3. import pandas as pd
  4. from sklearn.linear_model import LinearRegression
  5. try:
  6. import plotnine as gg
  7. except:
  8. pass
  9. from .._base._base import BaseModel
  10. class RoomDewPredictor(BaseModel):
  11. def __init__(self,coef_is_pos:bool):
  12. super().__init__()
  13. self.coef_is_pos = coef_is_pos
  14. self.record_load_info(coef_is_pos = self.coef_is_pos)
  15. def fit_Droom(self,Dout:np.ndarray, Droom:np.ndarray):
  16. _,_,best_lag,best_coef,boundary = self.get_lag_coef(Dout=Dout,Droom=Droom)
  17. self.record_model(
  18. model_name = 'Droom',
  19. model = {'coef':best_coef,'lag':best_lag,'boundary':boundary},
  20. train_data = {'Dout':Dout,'Droom':Droom},
  21. train_metric = {'R2':1,'MAE':1,'MAPE':1},
  22. )
  23. return self
  24. @property
  25. def lag(self):
  26. return self.model_info['model_Droom']['lag']
  27. def predict_Droom(self,Dout:np.ndarray,Droom_cur:float,sm_frac=0) -> np.ndarray:
  28. if isinstance(Droom_cur,np.ndarray):
  29. Droom_cur = Droom_cur[-1]
  30. model = self.model_info['model_Droom']
  31. lag = model['lag']
  32. coef = model['coef']
  33. boundary = model['boundary']
  34. Dout = Dout[-lag-1:]
  35. Dout_diff = np.diff(Dout)
  36. Droom_diff = np.clip(Dout_diff * coef,boundary[0],boundary[1])
  37. Droom = smooth(Droom_diff.cumsum() + Droom_cur,sm_frac)
  38. return Droom
  39. def plot_predict_Droom(self,Dout:np.ndarray,Droom_real:np.ndarray,sm_frac=0) -> np.ndarray:
  40. all_pred = {}
  41. for i in range(self.lag+1,len(Dout)-self.lag):
  42. Droom_cur = Droom_real[i]
  43. pred = self.predict_Droom(Dout=Dout[:i],Droom_cur=Droom_cur,sm_frac=sm_frac)
  44. all_pred[f'step_{i}'] = pd.Series(pred,index=np.arange(i+1,i+self.lag+1))
  45. all_pred = pd.concat(all_pred,axis=1)
  46. plot = (
  47. all_pred
  48. .assign(D=Droom_real[self.lag+2:])
  49. .reset_index()
  50. .melt(id_vars=['index','D'])
  51. .pipe(gg.ggplot)
  52. + gg.aes(x='index',y='value',color='variable')
  53. + gg.geom_line()
  54. + gg.geom_line(gg.aes(x='index',y='D'),color='black')
  55. + gg.theme(legend_position='none')
  56. )
  57. return plot
  58. def to_diffdata(
  59. self,
  60. Dout : np.ndarray,
  61. Droom: Union[np.ndarray,None]
  62. ) -> pd.DataFrame:
  63. data = {'Dout':Dout}
  64. if Droom is not None:
  65. data['Droom'] = Droom
  66. data = (
  67. pd.DataFrame(data)
  68. .assign(ts=np.arange(len(Dout))).set_index('ts')
  69. .rolling(15,center=False).mean()
  70. .diff()
  71. .rolling(15,center=False).mean()
  72. .dropna()
  73. )
  74. for lag in range(1,30):
  75. data[f'Dout_lag{lag:02d}'] = data['Dout'].shift(lag)
  76. data = data.dropna()
  77. return data
  78. def plot_diffdata(
  79. self,
  80. Dout : np.ndarray,
  81. Droom: np.ndarray
  82. ):
  83. plot_raw = (
  84. pd.DataFrame({'Dout':Dout,'Droom':Droom})
  85. .assign(ts=np.arange(len(Dout)))
  86. .pipe(gg.ggplot)
  87. + gg.aes(x='ts')
  88. + gg.geom_line(gg.aes(y='Droom'),color='red')
  89. + gg.geom_line(gg.aes(y='Dout'),color='blue')
  90. )
  91. plot_diff = (
  92. self.to_diffdata(Dout=Dout,Droom=Droom)
  93. .reset_index()
  94. .pipe(gg.ggplot)
  95. + gg.aes(x='ts')
  96. + gg.geom_line(gg.aes(y='Droom'),color='red')
  97. + gg.geom_line(gg.aes(y='Dout'),color='blue')
  98. )
  99. plot = (plot_raw / plot_diff) + gg.theme(figure_size=[8,6])
  100. return plot
  101. def plot_diffdata_lagcorr(self,Dout:np.ndarray, Droom:np.ndarray):
  102. all_coef,all_r2,best_lag,_,_ = self.get_lag_coef(Dout=Dout,Droom=Droom)
  103. data = pd.DataFrame({'coef':all_coef,'r2':all_r2,'lag':np.arange(1,30)})
  104. p1 = (
  105. data
  106. .pipe(gg.ggplot)
  107. + gg.aes(x='lag',y='coef')
  108. + gg.geom_point()
  109. + gg.geom_line()
  110. + gg.geom_vline(xintercept=best_lag,color='red',linetype='--')
  111. + gg.geom_hline(yintercept=0,color='blue',linetype='--')
  112. + gg.scale_x_continuous(breaks=np.arange(1,30))
  113. )
  114. p2 = (
  115. data
  116. .pipe(gg.ggplot)
  117. + gg.aes(x='lag',y='r2')
  118. + gg.geom_point()
  119. + gg.geom_line()
  120. + gg.geom_vline(xintercept=best_lag,color='red',linetype='--')
  121. + gg.geom_hline(yintercept=0.5,color='blue',linetype='--')
  122. + gg.scale_x_continuous(breaks=np.arange(1,30))
  123. + gg.scale_y_continuous(breaks=np.arange(0,1,0.2))
  124. )
  125. plot_line = (p1 / p2)
  126. plot_scatter = (
  127. self.to_diffdata(Dout=Dout,Droom=Droom)
  128. .reset_index()
  129. .melt(id_vars=['ts','Droom'],value_name='Dout_lag')
  130. .pipe(gg.ggplot)
  131. + gg.aes(x='Dout_lag',y='Droom')
  132. + gg.geom_point()
  133. + gg.facet_wrap('~variable',ncol=8)
  134. + gg.geom_smooth(method='lm',color='red',se=False)
  135. + gg.geom_vline(xintercept=0,color='grey',linetype='--')
  136. + gg.geom_hline(yintercept=0,color='grey',linetype='--')
  137. + gg.geom_abline(color='grey',linetype='--')
  138. )
  139. plot = (plot_scatter | plot_line)+ gg.theme(figure_size=[25,8])
  140. return plot
  141. def get_lag_coef(self,Dout:np.ndarray, Droom:np.ndarray):
  142. diffdata = self.to_diffdata(Dout=Dout,Droom=Droom)
  143. boundary = diffdata.loc[:,'Droom'].quantile(q=[0.01,0.99])
  144. boundary = (boundary.iat[0],boundary.iat[1])
  145. all_lag = np.arange(1,30)
  146. all_coef = []
  147. all_r2 = []
  148. for i in all_lag:
  149. lag = f'Dout_lag{i:02d}'
  150. x = diffdata.loc[:,[lag]].values
  151. y = diffdata.Droom.values
  152. lm = LinearRegression(fit_intercept=False).fit(x,y)
  153. coef = lm.coef_
  154. r2 = lm.score(x,y)
  155. all_coef.append(coef[0])
  156. all_r2.append(r2)
  157. all_coef = np.array(all_coef)
  158. all_r2 = np.array(all_r2)
  159. is_right_coef = all_coef > 0 if self.coef_is_pos else all_coef < 0
  160. right_lag = all_lag[is_right_coef]
  161. right_coef = all_coef[is_right_coef]
  162. if np.sum(is_right_coef) == 0:
  163. best_lag = 0
  164. best_coef = 0
  165. else:
  166. best_lag = right_lag[all_r2[is_right_coef].argmax()]
  167. best_coef = right_coef[all_r2[is_right_coef].argmax()]
  168. return (
  169. all_coef,
  170. all_r2,
  171. best_lag,
  172. best_coef,
  173. boundary
  174. )
  175. def smooth(y:pd.Series,frac=0.1):
  176. import statsmodels.api as sm
  177. x = np.arange(len(y))
  178. lowess_result = sm.nonparametric.lowess(y, x, frac=frac) # frac 控制平滑程度
  179. return lowess_result[:, 1]