room.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. from typing import Union
  2. import numpy as np
  3. import pandas as pd
  4. from sklearn.linear_model import LinearRegression
  5. try:
  6. import plotnine as gg
  7. except:
  8. pass
  9. from .._base._base import BaseModel
  10. class RoomDewPredictor(BaseModel):
  11. def __init__(self):
  12. super().__init__()
  13. def fit_Droom(self,Dout:np.ndarray, Droom:np.ndarray):
  14. _,_,best_lag,best_coef = self.get_lag_coef(Dout=Dout,Droom=Droom)
  15. self.record_model(
  16. model_name = 'Droom',
  17. model = {'coef':best_coef,'lag':best_lag},
  18. train_data = {'Dout':Dout,'Droom':Droom},
  19. train_metric = {'R2':1,'MAE':1,'MAPE':1},
  20. )
  21. return self
  22. @property
  23. def lag(self):
  24. return self.model_info['model_Droom']['lag']
  25. def predict_Droom(self,Dout:np.ndarray,Droom_cur:float,sm_frac=0) -> np.ndarray:
  26. if isinstance(Droom_cur,np.ndarray):
  27. Droom_cur = Droom_cur[-1]
  28. model = self.model_info['model_Droom']
  29. lag = model['lag']
  30. coef = model['coef']
  31. Dout = Dout[-lag-1:]
  32. Dout_diff = np.diff(Dout)
  33. Droom_diff = Dout_diff * coef
  34. Droom = smooth(Droom_diff.cumsum() + Droom_cur,sm_frac)
  35. return Droom
  36. @classmethod
  37. def to_diffdata(
  38. self,
  39. Dout : np.ndarray,
  40. Droom: Union[np.ndarray,None]
  41. ) -> pd.DataFrame:
  42. data = {'Dout':Dout}
  43. if Droom is not None:
  44. data['Droom'] = Droom
  45. data = (
  46. pd.DataFrame(data)
  47. .assign(ts=np.arange(len(Dout))).set_index('ts')
  48. .rolling(15,center=False).mean()
  49. .diff()
  50. .rolling(15,center=False).mean()
  51. .dropna()
  52. )
  53. for lag in range(1,30):
  54. data[f'Dout_lag{lag:02d}'] = data['Dout'].shift(lag)
  55. data = data.dropna()
  56. return data
  57. def plot_diffdata(
  58. self,
  59. Dout : np.ndarray,
  60. Droom: np.ndarray
  61. ):
  62. plot_raw = (
  63. pd.DataFrame({'Dout':Dout,'Droom':Droom})
  64. .assign(ts=np.arange(len(Dout)))
  65. .pipe(gg.ggplot)
  66. + gg.aes(x='ts')
  67. + gg.geom_line(gg.aes(y='Droom'),color='red')
  68. + gg.geom_line(gg.aes(y='Dout'),color='blue')
  69. )
  70. plot_diff = (
  71. self.to_diffdata(Dout=Dout,Droom=Droom)
  72. .reset_index()
  73. .pipe(gg.ggplot)
  74. + gg.aes(x='ts')
  75. + gg.geom_line(gg.aes(y='Droom'),color='red')
  76. + gg.geom_line(gg.aes(y='Dout'),color='blue')
  77. )
  78. plot = (plot_raw / plot_diff) + gg.theme(figure_size=[8,6])
  79. return plot
  80. def plot_diffdata_lagcorr(self,Dout:np.ndarray, Droom:np.ndarray):
  81. all_coef,all_r2,best_lag,best_coef = self.get_lag_coef(Dout=Dout,Droom=Droom)
  82. data = pd.DataFrame({'coef':all_coef,'r2':all_r2,'lag':np.arange(1,30)})
  83. p1 = (
  84. data
  85. .pipe(gg.ggplot)
  86. + gg.aes(x='lag',y='coef')
  87. + gg.geom_point()
  88. + gg.geom_line()
  89. + gg.geom_vline(xintercept=best_lag,color='red',linetype='--')
  90. + gg.scale_x_continuous(breaks=np.arange(1,30))
  91. )
  92. p2 = (
  93. data
  94. .pipe(gg.ggplot)
  95. + gg.aes(x='lag',y='r2')
  96. + gg.geom_point()
  97. + gg.geom_line()
  98. + gg.geom_vline(xintercept=best_lag,color='red',linetype='--')
  99. + gg.scale_x_continuous(breaks=np.arange(1,30))
  100. + gg.scale_y_continuous(breaks=np.arange(0,1,0.2))
  101. )
  102. plot_line = (p1 / p2)
  103. plot_scatter = (
  104. self.to_diffdata(Dout=Dout,Droom=Droom)
  105. .reset_index()
  106. .melt(id_vars=['ts','Droom'],value_name='Dout_lag')
  107. .pipe(gg.ggplot)
  108. + gg.aes(x='Dout_lag',y='Droom')
  109. + gg.geom_point()
  110. + gg.facet_wrap('~variable',ncol=8)
  111. + gg.geom_smooth(method='lm',color='red',se=False)
  112. + gg.geom_vline(xintercept=0,color='grey',linetype='--')
  113. + gg.geom_hline(yintercept=0,color='grey',linetype='--')
  114. + gg.geom_abline(color='grey',linetype='--')
  115. )
  116. plot = (plot_scatter | plot_line)+ gg.theme(figure_size=[25,8])
  117. return plot
  118. def get_lag_coef(self,Dout:np.ndarray, Droom:np.ndarray):
  119. diffdata = self.to_diffdata(Dout=Dout,Droom=Droom)
  120. all_lag = np.arange(1,30)
  121. all_coef = []
  122. all_r2 = []
  123. for i in all_lag:
  124. lag = f'Dout_lag{i:02d}'
  125. x = diffdata.loc[:,[lag]].values
  126. y = diffdata.Droom.values
  127. lm = LinearRegression(fit_intercept=False).fit(x,y)
  128. coef = lm.coef_
  129. r2 = lm.score(x,y)
  130. all_coef.append(coef[0])
  131. all_r2.append(r2)
  132. all_coef = np.array(all_coef)
  133. all_r2 = np.array(all_r2)
  134. best_lag = all_lag[all_r2.argmax()]
  135. best_coef = all_coef[all_r2.argmax()]
  136. return all_coef,all_r2,best_lag,best_coef
  137. def smooth(y:pd.Series,frac=0.1):
  138. import statsmodels.api as sm
  139. x = np.arange(len(y))
  140. lowess_result = sm.nonparametric.lowess(y, x, frac=frac) # frac 控制平滑程度
  141. return lowess_result[:, 1]