data_loader.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. import os
  2. import re
  3. from pathlib import Path
  4. import shutil
  5. from typing import Union
  6. import numpy as np
  7. import pandas as pd
  8. import psychrolib
  9. psychrolib.SetUnitSystem(psychrolib.SI)
  10. get_Dew = np.vectorize(psychrolib.GetTDewPointFromRelHum)
  11. get_Hr = np.vectorize(psychrolib.GetHumRatioFromTDewPoint)
  12. from .._data.main import get_data
  13. class DataLoader:
  14. def __init__(self,path,start_time,end_time):
  15. self.path = path
  16. self.start_time = start_time
  17. self.end_time = end_time
  18. self.int_time = 'min'
  19. self.date_range = pd.date_range(start=self.start_time,end=self.end_time,freq=self.int_time)
  20. def dowload_equp_data(
  21. self,
  22. equp_name : str,
  23. point : dict,
  24. url : str,
  25. clean_cache: bool
  26. ):
  27. equp_path = os.path.join(self.path,equp_name)
  28. if clean_cache and os.path.exists(equp_path):
  29. shutil.rmtree(equp_path)
  30. if not os.path.exists(equp_path):
  31. os.makedirs(equp_path)
  32. for point_name,point_class in point.items():
  33. point_path = os.path.join(equp_path,f'{point_name}.pkl')
  34. point_class = str(point_class)
  35. if point_class in ['/']:
  36. continue
  37. # 纯数字
  38. elif bool(re.match(r'^[-+]?(\d+(\.\d*)?|\.\d+)$', point_class)):
  39. point_value = float(point_class)
  40. data = pd.DataFrame({point_name:point_value},index=self.date_range)
  41. pd.to_pickle(data,point_path)
  42. # 公式:干球温度和相对湿度计算露点
  43. elif bool(re.match(r'^Dew\(.*?\)$',point_class)):
  44. Tdb, RH = point_class.strip('Dew(').strip(')').split(',')
  45. points_id = [f'{equp_name}_{Tdb}',f'{equp_name}_{RH}']
  46. points_path = [point_path.replace('_D','_T'),point_path.replace('_D','_R')]
  47. for point_id,point_path in zip(points_id,points_path):
  48. get_data(
  49. points_id = [point_id],
  50. time_start= self.start_time,
  51. time_end = self.end_time,
  52. int_time = 'M',
  53. url = url,
  54. from_cache= True,
  55. PATH = Path(point_path)
  56. )
  57. # 非该设备的点位
  58. elif bool(re.match(r'^\[.*\]$',point_class)):
  59. get_data(
  60. points_id = [point_class.replace('[','').replace(']','')],
  61. time_start= self.start_time,
  62. time_end = self.end_time,
  63. int_time = 'M',
  64. url = url,
  65. from_cache= True,
  66. PATH = Path(point_path)
  67. )
  68. # 正常点位
  69. else:
  70. get_data(
  71. points_id = [f'{equp_name}_{point_class}'],
  72. time_start= self.start_time,
  73. time_end = self.end_time,
  74. int_time = 'M',
  75. url = url,
  76. from_cache= True,
  77. PATH = Path(point_path)
  78. )
  79. # 补齐未指定的数据
  80. all_file_path = os.listdir(equp_path)
  81. for file in all_file_path:
  82. # 通过干球温度和相对湿度计算露点
  83. if '_T' in file and file.replace('_T','_R') in all_file_path:
  84. Tdb = pd.read_pickle(os.path.join(equp_path,file)).iloc[:,0].values
  85. RH = pd.read_pickle(os.path.join(equp_path,file.replace('_T','_R'))).iloc[:,0].values
  86. Dew = pd.DataFrame({file.replace('_T','_D'):get_Dew(Tdb,np.clip(RH,0,100)/100)},index=self.date_range)
  87. pd.to_pickle(Dew,os.path.join(equp_path,file.replace('_T','_D')))
  88. # 通过露点计算绝对湿度
  89. if '_D' in file:
  90. Dew = pd.read_pickle(os.path.join(equp_path,file)).iloc[:,0].values
  91. Hr = pd.DataFrame({file.replace('_D','_H'):get_Hr(Dew,101325)},index=self.date_range)
  92. pd.to_pickle(Hr,os.path.join(equp_path,file.replace('_D','_H')))
  93. return self
  94. def get_equp_data(self,equp_name:str) -> pd.DataFrame:
  95. equp_path = os.path.join(self.path,equp_name)
  96. all_file_path = os.listdir(equp_path)
  97. all_data = []
  98. for file in all_file_path:
  99. if '.pkl' not in file:
  100. continue
  101. data_path = os.path.join(equp_path,file)
  102. data = (
  103. pd.read_pickle(data_path)
  104. .set_axis([file.replace('.pkl','')],axis=1)
  105. .loc[self.start_time:self.end_time,:]
  106. )
  107. all_data.append(data)
  108. if len(all_data) == 0:
  109. raise Exception(f'没有找到指定数据{all_file_path}')
  110. all_data = pd.concat(all_data,axis=1)
  111. return all_data