Improve RAOB parsing

This commit is contained in:
XANTRONIX 2025-03-28 19:03:56 -04:00
parent e119657309
commit db22113c55

View file

@ -121,7 +121,7 @@ class RAOBObs():
) )
def parse_temp_dewpoint(self, token: str): def parse_temp_dewpoint(self, token: str):
if token[2] == '/': if token[2] == '/' or token[0:4] == 'NNNN':
return { return {
'temp': None, 'temp': None,
'dewpoint': None 'dewpoint': None
@ -130,6 +130,7 @@ class RAOBObs():
if token[0:2] == '//': if token[0:2] == '//':
temp = None temp = None
else: else:
print(f"Got token {token}")
tenths = int(token[2]) tenths = int(token[2])
sign = 1 if tenths % 2 == 0 else -1 sign = 1 if tenths % 2 == 0 else -1
@ -137,8 +138,12 @@ class RAOBObs():
if token[3:5] == '//': if token[3:5] == '//':
dewpoint = None dewpoint = None
else:
if token[4] == '/':
dda = int(token[3])
else: else:
dda = int(token[3:5]) dda = int(token[3:5])
dd = dda * 0.1 if dda <= 50 else dda - 50 dd = dda * 0.1 if dda <= 50 else dda - 50
dewpoint = temp - dd dewpoint = temp - dd
@ -151,12 +156,6 @@ class RAOBObs():
base_speed = 0 base_speed = 0
base_dir = 0 base_dir = 0
if token == '=' or token == '//END':
return {
'dir': None,
'speed': None
}
if token[2] != '/': if token[2] != '/':
value = int(token[2]) value = int(token[2])
@ -267,16 +266,23 @@ class RAOBObs():
} }
def parse_pressure(self, token: str): def parse_pressure(self, token: str):
code, pressure = token[0:2], float(token[2:5]) ret = {
'pressure': None,
if pressure < 100:
pressure += 1000.0
return {
'pressure': pressure if code in self.PRESSURE_CODES else None,
'height': None 'height': None
} }
code, pressure = token[0:2], token[2:5]
if pressure != '///' and code in self.PRESSURE_CODES:
value = float(pressure)
if value < 100:
value += 1000.0
ret['pressure'] = value
return ret
def parse_ttaa_sample(self, tokens: list[str]) -> dict: def parse_ttaa_sample(self, tokens: list[str]) -> dict:
sample = SoundingSample() sample = SoundingSample()
@ -320,7 +326,7 @@ class RAOBObs():
samples = list() samples = list()
for i in range(2, len(self.tokens), 3): for i in range(2, len(self.tokens), 3):
if len(self.tokens) < i+3 or self.tokens[i][-1] == '=': if len(self.tokens) < i+3:
break break
# #
@ -329,10 +335,10 @@ class RAOBObs():
if self.tokens[i][0:2] == '88': if self.tokens[i][0:2] == '88':
break break
data = self.parse_ttaa_sample(self.tokens[i:i+3]) sample = self.parse_ttaa_sample(self.tokens[i:i+3])
if data is not None: if sample is not None:
samples.append(data) samples.append(sample)
return { return {
'station': station, 'station': station,
@ -351,12 +357,12 @@ class RAOBObs():
} }
def parse_ttbb(self) -> dict: def parse_ttbb(self) -> dict:
station = self.tokens[1] station = self.tokens[0]
timestamp = self.tokens[2] timestamp = self.tokens[1]
samples = list() samples = list()
for i in range(3, len(self.tokens), 2): for i in range(2, len(self.tokens), 2):
samples.append(self.parse_sample_tokens(self.tokens[i:i+2])) samples.append(self.parse_ttbb_sample(self.tokens[i:i+2]))
return { return {
'station': station, 'station': station,
@ -368,10 +374,10 @@ class RAOBChunk():
def __init__(self, def __init__(self,
wfo: str, wfo: str,
product: str, product: str,
tokens: list[str]): lines: list[str]):
self.wfo = wfo self.wfo = wfo
self.product = product self.product = product
self.tokens = tokens self.lines = lines
def is_obs_start(self, token: str) -> bool: def is_obs_start(self, token: str) -> bool:
return token == 'TTAA' or token == 'TTBB' \ return token == 'TTAA' or token == 'TTBB' \
@ -382,22 +388,38 @@ class RAOBChunk():
def each_obs(self): def each_obs(self):
obs = None obs = None
for token in self.tokens: for line in self.lines:
if self.is_obs_start(token): tokens = re.split(r'\s+', line.rstrip())
if obs is not None:
yield obs
for token in tokens:
if obs is None:
if self.is_obs_start(token):
obs = RAOBObs(token) obs = RAOBObs(token)
elif obs is not None: else:
if token == '//END':
yield obs
obs = None
else:
i = token.find('=')
if i < 0:
obs.read(token) obs.read(token)
else:
obs.read(token[0:i])
yield obs
obs = None
if obs is not None: if obs is not None:
yield obs yield obs
def each_data(self): def each_data(self):
for obs in self.each_obs(): for obs in self.each_obs():
data = None
if obs.kind == 'TTAA': if obs.kind == 'TTAA':
data = obs.parse_ttaa() data = obs.parse_ttaa()
elif obs.kind == 'TTBB':
data = obs.parse_ttbb()
if data is None or len(data['samples']) == 0: if data is None or len(data['samples']) == 0:
continue continue
@ -480,18 +502,7 @@ class RAOBReader():
meta['product'] = match['product'] meta['product'] = match['product']
line_index += 1 line_index += 1
# return RAOBChunk(meta['wfo'], meta['product'], lines)
# Split each whitespace-delimited column of each line into one big
# list of lines for the remainder of the current text chunk.
#
tokens = list()
for line in lines[line_index:]:
tokens.extend(re.split(r'\s+', line))
return RAOBChunk(meta['wfo'],
meta['product'],
tokens)
def each_chunk(self): def each_chunk(self):
for text in each_chunk(self.fh, CHUNK_SEP, CHUNK_STRIP_CHARS): for text in each_chunk(self.fh, CHUNK_SEP, CHUNK_STRIP_CHARS):
@ -509,15 +520,15 @@ class RAOBReader():
for data in samples: for data in samples:
pressure = data['pressure'] pressure = data['pressure']
sounding.record_height(pressure, data['height']) sounding.record_height(pressure, data.get('height'))
sounding.record_temp_dewpoint(pressure, sounding.record_temp_dewpoint(pressure,
data['temp'], data.get('temp'),
data['dewpoint']) data.get('dewpoint'))
sounding.record_wind_speed_dir(pressure, sounding.record_wind_speed_dir(pressure,
data['wind_speed'], data.get('wind_speed'),
data['wind_dir']) data.get('wind_dir'))
for key in self.soundings: for key in self.soundings:
yield self.soundings[key].finish() yield self.soundings[key].finish()