fixed deutsche boerse
All checks were successful
Deployment / deploy-docker (push) Successful in 17s
All checks were successful
Deployment / deploy-docker (push) Successful in 17s
This commit is contained in:
@@ -28,20 +28,44 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
|
||||
def _get_file_list(self) -> List[str]:
|
||||
"""Parst die Verzeichnisseite und extrahiert alle Dateinamen"""
|
||||
import re
|
||||
try:
|
||||
response = requests.get(self.base_url, headers=HEADERS, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
files = []
|
||||
|
||||
# Deutsche Börse listet Dateien als Links auf
|
||||
for link in soup.find_all('a'):
|
||||
href = link.get('href', '')
|
||||
# Nur posttrade JSON.gz Dateien
|
||||
if 'posttrade' in href and href.endswith('.json.gz'):
|
||||
files.append(href)
|
||||
# Primär: Regex-basierte Extraktion (zuverlässiger)
|
||||
# Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
||||
# Das Prefix wird aus der base_url extrahiert (z.B. DETR, DFRA, DGAT)
|
||||
prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url)
|
||||
if prefix_match:
|
||||
prefix = prefix_match.group(1)
|
||||
# Suche nach Dateinamen mit diesem Prefix
|
||||
pattern = f'{prefix}-posttrade-\\d{{4}}-\\d{{2}}-\\d{{2}}T\\d{{2}}_\\d{{2}}\\.json\\.gz'
|
||||
else:
|
||||
# Generisches Pattern
|
||||
pattern = r'[A-Z]{4}-posttrade-\d{4}-\d{2}-\d{2}T\d{2}_\d{2}\.json\.gz'
|
||||
|
||||
matches = re.findall(pattern, response.text)
|
||||
files = list(set(matches))
|
||||
|
||||
# Sekundär: BeautifulSoup für Links (falls Regex nichts findet)
|
||||
if not files:
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
for link in soup.find_all('a'):
|
||||
href = link.get('href', '')
|
||||
text = link.get_text(strip=True)
|
||||
|
||||
# Prüfe href und Text für posttrade Dateien
|
||||
if href and 'posttrade' in href.lower() and '.json.gz' in href.lower():
|
||||
# Extrahiere nur den Dateinamen
|
||||
filename = href.split('/')[-1] if '/' in href else href
|
||||
files.append(filename)
|
||||
elif text and 'posttrade' in text.lower() and '.json.gz' in text.lower():
|
||||
files.append(text)
|
||||
|
||||
print(f"[{self.name}] Found {len(files)} files via regex/soup")
|
||||
return files
|
||||
except Exception as e:
|
||||
print(f"Error fetching file list from {self.base_url}: {e}")
|
||||
@@ -50,11 +74,12 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
def _filter_files_for_date(self, files: List[str], target_date: datetime.date) -> List[str]:
|
||||
"""
|
||||
Filtert Dateien für ein bestimmtes Datum.
|
||||
Dateiformat: *posttrade-YYYY-MM-DDTHH:MM:SS*.json.gz
|
||||
Dateiformat: DETR-posttrade-YYYY-MM-DDTHH_MM.json.gz (mit Unterstrich!)
|
||||
|
||||
Da Handel bis 22:00 MEZ geht (21:00/20:00 UTC), müssen wir auch
|
||||
Dateien nach Mitternacht UTC berücksichtigen.
|
||||
"""
|
||||
import re
|
||||
filtered = []
|
||||
|
||||
# Für den Vortag: Dateien vom target_date UND vom Folgetag (bis ~02:00 UTC)
|
||||
@@ -64,18 +89,17 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
|
||||
for file in files:
|
||||
# Extrahiere Datum aus Dateiname
|
||||
# Format: posttrade-2026-01-26T21:30:00.json.gz
|
||||
# Format: DETR-posttrade-2026-01-26T21_30.json.gz
|
||||
if target_str in file:
|
||||
filtered.append(file)
|
||||
elif next_day_str in file:
|
||||
# Prüfe ob es eine frühe Datei vom nächsten Tag ist (< 03:00 UTC)
|
||||
try:
|
||||
# Finde Timestamp im Dateinamen
|
||||
parts = file.split('posttrade-')
|
||||
if len(parts) > 1:
|
||||
ts_part = parts[1].split('.json.gz')[0]
|
||||
file_dt = datetime.fromisoformat(ts_part)
|
||||
if file_dt.hour < 3: # Frühe Morgenstunden gehören noch zum Vortag
|
||||
# Finde Timestamp im Dateinamen mit Unterstrich für Minuten
|
||||
match = re.search(r'posttrade-(\d{4}-\d{2}-\d{2})T(\d{2})_(\d{2})', file)
|
||||
if match:
|
||||
hour = int(match.group(2))
|
||||
if hour < 3: # Frühe Morgenstunden gehören noch zum Vortag
|
||||
filtered.append(file)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -88,13 +112,22 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
|
||||
try:
|
||||
# Vollständige URL erstellen
|
||||
# Format: https://mfs.deutsche-boerse.com/DETR-posttrade/DETR-posttrade-2026-01-27T08_53.json.gz
|
||||
if not file_url.startswith('http'):
|
||||
full_url = f"{self.base_url.rstrip('/')}/{file_url.lstrip('/')}"
|
||||
# Entferne führenden Slash falls vorhanden
|
||||
filename = file_url.lstrip('/')
|
||||
full_url = f"{self.base_url}/{filename}"
|
||||
else:
|
||||
full_url = file_url
|
||||
|
||||
response = requests.get(full_url, headers=HEADERS, timeout=60)
|
||||
|
||||
if response.status_code == 404:
|
||||
print(f"[{self.name}] File not found: {full_url}")
|
||||
return []
|
||||
|
||||
response.raise_for_status()
|
||||
print(f"[{self.name}] Downloaded: {full_url} ({len(response.content)} bytes)")
|
||||
|
||||
# Gzip entpacken
|
||||
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
||||
|
||||
Reference in New Issue
Block a user