Files
trading-daemon/src/exchanges/deutsche_boerse.py

360 lines
14 KiB
Python
Raw Normal View History

2026-01-27 09:59:43 +01:00
import requests
import gzip
import json
import io
from datetime import datetime, timedelta, timezone
from typing import List, Optional
from .base import BaseExchange, Trade
from bs4 import BeautifulSoup
# Browser User-Agent für Zugriff
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
}
class DeutscheBoerseBase(BaseExchange):
"""Basisklasse für Deutsche Börse Exchanges (Xetra, Frankfurt, Quotrix)"""
@property
def base_url(self) -> str:
"""Override in subclasses"""
raise NotImplementedError
@property
def name(self) -> str:
raise NotImplementedError
def _get_file_list(self) -> List[str]:
"""Parst die Verzeichnisseite und extrahiert alle Dateinamen"""
2026-01-27 10:14:27 +01:00
import re
2026-01-27 09:59:43 +01:00
try:
response = requests.get(self.base_url, headers=HEADERS, timeout=30)
response.raise_for_status()
files = []
2026-01-27 10:38:29 +01:00
html_text = response.text
# Debug: Response-Länge
print(f"[{self.name}] Response length: {len(html_text)} chars")
2026-01-27 09:59:43 +01:00
2026-01-27 10:14:27 +01:00
# Primär: Regex-basierte Extraktion (zuverlässiger)
# Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
# Das Prefix wird aus der base_url extrahiert (z.B. DETR, DFRA, DGAT)
prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url)
if prefix_match:
prefix = prefix_match.group(1)
# Suche nach Dateinamen mit diesem Prefix
pattern = f'{prefix}-posttrade-\\d{{4}}-\\d{{2}}-\\d{{2}}T\\d{{2}}_\\d{{2}}\\.json\\.gz'
else:
# Generisches Pattern
pattern = r'[A-Z]{4}-posttrade-\d{4}-\d{2}-\d{2}T\d{2}_\d{2}\.json\.gz'
2026-01-27 10:38:29 +01:00
matches = re.findall(pattern, html_text)
2026-01-27 10:14:27 +01:00
files = list(set(matches))
# Sekundär: BeautifulSoup für Links (falls Regex nichts findet)
if not files:
2026-01-27 10:38:29 +01:00
soup = BeautifulSoup(html_text, 'html.parser')
all_links = soup.find_all('a')
print(f"[{self.name}] Found {len(all_links)} total links on page")
for link in all_links:
2026-01-27 10:14:27 +01:00
href = link.get('href', '')
text = link.get_text(strip=True)
# Prüfe href und Text für posttrade Dateien
if href and 'posttrade' in href.lower() and '.json.gz' in href.lower():
# Extrahiere nur den Dateinamen
filename = href.split('/')[-1] if '/' in href else href
files.append(filename)
elif text and 'posttrade' in text.lower() and '.json.gz' in text.lower():
files.append(text)
2026-01-27 09:59:43 +01:00
2026-01-27 10:38:29 +01:00
# Tertiär: Suche nach jedem "posttrade" im HTML und extrahiere Dateinamen
if not files:
# Allgemeineres Pattern für beliebige Dateinamen mit "posttrade"
general_pattern = r'[\w-]*posttrade[\w-]*\d{4}[-_]\d{2}[-_]\d{2}[T_]\d{2}[_:]\d{2}\.json\.gz'
matches = re.findall(general_pattern, html_text, re.IGNORECASE)
files = list(set(matches))
if files:
print(f"[{self.name}] Found {len(files)} files via general pattern")
2026-01-27 10:14:27 +01:00
print(f"[{self.name}] Found {len(files)} files via regex/soup")
2026-01-27 09:59:43 +01:00
return files
except Exception as e:
print(f"Error fetching file list from {self.base_url}: {e}")
return []
def _filter_files_for_date(self, files: List[str], target_date: datetime.date) -> List[str]:
"""
Filtert Dateien für ein bestimmtes Datum.
2026-01-27 10:14:27 +01:00
Dateiformat: DETR-posttrade-YYYY-MM-DDTHH_MM.json.gz (mit Unterstrich!)
2026-01-27 09:59:43 +01:00
Da Handel bis 22:00 MEZ geht (21:00/20:00 UTC), müssen wir auch
Dateien nach Mitternacht UTC berücksichtigen.
"""
2026-01-27 10:14:27 +01:00
import re
2026-01-27 09:59:43 +01:00
filtered = []
# Für den Vortag: Dateien vom target_date UND vom Folgetag (bis ~02:00 UTC)
target_str = target_date.strftime('%Y-%m-%d')
next_day = target_date + timedelta(days=1)
next_day_str = next_day.strftime('%Y-%m-%d')
for file in files:
# Extrahiere Datum aus Dateiname
2026-01-27 10:14:27 +01:00
# Format: DETR-posttrade-2026-01-26T21_30.json.gz
2026-01-27 09:59:43 +01:00
if target_str in file:
filtered.append(file)
elif next_day_str in file:
# Prüfe ob es eine frühe Datei vom nächsten Tag ist (< 03:00 UTC)
try:
2026-01-27 10:14:27 +01:00
# Finde Timestamp im Dateinamen mit Unterstrich für Minuten
match = re.search(r'posttrade-(\d{4}-\d{2}-\d{2})T(\d{2})_(\d{2})', file)
if match:
hour = int(match.group(2))
if hour < 3: # Frühe Morgenstunden gehören noch zum Vortag
2026-01-27 09:59:43 +01:00
filtered.append(file)
except Exception:
pass
return filtered
def _download_and_parse_file(self, file_url: str) -> List[Trade]:
"""Lädt eine JSON.gz Datei herunter und parst die Trades"""
trades = []
try:
# Vollständige URL erstellen
2026-01-27 10:14:27 +01:00
# Format: https://mfs.deutsche-boerse.com/DETR-posttrade/DETR-posttrade-2026-01-27T08_53.json.gz
2026-01-27 09:59:43 +01:00
if not file_url.startswith('http'):
2026-01-27 10:14:27 +01:00
# Entferne führenden Slash falls vorhanden
filename = file_url.lstrip('/')
full_url = f"{self.base_url}/{filename}"
2026-01-27 09:59:43 +01:00
else:
full_url = file_url
response = requests.get(full_url, headers=HEADERS, timeout=60)
2026-01-27 10:14:27 +01:00
if response.status_code == 404:
print(f"[{self.name}] File not found: {full_url}")
return []
2026-01-27 09:59:43 +01:00
response.raise_for_status()
2026-01-27 10:14:27 +01:00
print(f"[{self.name}] Downloaded: {full_url} ({len(response.content)} bytes)")
2026-01-27 09:59:43 +01:00
# Gzip entpacken
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
json_data = json.load(f)
# Trades parsen
# Deutsche Börse JSON Format (RTS1/RTS2):
# Typische Felder: TrdDt, TrdTm, ISIN, Pric, Qty, TrdCcy, etc.
for record in json_data:
try:
trade = self._parse_trade_record(record)
if trade:
trades.append(trade)
except Exception as e:
print(f"Error parsing trade record: {e}")
continue
except Exception as e:
print(f"Error downloading/parsing {file_url}: {e}")
return trades
def _parse_trade_record(self, record: dict) -> Optional[Trade]:
"""
Parst einen einzelnen Trade-Record aus dem JSON.
Deutsche Börse verwendet RTS1/RTS2 Format.
Wichtige Felder:
- TrdDt: Trading Date (YYYY-MM-DD)
- TrdTm: Trading Time (HH:MM:SS.ffffff)
- ISIN: Instrument Identifier
- FinInstrmId.Id: Alternative ISIN Feld
- Pric.Pric.MntryVal.Amt: Preis
- Qty.Unit: Menge
"""
try:
# ISIN extrahieren
isin = record.get('ISIN') or record.get('FinInstrmId', {}).get('Id', '')
if not isin:
return None
# Preis extrahieren (verschiedene mögliche Pfade)
price = None
if 'Pric' in record:
pric = record['Pric']
if isinstance(pric, dict):
if 'Pric' in pric:
inner = pric['Pric']
if 'MntryVal' in inner:
price = float(inner['MntryVal'].get('Amt', 0))
elif 'Amt' in inner:
price = float(inner['Amt'])
elif 'MntryVal' in pric:
price = float(pric['MntryVal'].get('Amt', 0))
elif isinstance(pric, (int, float)):
price = float(pric)
if price is None or price <= 0:
return None
# Menge extrahieren
quantity = None
if 'Qty' in record:
qty = record['Qty']
if isinstance(qty, dict):
quantity = float(qty.get('Unit', qty.get('Qty', 0)))
elif isinstance(qty, (int, float)):
quantity = float(qty)
if quantity is None or quantity <= 0:
return None
# Timestamp extrahieren
trd_dt = record.get('TrdDt', '')
trd_tm = record.get('TrdTm', '00:00:00')
if not trd_dt:
return None
# Kombiniere Datum und Zeit
ts_str = f"{trd_dt}T{trd_tm}"
# Entferne Mikrosekunden wenn zu lang
if '.' in ts_str:
parts = ts_str.split('.')
if len(parts[1]) > 6:
ts_str = parts[0] + '.' + parts[1][:6]
# Parse als UTC (Deutsche Börse liefert UTC)
timestamp = datetime.fromisoformat(ts_str)
if timestamp.tzinfo is None:
timestamp = timestamp.replace(tzinfo=timezone.utc)
return Trade(
exchange=self.name,
symbol=isin, # Symbol = ISIN
isin=isin,
price=price,
quantity=quantity,
timestamp=timestamp
)
except Exception as e:
print(f"Error parsing record: {e}")
return None
2026-01-27 10:38:29 +01:00
def _generate_expected_files(self, target_date: datetime.date) -> List[str]:
"""
Generiert erwartete Dateinamen basierend auf dem bekannten Format.
Format: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
"""
import re
files = []
# Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT)
prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url)
prefix = prefix_match.group(1) if prefix_match else 'DETR'
date_str = target_date.strftime('%Y-%m-%d')
# Generiere für alle Stunden des Handelstages (07:00 - 22:00 UTC, alle Minuten)
for hour in range(7, 23):
for minute in range(0, 60):
files.append(f"{prefix}-posttrade-{date_str}T{hour:02d}_{minute:02d}.json.gz")
# Auch frühe Dateien vom Folgetag (nach Mitternacht UTC)
next_date = target_date + timedelta(days=1)
next_date_str = next_date.strftime('%Y-%m-%d')
for hour in range(0, 3):
for minute in range(0, 60):
files.append(f"{prefix}-posttrade-{next_date_str}T{hour:02d}_{minute:02d}.json.gz")
return files
2026-01-27 09:59:43 +01:00
def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]:
"""
Holt alle Trades vom Vortag (oder seit since_date).
"""
all_trades = []
# Bestimme Zieldatum
if since_date:
target_date = since_date.date() if hasattr(since_date, 'date') else since_date
else:
# Standard: Vortag
target_date = (datetime.now(timezone.utc) - timedelta(days=1)).date()
print(f"[{self.name}] Fetching trades for date: {target_date}")
2026-01-27 10:38:29 +01:00
# Erst versuchen, Dateiliste von der Seite zu holen
2026-01-27 09:59:43 +01:00
files = self._get_file_list()
print(f"[{self.name}] Found {len(files)} total files")
# Dateien für Zieldatum filtern
target_files = self._filter_files_for_date(files, target_date)
print(f"[{self.name}] {len(target_files)} files match target date")
2026-01-27 10:38:29 +01:00
# Falls keine Dateien von der Seite gefunden, generiere erwartete Dateinamen
if not target_files:
print(f"[{self.name}] No files from page, trying generated filenames...")
target_files = self._generate_expected_files(target_date)
print(f"[{self.name}] Trying {len(target_files)} potential files")
2026-01-27 09:59:43 +01:00
# Alle passenden Dateien herunterladen und parsen
2026-01-27 10:38:29 +01:00
successful = 0
2026-01-27 09:59:43 +01:00
for file in target_files:
trades = self._download_and_parse_file(file)
2026-01-27 10:38:29 +01:00
if trades:
all_trades.extend(trades)
successful += 1
if successful <= 5:
print(f"[{self.name}] Parsed {len(trades)} trades from {file}")
if successful > 5:
print(f"[{self.name}] ... and {successful - 5} more files")
2026-01-27 09:59:43 +01:00
print(f"[{self.name}] Total trades fetched: {len(all_trades)}")
return all_trades
class XetraExchange(DeutscheBoerseBase):
"""Xetra (Deutsche Börse) - DETR"""
@property
def base_url(self) -> str:
return "https://mfs.deutsche-boerse.com/DETR-posttrade"
@property
def name(self) -> str:
return "XETRA"
class FrankfurtExchange(DeutscheBoerseBase):
"""Börse Frankfurt - DFRA"""
@property
def base_url(self) -> str:
return "https://mfs.deutsche-boerse.com/DFRA-posttrade"
@property
def name(self) -> str:
return "FRA"
class QuotrixExchange(DeutscheBoerseBase):
"""Quotrix (Düsseldorf/Tradegate) - DGAT"""
@property
def base_url(self) -> str:
return "https://mfs.deutsche-boerse.com/DGAT-posttrade"
@property
def name(self) -> str:
return "QUOTRIX"