diff --git a/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc b/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc index c69c103..034c26f 100644 Binary files a/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc and b/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc differ diff --git a/src/exchanges/deutsche_boerse.py b/src/exchanges/deutsche_boerse.py index 7a85211..db30015 100644 --- a/src/exchanges/deutsche_boerse.py +++ b/src/exchanges/deutsche_boerse.py @@ -7,10 +7,19 @@ from typing import List, Optional from .base import BaseExchange, Trade from bs4 import BeautifulSoup +# API URLs für Deutsche Börse +API_URLS = { + 'XETRA': 'https://mfs.deutsche-boerse.com/api/DETR-posttrade', + 'FRA': 'https://mfs.deutsche-boerse.com/api/DFRA-posttrade', + 'QUOTRIX': 'https://mfs.deutsche-boerse.com/api/DGAT-posttrade', +} +DOWNLOAD_BASE_URL = "https://mfs.deutsche-boerse.com/api/download" + # Browser User-Agent für Zugriff HEADERS = { - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Accept': 'application/json, application/gzip, */*', + 'Referer': 'https://mfs.deutsche-boerse.com/', } @@ -26,59 +35,25 @@ class DeutscheBoerseBase(BaseExchange): def name(self) -> str: raise NotImplementedError + @property + def api_url(self) -> str: + """API URL für die Dateiliste""" + return API_URLS.get(self.name, self.base_url) + def _get_file_list(self) -> List[str]: - """Parst die Verzeichnisseite und extrahiert alle Dateinamen""" - import re + """Holt die Dateiliste von der JSON API""" try: - response = requests.get(self.base_url, headers=HEADERS, timeout=30) + response = requests.get(self.api_url, headers=HEADERS, timeout=30) response.raise_for_status() - files = [] - html_text = response.text + data = response.json() + files = data.get('CurrentFiles', []) - # Debug: Response-Länge - print(f"[{self.name}] Response length: {len(html_text)} chars") - - # Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT) - prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url) - prefix = prefix_match.group(1) if prefix_match else '[A-Z]{4}' - - # Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz - # Wichtig: Dateinamen erscheinen als Text/Name, nicht nur in href - pattern = f'{prefix}-posttrade-\\d{{4}}-\\d{{2}}-\\d{{2}}T\\d{{2}}_\\d{{2}}\\.json\\.gz' - - matches = re.findall(pattern, html_text) - files = list(set(matches)) - - if files: - print(f"[{self.name}] Found {len(files)} files via regex") - - # Fallback: BeautifulSoup für Links und Text - if not files: - soup = BeautifulSoup(html_text, 'html.parser') - all_links = soup.find_all('a') - print(f"[{self.name}] Found {len(all_links)} total links on page") - - for link in all_links: - href = link.get('href', '') - text = link.get_text(strip=True) - - # Prüfe Link-Text (Dateinamen werden oft als Link-Text angezeigt) - if text and 'posttrade' in text.lower() and '.json.gz' in text.lower(): - files.append(text) - # Prüfe href - elif href and 'posttrade' in href.lower() and '.json.gz' in href.lower(): - filename = href.split('/')[-1] if '/' in href else href - files.append(filename) - - files = list(set(files)) - if files: - print(f"[{self.name}] Found {len(files)} files via BeautifulSoup") - - print(f"[{self.name}] Total files found: {len(files)}") + print(f"[{self.name}] API returned {len(files)} files") return files + except Exception as e: - print(f"Error fetching file list from {self.base_url}: {e}") + print(f"[{self.name}] Error fetching file list from API: {e}") return [] def _filter_files_for_date(self, files: List[str], target_date: datetime.date) -> List[str]: @@ -116,47 +91,52 @@ class DeutscheBoerseBase(BaseExchange): return filtered - def _download_and_parse_file(self, file_url: str) -> List[Trade]: - """Lädt eine JSON.gz Datei herunter und parst die Trades""" + def _download_and_parse_file(self, filename: str) -> List[Trade]: + """Lädt eine JSON.gz Datei von der API herunter und parst die Trades""" trades = [] try: - # Vollständige URL erstellen - # Format: https://mfs.deutsche-boerse.com/DETR-posttrade/DETR-posttrade-2026-01-27T08_53.json.gz - if not file_url.startswith('http'): - # Entferne führenden Slash falls vorhanden - filename = file_url.lstrip('/') - full_url = f"{self.base_url}/{filename}" - else: - full_url = file_url + # Download-URL: https://mfs.deutsche-boerse.com/api/download/{filename} + full_url = f"{DOWNLOAD_BASE_URL}/{filename}" response = requests.get(full_url, headers=HEADERS, timeout=60) if response.status_code == 404: - print(f"[{self.name}] File not found: {full_url}") + # Datei nicht gefunden - normal für alte Dateien return [] response.raise_for_status() - print(f"[{self.name}] Downloaded: {full_url} ({len(response.content)} bytes)") # Gzip entpacken with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f: - json_data = json.load(f) + content = f.read().decode('utf-8') - # Trades parsen - # Deutsche Börse JSON Format (RTS1/RTS2): - # Typische Felder: TrdDt, TrdTm, ISIN, Pric, Qty, TrdCcy, etc. - for record in json_data: + if not content.strip(): + # Leere Datei + return [] + + # NDJSON Format: Eine JSON-Zeile pro Trade + for line in content.strip().split('\n'): + if not line.strip(): + continue try: + record = json.loads(line) trade = self._parse_trade_record(record) if trade: trades.append(trade) - except Exception as e: - print(f"Error parsing trade record: {e}") + except json.JSONDecodeError: continue + except Exception as e: + continue + + if trades: + print(f"[{self.name}] Parsed {len(trades)} trades from {filename}") + except requests.exceptions.HTTPError as e: + if e.response.status_code != 404: + print(f"[{self.name}] HTTP error downloading {filename}: {e}") except Exception as e: - print(f"Error downloading/parsing {file_url}: {e}") + print(f"[{self.name}] Error downloading/parsing {filename}: {e}") return trades @@ -243,34 +223,6 @@ class DeutscheBoerseBase(BaseExchange): print(f"Error parsing record: {e}") return None - def _generate_expected_files(self, target_date: datetime.date) -> List[str]: - """ - Generiert erwartete Dateinamen basierend auf dem bekannten Format. - Format: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz - """ - import re - files = [] - - # Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT) - prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url) - prefix = prefix_match.group(1) if prefix_match else 'DETR' - - date_str = target_date.strftime('%Y-%m-%d') - - # Generiere für alle Stunden des Handelstages (07:00 - 22:00 UTC, alle Minuten) - for hour in range(7, 23): - for minute in range(0, 60): - files.append(f"{prefix}-posttrade-{date_str}T{hour:02d}_{minute:02d}.json.gz") - - # Auch frühe Dateien vom Folgetag (nach Mitternacht UTC) - next_date = target_date + timedelta(days=1) - next_date_str = next_date.strftime('%Y-%m-%d') - for hour in range(0, 3): - for minute in range(0, 60): - files.append(f"{prefix}-posttrade-{next_date_str}T{hour:02d}_{minute:02d}.json.gz") - - return files - def _get_last_trading_day(self, from_date: datetime.date) -> datetime.date: """ Findet den letzten Handelstag (überspringt Wochenenden). @@ -307,19 +259,20 @@ class DeutscheBoerseBase(BaseExchange): print(f"[{self.name}] Fetching trades for date: {target_date}") - # Erst versuchen, Dateiliste von der Seite zu holen + # Hole Dateiliste von der API files = self._get_file_list() - print(f"[{self.name}] Found {len(files)} total files") + + if not files: + print(f"[{self.name}] No files available from API") + return [] # Dateien für Zieldatum filtern target_files = self._filter_files_for_date(files, target_date) - print(f"[{self.name}] {len(target_files)} files match target date") + print(f"[{self.name}] {len(target_files)} files match target date (of {len(files)} total)") - # Falls keine Dateien von der Seite gefunden, generiere erwartete Dateinamen if not target_files: - print(f"[{self.name}] No files from page, trying generated filenames...") - target_files = self._generate_expected_files(target_date) - print(f"[{self.name}] Trying {len(target_files)} potential files") + print(f"[{self.name}] No files for target date found") + return [] # Alle passenden Dateien herunterladen und parsen successful = 0 @@ -328,13 +281,8 @@ class DeutscheBoerseBase(BaseExchange): if trades: all_trades.extend(trades) successful += 1 - if successful <= 5: - print(f"[{self.name}] Parsed {len(trades)} trades from {file}") - if successful > 5: - print(f"[{self.name}] ... and {successful - 5} more files") - - print(f"[{self.name}] Total trades fetched: {len(all_trades)}") + print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades") return all_trades