diff --git a/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc b/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc index 034c26f..867d3a0 100644 Binary files a/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc and b/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc differ diff --git a/src/exchanges/__pycache__/gettex.cpython-313.pyc b/src/exchanges/__pycache__/gettex.cpython-313.pyc index 651d991..92748ed 100644 Binary files a/src/exchanges/__pycache__/gettex.cpython-313.pyc and b/src/exchanges/__pycache__/gettex.cpython-313.pyc differ diff --git a/src/exchanges/deutsche_boerse.py b/src/exchanges/deutsche_boerse.py index db30015..73affb1 100644 --- a/src/exchanges/deutsche_boerse.py +++ b/src/exchanges/deutsche_boerse.py @@ -2,11 +2,17 @@ import requests import gzip import json import io +import time from datetime import datetime, timedelta, timezone from typing import List, Optional from .base import BaseExchange, Trade from bs4 import BeautifulSoup +# Rate-Limiting Konfiguration +RATE_LIMIT_DELAY = 0.5 # Sekunden zwischen Requests +RATE_LIMIT_RETRY_DELAY = 5 # Sekunden Wartezeit bei 429 +MAX_RETRIES = 3 # Maximale Wiederholungen bei 429 + # API URLs für Deutsche Börse API_URLS = { 'XETRA': 'https://mfs.deutsche-boerse.com/api/DETR-posttrade', @@ -94,49 +100,62 @@ class DeutscheBoerseBase(BaseExchange): def _download_and_parse_file(self, filename: str) -> List[Trade]: """Lädt eine JSON.gz Datei von der API herunter und parst die Trades""" trades = [] + full_url = f"{DOWNLOAD_BASE_URL}/{filename}" - try: - # Download-URL: https://mfs.deutsche-boerse.com/api/download/{filename} - full_url = f"{DOWNLOAD_BASE_URL}/{filename}" - - response = requests.get(full_url, headers=HEADERS, timeout=60) - - if response.status_code == 404: - # Datei nicht gefunden - normal für alte Dateien - return [] - - response.raise_for_status() - - # Gzip entpacken - with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f: - content = f.read().decode('utf-8') - - if not content.strip(): - # Leere Datei - return [] - - # NDJSON Format: Eine JSON-Zeile pro Trade - for line in content.strip().split('\n'): - if not line.strip(): + for retry in range(MAX_RETRIES): + try: + response = requests.get(full_url, headers=HEADERS, timeout=60) + + if response.status_code == 404: + # Datei nicht gefunden - normal für alte Dateien + return [] + + if response.status_code == 429: + # Rate-Limit erreicht - warten und erneut versuchen + wait_time = RATE_LIMIT_RETRY_DELAY * (retry + 1) + print(f"[{self.name}] Rate limited, waiting {wait_time}s...") + time.sleep(wait_time) continue - try: - record = json.loads(line) - trade = self._parse_trade_record(record) - if trade: - trades.append(trade) - except json.JSONDecodeError: + + response.raise_for_status() + + # Gzip entpacken + with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f: + content = f.read().decode('utf-8') + + if not content.strip(): + # Leere Datei + return [] + + # NDJSON Format: Eine JSON-Zeile pro Trade + for line in content.strip().split('\n'): + if not line.strip(): + continue + try: + record = json.loads(line) + trade = self._parse_trade_record(record) + if trade: + trades.append(trade) + except json.JSONDecodeError: + continue + except Exception: + continue + + # Erfolg - keine weitere Retry nötig + break + + except requests.exceptions.HTTPError as e: + if e.response.status_code == 429: + wait_time = RATE_LIMIT_RETRY_DELAY * (retry + 1) + print(f"[{self.name}] Rate limited, waiting {wait_time}s...") + time.sleep(wait_time) continue - except Exception as e: - continue - - if trades: - print(f"[{self.name}] Parsed {len(trades)} trades from {filename}") - - except requests.exceptions.HTTPError as e: - if e.response.status_code != 404: - print(f"[{self.name}] HTTP error downloading {filename}: {e}") - except Exception as e: - print(f"[{self.name}] Error downloading/parsing {filename}: {e}") + elif e.response.status_code != 404: + print(f"[{self.name}] HTTP error downloading {filename}: {e}") + break + except Exception as e: + print(f"[{self.name}] Error downloading/parsing {filename}: {e}") + break return trades @@ -274,13 +293,23 @@ class DeutscheBoerseBase(BaseExchange): print(f"[{self.name}] No files for target date found") return [] - # Alle passenden Dateien herunterladen und parsen + # Alle passenden Dateien herunterladen und parsen (mit Rate-Limiting) successful = 0 - for file in target_files: + total_files = len(target_files) + + for i, file in enumerate(target_files): trades = self._download_and_parse_file(file) if trades: all_trades.extend(trades) successful += 1 + + # Rate-Limiting: Pause zwischen Downloads + if i < total_files - 1: + time.sleep(RATE_LIMIT_DELAY) + + # Fortschritt alle 100 Dateien + if (i + 1) % 100 == 0: + print(f"[{self.name}] Progress: {i + 1}/{total_files} files, {len(all_trades)} trades so far") print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades") return all_trades diff --git a/src/exchanges/gettex.py b/src/exchanges/gettex.py index c83c9b1..c0f352d 100644 --- a/src/exchanges/gettex.py +++ b/src/exchanges/gettex.py @@ -2,11 +2,15 @@ import requests import gzip import csv import io +import time from datetime import datetime, timedelta, timezone from typing import List, Optional from .base import BaseExchange, Trade from bs4 import BeautifulSoup +# Rate-Limiting +RATE_LIMIT_DELAY = 0.3 # Sekunden zwischen Requests + # Browser User-Agent für Zugriff (gettex prüft User-Agent!) HEADERS = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', @@ -411,11 +415,14 @@ class GettexExchange(BaseExchange): print(f"[{self.name}] Found {len(target_files)} files for target date from page") - # Lade Dateien von der Webseite - for f in target_files: + # Lade Dateien von der Webseite (mit Rate-Limiting) + for i, f in enumerate(target_files): trades = self._download_file_by_url(f['url'], f['filename']) if trades: all_trades.extend(trades) + # Rate-Limiting + if i < len(target_files) - 1: + time.sleep(RATE_LIMIT_DELAY) # Fallback: Versuche erwartete Dateinamen if not all_trades: