updated dashboard
All checks were successful
Deployment / deploy-docker (push) Successful in 17s

This commit is contained in:
Melchior Reimers
2026-01-27 11:09:52 +01:00
parent b25bab2288
commit 228212cbab
4 changed files with 80 additions and 44 deletions

View File

@@ -2,11 +2,17 @@ import requests
import gzip import gzip
import json import json
import io import io
import time
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import List, Optional from typing import List, Optional
from .base import BaseExchange, Trade from .base import BaseExchange, Trade
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
# Rate-Limiting Konfiguration
RATE_LIMIT_DELAY = 0.5 # Sekunden zwischen Requests
RATE_LIMIT_RETRY_DELAY = 5 # Sekunden Wartezeit bei 429
MAX_RETRIES = 3 # Maximale Wiederholungen bei 429
# API URLs für Deutsche Börse # API URLs für Deutsche Börse
API_URLS = { API_URLS = {
'XETRA': 'https://mfs.deutsche-boerse.com/api/DETR-posttrade', 'XETRA': 'https://mfs.deutsche-boerse.com/api/DETR-posttrade',
@@ -94,49 +100,62 @@ class DeutscheBoerseBase(BaseExchange):
def _download_and_parse_file(self, filename: str) -> List[Trade]: def _download_and_parse_file(self, filename: str) -> List[Trade]:
"""Lädt eine JSON.gz Datei von der API herunter und parst die Trades""" """Lädt eine JSON.gz Datei von der API herunter und parst die Trades"""
trades = [] trades = []
full_url = f"{DOWNLOAD_BASE_URL}/{filename}"
try: for retry in range(MAX_RETRIES):
# Download-URL: https://mfs.deutsche-boerse.com/api/download/{filename} try:
full_url = f"{DOWNLOAD_BASE_URL}/{filename}" response = requests.get(full_url, headers=HEADERS, timeout=60)
response = requests.get(full_url, headers=HEADERS, timeout=60) if response.status_code == 404:
# Datei nicht gefunden - normal für alte Dateien
if response.status_code == 404: return []
# Datei nicht gefunden - normal für alte Dateien
return [] if response.status_code == 429:
# Rate-Limit erreicht - warten und erneut versuchen
response.raise_for_status() wait_time = RATE_LIMIT_RETRY_DELAY * (retry + 1)
print(f"[{self.name}] Rate limited, waiting {wait_time}s...")
# Gzip entpacken time.sleep(wait_time)
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
content = f.read().decode('utf-8')
if not content.strip():
# Leere Datei
return []
# NDJSON Format: Eine JSON-Zeile pro Trade
for line in content.strip().split('\n'):
if not line.strip():
continue continue
try:
record = json.loads(line) response.raise_for_status()
trade = self._parse_trade_record(record)
if trade: # Gzip entpacken
trades.append(trade) with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
except json.JSONDecodeError: content = f.read().decode('utf-8')
if not content.strip():
# Leere Datei
return []
# NDJSON Format: Eine JSON-Zeile pro Trade
for line in content.strip().split('\n'):
if not line.strip():
continue
try:
record = json.loads(line)
trade = self._parse_trade_record(record)
if trade:
trades.append(trade)
except json.JSONDecodeError:
continue
except Exception:
continue
# Erfolg - keine weitere Retry nötig
break
except requests.exceptions.HTTPError as e:
if e.response.status_code == 429:
wait_time = RATE_LIMIT_RETRY_DELAY * (retry + 1)
print(f"[{self.name}] Rate limited, waiting {wait_time}s...")
time.sleep(wait_time)
continue continue
except Exception as e: elif e.response.status_code != 404:
continue print(f"[{self.name}] HTTP error downloading {filename}: {e}")
break
if trades: except Exception as e:
print(f"[{self.name}] Parsed {len(trades)} trades from {filename}") print(f"[{self.name}] Error downloading/parsing {filename}: {e}")
break
except requests.exceptions.HTTPError as e:
if e.response.status_code != 404:
print(f"[{self.name}] HTTP error downloading {filename}: {e}")
except Exception as e:
print(f"[{self.name}] Error downloading/parsing {filename}: {e}")
return trades return trades
@@ -274,13 +293,23 @@ class DeutscheBoerseBase(BaseExchange):
print(f"[{self.name}] No files for target date found") print(f"[{self.name}] No files for target date found")
return [] return []
# Alle passenden Dateien herunterladen und parsen # Alle passenden Dateien herunterladen und parsen (mit Rate-Limiting)
successful = 0 successful = 0
for file in target_files: total_files = len(target_files)
for i, file in enumerate(target_files):
trades = self._download_and_parse_file(file) trades = self._download_and_parse_file(file)
if trades: if trades:
all_trades.extend(trades) all_trades.extend(trades)
successful += 1 successful += 1
# Rate-Limiting: Pause zwischen Downloads
if i < total_files - 1:
time.sleep(RATE_LIMIT_DELAY)
# Fortschritt alle 100 Dateien
if (i + 1) % 100 == 0:
print(f"[{self.name}] Progress: {i + 1}/{total_files} files, {len(all_trades)} trades so far")
print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades") print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades")
return all_trades return all_trades

View File

@@ -2,11 +2,15 @@ import requests
import gzip import gzip
import csv import csv
import io import io
import time
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import List, Optional from typing import List, Optional
from .base import BaseExchange, Trade from .base import BaseExchange, Trade
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
# Rate-Limiting
RATE_LIMIT_DELAY = 0.3 # Sekunden zwischen Requests
# Browser User-Agent für Zugriff (gettex prüft User-Agent!) # Browser User-Agent für Zugriff (gettex prüft User-Agent!)
HEADERS = { HEADERS = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
@@ -411,11 +415,14 @@ class GettexExchange(BaseExchange):
print(f"[{self.name}] Found {len(target_files)} files for target date from page") print(f"[{self.name}] Found {len(target_files)} files for target date from page")
# Lade Dateien von der Webseite # Lade Dateien von der Webseite (mit Rate-Limiting)
for f in target_files: for i, f in enumerate(target_files):
trades = self._download_file_by_url(f['url'], f['filename']) trades = self._download_file_by_url(f['url'], f['filename'])
if trades: if trades:
all_trades.extend(trades) all_trades.extend(trades)
# Rate-Limiting
if i < len(target_files) - 1:
time.sleep(RATE_LIMIT_DELAY)
# Fallback: Versuche erwartete Dateinamen # Fallback: Versuche erwartete Dateinamen
if not all_trades: if not all_trades: