This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -2,11 +2,17 @@ import requests
|
||||
import gzip
|
||||
import json
|
||||
import io
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import List, Optional
|
||||
from .base import BaseExchange, Trade
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Rate-Limiting Konfiguration
|
||||
RATE_LIMIT_DELAY = 0.5 # Sekunden zwischen Requests
|
||||
RATE_LIMIT_RETRY_DELAY = 5 # Sekunden Wartezeit bei 429
|
||||
MAX_RETRIES = 3 # Maximale Wiederholungen bei 429
|
||||
|
||||
# API URLs für Deutsche Börse
|
||||
API_URLS = {
|
||||
'XETRA': 'https://mfs.deutsche-boerse.com/api/DETR-posttrade',
|
||||
@@ -94,49 +100,62 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
def _download_and_parse_file(self, filename: str) -> List[Trade]:
|
||||
"""Lädt eine JSON.gz Datei von der API herunter und parst die Trades"""
|
||||
trades = []
|
||||
full_url = f"{DOWNLOAD_BASE_URL}/{filename}"
|
||||
|
||||
try:
|
||||
# Download-URL: https://mfs.deutsche-boerse.com/api/download/{filename}
|
||||
full_url = f"{DOWNLOAD_BASE_URL}/{filename}"
|
||||
for retry in range(MAX_RETRIES):
|
||||
try:
|
||||
response = requests.get(full_url, headers=HEADERS, timeout=60)
|
||||
|
||||
response = requests.get(full_url, headers=HEADERS, timeout=60)
|
||||
if response.status_code == 404:
|
||||
# Datei nicht gefunden - normal für alte Dateien
|
||||
return []
|
||||
|
||||
if response.status_code == 404:
|
||||
# Datei nicht gefunden - normal für alte Dateien
|
||||
return []
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# Gzip entpacken
|
||||
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
||||
content = f.read().decode('utf-8')
|
||||
|
||||
if not content.strip():
|
||||
# Leere Datei
|
||||
return []
|
||||
|
||||
# NDJSON Format: Eine JSON-Zeile pro Trade
|
||||
for line in content.strip().split('\n'):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
record = json.loads(line)
|
||||
trade = self._parse_trade_record(record)
|
||||
if trade:
|
||||
trades.append(trade)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
except Exception as e:
|
||||
if response.status_code == 429:
|
||||
# Rate-Limit erreicht - warten und erneut versuchen
|
||||
wait_time = RATE_LIMIT_RETRY_DELAY * (retry + 1)
|
||||
print(f"[{self.name}] Rate limited, waiting {wait_time}s...")
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
|
||||
if trades:
|
||||
print(f"[{self.name}] Parsed {len(trades)} trades from {filename}")
|
||||
response.raise_for_status()
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code != 404:
|
||||
print(f"[{self.name}] HTTP error downloading {filename}: {e}")
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Error downloading/parsing {filename}: {e}")
|
||||
# Gzip entpacken
|
||||
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
||||
content = f.read().decode('utf-8')
|
||||
|
||||
if not content.strip():
|
||||
# Leere Datei
|
||||
return []
|
||||
|
||||
# NDJSON Format: Eine JSON-Zeile pro Trade
|
||||
for line in content.strip().split('\n'):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
record = json.loads(line)
|
||||
trade = self._parse_trade_record(record)
|
||||
if trade:
|
||||
trades.append(trade)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Erfolg - keine weitere Retry nötig
|
||||
break
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code == 429:
|
||||
wait_time = RATE_LIMIT_RETRY_DELAY * (retry + 1)
|
||||
print(f"[{self.name}] Rate limited, waiting {wait_time}s...")
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
elif e.response.status_code != 404:
|
||||
print(f"[{self.name}] HTTP error downloading {filename}: {e}")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Error downloading/parsing {filename}: {e}")
|
||||
break
|
||||
|
||||
return trades
|
||||
|
||||
@@ -274,14 +293,24 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
print(f"[{self.name}] No files for target date found")
|
||||
return []
|
||||
|
||||
# Alle passenden Dateien herunterladen und parsen
|
||||
# Alle passenden Dateien herunterladen und parsen (mit Rate-Limiting)
|
||||
successful = 0
|
||||
for file in target_files:
|
||||
total_files = len(target_files)
|
||||
|
||||
for i, file in enumerate(target_files):
|
||||
trades = self._download_and_parse_file(file)
|
||||
if trades:
|
||||
all_trades.extend(trades)
|
||||
successful += 1
|
||||
|
||||
# Rate-Limiting: Pause zwischen Downloads
|
||||
if i < total_files - 1:
|
||||
time.sleep(RATE_LIMIT_DELAY)
|
||||
|
||||
# Fortschritt alle 100 Dateien
|
||||
if (i + 1) % 100 == 0:
|
||||
print(f"[{self.name}] Progress: {i + 1}/{total_files} files, {len(all_trades)} trades so far")
|
||||
|
||||
print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades")
|
||||
return all_trades
|
||||
|
||||
|
||||
@@ -2,11 +2,15 @@ import requests
|
||||
import gzip
|
||||
import csv
|
||||
import io
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import List, Optional
|
||||
from .base import BaseExchange, Trade
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Rate-Limiting
|
||||
RATE_LIMIT_DELAY = 0.3 # Sekunden zwischen Requests
|
||||
|
||||
# Browser User-Agent für Zugriff (gettex prüft User-Agent!)
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
@@ -411,11 +415,14 @@ class GettexExchange(BaseExchange):
|
||||
|
||||
print(f"[{self.name}] Found {len(target_files)} files for target date from page")
|
||||
|
||||
# Lade Dateien von der Webseite
|
||||
for f in target_files:
|
||||
# Lade Dateien von der Webseite (mit Rate-Limiting)
|
||||
for i, f in enumerate(target_files):
|
||||
trades = self._download_file_by_url(f['url'], f['filename'])
|
||||
if trades:
|
||||
all_trades.extend(trades)
|
||||
# Rate-Limiting
|
||||
if i < len(target_files) - 1:
|
||||
time.sleep(RATE_LIMIT_DELAY)
|
||||
|
||||
# Fallback: Versuche erwartete Dateinamen
|
||||
if not all_trades:
|
||||
|
||||
Reference in New Issue
Block a user