This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -2,11 +2,17 @@ import requests
|
|||||||
import gzip
|
import gzip
|
||||||
import json
|
import json
|
||||||
import io
|
import io
|
||||||
|
import time
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
from .base import BaseExchange, Trade
|
from .base import BaseExchange, Trade
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
# Rate-Limiting Konfiguration
|
||||||
|
RATE_LIMIT_DELAY = 0.5 # Sekunden zwischen Requests
|
||||||
|
RATE_LIMIT_RETRY_DELAY = 5 # Sekunden Wartezeit bei 429
|
||||||
|
MAX_RETRIES = 3 # Maximale Wiederholungen bei 429
|
||||||
|
|
||||||
# API URLs für Deutsche Börse
|
# API URLs für Deutsche Börse
|
||||||
API_URLS = {
|
API_URLS = {
|
||||||
'XETRA': 'https://mfs.deutsche-boerse.com/api/DETR-posttrade',
|
'XETRA': 'https://mfs.deutsche-boerse.com/api/DETR-posttrade',
|
||||||
@@ -94,49 +100,62 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
def _download_and_parse_file(self, filename: str) -> List[Trade]:
|
def _download_and_parse_file(self, filename: str) -> List[Trade]:
|
||||||
"""Lädt eine JSON.gz Datei von der API herunter und parst die Trades"""
|
"""Lädt eine JSON.gz Datei von der API herunter und parst die Trades"""
|
||||||
trades = []
|
trades = []
|
||||||
|
full_url = f"{DOWNLOAD_BASE_URL}/{filename}"
|
||||||
|
|
||||||
try:
|
for retry in range(MAX_RETRIES):
|
||||||
# Download-URL: https://mfs.deutsche-boerse.com/api/download/{filename}
|
try:
|
||||||
full_url = f"{DOWNLOAD_BASE_URL}/{filename}"
|
response = requests.get(full_url, headers=HEADERS, timeout=60)
|
||||||
|
|
||||||
response = requests.get(full_url, headers=HEADERS, timeout=60)
|
if response.status_code == 404:
|
||||||
|
# Datei nicht gefunden - normal für alte Dateien
|
||||||
if response.status_code == 404:
|
return []
|
||||||
# Datei nicht gefunden - normal für alte Dateien
|
|
||||||
return []
|
if response.status_code == 429:
|
||||||
|
# Rate-Limit erreicht - warten und erneut versuchen
|
||||||
response.raise_for_status()
|
wait_time = RATE_LIMIT_RETRY_DELAY * (retry + 1)
|
||||||
|
print(f"[{self.name}] Rate limited, waiting {wait_time}s...")
|
||||||
# Gzip entpacken
|
time.sleep(wait_time)
|
||||||
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
|
||||||
content = f.read().decode('utf-8')
|
|
||||||
|
|
||||||
if not content.strip():
|
|
||||||
# Leere Datei
|
|
||||||
return []
|
|
||||||
|
|
||||||
# NDJSON Format: Eine JSON-Zeile pro Trade
|
|
||||||
for line in content.strip().split('\n'):
|
|
||||||
if not line.strip():
|
|
||||||
continue
|
continue
|
||||||
try:
|
|
||||||
record = json.loads(line)
|
response.raise_for_status()
|
||||||
trade = self._parse_trade_record(record)
|
|
||||||
if trade:
|
# Gzip entpacken
|
||||||
trades.append(trade)
|
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
||||||
except json.JSONDecodeError:
|
content = f.read().decode('utf-8')
|
||||||
|
|
||||||
|
if not content.strip():
|
||||||
|
# Leere Datei
|
||||||
|
return []
|
||||||
|
|
||||||
|
# NDJSON Format: Eine JSON-Zeile pro Trade
|
||||||
|
for line in content.strip().split('\n'):
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
record = json.loads(line)
|
||||||
|
trade = self._parse_trade_record(record)
|
||||||
|
if trade:
|
||||||
|
trades.append(trade)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Erfolg - keine weitere Retry nötig
|
||||||
|
break
|
||||||
|
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
if e.response.status_code == 429:
|
||||||
|
wait_time = RATE_LIMIT_RETRY_DELAY * (retry + 1)
|
||||||
|
print(f"[{self.name}] Rate limited, waiting {wait_time}s...")
|
||||||
|
time.sleep(wait_time)
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
elif e.response.status_code != 404:
|
||||||
continue
|
print(f"[{self.name}] HTTP error downloading {filename}: {e}")
|
||||||
|
break
|
||||||
if trades:
|
except Exception as e:
|
||||||
print(f"[{self.name}] Parsed {len(trades)} trades from {filename}")
|
print(f"[{self.name}] Error downloading/parsing {filename}: {e}")
|
||||||
|
break
|
||||||
except requests.exceptions.HTTPError as e:
|
|
||||||
if e.response.status_code != 404:
|
|
||||||
print(f"[{self.name}] HTTP error downloading {filename}: {e}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[{self.name}] Error downloading/parsing {filename}: {e}")
|
|
||||||
|
|
||||||
return trades
|
return trades
|
||||||
|
|
||||||
@@ -274,13 +293,23 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
print(f"[{self.name}] No files for target date found")
|
print(f"[{self.name}] No files for target date found")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Alle passenden Dateien herunterladen und parsen
|
# Alle passenden Dateien herunterladen und parsen (mit Rate-Limiting)
|
||||||
successful = 0
|
successful = 0
|
||||||
for file in target_files:
|
total_files = len(target_files)
|
||||||
|
|
||||||
|
for i, file in enumerate(target_files):
|
||||||
trades = self._download_and_parse_file(file)
|
trades = self._download_and_parse_file(file)
|
||||||
if trades:
|
if trades:
|
||||||
all_trades.extend(trades)
|
all_trades.extend(trades)
|
||||||
successful += 1
|
successful += 1
|
||||||
|
|
||||||
|
# Rate-Limiting: Pause zwischen Downloads
|
||||||
|
if i < total_files - 1:
|
||||||
|
time.sleep(RATE_LIMIT_DELAY)
|
||||||
|
|
||||||
|
# Fortschritt alle 100 Dateien
|
||||||
|
if (i + 1) % 100 == 0:
|
||||||
|
print(f"[{self.name}] Progress: {i + 1}/{total_files} files, {len(all_trades)} trades so far")
|
||||||
|
|
||||||
print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades")
|
print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades")
|
||||||
return all_trades
|
return all_trades
|
||||||
|
|||||||
@@ -2,11 +2,15 @@ import requests
|
|||||||
import gzip
|
import gzip
|
||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
|
import time
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
from .base import BaseExchange, Trade
|
from .base import BaseExchange, Trade
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
# Rate-Limiting
|
||||||
|
RATE_LIMIT_DELAY = 0.3 # Sekunden zwischen Requests
|
||||||
|
|
||||||
# Browser User-Agent für Zugriff (gettex prüft User-Agent!)
|
# Browser User-Agent für Zugriff (gettex prüft User-Agent!)
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
@@ -411,11 +415,14 @@ class GettexExchange(BaseExchange):
|
|||||||
|
|
||||||
print(f"[{self.name}] Found {len(target_files)} files for target date from page")
|
print(f"[{self.name}] Found {len(target_files)} files for target date from page")
|
||||||
|
|
||||||
# Lade Dateien von der Webseite
|
# Lade Dateien von der Webseite (mit Rate-Limiting)
|
||||||
for f in target_files:
|
for i, f in enumerate(target_files):
|
||||||
trades = self._download_file_by_url(f['url'], f['filename'])
|
trades = self._download_file_by_url(f['url'], f['filename'])
|
||||||
if trades:
|
if trades:
|
||||||
all_trades.extend(trades)
|
all_trades.extend(trades)
|
||||||
|
# Rate-Limiting
|
||||||
|
if i < len(target_files) - 1:
|
||||||
|
time.sleep(RATE_LIMIT_DELAY)
|
||||||
|
|
||||||
# Fallback: Versuche erwartete Dateinamen
|
# Fallback: Versuche erwartete Dateinamen
|
||||||
if not all_trades:
|
if not all_trades:
|
||||||
|
|||||||
Reference in New Issue
Block a user