This commit is contained in:
Binary file not shown.
@@ -7,10 +7,19 @@ from typing import List, Optional
|
||||
from .base import BaseExchange, Trade
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# API URLs für Deutsche Börse
|
||||
API_URLS = {
|
||||
'XETRA': 'https://mfs.deutsche-boerse.com/api/DETR-posttrade',
|
||||
'FRA': 'https://mfs.deutsche-boerse.com/api/DFRA-posttrade',
|
||||
'QUOTRIX': 'https://mfs.deutsche-boerse.com/api/DGAT-posttrade',
|
||||
}
|
||||
DOWNLOAD_BASE_URL = "https://mfs.deutsche-boerse.com/api/download"
|
||||
|
||||
# Browser User-Agent für Zugriff
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'application/json, application/gzip, */*',
|
||||
'Referer': 'https://mfs.deutsche-boerse.com/',
|
||||
}
|
||||
|
||||
|
||||
@@ -26,59 +35,25 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
def name(self) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def api_url(self) -> str:
|
||||
"""API URL für die Dateiliste"""
|
||||
return API_URLS.get(self.name, self.base_url)
|
||||
|
||||
def _get_file_list(self) -> List[str]:
|
||||
"""Parst die Verzeichnisseite und extrahiert alle Dateinamen"""
|
||||
import re
|
||||
"""Holt die Dateiliste von der JSON API"""
|
||||
try:
|
||||
response = requests.get(self.base_url, headers=HEADERS, timeout=30)
|
||||
response = requests.get(self.api_url, headers=HEADERS, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
files = []
|
||||
html_text = response.text
|
||||
data = response.json()
|
||||
files = data.get('CurrentFiles', [])
|
||||
|
||||
# Debug: Response-Länge
|
||||
print(f"[{self.name}] Response length: {len(html_text)} chars")
|
||||
|
||||
# Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT)
|
||||
prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url)
|
||||
prefix = prefix_match.group(1) if prefix_match else '[A-Z]{4}'
|
||||
|
||||
# Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
||||
# Wichtig: Dateinamen erscheinen als Text/Name, nicht nur in href
|
||||
pattern = f'{prefix}-posttrade-\\d{{4}}-\\d{{2}}-\\d{{2}}T\\d{{2}}_\\d{{2}}\\.json\\.gz'
|
||||
|
||||
matches = re.findall(pattern, html_text)
|
||||
files = list(set(matches))
|
||||
|
||||
if files:
|
||||
print(f"[{self.name}] Found {len(files)} files via regex")
|
||||
|
||||
# Fallback: BeautifulSoup für Links und Text
|
||||
if not files:
|
||||
soup = BeautifulSoup(html_text, 'html.parser')
|
||||
all_links = soup.find_all('a')
|
||||
print(f"[{self.name}] Found {len(all_links)} total links on page")
|
||||
|
||||
for link in all_links:
|
||||
href = link.get('href', '')
|
||||
text = link.get_text(strip=True)
|
||||
|
||||
# Prüfe Link-Text (Dateinamen werden oft als Link-Text angezeigt)
|
||||
if text and 'posttrade' in text.lower() and '.json.gz' in text.lower():
|
||||
files.append(text)
|
||||
# Prüfe href
|
||||
elif href and 'posttrade' in href.lower() and '.json.gz' in href.lower():
|
||||
filename = href.split('/')[-1] if '/' in href else href
|
||||
files.append(filename)
|
||||
|
||||
files = list(set(files))
|
||||
if files:
|
||||
print(f"[{self.name}] Found {len(files)} files via BeautifulSoup")
|
||||
|
||||
print(f"[{self.name}] Total files found: {len(files)}")
|
||||
print(f"[{self.name}] API returned {len(files)} files")
|
||||
return files
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching file list from {self.base_url}: {e}")
|
||||
print(f"[{self.name}] Error fetching file list from API: {e}")
|
||||
return []
|
||||
|
||||
def _filter_files_for_date(self, files: List[str], target_date: datetime.date) -> List[str]:
|
||||
@@ -116,47 +91,52 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
|
||||
return filtered
|
||||
|
||||
def _download_and_parse_file(self, file_url: str) -> List[Trade]:
|
||||
"""Lädt eine JSON.gz Datei herunter und parst die Trades"""
|
||||
def _download_and_parse_file(self, filename: str) -> List[Trade]:
|
||||
"""Lädt eine JSON.gz Datei von der API herunter und parst die Trades"""
|
||||
trades = []
|
||||
|
||||
try:
|
||||
# Vollständige URL erstellen
|
||||
# Format: https://mfs.deutsche-boerse.com/DETR-posttrade/DETR-posttrade-2026-01-27T08_53.json.gz
|
||||
if not file_url.startswith('http'):
|
||||
# Entferne führenden Slash falls vorhanden
|
||||
filename = file_url.lstrip('/')
|
||||
full_url = f"{self.base_url}/{filename}"
|
||||
else:
|
||||
full_url = file_url
|
||||
# Download-URL: https://mfs.deutsche-boerse.com/api/download/{filename}
|
||||
full_url = f"{DOWNLOAD_BASE_URL}/{filename}"
|
||||
|
||||
response = requests.get(full_url, headers=HEADERS, timeout=60)
|
||||
|
||||
if response.status_code == 404:
|
||||
print(f"[{self.name}] File not found: {full_url}")
|
||||
# Datei nicht gefunden - normal für alte Dateien
|
||||
return []
|
||||
|
||||
response.raise_for_status()
|
||||
print(f"[{self.name}] Downloaded: {full_url} ({len(response.content)} bytes)")
|
||||
|
||||
# Gzip entpacken
|
||||
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
||||
json_data = json.load(f)
|
||||
content = f.read().decode('utf-8')
|
||||
|
||||
# Trades parsen
|
||||
# Deutsche Börse JSON Format (RTS1/RTS2):
|
||||
# Typische Felder: TrdDt, TrdTm, ISIN, Pric, Qty, TrdCcy, etc.
|
||||
for record in json_data:
|
||||
if not content.strip():
|
||||
# Leere Datei
|
||||
return []
|
||||
|
||||
# NDJSON Format: Eine JSON-Zeile pro Trade
|
||||
for line in content.strip().split('\n'):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
record = json.loads(line)
|
||||
trade = self._parse_trade_record(record)
|
||||
if trade:
|
||||
trades.append(trade)
|
||||
except Exception as e:
|
||||
print(f"Error parsing trade record: {e}")
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if trades:
|
||||
print(f"[{self.name}] Parsed {len(trades)} trades from {filename}")
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code != 404:
|
||||
print(f"[{self.name}] HTTP error downloading {filename}: {e}")
|
||||
except Exception as e:
|
||||
print(f"Error downloading/parsing {file_url}: {e}")
|
||||
print(f"[{self.name}] Error downloading/parsing {filename}: {e}")
|
||||
|
||||
return trades
|
||||
|
||||
@@ -243,34 +223,6 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
print(f"Error parsing record: {e}")
|
||||
return None
|
||||
|
||||
def _generate_expected_files(self, target_date: datetime.date) -> List[str]:
|
||||
"""
|
||||
Generiert erwartete Dateinamen basierend auf dem bekannten Format.
|
||||
Format: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
||||
"""
|
||||
import re
|
||||
files = []
|
||||
|
||||
# Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT)
|
||||
prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url)
|
||||
prefix = prefix_match.group(1) if prefix_match else 'DETR'
|
||||
|
||||
date_str = target_date.strftime('%Y-%m-%d')
|
||||
|
||||
# Generiere für alle Stunden des Handelstages (07:00 - 22:00 UTC, alle Minuten)
|
||||
for hour in range(7, 23):
|
||||
for minute in range(0, 60):
|
||||
files.append(f"{prefix}-posttrade-{date_str}T{hour:02d}_{minute:02d}.json.gz")
|
||||
|
||||
# Auch frühe Dateien vom Folgetag (nach Mitternacht UTC)
|
||||
next_date = target_date + timedelta(days=1)
|
||||
next_date_str = next_date.strftime('%Y-%m-%d')
|
||||
for hour in range(0, 3):
|
||||
for minute in range(0, 60):
|
||||
files.append(f"{prefix}-posttrade-{next_date_str}T{hour:02d}_{minute:02d}.json.gz")
|
||||
|
||||
return files
|
||||
|
||||
def _get_last_trading_day(self, from_date: datetime.date) -> datetime.date:
|
||||
"""
|
||||
Findet den letzten Handelstag (überspringt Wochenenden).
|
||||
@@ -307,19 +259,20 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
|
||||
print(f"[{self.name}] Fetching trades for date: {target_date}")
|
||||
|
||||
# Erst versuchen, Dateiliste von der Seite zu holen
|
||||
# Hole Dateiliste von der API
|
||||
files = self._get_file_list()
|
||||
print(f"[{self.name}] Found {len(files)} total files")
|
||||
|
||||
if not files:
|
||||
print(f"[{self.name}] No files available from API")
|
||||
return []
|
||||
|
||||
# Dateien für Zieldatum filtern
|
||||
target_files = self._filter_files_for_date(files, target_date)
|
||||
print(f"[{self.name}] {len(target_files)} files match target date")
|
||||
print(f"[{self.name}] {len(target_files)} files match target date (of {len(files)} total)")
|
||||
|
||||
# Falls keine Dateien von der Seite gefunden, generiere erwartete Dateinamen
|
||||
if not target_files:
|
||||
print(f"[{self.name}] No files from page, trying generated filenames...")
|
||||
target_files = self._generate_expected_files(target_date)
|
||||
print(f"[{self.name}] Trying {len(target_files)} potential files")
|
||||
print(f"[{self.name}] No files for target date found")
|
||||
return []
|
||||
|
||||
# Alle passenden Dateien herunterladen und parsen
|
||||
successful = 0
|
||||
@@ -328,13 +281,8 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
if trades:
|
||||
all_trades.extend(trades)
|
||||
successful += 1
|
||||
if successful <= 5:
|
||||
print(f"[{self.name}] Parsed {len(trades)} trades from {file}")
|
||||
|
||||
if successful > 5:
|
||||
print(f"[{self.name}] ... and {successful - 5} more files")
|
||||
|
||||
print(f"[{self.name}] Total trades fetched: {len(all_trades)}")
|
||||
print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades")
|
||||
return all_trades
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user