This commit is contained in:
Binary file not shown.
@@ -7,10 +7,19 @@ from typing import List, Optional
|
|||||||
from .base import BaseExchange, Trade
|
from .base import BaseExchange, Trade
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
# API URLs für Deutsche Börse
|
||||||
|
API_URLS = {
|
||||||
|
'XETRA': 'https://mfs.deutsche-boerse.com/api/DETR-posttrade',
|
||||||
|
'FRA': 'https://mfs.deutsche-boerse.com/api/DFRA-posttrade',
|
||||||
|
'QUOTRIX': 'https://mfs.deutsche-boerse.com/api/DGAT-posttrade',
|
||||||
|
}
|
||||||
|
DOWNLOAD_BASE_URL = "https://mfs.deutsche-boerse.com/api/download"
|
||||||
|
|
||||||
# Browser User-Agent für Zugriff
|
# Browser User-Agent für Zugriff
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
'Accept': 'application/json, application/gzip, */*',
|
||||||
|
'Referer': 'https://mfs.deutsche-boerse.com/',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -26,59 +35,25 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def api_url(self) -> str:
|
||||||
|
"""API URL für die Dateiliste"""
|
||||||
|
return API_URLS.get(self.name, self.base_url)
|
||||||
|
|
||||||
def _get_file_list(self) -> List[str]:
|
def _get_file_list(self) -> List[str]:
|
||||||
"""Parst die Verzeichnisseite und extrahiert alle Dateinamen"""
|
"""Holt die Dateiliste von der JSON API"""
|
||||||
import re
|
|
||||||
try:
|
try:
|
||||||
response = requests.get(self.base_url, headers=HEADERS, timeout=30)
|
response = requests.get(self.api_url, headers=HEADERS, timeout=30)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
files = []
|
data = response.json()
|
||||||
html_text = response.text
|
files = data.get('CurrentFiles', [])
|
||||||
|
|
||||||
# Debug: Response-Länge
|
print(f"[{self.name}] API returned {len(files)} files")
|
||||||
print(f"[{self.name}] Response length: {len(html_text)} chars")
|
|
||||||
|
|
||||||
# Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT)
|
|
||||||
prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url)
|
|
||||||
prefix = prefix_match.group(1) if prefix_match else '[A-Z]{4}'
|
|
||||||
|
|
||||||
# Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
|
||||||
# Wichtig: Dateinamen erscheinen als Text/Name, nicht nur in href
|
|
||||||
pattern = f'{prefix}-posttrade-\\d{{4}}-\\d{{2}}-\\d{{2}}T\\d{{2}}_\\d{{2}}\\.json\\.gz'
|
|
||||||
|
|
||||||
matches = re.findall(pattern, html_text)
|
|
||||||
files = list(set(matches))
|
|
||||||
|
|
||||||
if files:
|
|
||||||
print(f"[{self.name}] Found {len(files)} files via regex")
|
|
||||||
|
|
||||||
# Fallback: BeautifulSoup für Links und Text
|
|
||||||
if not files:
|
|
||||||
soup = BeautifulSoup(html_text, 'html.parser')
|
|
||||||
all_links = soup.find_all('a')
|
|
||||||
print(f"[{self.name}] Found {len(all_links)} total links on page")
|
|
||||||
|
|
||||||
for link in all_links:
|
|
||||||
href = link.get('href', '')
|
|
||||||
text = link.get_text(strip=True)
|
|
||||||
|
|
||||||
# Prüfe Link-Text (Dateinamen werden oft als Link-Text angezeigt)
|
|
||||||
if text and 'posttrade' in text.lower() and '.json.gz' in text.lower():
|
|
||||||
files.append(text)
|
|
||||||
# Prüfe href
|
|
||||||
elif href and 'posttrade' in href.lower() and '.json.gz' in href.lower():
|
|
||||||
filename = href.split('/')[-1] if '/' in href else href
|
|
||||||
files.append(filename)
|
|
||||||
|
|
||||||
files = list(set(files))
|
|
||||||
if files:
|
|
||||||
print(f"[{self.name}] Found {len(files)} files via BeautifulSoup")
|
|
||||||
|
|
||||||
print(f"[{self.name}] Total files found: {len(files)}")
|
|
||||||
return files
|
return files
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error fetching file list from {self.base_url}: {e}")
|
print(f"[{self.name}] Error fetching file list from API: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _filter_files_for_date(self, files: List[str], target_date: datetime.date) -> List[str]:
|
def _filter_files_for_date(self, files: List[str], target_date: datetime.date) -> List[str]:
|
||||||
@@ -116,47 +91,52 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
|
|
||||||
return filtered
|
return filtered
|
||||||
|
|
||||||
def _download_and_parse_file(self, file_url: str) -> List[Trade]:
|
def _download_and_parse_file(self, filename: str) -> List[Trade]:
|
||||||
"""Lädt eine JSON.gz Datei herunter und parst die Trades"""
|
"""Lädt eine JSON.gz Datei von der API herunter und parst die Trades"""
|
||||||
trades = []
|
trades = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Vollständige URL erstellen
|
# Download-URL: https://mfs.deutsche-boerse.com/api/download/{filename}
|
||||||
# Format: https://mfs.deutsche-boerse.com/DETR-posttrade/DETR-posttrade-2026-01-27T08_53.json.gz
|
full_url = f"{DOWNLOAD_BASE_URL}/{filename}"
|
||||||
if not file_url.startswith('http'):
|
|
||||||
# Entferne führenden Slash falls vorhanden
|
|
||||||
filename = file_url.lstrip('/')
|
|
||||||
full_url = f"{self.base_url}/{filename}"
|
|
||||||
else:
|
|
||||||
full_url = file_url
|
|
||||||
|
|
||||||
response = requests.get(full_url, headers=HEADERS, timeout=60)
|
response = requests.get(full_url, headers=HEADERS, timeout=60)
|
||||||
|
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
print(f"[{self.name}] File not found: {full_url}")
|
# Datei nicht gefunden - normal für alte Dateien
|
||||||
return []
|
return []
|
||||||
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
print(f"[{self.name}] Downloaded: {full_url} ({len(response.content)} bytes)")
|
|
||||||
|
|
||||||
# Gzip entpacken
|
# Gzip entpacken
|
||||||
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
||||||
json_data = json.load(f)
|
content = f.read().decode('utf-8')
|
||||||
|
|
||||||
# Trades parsen
|
if not content.strip():
|
||||||
# Deutsche Börse JSON Format (RTS1/RTS2):
|
# Leere Datei
|
||||||
# Typische Felder: TrdDt, TrdTm, ISIN, Pric, Qty, TrdCcy, etc.
|
return []
|
||||||
for record in json_data:
|
|
||||||
|
# NDJSON Format: Eine JSON-Zeile pro Trade
|
||||||
|
for line in content.strip().split('\n'):
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
|
record = json.loads(line)
|
||||||
trade = self._parse_trade_record(record)
|
trade = self._parse_trade_record(record)
|
||||||
if trade:
|
if trade:
|
||||||
trades.append(trade)
|
trades.append(trade)
|
||||||
except Exception as e:
|
except json.JSONDecodeError:
|
||||||
print(f"Error parsing trade record: {e}")
|
|
||||||
continue
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if trades:
|
||||||
|
print(f"[{self.name}] Parsed {len(trades)} trades from {filename}")
|
||||||
|
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
if e.response.status_code != 404:
|
||||||
|
print(f"[{self.name}] HTTP error downloading {filename}: {e}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error downloading/parsing {file_url}: {e}")
|
print(f"[{self.name}] Error downloading/parsing {filename}: {e}")
|
||||||
|
|
||||||
return trades
|
return trades
|
||||||
|
|
||||||
@@ -243,34 +223,6 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
print(f"Error parsing record: {e}")
|
print(f"Error parsing record: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _generate_expected_files(self, target_date: datetime.date) -> List[str]:
|
|
||||||
"""
|
|
||||||
Generiert erwartete Dateinamen basierend auf dem bekannten Format.
|
|
||||||
Format: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
files = []
|
|
||||||
|
|
||||||
# Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT)
|
|
||||||
prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url)
|
|
||||||
prefix = prefix_match.group(1) if prefix_match else 'DETR'
|
|
||||||
|
|
||||||
date_str = target_date.strftime('%Y-%m-%d')
|
|
||||||
|
|
||||||
# Generiere für alle Stunden des Handelstages (07:00 - 22:00 UTC, alle Minuten)
|
|
||||||
for hour in range(7, 23):
|
|
||||||
for minute in range(0, 60):
|
|
||||||
files.append(f"{prefix}-posttrade-{date_str}T{hour:02d}_{minute:02d}.json.gz")
|
|
||||||
|
|
||||||
# Auch frühe Dateien vom Folgetag (nach Mitternacht UTC)
|
|
||||||
next_date = target_date + timedelta(days=1)
|
|
||||||
next_date_str = next_date.strftime('%Y-%m-%d')
|
|
||||||
for hour in range(0, 3):
|
|
||||||
for minute in range(0, 60):
|
|
||||||
files.append(f"{prefix}-posttrade-{next_date_str}T{hour:02d}_{minute:02d}.json.gz")
|
|
||||||
|
|
||||||
return files
|
|
||||||
|
|
||||||
def _get_last_trading_day(self, from_date: datetime.date) -> datetime.date:
|
def _get_last_trading_day(self, from_date: datetime.date) -> datetime.date:
|
||||||
"""
|
"""
|
||||||
Findet den letzten Handelstag (überspringt Wochenenden).
|
Findet den letzten Handelstag (überspringt Wochenenden).
|
||||||
@@ -307,19 +259,20 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
|
|
||||||
print(f"[{self.name}] Fetching trades for date: {target_date}")
|
print(f"[{self.name}] Fetching trades for date: {target_date}")
|
||||||
|
|
||||||
# Erst versuchen, Dateiliste von der Seite zu holen
|
# Hole Dateiliste von der API
|
||||||
files = self._get_file_list()
|
files = self._get_file_list()
|
||||||
print(f"[{self.name}] Found {len(files)} total files")
|
|
||||||
|
if not files:
|
||||||
|
print(f"[{self.name}] No files available from API")
|
||||||
|
return []
|
||||||
|
|
||||||
# Dateien für Zieldatum filtern
|
# Dateien für Zieldatum filtern
|
||||||
target_files = self._filter_files_for_date(files, target_date)
|
target_files = self._filter_files_for_date(files, target_date)
|
||||||
print(f"[{self.name}] {len(target_files)} files match target date")
|
print(f"[{self.name}] {len(target_files)} files match target date (of {len(files)} total)")
|
||||||
|
|
||||||
# Falls keine Dateien von der Seite gefunden, generiere erwartete Dateinamen
|
|
||||||
if not target_files:
|
if not target_files:
|
||||||
print(f"[{self.name}] No files from page, trying generated filenames...")
|
print(f"[{self.name}] No files for target date found")
|
||||||
target_files = self._generate_expected_files(target_date)
|
return []
|
||||||
print(f"[{self.name}] Trying {len(target_files)} potential files")
|
|
||||||
|
|
||||||
# Alle passenden Dateien herunterladen und parsen
|
# Alle passenden Dateien herunterladen und parsen
|
||||||
successful = 0
|
successful = 0
|
||||||
@@ -328,13 +281,8 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
if trades:
|
if trades:
|
||||||
all_trades.extend(trades)
|
all_trades.extend(trades)
|
||||||
successful += 1
|
successful += 1
|
||||||
if successful <= 5:
|
|
||||||
print(f"[{self.name}] Parsed {len(trades)} trades from {file}")
|
|
||||||
|
|
||||||
if successful > 5:
|
print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades")
|
||||||
print(f"[{self.name}] ... and {successful - 5} more files")
|
|
||||||
|
|
||||||
print(f"[{self.name}] Total trades fetched: {len(all_trades)}")
|
|
||||||
return all_trades
|
return all_trades
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user