This commit is contained in:
@@ -34,6 +34,10 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
response.raise_for_status()
|
||||
|
||||
files = []
|
||||
html_text = response.text
|
||||
|
||||
# Debug: Response-Länge
|
||||
print(f"[{self.name}] Response length: {len(html_text)} chars")
|
||||
|
||||
# Primär: Regex-basierte Extraktion (zuverlässiger)
|
||||
# Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
||||
@@ -47,13 +51,16 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
# Generisches Pattern
|
||||
pattern = r'[A-Z]{4}-posttrade-\d{4}-\d{2}-\d{2}T\d{2}_\d{2}\.json\.gz'
|
||||
|
||||
matches = re.findall(pattern, response.text)
|
||||
matches = re.findall(pattern, html_text)
|
||||
files = list(set(matches))
|
||||
|
||||
# Sekundär: BeautifulSoup für Links (falls Regex nichts findet)
|
||||
if not files:
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
for link in soup.find_all('a'):
|
||||
soup = BeautifulSoup(html_text, 'html.parser')
|
||||
all_links = soup.find_all('a')
|
||||
print(f"[{self.name}] Found {len(all_links)} total links on page")
|
||||
|
||||
for link in all_links:
|
||||
href = link.get('href', '')
|
||||
text = link.get_text(strip=True)
|
||||
|
||||
@@ -65,6 +72,15 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
elif text and 'posttrade' in text.lower() and '.json.gz' in text.lower():
|
||||
files.append(text)
|
||||
|
||||
# Tertiär: Suche nach jedem "posttrade" im HTML und extrahiere Dateinamen
|
||||
if not files:
|
||||
# Allgemeineres Pattern für beliebige Dateinamen mit "posttrade"
|
||||
general_pattern = r'[\w-]*posttrade[\w-]*\d{4}[-_]\d{2}[-_]\d{2}[T_]\d{2}[_:]\d{2}\.json\.gz'
|
||||
matches = re.findall(general_pattern, html_text, re.IGNORECASE)
|
||||
files = list(set(matches))
|
||||
if files:
|
||||
print(f"[{self.name}] Found {len(files)} files via general pattern")
|
||||
|
||||
print(f"[{self.name}] Found {len(files)} files via regex/soup")
|
||||
return files
|
||||
except Exception as e:
|
||||
@@ -233,6 +249,34 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
print(f"Error parsing record: {e}")
|
||||
return None
|
||||
|
||||
def _generate_expected_files(self, target_date: datetime.date) -> List[str]:
|
||||
"""
|
||||
Generiert erwartete Dateinamen basierend auf dem bekannten Format.
|
||||
Format: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
||||
"""
|
||||
import re
|
||||
files = []
|
||||
|
||||
# Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT)
|
||||
prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url)
|
||||
prefix = prefix_match.group(1) if prefix_match else 'DETR'
|
||||
|
||||
date_str = target_date.strftime('%Y-%m-%d')
|
||||
|
||||
# Generiere für alle Stunden des Handelstages (07:00 - 22:00 UTC, alle Minuten)
|
||||
for hour in range(7, 23):
|
||||
for minute in range(0, 60):
|
||||
files.append(f"{prefix}-posttrade-{date_str}T{hour:02d}_{minute:02d}.json.gz")
|
||||
|
||||
# Auch frühe Dateien vom Folgetag (nach Mitternacht UTC)
|
||||
next_date = target_date + timedelta(days=1)
|
||||
next_date_str = next_date.strftime('%Y-%m-%d')
|
||||
for hour in range(0, 3):
|
||||
for minute in range(0, 60):
|
||||
files.append(f"{prefix}-posttrade-{next_date_str}T{hour:02d}_{minute:02d}.json.gz")
|
||||
|
||||
return files
|
||||
|
||||
def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]:
|
||||
"""
|
||||
Holt alle Trades vom Vortag (oder seit since_date).
|
||||
@@ -248,7 +292,7 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
|
||||
print(f"[{self.name}] Fetching trades for date: {target_date}")
|
||||
|
||||
# Dateiliste holen
|
||||
# Erst versuchen, Dateiliste von der Seite zu holen
|
||||
files = self._get_file_list()
|
||||
print(f"[{self.name}] Found {len(files)} total files")
|
||||
|
||||
@@ -256,11 +300,24 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
target_files = self._filter_files_for_date(files, target_date)
|
||||
print(f"[{self.name}] {len(target_files)} files match target date")
|
||||
|
||||
# Falls keine Dateien von der Seite gefunden, generiere erwartete Dateinamen
|
||||
if not target_files:
|
||||
print(f"[{self.name}] No files from page, trying generated filenames...")
|
||||
target_files = self._generate_expected_files(target_date)
|
||||
print(f"[{self.name}] Trying {len(target_files)} potential files")
|
||||
|
||||
# Alle passenden Dateien herunterladen und parsen
|
||||
successful = 0
|
||||
for file in target_files:
|
||||
trades = self._download_and_parse_file(file)
|
||||
all_trades.extend(trades)
|
||||
print(f"[{self.name}] Parsed {len(trades)} trades from {file}")
|
||||
if trades:
|
||||
all_trades.extend(trades)
|
||||
successful += 1
|
||||
if successful <= 5:
|
||||
print(f"[{self.name}] Parsed {len(trades)} trades from {file}")
|
||||
|
||||
if successful > 5:
|
||||
print(f"[{self.name}] ... and {successful - 5} more files")
|
||||
|
||||
print(f"[{self.name}] Total trades fetched: {len(all_trades)}")
|
||||
return all_trades
|
||||
|
||||
Reference in New Issue
Block a user