This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -39,22 +39,21 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
# Debug: Response-Länge
|
||||
print(f"[{self.name}] Response length: {len(html_text)} chars")
|
||||
|
||||
# Primär: Regex-basierte Extraktion (zuverlässiger)
|
||||
# Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
||||
# Das Prefix wird aus der base_url extrahiert (z.B. DETR, DFRA, DGAT)
|
||||
# Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT)
|
||||
prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url)
|
||||
if prefix_match:
|
||||
prefix = prefix_match.group(1)
|
||||
# Suche nach Dateinamen mit diesem Prefix
|
||||
pattern = f'{prefix}-posttrade-\\d{{4}}-\\d{{2}}-\\d{{2}}T\\d{{2}}_\\d{{2}}\\.json\\.gz'
|
||||
else:
|
||||
# Generisches Pattern
|
||||
pattern = r'[A-Z]{4}-posttrade-\d{4}-\d{2}-\d{2}T\d{2}_\d{2}\.json\.gz'
|
||||
prefix = prefix_match.group(1) if prefix_match else '[A-Z]{4}'
|
||||
|
||||
# Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
||||
# Wichtig: Dateinamen erscheinen als Text/Name, nicht nur in href
|
||||
pattern = f'{prefix}-posttrade-\\d{{4}}-\\d{{2}}-\\d{{2}}T\\d{{2}}_\\d{{2}}\\.json\\.gz'
|
||||
|
||||
matches = re.findall(pattern, html_text)
|
||||
files = list(set(matches))
|
||||
|
||||
# Sekundär: BeautifulSoup für Links (falls Regex nichts findet)
|
||||
if files:
|
||||
print(f"[{self.name}] Found {len(files)} files via regex")
|
||||
|
||||
# Fallback: BeautifulSoup für Links und Text
|
||||
if not files:
|
||||
soup = BeautifulSoup(html_text, 'html.parser')
|
||||
all_links = soup.find_all('a')
|
||||
@@ -64,24 +63,19 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
href = link.get('href', '')
|
||||
text = link.get_text(strip=True)
|
||||
|
||||
# Prüfe href und Text für posttrade Dateien
|
||||
if href and 'posttrade' in href.lower() and '.json.gz' in href.lower():
|
||||
# Extrahiere nur den Dateinamen
|
||||
# Prüfe Link-Text (Dateinamen werden oft als Link-Text angezeigt)
|
||||
if text and 'posttrade' in text.lower() and '.json.gz' in text.lower():
|
||||
files.append(text)
|
||||
# Prüfe href
|
||||
elif href and 'posttrade' in href.lower() and '.json.gz' in href.lower():
|
||||
filename = href.split('/')[-1] if '/' in href else href
|
||||
files.append(filename)
|
||||
elif text and 'posttrade' in text.lower() and '.json.gz' in text.lower():
|
||||
files.append(text)
|
||||
|
||||
# Tertiär: Suche nach jedem "posttrade" im HTML und extrahiere Dateinamen
|
||||
if not files:
|
||||
# Allgemeineres Pattern für beliebige Dateinamen mit "posttrade"
|
||||
general_pattern = r'[\w-]*posttrade[\w-]*\d{4}[-_]\d{2}[-_]\d{2}[T_]\d{2}[_:]\d{2}\.json\.gz'
|
||||
matches = re.findall(general_pattern, html_text, re.IGNORECASE)
|
||||
files = list(set(matches))
|
||||
|
||||
files = list(set(files))
|
||||
if files:
|
||||
print(f"[{self.name}] Found {len(files)} files via general pattern")
|
||||
print(f"[{self.name}] Found {len(files)} files via BeautifulSoup")
|
||||
|
||||
print(f"[{self.name}] Found {len(files)} files via regex/soup")
|
||||
print(f"[{self.name}] Total files found: {len(files)}")
|
||||
return files
|
||||
except Exception as e:
|
||||
print(f"Error fetching file list from {self.base_url}: {e}")
|
||||
@@ -277,9 +271,23 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
|
||||
return files
|
||||
|
||||
def _get_last_trading_day(self, from_date: datetime.date) -> datetime.date:
|
||||
"""
|
||||
Findet den letzten Handelstag (überspringt Wochenenden).
|
||||
Montag=0, Sonntag=6
|
||||
"""
|
||||
date = from_date
|
||||
# Wenn Samstag (5), gehe zurück zu Freitag
|
||||
if date.weekday() == 5:
|
||||
date = date - timedelta(days=1)
|
||||
# Wenn Sonntag (6), gehe zurück zu Freitag
|
||||
elif date.weekday() == 6:
|
||||
date = date - timedelta(days=2)
|
||||
return date
|
||||
|
||||
def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]:
|
||||
"""
|
||||
Holt alle Trades vom Vortag (oder seit since_date).
|
||||
Holt alle Trades vom letzten Handelstag (überspringt Wochenenden).
|
||||
"""
|
||||
all_trades = []
|
||||
|
||||
@@ -290,6 +298,13 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
# Standard: Vortag
|
||||
target_date = (datetime.now(timezone.utc) - timedelta(days=1)).date()
|
||||
|
||||
# Überspringe Wochenenden
|
||||
original_date = target_date
|
||||
target_date = self._get_last_trading_day(target_date)
|
||||
|
||||
if target_date != original_date:
|
||||
print(f"[{self.name}] Skipping weekend: {original_date} -> {target_date}")
|
||||
|
||||
print(f"[{self.name}] Fetching trades for date: {target_date}")
|
||||
|
||||
# Erst versuchen, Dateiliste von der Seite zu holen
|
||||
|
||||
@@ -285,9 +285,23 @@ class GettexExchange(BaseExchange):
|
||||
# Nur bei den ersten paar Fehlern loggen
|
||||
return None
|
||||
|
||||
def _get_last_trading_day(self, from_date) -> datetime.date:
|
||||
"""
|
||||
Findet den letzten Handelstag (überspringt Wochenenden).
|
||||
Montag=0, Sonntag=6
|
||||
"""
|
||||
date = from_date
|
||||
# Wenn Samstag (5), gehe zurück zu Freitag
|
||||
if date.weekday() == 5:
|
||||
date = date - timedelta(days=1)
|
||||
# Wenn Sonntag (6), gehe zurück zu Freitag
|
||||
elif date.weekday() == 6:
|
||||
date = date - timedelta(days=2)
|
||||
return date
|
||||
|
||||
def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]:
|
||||
"""
|
||||
Holt alle Trades vom Vortag.
|
||||
Holt alle Trades vom letzten Handelstag (überspringt Wochenenden).
|
||||
"""
|
||||
all_trades = []
|
||||
|
||||
@@ -297,6 +311,13 @@ class GettexExchange(BaseExchange):
|
||||
else:
|
||||
target_date = (datetime.now(timezone.utc) - timedelta(days=1)).date()
|
||||
|
||||
# Überspringe Wochenenden
|
||||
original_date = target_date
|
||||
target_date = self._get_last_trading_day(target_date)
|
||||
|
||||
if target_date != original_date:
|
||||
print(f"[{self.name}] Skipping weekend: {original_date} -> {target_date}")
|
||||
|
||||
print(f"[{self.name}] Fetching trades for date: {target_date}")
|
||||
|
||||
# Versuche zuerst, Dateien von der Webseite zu laden
|
||||
|
||||
@@ -334,9 +334,23 @@ class StuttgartExchange(BaseExchange):
|
||||
print(f"[STU] Error parsing CSV row: {e}")
|
||||
return None
|
||||
|
||||
def _get_last_trading_day(self, from_date) -> datetime.date:
|
||||
"""
|
||||
Findet den letzten Handelstag (überspringt Wochenenden).
|
||||
Montag=0, Sonntag=6
|
||||
"""
|
||||
date = from_date
|
||||
# Wenn Samstag (5), gehe zurück zu Freitag
|
||||
if date.weekday() == 5:
|
||||
date = date - timedelta(days=1)
|
||||
# Wenn Sonntag (6), gehe zurück zu Freitag
|
||||
elif date.weekday() == 6:
|
||||
date = date - timedelta(days=2)
|
||||
return date
|
||||
|
||||
def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]:
|
||||
"""
|
||||
Holt alle Trades vom Vortag.
|
||||
Holt alle Trades vom letzten Handelstag (überspringt Wochenenden).
|
||||
"""
|
||||
all_trades = []
|
||||
|
||||
@@ -346,6 +360,13 @@ class StuttgartExchange(BaseExchange):
|
||||
else:
|
||||
target_date = (datetime.now(timezone.utc) - timedelta(days=1)).date()
|
||||
|
||||
# Überspringe Wochenenden
|
||||
original_date = target_date
|
||||
target_date = self._get_last_trading_day(target_date)
|
||||
|
||||
if target_date != original_date:
|
||||
print(f"[{self.name}] Skipping weekend: {original_date} -> {target_date}")
|
||||
|
||||
print(f"[{self.name}] Fetching trades for date: {target_date}")
|
||||
|
||||
# Download-Links holen
|
||||
|
||||
Reference in New Issue
Block a user