fixed deutsche boerse
All checks were successful
Deployment / deploy-docker (push) Successful in 17s
All checks were successful
Deployment / deploy-docker (push) Successful in 17s
This commit is contained in:
@@ -17,7 +17,9 @@ HEADERS = {
|
||||
|
||||
# gettex Download-Basis-URLs
|
||||
GETTEX_PAGE_URL = "https://www.gettex.de/handel/delayed-data/posttrade-data/"
|
||||
GETTEX_DOWNLOAD_BASE = "https://erdk.bayerische-boerse.de:8000/delayed-data/MUNC-MUND/posttrade/"
|
||||
# Die Download-URL ist auf der gettex-Webseite als Direkt-Link verfügbar
|
||||
# Basis-URL für fileadmin Downloads (gefunden durch Seitenanalyse)
|
||||
GETTEX_DOWNLOAD_BASE = "https://www.gettex.de/fileadmin/posttrade-data/"
|
||||
|
||||
|
||||
class GettexExchange(BaseExchange):
|
||||
@@ -32,9 +34,10 @@ class GettexExchange(BaseExchange):
|
||||
def name(self) -> str:
|
||||
return "GETTEX"
|
||||
|
||||
def _get_file_list_from_page(self) -> List[str]:
|
||||
def _get_file_list_from_page(self) -> List[dict]:
|
||||
"""
|
||||
Parst die gettex Seite und extrahiert Download-Links.
|
||||
Gibt Liste von dicts mit 'filename' und 'url' zurück.
|
||||
"""
|
||||
files = []
|
||||
|
||||
@@ -47,16 +50,32 @@ class GettexExchange(BaseExchange):
|
||||
# Suche nach Links zu CSV.gz Dateien
|
||||
for link in soup.find_all('a'):
|
||||
href = link.get('href', '')
|
||||
if href and 'posttrade' in href.lower() and href.endswith('.csv.gz'):
|
||||
files.append(href)
|
||||
text = link.get_text(strip=True)
|
||||
|
||||
# Prüfe den Link-Text oder href auf posttrade CSV.gz Dateien
|
||||
if href and 'posttrade' in href.lower() and '.csv.gz' in href.lower():
|
||||
# Vollständige URL erstellen
|
||||
if not href.startswith('http'):
|
||||
url = f"https://www.gettex.de{href}" if href.startswith('/') else f"https://www.gettex.de/{href}"
|
||||
else:
|
||||
url = href
|
||||
filename = href.split('/')[-1]
|
||||
files.append({'filename': filename, 'url': url})
|
||||
|
||||
elif text and 'posttrade' in text.lower() and '.csv.gz' in text.lower():
|
||||
# Link-Text ist der Dateiname, href könnte die URL sein
|
||||
filename = text
|
||||
if href:
|
||||
if not href.startswith('http'):
|
||||
url = f"https://www.gettex.de{href}" if href.startswith('/') else f"https://www.gettex.de/{href}"
|
||||
else:
|
||||
url = href
|
||||
else:
|
||||
# Fallback: Versuche verschiedene URL-Patterns
|
||||
url = f"https://www.gettex.de/fileadmin/posttrade-data/{filename}"
|
||||
files.append({'filename': filename, 'url': url})
|
||||
|
||||
# Falls keine Links gefunden, versuche alternative Struktur
|
||||
if not files:
|
||||
# Manchmal sind Links in data-Attributen versteckt
|
||||
for elem in soup.find_all(attrs={'data-href': True}):
|
||||
href = elem.get('data-href', '')
|
||||
if 'posttrade' in href.lower() and href.endswith('.csv.gz'):
|
||||
files.append(href)
|
||||
print(f"[GETTEX] Found {len(files)} files on page")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[GETTEX] Error fetching page: {e}")
|
||||
@@ -211,19 +230,95 @@ class GettexExchange(BaseExchange):
|
||||
|
||||
print(f"[{self.name}] Fetching trades for date: {target_date}")
|
||||
|
||||
# Generiere erwartete Dateinamen
|
||||
expected_files = self._generate_expected_files(target_date)
|
||||
print(f"[{self.name}] Trying {len(expected_files)} potential files")
|
||||
# Versuche zuerst, Dateien von der Webseite zu laden
|
||||
page_files = self._get_file_list_from_page()
|
||||
|
||||
# Versuche Dateien herunterzuladen
|
||||
successful_files = 0
|
||||
for filename in expected_files:
|
||||
trades = self._download_and_parse_file(filename)
|
||||
if trades:
|
||||
all_trades.extend(trades)
|
||||
successful_files += 1
|
||||
if page_files:
|
||||
# Filtere Dateien für das Zieldatum
|
||||
target_str = target_date.strftime('%Y%m%d')
|
||||
next_day = target_date + timedelta(days=1)
|
||||
next_day_str = next_day.strftime('%Y%m%d')
|
||||
|
||||
target_files = []
|
||||
for f in page_files:
|
||||
filename = f['filename']
|
||||
# Dateien vom Zieldatum oder frühe Morgenstunden des nächsten Tages
|
||||
if target_str in filename:
|
||||
target_files.append(f)
|
||||
elif next_day_str in filename:
|
||||
# Frühe Morgenstunden (00:00 - 02:45) gehören zum Vortag
|
||||
try:
|
||||
# Format: posttrade.YYYYMMDD.HH.MM.{munc|mund}.csv.gz
|
||||
parts = filename.split('.')
|
||||
if len(parts) >= 4:
|
||||
hour = int(parts[2])
|
||||
if hour < 3:
|
||||
target_files.append(f)
|
||||
except:
|
||||
pass
|
||||
|
||||
print(f"[{self.name}] Found {len(target_files)} files for target date from page")
|
||||
|
||||
# Lade Dateien von der Webseite
|
||||
for f in target_files:
|
||||
trades = self._download_file_by_url(f['url'], f['filename'])
|
||||
if trades:
|
||||
all_trades.extend(trades)
|
||||
|
||||
# Fallback: Versuche erwartete Dateinamen
|
||||
if not all_trades:
|
||||
print(f"[{self.name}] No files from page, trying generated filenames...")
|
||||
expected_files = self._generate_expected_files(target_date)
|
||||
print(f"[{self.name}] Trying {len(expected_files)} potential files")
|
||||
|
||||
successful_files = 0
|
||||
for filename in expected_files:
|
||||
trades = self._download_and_parse_file(filename)
|
||||
if trades:
|
||||
all_trades.extend(trades)
|
||||
successful_files += 1
|
||||
|
||||
print(f"[{self.name}] Successfully downloaded {successful_files} files")
|
||||
|
||||
print(f"[{self.name}] Successfully downloaded {successful_files} files")
|
||||
print(f"[{self.name}] Total trades fetched: {len(all_trades)}")
|
||||
|
||||
return all_trades
|
||||
|
||||
def _download_file_by_url(self, url: str, filename: str) -> List[Trade]:
|
||||
"""Lädt eine Datei direkt von einer URL"""
|
||||
trades = []
|
||||
|
||||
try:
|
||||
print(f"[{self.name}] Downloading: {url}")
|
||||
response = requests.get(url, headers=HEADERS, timeout=60)
|
||||
|
||||
if response.status_code == 404:
|
||||
return []
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# Gzip entpacken
|
||||
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
||||
csv_text = f.read().decode('utf-8')
|
||||
|
||||
# CSV parsen
|
||||
reader = csv.DictReader(io.StringIO(csv_text), delimiter=';')
|
||||
|
||||
for row in reader:
|
||||
try:
|
||||
trade = self._parse_csv_row(row)
|
||||
if trade:
|
||||
trades.append(trade)
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Error parsing row: {e}")
|
||||
continue
|
||||
|
||||
print(f"[{self.name}] Parsed {len(trades)} trades from {filename}")
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code != 404:
|
||||
print(f"[{self.name}] HTTP error downloading {url}: {e}")
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Error downloading {url}: {e}")
|
||||
|
||||
return trades
|
||||
|
||||
Reference in New Issue
Block a user