fix(gemini): retry-with-backoff on 429 + switch default model to flash-lite

This commit is contained in:
sylyx 2026-05-07 15:03:56 +02:00
parent ce5f970dde
commit 97ccecb82b
2 changed files with 34 additions and 10 deletions

View File

@ -19,7 +19,7 @@ ai:
timeout_seconds: 30 timeout_seconds: 30
voter_a: voter_a:
provider: gemini # gemini | claude | groq | deepseek | xai | openrouter | ollama provider: gemini # gemini | claude | groq | deepseek | xai | openrouter | ollama
model: gemini-2.5-flash model: gemini-2.5-flash-lite
temperature: 0.1 temperature: 0.1
voter_b: voter_b:
provider: groq provider: groq

View File

@ -2,8 +2,11 @@
from __future__ import annotations from __future__ import annotations
import json import json
import re
import time
import google.generativeai as genai import google.generativeai as genai
from google.api_core.exceptions import ResourceExhausted
from ..config import Settings from ..config import Settings
from ..logging_setup import get_logger from ..logging_setup import get_logger
@ -13,6 +16,11 @@ from .schema import JSON_SCHEMA, TradeDecision
log = get_logger(__name__) log = get_logger(__name__)
def _extract_retry_seconds(err: Exception, fallback: float) -> float:
m = re.search(r"retry in (\d+(?:\.\d+)?)", str(err))
return float(m.group(1)) if m else fallback
class GeminiClient: class GeminiClient:
provider = "gemini" provider = "gemini"
@ -31,15 +39,31 @@ class GeminiClient:
self.timeout = settings.ai.timeout_seconds self.timeout = settings.ai.timeout_seconds
def decide(self, user_prompt: str) -> TradeDecision: def decide(self, user_prompt: str) -> TradeDecision:
resp = self._model.generate_content( attempts = 0
user_prompt, max_attempts = 3
generation_config={ while True:
"response_mime_type": "application/json", try:
"response_schema": JSON_SCHEMA, resp = self._model.generate_content(
"temperature": self.temperature, user_prompt,
}, generation_config={
request_options={"timeout": self.timeout}, "response_mime_type": "application/json",
) "response_schema": JSON_SCHEMA,
"temperature": self.temperature,
},
request_options={"timeout": self.timeout},
)
break
except ResourceExhausted as e:
attempts += 1
if attempts >= max_attempts:
log.warning("gemini.rate_limit_giveup", attempts=attempts)
return TradeDecision(
action="HOLD", confidence=0.0, suggested_size_pct=0.0,
reasoning="rate_limit_exhausted",
)
wait = min(_extract_retry_seconds(e, 30.0) + 2, 60.0)
log.warning("gemini.rate_limit", attempt=attempts, wait_s=wait)
time.sleep(wait)
text = resp.text or "{}" text = resp.text or "{}"
try: try:
data = json.loads(text) data = json.loads(text)