diff --git a/config.yaml b/config.yaml index 18f9d1d..84d2048 100644 --- a/config.yaml +++ b/config.yaml @@ -19,7 +19,7 @@ ai: timeout_seconds: 30 voter_a: provider: gemini # gemini | claude | groq | deepseek | xai | openrouter | ollama - model: gemini-2.5-flash + model: gemini-2.5-flash-lite temperature: 0.1 voter_b: provider: groq diff --git a/src/aitrader/ai/gemini.py b/src/aitrader/ai/gemini.py index f3aa871..5bed728 100644 --- a/src/aitrader/ai/gemini.py +++ b/src/aitrader/ai/gemini.py @@ -2,8 +2,11 @@ from __future__ import annotations import json +import re +import time import google.generativeai as genai +from google.api_core.exceptions import ResourceExhausted from ..config import Settings from ..logging_setup import get_logger @@ -13,6 +16,11 @@ from .schema import JSON_SCHEMA, TradeDecision log = get_logger(__name__) +def _extract_retry_seconds(err: Exception, fallback: float) -> float: + m = re.search(r"retry in (\d+(?:\.\d+)?)", str(err)) + return float(m.group(1)) if m else fallback + + class GeminiClient: provider = "gemini" @@ -31,15 +39,31 @@ class GeminiClient: self.timeout = settings.ai.timeout_seconds def decide(self, user_prompt: str) -> TradeDecision: - resp = self._model.generate_content( - user_prompt, - generation_config={ - "response_mime_type": "application/json", - "response_schema": JSON_SCHEMA, - "temperature": self.temperature, - }, - request_options={"timeout": self.timeout}, - ) + attempts = 0 + max_attempts = 3 + while True: + try: + resp = self._model.generate_content( + user_prompt, + generation_config={ + "response_mime_type": "application/json", + "response_schema": JSON_SCHEMA, + "temperature": self.temperature, + }, + request_options={"timeout": self.timeout}, + ) + break + except ResourceExhausted as e: + attempts += 1 + if attempts >= max_attempts: + log.warning("gemini.rate_limit_giveup", attempts=attempts) + return TradeDecision( + action="HOLD", confidence=0.0, suggested_size_pct=0.0, + reasoning="rate_limit_exhausted", + ) + wait = min(_extract_retry_seconds(e, 30.0) + 2, 60.0) + log.warning("gemini.rate_limit", attempt=attempts, wait_s=wait) + time.sleep(wait) text = resp.text or "{}" try: data = json.loads(text)