fix(gemini): retry-with-backoff on 429 + switch default model to flash-lite
This commit is contained in:
parent
ce5f970dde
commit
97ccecb82b
@ -19,7 +19,7 @@ ai:
|
||||
timeout_seconds: 30
|
||||
voter_a:
|
||||
provider: gemini # gemini | claude | groq | deepseek | xai | openrouter | ollama
|
||||
model: gemini-2.5-flash
|
||||
model: gemini-2.5-flash-lite
|
||||
temperature: 0.1
|
||||
voter_b:
|
||||
provider: groq
|
||||
|
||||
@ -2,8 +2,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
import google.generativeai as genai
|
||||
from google.api_core.exceptions import ResourceExhausted
|
||||
|
||||
from ..config import Settings
|
||||
from ..logging_setup import get_logger
|
||||
@ -13,6 +16,11 @@ from .schema import JSON_SCHEMA, TradeDecision
|
||||
log = get_logger(__name__)
|
||||
|
||||
|
||||
def _extract_retry_seconds(err: Exception, fallback: float) -> float:
|
||||
m = re.search(r"retry in (\d+(?:\.\d+)?)", str(err))
|
||||
return float(m.group(1)) if m else fallback
|
||||
|
||||
|
||||
class GeminiClient:
|
||||
provider = "gemini"
|
||||
|
||||
@ -31,15 +39,31 @@ class GeminiClient:
|
||||
self.timeout = settings.ai.timeout_seconds
|
||||
|
||||
def decide(self, user_prompt: str) -> TradeDecision:
|
||||
resp = self._model.generate_content(
|
||||
user_prompt,
|
||||
generation_config={
|
||||
"response_mime_type": "application/json",
|
||||
"response_schema": JSON_SCHEMA,
|
||||
"temperature": self.temperature,
|
||||
},
|
||||
request_options={"timeout": self.timeout},
|
||||
)
|
||||
attempts = 0
|
||||
max_attempts = 3
|
||||
while True:
|
||||
try:
|
||||
resp = self._model.generate_content(
|
||||
user_prompt,
|
||||
generation_config={
|
||||
"response_mime_type": "application/json",
|
||||
"response_schema": JSON_SCHEMA,
|
||||
"temperature": self.temperature,
|
||||
},
|
||||
request_options={"timeout": self.timeout},
|
||||
)
|
||||
break
|
||||
except ResourceExhausted as e:
|
||||
attempts += 1
|
||||
if attempts >= max_attempts:
|
||||
log.warning("gemini.rate_limit_giveup", attempts=attempts)
|
||||
return TradeDecision(
|
||||
action="HOLD", confidence=0.0, suggested_size_pct=0.0,
|
||||
reasoning="rate_limit_exhausted",
|
||||
)
|
||||
wait = min(_extract_retry_seconds(e, 30.0) + 2, 60.0)
|
||||
log.warning("gemini.rate_limit", attempt=attempts, wait_s=wait)
|
||||
time.sleep(wait)
|
||||
text = resp.text or "{}"
|
||||
try:
|
||||
data = json.loads(text)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user