pytrade-backend / news.py
Oviya
Track binaries via Git LFS (analysedata.xlsx, TA_Lib wheel)
66dc1bf
# news_sentiment.py
# pip install gnews nltk rapidfuzz
from __future__ import annotations
from datetime import datetime, timezone
import time
from typing import List, Dict, Any
from gnews import GNews
from rapidfuzz import fuzz
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
# Ensure VADER is available (safe to call multiple times)
try:
nltk.data.find("sentiment/vader_lexicon.zip")
except LookupError:
nltk.download("vader_lexicon")
# Keep one analyzer instance
_SIA = SentimentIntensityAnalyzer()
def _sentiment_label(compound: float) -> str:
if compound > 0.05:
return "Positive"
elif compound < -0.05:
return "Negative"
return "Neutral"
def _is_similar(title: str, seen_titles: List[str], threshold: int = 60) -> bool:
for t in seen_titles:
if fuzz.ratio(title, t) > threshold:
return True
return False
def get_latest_news_with_sentiment(
query: str,
*,
period: str = "1d",
max_results: int = 20,
language: str = "en",
country: str = "US",
retries: int = 3,
backoff_seconds: int = 3
) -> Dict[str, Any]:
seen_titles: List[str] = []
results = []
for attempt in range(retries):
try:
g = GNews(language=language, country=country, period=period, max_results=max_results)
results = g.get_news(query) or []
if results:
break
except Exception as e:
print(f"[Attempt {attempt+1}] GNews error: {e}")
time.sleep(backoff_seconds * (attempt + 1))
if not results:
return {"overall_news_score": 0.0, "count": 0, "items": []}
items: List[Dict[str, Any]] = []
total_compound = 0.0
for art in results:
title = (art.get("title") or "").strip()
if not title:
continue
if _is_similar(title, seen_titles, threshold=60):
continue
seen_titles.append(title)
url = (art.get("url")
or art.get("link")
or art.get("source", {}).get("url")
or "")
published_raw = (art.get("published date")
or art.get("publishedDate")
or art.get("datetime")
or "")
if isinstance(published_raw, datetime):
if published_raw.tzinfo is None:
published_raw = published_raw.replace(tzinfo=timezone.utc)
published = published_raw.strftime("%Y-%m-%d %H:%M")
else:
published = str(published_raw)
compound = _SIA.polarity_scores(title)["compound"]
items.append({
"title": title,
"url": url,
"published": published,
"sentiment": _sentiment_label(compound),
"compound": round(compound, 3),
})
total_compound += compound
n = len(items)
overall = round(((total_compound / n) + 1) * 2.5, 2) if n else 0.0
return {"overall_news_score": overall, "count": n, "items": items}