Spaces:

pykara
/

pytrade-backend

Running

pytrade-backend / news.py

Oviya

Track binaries via Git LFS (analysedata.xlsx, TA_Lib wheel)

66dc1bf 3 months ago

3.06 kB

	# news_sentiment.py
	# pip install gnews nltk rapidfuzz

	from __future__ import annotations
	from datetime import datetime, timezone
	import time
	from typing import List, Dict, Any

	from gnews import GNews
	from rapidfuzz import fuzz
	from nltk.sentiment import SentimentIntensityAnalyzer
	import nltk

	# Ensure VADER is available (safe to call multiple times)
	try:
	nltk.data.find("sentiment/vader_lexicon.zip")
	except LookupError:
	nltk.download("vader_lexicon")

	# Keep one analyzer instance
	_SIA = SentimentIntensityAnalyzer()


	def _sentiment_label(compound: float) -> str:
	if compound > 0.05:
	return "Positive"
	elif compound < -0.05:
	return "Negative"
	return "Neutral"


	def _is_similar(title: str, seen_titles: List[str], threshold: int = 60) -> bool:
	for t in seen_titles:
	if fuzz.ratio(title, t) > threshold:
	return True
	return False


	def get_latest_news_with_sentiment(
	query: str,
	*,
	period: str = "1d",
	max_results: int = 20,
	language: str = "en",
	country: str = "US",
	retries: int = 3,
	backoff_seconds: int = 3
	) -> Dict[str, Any]:


	seen_titles: List[str] = []
	results = []

	for attempt in range(retries):
	try:
	g = GNews(language=language, country=country, period=period, max_results=max_results)
	results = g.get_news(query) or []
	if results:
	break
	except Exception as e:
	print(f"[Attempt {attempt+1}] GNews error: {e}")
	time.sleep(backoff_seconds * (attempt + 1))

	if not results:
	return {"overall_news_score": 0.0, "count": 0, "items": []}

	items: List[Dict[str, Any]] = []
	total_compound = 0.0

	for art in results:
	title = (art.get("title") or "").strip()
	if not title:
	continue
	if _is_similar(title, seen_titles, threshold=60):
	continue
	seen_titles.append(title)

	url = (art.get("url")
	or art.get("link")
	or art.get("source", {}).get("url")
	or "")

	published_raw = (art.get("published date")
	or art.get("publishedDate")
	or art.get("datetime")
	or "")
	if isinstance(published_raw, datetime):
	if published_raw.tzinfo is None:
	published_raw = published_raw.replace(tzinfo=timezone.utc)
	published = published_raw.strftime("%Y-%m-%d %H:%M")
	else:
	published = str(published_raw)

	compound = _SIA.polarity_scores(title)["compound"]
	items.append({
	"title": title,
	"url": url,
	"published": published,
	"sentiment": _sentiment_label(compound),
	"compound": round(compound, 3),
	})
	total_compound += compound

	n = len(items)
	overall = round(((total_compound / n) + 1) * 2.5, 2) if n else 0.0

	return {"overall_news_score": overall, "count": n, "items": items}