Spaces:
Running
Running
Oviya
commited on
Commit
·
8dfbce4
1
Parent(s):
62afd3f
update prediction
Browse files- analysestock.py +47 -77
- chatbot.py +0 -232
- companies.py +0 -50
- highlow_forecast.py +695 -0
- predictedchart.py +0 -126
- prediction.py +0 -257
analysestock.py
CHANGED
|
@@ -22,45 +22,10 @@ from srstrategies import get_support_resistance_signal
|
|
| 22 |
from bbstrategies import get_bollinger_trade_signal
|
| 23 |
from fundamental import get_fundamental_details
|
| 24 |
from news import get_latest_news_with_sentiment
|
| 25 |
-
|
| 26 |
-
from prediction import (
|
| 27 |
-
load_or_train_highlow_model,
|
| 28 |
-
build_current_features_row_23k,
|
| 29 |
-
predict_high_low_for_current_row,
|
| 30 |
-
)
|
| 31 |
import os, numpy as np, pandas as pd
|
| 32 |
|
| 33 |
-
BASE_DIR = Path(__file__).resolve().parent
|
| 34 |
-
|
| 35 |
-
# TRAIN_XLSX_PATH = r"D:\PY-Trade\backend alone\analysedata.xlsx"
|
| 36 |
-
# MODEL_BUNDLE_PATH = r"C:\VIJI\pytrade-app\backend\models\gps_highlow_extratrees.pkl"
|
| 37 |
-
|
| 38 |
-
# Excel path (priority: env var → file in repo → your Windows path)
|
| 39 |
-
TRAIN_XLSX_PATH = (
|
| 40 |
-
os.getenv("TRAIN_XLSX_PATH")
|
| 41 |
-
or (str(BASE_DIR / "analysedata.xlsx") if (BASE_DIR / "analysedata.xlsx").exists() else None)
|
| 42 |
-
or (r"C:\VIJI\huggingface-deployment\deployment\pytrade-backend\analysedata.xlsx" if os.name == "nt" else None)
|
| 43 |
-
)
|
| 44 |
-
|
| 45 |
-
if not TRAIN_XLSX_PATH or not Path(TRAIN_XLSX_PATH).exists():
|
| 46 |
-
raise FileNotFoundError(
|
| 47 |
-
"Training Excel not found. Set TRAIN_XLSX_PATH or place 'analysedata.xlsx' next to analysestock.py."
|
| 48 |
-
)
|
| 49 |
-
|
| 50 |
-
# Model path (priority: env var → model file in repo → your Windows path → /tmp for training)
|
| 51 |
-
MODEL_BUNDLE_PATH = (
|
| 52 |
-
os.getenv("MODEL_BUNDLE_PATH")
|
| 53 |
-
or (str(BASE_DIR / "gps_highlow_extratrees.pkl") if (BASE_DIR / "gps_highlow_extratrees.pkl").exists() else None)
|
| 54 |
-
or (r"C:\VIJI\huggingface-deployment\deployment\pytrade-backend\gps_highlow_extratrees.pkl" if os.name == "nt" else None)
|
| 55 |
-
or "/tmp/pytrade-models/gps_highlow_extratrees.pkl"
|
| 56 |
-
)
|
| 57 |
-
|
| 58 |
-
Path(MODEL_BUNDLE_PATH).parent.mkdir(parents=True, exist_ok=True) # ensure writable dir when training
|
| 59 |
-
# --- end snippet ---
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
from predictedchart import run_stock_prediction
|
| 64 |
|
| 65 |
# ===================== TA scoring =====================
|
| 66 |
def calculate_technical_analysis_score(indicator_scores):
|
|
@@ -271,37 +236,42 @@ def analysestock(ticker):
|
|
| 271 |
|
| 272 |
|
| 273 |
#prediiction
|
| 274 |
-
|
| 275 |
-
predictions_float = [float(pred) for pred in predictions['Predicted Close']]
|
| 276 |
-
prediction_dates = pd.to_datetime(predictions['Date']).dt.strftime('%d-%m-%Y').tolist()
|
| 277 |
-
model_error = None
|
| 278 |
-
pred_high, pred_low = np.nan, np.nan
|
| 279 |
try:
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
current_feat_row = build_current_features_row_23k(
|
| 283 |
ticker=ticker,
|
| 284 |
-
stock_data=stock_data
|
| 285 |
-
rsi_trade_signal=rsi_trade_signal,
|
| 286 |
-
macd_trade_signal=macd_trade_signal,
|
| 287 |
-
ema_trade_signal=ema_trade_signal,
|
| 288 |
-
atr_trade_signal=atr_trade_signal,
|
| 289 |
-
adx_trade_signal=adx_trade_signal,
|
| 290 |
-
bb_trade_signal=bb_trade_signal,
|
| 291 |
-
sr_trade_signal=sr_trade_signal,
|
| 292 |
-
priceaction_trade_signal=priceaction_trade_signal,
|
| 293 |
-
fibo_trade_signal=fibo_trade_signal,
|
| 294 |
-
overall_ta_score=overall_ta_score,
|
| 295 |
-
)
|
| 296 |
-
|
| 297 |
-
pred_high, pred_low = predict_high_low_for_current_row(
|
| 298 |
-
bundle=bundle,
|
| 299 |
-
current_row_df=current_feat_row,
|
| 300 |
-
live_close=stock_data['close'].iloc[-1]
|
| 301 |
)
|
| 302 |
except Exception as ex:
|
| 303 |
-
|
| 304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
|
| 307 |
|
|
@@ -352,22 +322,22 @@ def analysestock(ticker):
|
|
| 352 |
"EMA 50": ema_trade_signal['EMA_50'],
|
| 353 |
"ADX_Indicator": adx_trade_signal['ADX_Indicator'],
|
| 354 |
"PLUS_DI": adx_trade_signal['PLUS_DI'],
|
| 355 |
-
"MINUS_DI": adx_trade_signal['MINUS_DI']
|
| 356 |
-
"prediction_prices": predictions_float,
|
| 357 |
-
"prediction_dates": prediction_dates,
|
| 358 |
}
|
| 359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
|
| 361 |
response.update({
|
| 362 |
-
"
|
| 363 |
-
"
|
| 364 |
-
"
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
"trained_rows": (bundle.get("trained_rows") if 'bundle' in locals() else None),
|
| 368 |
-
"sklearn_version": (bundle.get("sklearn_version") if 'bundle' in locals() else None)
|
| 369 |
-
},
|
| 370 |
-
"ai_model_error": model_error
|
| 371 |
})
|
| 372 |
|
| 373 |
return response
|
|
|
|
| 22 |
from bbstrategies import get_bollinger_trade_signal
|
| 23 |
from fundamental import get_fundamental_details
|
| 24 |
from news import get_latest_news_with_sentiment
|
| 25 |
+
from highlow_forecast import forecast_next_15_high_low
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
import os, numpy as np, pandas as pd
|
| 27 |
|
| 28 |
+
BASE_DIR = Path(__file__).resolve().parent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# ===================== TA scoring =====================
|
| 31 |
def calculate_technical_analysis_score(indicator_scores):
|
|
|
|
| 236 |
|
| 237 |
|
| 238 |
#prediiction
|
| 239 |
+
forecast_15 = None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
try:
|
| 241 |
+
forecast_15 = forecast_next_15_high_low(
|
|
|
|
|
|
|
| 242 |
ticker=ticker,
|
| 243 |
+
stock_data=stock_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
)
|
| 245 |
except Exception as ex:
|
| 246 |
+
forecast_15 = {"error": f"{type(ex).__name__}: {ex}"}
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
# Summaries for 15-day forecast (max high, min low) + range series for charts
|
| 250 |
+
max_high_15 = None
|
| 251 |
+
max_high_15_date = None
|
| 252 |
+
min_low_15 = None
|
| 253 |
+
min_low_15_date = None
|
| 254 |
+
highlow_range_15 = None
|
| 255 |
+
|
| 256 |
+
if isinstance(forecast_15, dict) and all(k in forecast_15 for k in ("pred_high", "pred_low", "dates")):
|
| 257 |
+
highs = np.asarray(forecast_15["pred_high"], dtype=float)
|
| 258 |
+
lows = np.asarray(forecast_15["pred_low"], dtype=float)
|
| 259 |
+
dates = forecast_15["dates"]
|
| 260 |
+
|
| 261 |
+
if highs.size and lows.size and highs.size == lows.size == len(dates):
|
| 262 |
+
hi_idx = int(np.nanargmax(highs))
|
| 263 |
+
lo_idx = int(np.nanargmin(lows))
|
| 264 |
+
|
| 265 |
+
max_high_15 = round(float(highs[hi_idx]), 2)
|
| 266 |
+
max_high_15_date = dates[hi_idx]
|
| 267 |
+
min_low_15 = round(float(lows[lo_idx]), 2)
|
| 268 |
+
min_low_15_date = dates[lo_idx]
|
| 269 |
+
|
| 270 |
+
# Precomputed rangeBar data: [{x: date, y: [low, high]}]
|
| 271 |
+
highlow_range_15 = [
|
| 272 |
+
{"x": d, "y": [round(float(l), 2), round(float(h), 2)]}
|
| 273 |
+
for d, h, l in zip(dates, highs.tolist(), lows.tolist())
|
| 274 |
+
]
|
| 275 |
|
| 276 |
|
| 277 |
|
|
|
|
| 322 |
"EMA 50": ema_trade_signal['EMA_50'],
|
| 323 |
"ADX_Indicator": adx_trade_signal['ADX_Indicator'],
|
| 324 |
"PLUS_DI": adx_trade_signal['PLUS_DI'],
|
| 325 |
+
"MINUS_DI": adx_trade_signal['MINUS_DI']
|
|
|
|
|
|
|
| 326 |
}
|
| 327 |
+
response.update({
|
| 328 |
+
"ai_predicted_daily_high_15": (forecast_15.get("pred_high") if isinstance(forecast_15, dict) and "pred_high" in forecast_15 else None),
|
| 329 |
+
"ai_predicted_daily_low_15": (forecast_15.get("pred_low") if isinstance(forecast_15, dict) and "pred_low" in forecast_15 else None),
|
| 330 |
+
"ai_predicted_dates_15": (forecast_15.get("dates") if isinstance(forecast_15, dict) and "dates" in forecast_15 else None),
|
| 331 |
+
"ai_model_meta_15d": (forecast_15.get("bundle_meta") if isinstance(forecast_15, dict) and "bundle_meta" in forecast_15 else None),
|
| 332 |
+
"ai_model_error_15d": (forecast_15.get("error") if isinstance(forecast_15, dict) and "error" in forecast_15 else None),
|
| 333 |
+
})
|
| 334 |
|
| 335 |
response.update({
|
| 336 |
+
"ai_predicted_max_high_15": max_high_15,
|
| 337 |
+
"ai_predicted_max_high_15_date": max_high_15_date,
|
| 338 |
+
"ai_predicted_min_low_15": min_low_15,
|
| 339 |
+
"ai_predicted_min_low_15_date": min_low_15_date,
|
| 340 |
+
"ai_predicted_highlow_range_15": highlow_range_15
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
})
|
| 342 |
|
| 343 |
return response
|
chatbot.py
DELETED
|
@@ -1,232 +0,0 @@
|
|
| 1 |
-
# app.py
|
| 2 |
-
import os
|
| 3 |
-
import re
|
| 4 |
-
import json
|
| 5 |
-
import time
|
| 6 |
-
from datetime import datetime
|
| 7 |
-
from typing import List, Dict
|
| 8 |
-
|
| 9 |
-
from flask import Flask, request, jsonify
|
| 10 |
-
from dotenv import load_dotenv
|
| 11 |
-
import requests
|
| 12 |
-
|
| 13 |
-
# ----------------------------
|
| 14 |
-
# Optional providers (OpenAI v1 / Cohere)
|
| 15 |
-
# ----------------------------
|
| 16 |
-
OPENAI_CLIENT = None
|
| 17 |
-
try:
|
| 18 |
-
from openai import OpenAI
|
| 19 |
-
OPENAI_CLIENT = "available"
|
| 20 |
-
except Exception:
|
| 21 |
-
OPENAI_CLIENT = None
|
| 22 |
-
|
| 23 |
-
try:
|
| 24 |
-
import cohere
|
| 25 |
-
except Exception:
|
| 26 |
-
cohere = None
|
| 27 |
-
|
| 28 |
-
load_dotenv()
|
| 29 |
-
app = Flask(__name__)
|
| 30 |
-
|
| 31 |
-
# ----------------------------
|
| 32 |
-
# Config
|
| 33 |
-
# ----------------------------
|
| 34 |
-
LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openai").lower().strip()
|
| 35 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 36 |
-
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
| 37 |
-
SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")
|
| 38 |
-
SEARCH_TOPK = int(os.getenv("SEARCH_TOPK", "5"))
|
| 39 |
-
TIMEZONE = "Asia/Kolkata"
|
| 40 |
-
|
| 41 |
-
if LLM_PROVIDER == "openai" and not OPENAI_API_KEY:
|
| 42 |
-
print("[WARN] OPENAI_API_KEY not set; general answers will fail.")
|
| 43 |
-
if LLM_PROVIDER == "cohere" and not COHERE_API_KEY:
|
| 44 |
-
print("[WARN] COHERE_API_KEY not set; general answers will fail.")
|
| 45 |
-
if not SERPAPI_API_KEY:
|
| 46 |
-
print("[WARN] SERPAPI_API_KEY not set; 'latest' queries will not work.")
|
| 47 |
-
|
| 48 |
-
# Initialize OpenAI client (v1+)
|
| 49 |
-
openai_client = None
|
| 50 |
-
if LLM_PROVIDER == "openai" and OPENAI_CLIENT and OPENAI_API_KEY:
|
| 51 |
-
openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
| 52 |
-
|
| 53 |
-
# ----------------------------
|
| 54 |
-
# Utilities
|
| 55 |
-
# ----------------------------
|
| 56 |
-
|
| 57 |
-
# Common “latest/live” triggers
|
| 58 |
-
LATEST_TRIGGERS = [
|
| 59 |
-
r"\btoday\b", r"\bnow\b", r"\blatest\b", r"\bupdate\b", r"\brecent\b",
|
| 60 |
-
r"\bbreaking\b", r"\blive\b", r"\bthis\s+hour\b", r"\bthis\s+minute\b",
|
| 61 |
-
r"\bcurrent\b", r"\bas of\b", r"\btoday'?s\b", r"\bprice\s+today\b"
|
| 62 |
-
]
|
| 63 |
-
LATEST_PATTERN = re.compile("|".join(LATEST_TRIGGERS), re.IGNORECASE)
|
| 64 |
-
|
| 65 |
-
# Simple aliases for finance names/tickers (extend as needed)
|
| 66 |
-
ALIASES = {
|
| 67 |
-
"tcs": "Tata Consultancy Services",
|
| 68 |
-
"ril": "Reliance Industries",
|
| 69 |
-
"infy": "Infosys",
|
| 70 |
-
"hdfc bank": "HDFC Bank",
|
| 71 |
-
"icici": "ICICI Bank",
|
| 72 |
-
}
|
| 73 |
-
|
| 74 |
-
def normalize_entities(text: str) -> str:
|
| 75 |
-
t = text
|
| 76 |
-
for k, v in ALIASES.items():
|
| 77 |
-
t = re.sub(rf"\b{k}\b", v, t, flags=re.IGNORECASE)
|
| 78 |
-
return t
|
| 79 |
-
|
| 80 |
-
def needs_live_context(query: str) -> bool:
|
| 81 |
-
"""Heuristic to detect time-sensitive queries."""
|
| 82 |
-
if not query:
|
| 83 |
-
return False
|
| 84 |
-
q = query.lower()
|
| 85 |
-
|
| 86 |
-
if LATEST_PATTERN.search(q):
|
| 87 |
-
return True
|
| 88 |
-
|
| 89 |
-
# Domain shortcuts
|
| 90 |
-
domain_triggers = [
|
| 91 |
-
"who won", "match result", "score now", "stock price", "share price",
|
| 92 |
-
"usd inr rate", "exchange rate", "weather", "today's weather",
|
| 93 |
-
"news on", "headline", "earnings today", "ipo today",
|
| 94 |
-
"live price", "current price", "price right now"
|
| 95 |
-
]
|
| 96 |
-
if any(t in q for t in domain_triggers):
|
| 97 |
-
return True
|
| 98 |
-
|
| 99 |
-
# Finance shortcut: “price of <entity>”
|
| 100 |
-
if re.search(r"\bprice of\b", q) and not re.search(r"\byesterday|last close|history\b", q):
|
| 101 |
-
return True
|
| 102 |
-
|
| 103 |
-
return False
|
| 104 |
-
|
| 105 |
-
def pick_is_news(query: str) -> bool:
|
| 106 |
-
"""Treat as news if clear news terms appear."""
|
| 107 |
-
q = query.lower()
|
| 108 |
-
news_terms = ["news", "headline", "breaking", "election", "budget", "earthquake", "merger", "acquisition", "ceo resigns"]
|
| 109 |
-
return any(t in q for t in news_terms)
|
| 110 |
-
|
| 111 |
-
def serpapi_search(query: str, is_news: bool = False, num: int = SEARCH_TOPK) -> List[Dict[str, str]]:
|
| 112 |
-
"""Fetch top search or news results from SerpAPI."""
|
| 113 |
-
if not SERPAPI_API_KEY:
|
| 114 |
-
return []
|
| 115 |
-
|
| 116 |
-
params = {
|
| 117 |
-
"api_key": SERPAPI_API_KEY,
|
| 118 |
-
"q": query,
|
| 119 |
-
}
|
| 120 |
-
|
| 121 |
-
if is_news:
|
| 122 |
-
url = "https://serpapi.com/search.json"
|
| 123 |
-
params.update({"engine": "google_news", "num": min(num, 10), "hl": "en", "gl": "in"})
|
| 124 |
-
else:
|
| 125 |
-
url = "https://serpapi.com/search.json"
|
| 126 |
-
params.update({"engine": "google", "num": min(num, 10), "hl": "en", "gl": "in"})
|
| 127 |
-
|
| 128 |
-
r = requests.get(url, params=params, timeout=20)
|
| 129 |
-
r.raise_for_status()
|
| 130 |
-
data = r.json()
|
| 131 |
-
|
| 132 |
-
results: List[Dict[str, str]] = []
|
| 133 |
-
if is_news:
|
| 134 |
-
for item in (data.get("news_results") or [])[:num]:
|
| 135 |
-
results.append({
|
| 136 |
-
"title": item.get("title") or "",
|
| 137 |
-
"snippet": item.get("snippet") or item.get("description") or "",
|
| 138 |
-
"link": item.get("link") or "",
|
| 139 |
-
"source": (item.get("source") or {}).get("name") or item.get("source") or ""
|
| 140 |
-
})
|
| 141 |
-
else:
|
| 142 |
-
for item in (data.get("organic_results") or [])[:num]:
|
| 143 |
-
results.append({
|
| 144 |
-
"title": item.get("title") or "",
|
| 145 |
-
"snippet": item.get("snippet") or "",
|
| 146 |
-
"link": item.get("link") or "",
|
| 147 |
-
"source": item.get("source") or ""
|
| 148 |
-
})
|
| 149 |
-
return results
|
| 150 |
-
|
| 151 |
-
def build_citation_block(hits: List[Dict[str, str]]) -> str:
|
| 152 |
-
"""Compact citations for the LLM and the response."""
|
| 153 |
-
lines = []
|
| 154 |
-
for i, h in enumerate(hits, start=1):
|
| 155 |
-
title = (h.get("title") or "").strip()
|
| 156 |
-
link = (h.get("link") or "").strip()
|
| 157 |
-
source = (h.get("source") or "").strip()
|
| 158 |
-
snippet = (h.get("snippet") or "").strip()
|
| 159 |
-
lines.append(f"[{i}] {title} — {source}\n{snippet}\n{link}")
|
| 160 |
-
return "\n\n".join(lines)
|
| 161 |
-
|
| 162 |
-
# ----------------------------
|
| 163 |
-
# LLM Calls
|
| 164 |
-
# ----------------------------
|
| 165 |
-
|
| 166 |
-
BASE_SYSTEM_PROMPT = (
|
| 167 |
-
"You are a helpful and precise assistant. Use simple, neutral English. "
|
| 168 |
-
"When sources are provided, synthesize them, highlight clear facts, and include a short 'Sources' list as [1], [2], etc. "
|
| 169 |
-
"If information is uncertain or evolving, state that clearly."
|
| 170 |
-
)
|
| 171 |
-
|
| 172 |
-
def call_openai(system_prompt: str, user_prompt: str) -> str:
|
| 173 |
-
"""OpenAI Python SDK ≥ 1.0.0."""
|
| 174 |
-
if not openai_client:
|
| 175 |
-
raise RuntimeError("OpenAI is not configured.")
|
| 176 |
-
resp = openai_client.chat.completions.create(
|
| 177 |
-
model="gpt-4o-mini",
|
| 178 |
-
messages=[
|
| 179 |
-
{"role": "system", "content": system_prompt},
|
| 180 |
-
{"role": "user", "content": user_prompt}
|
| 181 |
-
],
|
| 182 |
-
temperature=0.2,
|
| 183 |
-
max_tokens=900,
|
| 184 |
-
)
|
| 185 |
-
return (resp.choices[0].message.content or "").strip()
|
| 186 |
-
|
| 187 |
-
def call_cohere(system_prompt: str, user_prompt: str) -> str:
|
| 188 |
-
"""Cohere chat (adjust model if needed)."""
|
| 189 |
-
if not cohere or not COHERE_API_KEY:
|
| 190 |
-
raise RuntimeError("Cohere is not configured.")
|
| 191 |
-
client = cohere.Client(api_key=COHERE_API_KEY)
|
| 192 |
-
resp = client.chat(
|
| 193 |
-
model="command-r-plus",
|
| 194 |
-
messages=[
|
| 195 |
-
{"role": "system", "content": system_prompt},
|
| 196 |
-
{"role": "user", "content": user_prompt}
|
| 197 |
-
],
|
| 198 |
-
temperature=0.2,
|
| 199 |
-
max_tokens=900,
|
| 200 |
-
)
|
| 201 |
-
text = getattr(resp, "text", None) or (getattr(resp, "output_text", None))
|
| 202 |
-
if not text and hasattr(resp, "message") and hasattr(resp.message, "content"):
|
| 203 |
-
parts = resp.message.content
|
| 204 |
-
text = "".join(getattr(p, "text", "") for p in parts)
|
| 205 |
-
return (text or "").strip()
|
| 206 |
-
|
| 207 |
-
def call_llm(system_prompt: str, user_prompt: str) -> str:
|
| 208 |
-
if LLM_PROVIDER == "openai":
|
| 209 |
-
return call_openai(system_prompt, user_prompt)
|
| 210 |
-
elif LLM_PROVIDER == "cohere":
|
| 211 |
-
return call_cohere(system_prompt, user_prompt)
|
| 212 |
-
else:
|
| 213 |
-
raise RuntimeError("Unsupported LLM_PROVIDER")
|
| 214 |
-
|
| 215 |
-
def compose_live_user_prompt(query: str, hits: List[Dict[str, str]]) -> str:
|
| 216 |
-
citation_block = build_citation_block(hits)
|
| 217 |
-
today = datetime.now().strftime("%B %d, %Y")
|
| 218 |
-
return (
|
| 219 |
-
f"User question (time-sensitive): {query}\n"
|
| 220 |
-
f"Date today: {today}\n\n"
|
| 221 |
-
f"You have these top search results. Answer using only what these sources support. "
|
| 222 |
-
f"Be concise and include a 'Sources' section with numbered citations pointing to the links.\n\n"
|
| 223 |
-
f"{citation_block}\n\n"
|
| 224 |
-
f"Now write the answer:"
|
| 225 |
-
)
|
| 226 |
-
|
| 227 |
-
def compose_general_user_prompt(query: str) -> str:
|
| 228 |
-
today = datetime.now().strftime("%B %d, %Y")
|
| 229 |
-
return (
|
| 230 |
-
f"User question: {query}\n"
|
| 231 |
-
f"(Answer in simple, neutral English. If facts might have changed after {today}, mention that briefly.)"
|
| 232 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
companies.py
DELETED
|
@@ -1,50 +0,0 @@
|
|
| 1 |
-
# utils.py
|
| 2 |
-
import csv
|
| 3 |
-
import io
|
| 4 |
-
import requests
|
| 5 |
-
from typing import List, Dict
|
| 6 |
-
from requests.exceptions import RequestException
|
| 7 |
-
import time
|
| 8 |
-
|
| 9 |
-
# List of URLs for NIFTY50 and NIFTY100
|
| 10 |
-
NIFTY_URLS = {
|
| 11 |
-
"NIFTY50": "https://www.niftyindices.com/IndexConstituent/ind_nifty50list.csv",
|
| 12 |
-
"NIFTY100": "https://www.niftyindices.com/IndexConstituent/ind_nifty100list.csv"
|
| 13 |
-
}
|
| 14 |
-
|
| 15 |
-
def fetch_nifty_companies(index_code: str, retries: int = 3, delay: int = 5) -> List[Dict[str, str]]:
|
| 16 |
-
# Get the URL for the given index_code
|
| 17 |
-
url = NIFTY_URLS.get(index_code)
|
| 18 |
-
|
| 19 |
-
if not url:
|
| 20 |
-
raise ValueError(f"Unknown index code: {index_code}")
|
| 21 |
-
|
| 22 |
-
# Retry logic
|
| 23 |
-
for attempt in range(retries):
|
| 24 |
-
try:
|
| 25 |
-
# Fetch the CSV data
|
| 26 |
-
response = requests.get(url)
|
| 27 |
-
# Ensure the request was successful
|
| 28 |
-
response.raise_for_status()
|
| 29 |
-
# Read CSV data from the response text
|
| 30 |
-
return parse_nifty_csv(response.text)
|
| 31 |
-
|
| 32 |
-
except RequestException as e:
|
| 33 |
-
print(f"Attempt {attempt + 1} failed: {e}")
|
| 34 |
-
if attempt < retries - 1:
|
| 35 |
-
time.sleep(delay) # Wait before retrying
|
| 36 |
-
else:
|
| 37 |
-
raise Exception(f"Failed to fetch data after {retries} attempts.") from e
|
| 38 |
-
|
| 39 |
-
# Function to fetch companies for both NIFTY50 and NIFTY100
|
| 40 |
-
def get_companies_from_indices() -> Dict[str, List[Dict[str, str]]]:
|
| 41 |
-
nifty50_companies = fetch_nifty_companies("NIFTY50")
|
| 42 |
-
nifty100_companies = fetch_nifty_companies("NIFTY100")
|
| 43 |
-
|
| 44 |
-
# Combine both lists and return
|
| 45 |
-
all_companies = {
|
| 46 |
-
"NIFTY50": nifty50_companies,
|
| 47 |
-
"NIFTY100": nifty100_companies
|
| 48 |
-
}
|
| 49 |
-
|
| 50 |
-
return all_companies
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
highlow_forecast.py
ADDED
|
@@ -0,0 +1,695 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import talib
|
| 4 |
+
|
| 5 |
+
# Optional ML imports (graceful fallback if scikit-learn is not installed)
|
| 6 |
+
try:
|
| 7 |
+
from sklearn.ensemble import ExtraTreesRegressor
|
| 8 |
+
from sklearn.model_selection import TimeSeriesSplit
|
| 9 |
+
from sklearn.metrics import mean_absolute_error
|
| 10 |
+
_SKLEARN_AVAILABLE = True
|
| 11 |
+
except Exception:
|
| 12 |
+
ExtraTreesRegressor = None
|
| 13 |
+
TimeSeriesSplit = None
|
| 14 |
+
mean_absolute_error = None
|
| 15 |
+
_SKLEARN_AVAILABLE = False
|
| 16 |
+
|
| 17 |
+
# Optional: HistGradientBoostingRegressor for quantile regression
|
| 18 |
+
try:
|
| 19 |
+
from sklearn.ensemble import HistGradientBoostingRegressor
|
| 20 |
+
_HGBR_AVAILABLE = True
|
| 21 |
+
except Exception:
|
| 22 |
+
HistGradientBoostingRegressor = None
|
| 23 |
+
_HGBR_AVAILABLE = False
|
| 24 |
+
|
| 25 |
+
# --------------------- Configuration ---------------------
|
| 26 |
+
|
| 27 |
+
# Prefer quantile gradient boosting for extreme values (better for High/Low)
|
| 28 |
+
_USE_HGBR_QUANTILE = True # auto-fallback to ExtraTrees when unavailable
|
| 29 |
+
|
| 30 |
+
# Quantiles for high/low tails (in log-ratio space)
|
| 31 |
+
_Q_HIGH = 0.80 # upper-tail for High
|
| 32 |
+
_Q_LOW = 0.20 # lower-tail for Low
|
| 33 |
+
|
| 34 |
+
# Blend ML predictions with TA fallback (in log-return space)
|
| 35 |
+
# Set to 0.0 to disable blending
|
| 36 |
+
_BLEND_TA_WEIGHT = 0.20
|
| 37 |
+
|
| 38 |
+
# Log-ratio target winsorization to reduce outlier impact: [q_low, q_high] (ExtraTrees path)
|
| 39 |
+
_WINSOR_Q_LOW = 0.005
|
| 40 |
+
_WINSOR_Q_HIGH = 0.995
|
| 41 |
+
|
| 42 |
+
# Exponential recency weighting: larger = faster decay (0.0 to disable)
|
| 43 |
+
_RECENCY_DECAY = 0.003 # per-sample step
|
| 44 |
+
|
| 45 |
+
# ExtraTrees hyperparameters tuned for generalization
|
| 46 |
+
_ETR_PARAMS_CV = dict(
|
| 47 |
+
n_estimators=800,
|
| 48 |
+
max_depth=None,
|
| 49 |
+
min_samples_split=2,
|
| 50 |
+
min_samples_leaf=3,
|
| 51 |
+
max_features=0.6,
|
| 52 |
+
bootstrap=False,
|
| 53 |
+
n_jobs=-1,
|
| 54 |
+
random_state=42,
|
| 55 |
+
)
|
| 56 |
+
_ETR_PARAMS_FINAL = dict(
|
| 57 |
+
n_estimators=1200,
|
| 58 |
+
max_depth=None,
|
| 59 |
+
min_samples_split=2,
|
| 60 |
+
min_samples_leaf=3,
|
| 61 |
+
max_features=0.6,
|
| 62 |
+
bootstrap=False,
|
| 63 |
+
n_jobs=-1,
|
| 64 |
+
random_state=42,
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# HistGradientBoosting hyperparameters for quantile regression
|
| 68 |
+
_HGBR_PARAMS = dict(
|
| 69 |
+
loss="quantile",
|
| 70 |
+
learning_rate=0.05,
|
| 71 |
+
max_iter=600,
|
| 72 |
+
max_depth=3,
|
| 73 |
+
max_leaf_nodes=31,
|
| 74 |
+
max_bins=255,
|
| 75 |
+
l2_regularization=0.0,
|
| 76 |
+
early_stopping=False, # avoid random holdout leaking time
|
| 77 |
+
random_state=42,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# In-memory per-ticker model cache (no disk I/O)
|
| 81 |
+
_MEM_CACHE = {} # key: ticker.upper(), value: bundle dict
|
| 82 |
+
|
| 83 |
+
# --------------------- OHLC Utilities ---------------------
|
| 84 |
+
|
| 85 |
+
def _ensure_ohlc_columns(df: pd.DataFrame) -> pd.DataFrame:
|
| 86 |
+
cols = {c.lower(): c for c in df.columns}
|
| 87 |
+
need = ["open", "high", "low", "close", "volume"]
|
| 88 |
+
mapping = {}
|
| 89 |
+
for n in need:
|
| 90 |
+
if n in cols:
|
| 91 |
+
mapping[cols[n]] = n
|
| 92 |
+
else:
|
| 93 |
+
# try MultiIndex column cases from yfinance
|
| 94 |
+
for c in df.columns:
|
| 95 |
+
name = c[0].lower() if isinstance(c, tuple) and len(c) > 0 else str(c).lower()
|
| 96 |
+
if name == n:
|
| 97 |
+
mapping[c] = n
|
| 98 |
+
break
|
| 99 |
+
out = df.rename(columns=mapping).copy()
|
| 100 |
+
missing = [c for c in need if c not in out.columns]
|
| 101 |
+
if missing:
|
| 102 |
+
raise ValueError(f"Missing OHLCV columns after normalization: {missing}")
|
| 103 |
+
return out[["open", "high", "low", "close", "volume"]]
|
| 104 |
+
|
| 105 |
+
# --------------------- Business day helper ---------------------
|
| 106 |
+
|
| 107 |
+
def _next_business_days(last_date: pd.Timestamp, periods: int, exchange: str = "XNYS") -> pd.DatetimeIndex:
|
| 108 |
+
"""
|
| 109 |
+
Return next 'periods' business sessions after last_date.
|
| 110 |
+
Tries exchange calendar via pandas_market_calendars (holidays-aware), fallback to weekdays-only.
|
| 111 |
+
exchange examples: 'XNYS' (NYSE), 'XBOM' (BSE), 'XNAS' (NASDAQ), 'XNSE' (NSE).
|
| 112 |
+
"""
|
| 113 |
+
last_date = pd.Timestamp(last_date).tz_localize(None)
|
| 114 |
+
try:
|
| 115 |
+
import pandas_market_calendars as mcal
|
| 116 |
+
cal = mcal.get_calendar(exchange)
|
| 117 |
+
# buffer long enough to cover holidays
|
| 118 |
+
schedule = cal.schedule(start_date=last_date + pd.Timedelta(days=1),
|
| 119 |
+
end_date=last_date + pd.Timedelta(days=180))
|
| 120 |
+
sessions = schedule.index.tz_localize(None)
|
| 121 |
+
if len(sessions) >= periods:
|
| 122 |
+
return sessions[:periods]
|
| 123 |
+
# If for some reason not enough sessions, extend with weekday fallback
|
| 124 |
+
needed = periods - len(sessions)
|
| 125 |
+
tail = pd.bdate_range(sessions[-1] + pd.offsets.BDay(1) if len(sessions) else last_date + pd.offsets.BDay(1),
|
| 126 |
+
periods=needed)
|
| 127 |
+
return sessions.append(tail)
|
| 128 |
+
except Exception:
|
| 129 |
+
# Weekdays-only fallback
|
| 130 |
+
return pd.bdate_range(last_date + pd.offsets.BDay(1), periods=periods)
|
| 131 |
+
|
| 132 |
+
# --------------------- TA Heuristic (Fallback, No ML) ---------------------
|
| 133 |
+
|
| 134 |
+
def _last_finite(values: np.ndarray, default: float = np.nan) -> float:
|
| 135 |
+
for x in values[::-1]:
|
| 136 |
+
if np.isfinite(x):
|
| 137 |
+
return float(x)
|
| 138 |
+
return float(default)
|
| 139 |
+
|
| 140 |
+
def _ta_fallback_forecast(ohlc: pd.DataFrame, horizons: int = 15):
|
| 141 |
+
h = ohlc["high"].astype(float).values
|
| 142 |
+
l = ohlc["low"].astype(float).values
|
| 143 |
+
c = ohlc["close"].astype(float).values
|
| 144 |
+
|
| 145 |
+
if len(c) < 60:
|
| 146 |
+
raise ValueError("Not enough history for TA fallback (need >=60 rows).")
|
| 147 |
+
|
| 148 |
+
base_close = _last_finite(ohlc["close"].replace(0.0, np.nan).values)
|
| 149 |
+
if not np.isfinite(base_close) or base_close <= 0:
|
| 150 |
+
raise ValueError("Invalid last close after cleaning.")
|
| 151 |
+
|
| 152 |
+
atr14 = talib.ATR(h, l, c, timeperiod=14)
|
| 153 |
+
atr_last = _last_finite(atr14, default=np.nan)
|
| 154 |
+
atr_pct = (atr_last / base_close) if np.isfinite(atr_last) and base_close > 0 else np.nan
|
| 155 |
+
|
| 156 |
+
ema20 = talib.EMA(c, timeperiod=20)
|
| 157 |
+
ema50 = talib.EMA(c, timeperiod=50)
|
| 158 |
+
ema20_last = _last_finite(ema20, default=np.nan)
|
| 159 |
+
ema50_last = _last_finite(ema50, default=np.nan)
|
| 160 |
+
|
| 161 |
+
trend_strength = 0.0
|
| 162 |
+
if np.isfinite(ema20_last) and np.isfinite(ema50_last) and ema50_last > 0:
|
| 163 |
+
trend_strength = np.clip(ema20_last / ema50_last - 1.0, -0.05, 0.05)
|
| 164 |
+
ema20_slope = 0.0
|
| 165 |
+
if len(ema20) >= 2 and np.isfinite(ema20[-1]) and np.isfinite(ema20[-2]) and ema20[-2] > 0:
|
| 166 |
+
ema20_slope = np.clip((ema20[-1] / ema20[-2]) - 1.0, -0.05, 0.05)
|
| 167 |
+
|
| 168 |
+
adx14 = talib.ADX(h, l, c, timeperiod=14)
|
| 169 |
+
adx = _last_finite(adx14, default=20.0) / 100.0
|
| 170 |
+
adx = float(np.clip(adx, 0.0, 1.0))
|
| 171 |
+
|
| 172 |
+
rsi14 = talib.RSI(c, timeperiod=14)
|
| 173 |
+
rsi = _last_finite(rsi14, default=50.0)
|
| 174 |
+
tilt = float(np.clip((rsi - 50.0) / 50.0, -1.0, 1.0))
|
| 175 |
+
|
| 176 |
+
logret = np.diff(np.log(np.maximum(c, 1e-12)))
|
| 177 |
+
if len(logret) >= 20 and np.isfinite(logret[-20:]).sum() >= 10:
|
| 178 |
+
sigma20 = float(pd.Series(logret).rolling(20).std().iloc[-1])
|
| 179 |
+
else:
|
| 180 |
+
sigma20 = float(np.nan)
|
| 181 |
+
|
| 182 |
+
components = []
|
| 183 |
+
if np.isfinite(sigma20):
|
| 184 |
+
components.append(sigma20)
|
| 185 |
+
if np.isfinite(atr_pct):
|
| 186 |
+
components.append(atr_pct)
|
| 187 |
+
daily_vol = 0.0
|
| 188 |
+
if components:
|
| 189 |
+
daily_vol = 0.6 * components[0] + (0.4 * components[1] if len(components) > 1 else 0.0)
|
| 190 |
+
daily_vol = float(np.clip(daily_vol if np.isfinite(daily_vol) else 0.02, 0.004, 0.08))
|
| 191 |
+
|
| 192 |
+
drift_per_day = float(np.clip(0.5 * trend_strength + 0.5 * ema20_slope, -0.02, 0.02))
|
| 193 |
+
|
| 194 |
+
up_weight = 1.0 - 0.3 * tilt
|
| 195 |
+
dn_weight = 1.0 + 0.3 * tilt
|
| 196 |
+
up_weight = float(np.clip(up_weight, 0.5, 1.5))
|
| 197 |
+
dn_weight = float(np.clip(dn_weight, 0.5, 1.5))
|
| 198 |
+
trend_amp = 0.75 + 0.5 * adx
|
| 199 |
+
|
| 200 |
+
pred_high, pred_low = [], []
|
| 201 |
+
for k in range(1, horizons + 1):
|
| 202 |
+
amp = daily_vol * np.sqrt(k) * trend_amp
|
| 203 |
+
drift = drift_per_day * k
|
| 204 |
+
up_move = amp * up_weight
|
| 205 |
+
dn_move = amp * dn_weight
|
| 206 |
+
hi = base_close * (1.0 + drift + up_move)
|
| 207 |
+
lo = base_close * (1.0 + drift - dn_move)
|
| 208 |
+
hi = max(0.0, hi)
|
| 209 |
+
lo = max(0.0, lo)
|
| 210 |
+
if lo > hi:
|
| 211 |
+
lo, hi = hi, lo
|
| 212 |
+
pred_high.append(hi)
|
| 213 |
+
pred_low.append(lo)
|
| 214 |
+
|
| 215 |
+
return base_close, np.array(pred_high), np.array(pred_low)
|
| 216 |
+
|
| 217 |
+
# --------------------- Feature Engineering for ML ---------------------
|
| 218 |
+
|
| 219 |
+
def _compute_ta_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 220 |
+
df = _ensure_ohlc_columns(df).copy()
|
| 221 |
+
o, h, l, c, v = [df[k].astype(float).values for k in ("open", "high", "low", "close", "volume")]
|
| 222 |
+
|
| 223 |
+
close = df["close"].astype(float)
|
| 224 |
+
open_ = df["open"].astype(float)
|
| 225 |
+
high = df["high"].astype(float)
|
| 226 |
+
low = df["low"].astype(float)
|
| 227 |
+
vol = df["volume"].astype(float)
|
| 228 |
+
|
| 229 |
+
df_feat = pd.DataFrame(index=df.index)
|
| 230 |
+
|
| 231 |
+
# Basic price features
|
| 232 |
+
df_feat["ret_1"] = close.pct_change(1)
|
| 233 |
+
df_feat["logret_1"] = np.log(close.replace(0.0, np.nan)).diff(1)
|
| 234 |
+
df_feat["ret_5"] = close.pct_change(5)
|
| 235 |
+
df_feat["ret_10"] = close.pct_change(10)
|
| 236 |
+
df_feat["roll_mean_5"] = close.rolling(5).mean() / close - 1.0
|
| 237 |
+
df_feat["roll_mean_20"] = close.rolling(20).mean() / close - 1.0
|
| 238 |
+
df_feat["roll_std_10"] = close.pct_change().rolling(10).std()
|
| 239 |
+
df_feat["range_pct"] = (high - low) / close.replace(0.0, np.nan)
|
| 240 |
+
|
| 241 |
+
# Candle features (normalized)
|
| 242 |
+
with np.errstate(divide="ignore", invalid="ignore"):
|
| 243 |
+
body = (close - open_) / close
|
| 244 |
+
upper_shadow = (high - np.maximum(close, open_)) / close
|
| 245 |
+
lower_shadow = (np.minimum(close, open_) - low) / close
|
| 246 |
+
df_feat["candle_body"] = body
|
| 247 |
+
df_feat["candle_upper"] = upper_shadow
|
| 248 |
+
df_feat["candle_lower"] = lower_shadow
|
| 249 |
+
df_feat["gap_open"] = open_.shift(0) / close.shift(1) - 1.0
|
| 250 |
+
|
| 251 |
+
# EMAs and distances
|
| 252 |
+
ema5 = talib.EMA(close.values, timeperiod=5)
|
| 253 |
+
ema20 = talib.EMA(close.values, timeperiod=20)
|
| 254 |
+
ema50 = talib.EMA(close.values, timeperiod=50)
|
| 255 |
+
with np.errstate(divide="ignore", invalid="ignore"):
|
| 256 |
+
df_feat["ema5_dist"] = (ema5 / close.values) - 1.0
|
| 257 |
+
df_feat["ema20_dist"] = (ema20 / close.values) - 1.0
|
| 258 |
+
df_feat["ema50_dist"] = (ema50 / close.values) - 1.0
|
| 259 |
+
# EMA slopes (1-day change)
|
| 260 |
+
df_feat["ema20_slope"] = (pd.Series(ema20, index=df.index).pct_change(1))
|
| 261 |
+
|
| 262 |
+
# RSI family
|
| 263 |
+
df_feat["rsi14"] = talib.RSI(close.values, timeperiod=14) / 100.0
|
| 264 |
+
df_feat["rsi5"] = talib.RSI(close.values, timeperiod=5) / 100.0
|
| 265 |
+
|
| 266 |
+
# MACD
|
| 267 |
+
macd, macdsig, macdhist = talib.MACD(close.values, fastperiod=12, slowperiod=26, signalperiod=9)
|
| 268 |
+
df_feat["macd"] = macd
|
| 269 |
+
df_feat["macdsig"] = macdsig
|
| 270 |
+
df_feat["macdhist"] = macdhist
|
| 271 |
+
|
| 272 |
+
# Bollinger Bands width
|
| 273 |
+
upper, middle, lower = talib.BBANDS(close.values, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
|
| 274 |
+
with np.errstate(divide="ignore", invalid="ignore"):
|
| 275 |
+
df_feat["bb_width"] = (upper - lower) / middle
|
| 276 |
+
|
| 277 |
+
# Volatility/Trend
|
| 278 |
+
atr = talib.ATR(h, l, c, timeperiod=14)
|
| 279 |
+
with np.errstate(divide="ignore", invalid="ignore"):
|
| 280 |
+
df_feat["atr14"] = atr / close.values
|
| 281 |
+
df_feat["adx14"] = talib.ADX(h, l, c, timeperiod=14) / 100.0
|
| 282 |
+
|
| 283 |
+
# Additional momentum/oscillators
|
| 284 |
+
df_feat["roc10"] = talib.ROC(close.values, timeperiod=10) / 100.0
|
| 285 |
+
df_feat["cci14"] = talib.CCI(h, l, c, timeperiod=14) / 100.0
|
| 286 |
+
df_feat["mfi14"] = talib.MFI(h, l, c, v, timeperiod=14) / 100.0
|
| 287 |
+
df_feat["willr14"] = talib.WILLR(h, l, c, timeperiod=14) / 100.0 # [-1, 0]
|
| 288 |
+
|
| 289 |
+
# Stochastic
|
| 290 |
+
slowk, slowd = talib.STOCH(h, l, c, fastk_period=14, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
|
| 291 |
+
df_feat["stoch_k"] = slowk / 100.0
|
| 292 |
+
df_feat["stoch_d"] = slowd / 100.0
|
| 293 |
+
|
| 294 |
+
# OBV normalized (robust to missing/flat volume)
|
| 295 |
+
finite_vol = np.isfinite(vol.values)
|
| 296 |
+
if finite_vol.sum() >= max(30, int(0.5 * len(vol))):
|
| 297 |
+
obv = talib.OBV(close.values, vol.values)
|
| 298 |
+
df_feat["obv_z"] = pd.Series(obv, index=df.index).pct_change(5)
|
| 299 |
+
else:
|
| 300 |
+
df_feat["obv_z"] = 0.0
|
| 301 |
+
|
| 302 |
+
# Volume z-score and turnover proxies
|
| 303 |
+
vol_roll_mean = vol.rolling(20).mean()
|
| 304 |
+
vol_roll_std = vol.rolling(20).std()
|
| 305 |
+
with np.errstate(divide="ignore", invalid="ignore"):
|
| 306 |
+
df_feat["vol_z20"] = (vol - vol_roll_mean) / vol_roll_std
|
| 307 |
+
df_feat["turnover_z20"] = ((vol * close) - (vol * close).rolling(20).mean()) / (vol * close).rolling(20).std()
|
| 308 |
+
|
| 309 |
+
# Distance to rolling extremes
|
| 310 |
+
roll_max_20 = close.rolling(20).max()
|
| 311 |
+
roll_min_20 = close.rolling(20).min()
|
| 312 |
+
roll_max_55 = close.rolling(55).max()
|
| 313 |
+
roll_min_55 = close.rolling(55).min()
|
| 314 |
+
with np.errstate(divide="ignore", invalid="ignore"):
|
| 315 |
+
df_feat["dist_max20"] = roll_max_20 / close - 1.0
|
| 316 |
+
df_feat["dist_min20"] = close / roll_min_20 - 1.0
|
| 317 |
+
df_feat["dist_max55"] = roll_max_55 / close - 1.0
|
| 318 |
+
df_feat["dist_min55"] = close / roll_min_55 - 1.0
|
| 319 |
+
|
| 320 |
+
# Realized volatility features
|
| 321 |
+
logret = np.log(close.replace(0.0, np.nan)).diff(1)
|
| 322 |
+
df_feat["rv5"] = logret.rolling(5).std()
|
| 323 |
+
df_feat["rv20"] = logret.rolling(20).std()
|
| 324 |
+
df_feat["avg_range5"] = ((high - low) / close.replace(0.0, np.nan)).rolling(5).mean()
|
| 325 |
+
|
| 326 |
+
# Calendar (cyclical day-of-week, month-of-year)
|
| 327 |
+
dow = pd.Series(df.index).map(lambda d: d.weekday() if hasattr(d, "weekday") else pd.Timestamp(d).weekday())
|
| 328 |
+
df_feat["dow_sin"] = np.sin(2 * np.pi * dow / 7.0)
|
| 329 |
+
df_feat["dow_cos"] = np.cos(2 * np.pi * dow / 7.0)
|
| 330 |
+
moy = pd.Series(df.index).map(lambda d: (d.month if hasattr(d, "month") else pd.Timestamp(d).month))
|
| 331 |
+
df_feat["moy_sin"] = np.sin(2 * np.pi * (moy.astype(float) - 1.0) / 12.0)
|
| 332 |
+
df_feat["moy_cos"] = np.cos(2 * np.pi * (moy.astype(float) - 1.0) / 12.0)
|
| 333 |
+
|
| 334 |
+
# Lags of basic signals
|
| 335 |
+
df_feat["ret_1_lag1"] = df_feat["ret_1"].shift(1)
|
| 336 |
+
df_feat["ret_1_lag2"] = df_feat["ret_1"].shift(2)
|
| 337 |
+
df_feat["range_pct_lag1"] = df_feat["range_pct"].shift(1)
|
| 338 |
+
|
| 339 |
+
df_feat = df_feat.replace([np.inf, -np.inf], np.nan)
|
| 340 |
+
df_feat = df_feat.loc[:, df_feat.notna().any(axis=0)]
|
| 341 |
+
return df_feat
|
| 342 |
+
|
| 343 |
+
def _clean_features_for_training(feats: pd.DataFrame, warmup: int = 60) -> pd.DataFrame:
|
| 344 |
+
if feats.empty:
|
| 345 |
+
return feats
|
| 346 |
+
clean = feats.copy()
|
| 347 |
+
clean = clean.fillna(method="ffill").fillna(method="bfill")
|
| 348 |
+
if len(clean) > warmup:
|
| 349 |
+
clean = clean.iloc[warmup:]
|
| 350 |
+
clean = clean.dropna()
|
| 351 |
+
return clean
|
| 352 |
+
|
| 353 |
+
def _winsorize_targets(Y: np.ndarray, horizons: int, q_low: float, q_high: float) -> tuple[np.ndarray, dict]:
|
| 354 |
+
"""
|
| 355 |
+
Winsorize concatenated targets Y = [highs(0:h), lows(h:2h)] row-wise using global quantiles.
|
| 356 |
+
Returns clipped Y and thresholds used.
|
| 357 |
+
"""
|
| 358 |
+
h = horizons
|
| 359 |
+
Yh = Y[:, :h].ravel()
|
| 360 |
+
Yl = Y[:, h:].ravel()
|
| 361 |
+
|
| 362 |
+
lo_h, hi_h = np.quantile(Yh, [q_low, q_high]) if Yh.size else (-np.inf, np.inf)
|
| 363 |
+
lo_l, hi_l = np.quantile(Yl, [q_low, q_high]) if Yl.size else (-np.inf, np.inf)
|
| 364 |
+
|
| 365 |
+
Y_clip = Y.copy()
|
| 366 |
+
Y_clip[:, :h] = np.clip(Y_clip[:, :h], lo_h, hi_h)
|
| 367 |
+
Y_clip[:, h:] = np.clip(Y_clip[:, h:], lo_l, hi_l)
|
| 368 |
+
|
| 369 |
+
return Y_clip, {"high": (float(lo_h), float(hi_h)), "low": (float(lo_l), float(hi_l))}
|
| 370 |
+
|
| 371 |
+
def _sample_weights(n: int, decay: float) -> np.ndarray:
|
| 372 |
+
"""
|
| 373 |
+
Exponential recency weights. Newer samples get higher weight.
|
| 374 |
+
w_i = exp(-decay * (n-1-i)), i in [0..n-1]
|
| 375 |
+
"""
|
| 376 |
+
if decay <= 0 or n <= 0:
|
| 377 |
+
return np.ones(n, dtype=float)
|
| 378 |
+
idx = np.arange(n, dtype=float)
|
| 379 |
+
w = np.exp(-decay * (n - 1 - idx))
|
| 380 |
+
w /= np.average(w) # normalize to mean 1.0
|
| 381 |
+
return w
|
| 382 |
+
|
| 383 |
+
def _make_supervised(df: pd.DataFrame, horizons: int = 15):
|
| 384 |
+
"""
|
| 385 |
+
Build X, Y for multi-horizon high/low forecast.
|
| 386 |
+
Targets (log-ratio): y_high_h = log(High[t+h]/Close[t]), y_low_h = log(Low[t+h]/Close[t])
|
| 387 |
+
Log transform stabilizes variance and reduces skew.
|
| 388 |
+
"""
|
| 389 |
+
ohlc = _ensure_ohlc_columns(df)
|
| 390 |
+
feats = _compute_ta_features(df)
|
| 391 |
+
feat_df = _clean_features_for_training(feats, warmup=60)
|
| 392 |
+
|
| 393 |
+
# Align to cleaned feature index
|
| 394 |
+
ohlc = ohlc.loc[feat_df.index]
|
| 395 |
+
|
| 396 |
+
highs = ohlc["high"].astype(float).values
|
| 397 |
+
lows = ohlc["low"].astype(float).values
|
| 398 |
+
closes = ohlc["close"].astype(float).values
|
| 399 |
+
X_all = feat_df.values
|
| 400 |
+
|
| 401 |
+
n = len(feat_df)
|
| 402 |
+
if n < horizons + 30:
|
| 403 |
+
raise ValueError(f"Not enough rows after feature warm-up for {horizons}-day training. Have: {n}")
|
| 404 |
+
|
| 405 |
+
X_list, Y_list = [], []
|
| 406 |
+
for i in range(n - horizons):
|
| 407 |
+
base_c = closes[i]
|
| 408 |
+
if not np.isfinite(base_c) or base_c <= 0:
|
| 409 |
+
continue
|
| 410 |
+
|
| 411 |
+
future_highs = highs[i + 1:i + horizons + 1]
|
| 412 |
+
future_lows = lows[i + 1:i + horizons + 1]
|
| 413 |
+
|
| 414 |
+
with np.errstate(divide="ignore", invalid="ignore"):
|
| 415 |
+
yh = np.log(np.maximum(future_highs, 1e-12) / base_c)
|
| 416 |
+
yl = np.log(np.maximum(future_lows, 1e-12) / base_c)
|
| 417 |
+
|
| 418 |
+
if np.any(~np.isfinite(yh)) or np.any(~np.isfinite(yl)):
|
| 419 |
+
continue
|
| 420 |
+
|
| 421 |
+
X_list.append(X_all[i, :])
|
| 422 |
+
Y_list.append(np.concatenate([yh, yl], axis=0))
|
| 423 |
+
|
| 424 |
+
X = np.asarray(X_list)
|
| 425 |
+
Y = np.asarray(Y_list)
|
| 426 |
+
if X.size == 0 or Y.size == 0:
|
| 427 |
+
raise ValueError("No valid supervised samples after cleaning. Check data quality (NaNs/zeros).")
|
| 428 |
+
feature_names = feat_df.columns.tolist()
|
| 429 |
+
return X, Y, feature_names, feat_df.index[:len(X)]
|
| 430 |
+
|
| 431 |
+
def _get_sklearn_version():
|
| 432 |
+
try:
|
| 433 |
+
import sklearn
|
| 434 |
+
return sklearn.__version__
|
| 435 |
+
except Exception:
|
| 436 |
+
return None
|
| 437 |
+
|
| 438 |
+
# --------------------- Model Train/Load (In-Memory Only) ---------------------
|
| 439 |
+
|
| 440 |
+
def train_or_load_highlow_15d(df: pd.DataFrame, ticker: str, horizons: int = 15):
|
| 441 |
+
key = ticker.upper()
|
| 442 |
+
if key in _MEM_CACHE:
|
| 443 |
+
return _MEM_CACHE[key]
|
| 444 |
+
|
| 445 |
+
# If sklearn is not available at all, keep TA fallback metadata
|
| 446 |
+
if not _SKLEARN_AVAILABLE:
|
| 447 |
+
bundle = {
|
| 448 |
+
"model": None,
|
| 449 |
+
"feature_names": None,
|
| 450 |
+
"horizons": horizons,
|
| 451 |
+
"trained_rows": int(len(df)),
|
| 452 |
+
"metrics": None,
|
| 453 |
+
"sklearn_version": None,
|
| 454 |
+
"ticker": key,
|
| 455 |
+
"model_path": None,
|
| 456 |
+
"winsor": None,
|
| 457 |
+
"blend_weight": _BLEND_TA_WEIGHT,
|
| 458 |
+
"transform": "logratio",
|
| 459 |
+
"feature_importances": None,
|
| 460 |
+
"algo": "NONE",
|
| 461 |
+
}
|
| 462 |
+
_MEM_CACHE[key] = bundle
|
| 463 |
+
return bundle
|
| 464 |
+
|
| 465 |
+
# Build supervised set
|
| 466 |
+
X, Y_raw, feature_names, _ = _make_supervised(df, horizons=horizons)
|
| 467 |
+
sw = _sample_weights(X.shape[0], _RECENCY_DECAY)
|
| 468 |
+
|
| 469 |
+
# Prefer quantile gradient boosting if available
|
| 470 |
+
if _USE_HGBR_QUANTILE and _HGBR_AVAILABLE and HistGradientBoostingRegressor is not None:
|
| 471 |
+
q_models_high, q_models_low = [], []
|
| 472 |
+
for k in range(horizons):
|
| 473 |
+
# High models (upper quantile)
|
| 474 |
+
mh = HistGradientBoostingRegressor(**_HGBR_PARAMS, quantile=_Q_HIGH)
|
| 475 |
+
mh.fit(X, Y_raw[:, k], sample_weight=sw)
|
| 476 |
+
q_models_high.append(mh)
|
| 477 |
+
|
| 478 |
+
# Low models (lower quantile)
|
| 479 |
+
ml = HistGradientBoostingRegressor(**_HGBR_PARAMS, quantile=_Q_LOW)
|
| 480 |
+
ml.fit(X, Y_raw[:, horizons + k], sample_weight=sw)
|
| 481 |
+
q_models_low.append(ml)
|
| 482 |
+
|
| 483 |
+
bundle = {
|
| 484 |
+
"model": None, # not used in quantile path
|
| 485 |
+
"q_models_high": q_models_high,
|
| 486 |
+
"q_models_low": q_models_low,
|
| 487 |
+
"feature_names": feature_names,
|
| 488 |
+
"horizons": horizons,
|
| 489 |
+
"trained_rows": int(X.shape[0]),
|
| 490 |
+
"metrics": None, # optional: add custom CV if desired
|
| 491 |
+
"sklearn_version": _get_sklearn_version(),
|
| 492 |
+
"ticker": key,
|
| 493 |
+
"model_path": None,
|
| 494 |
+
"winsor": None,
|
| 495 |
+
"blend_weight": _BLEND_TA_WEIGHT,
|
| 496 |
+
"transform": "logratio",
|
| 497 |
+
"feature_importances": None,
|
| 498 |
+
"algo": f"HGBR_QUANTILE(high={_Q_HIGH}, low={_Q_LOW})",
|
| 499 |
+
}
|
| 500 |
+
_MEM_CACHE[key] = bundle
|
| 501 |
+
return bundle
|
| 502 |
+
|
| 503 |
+
# Else fall back to ExtraTrees mean-regression (existing path)
|
| 504 |
+
Y_clip, winsor_info = _winsorize_targets(Y_raw, horizons, _WINSOR_Q_LOW, _WINSOR_Q_HIGH)
|
| 505 |
+
|
| 506 |
+
fold_metrics = []
|
| 507 |
+
feature_importances = None
|
| 508 |
+
|
| 509 |
+
if TimeSeriesSplit is not None:
|
| 510 |
+
tscv = TimeSeriesSplit(n_splits=5)
|
| 511 |
+
for train_idx, val_idx in tscv.split(X):
|
| 512 |
+
Xtr, Xvl = X[train_idx], X[val_idx]
|
| 513 |
+
Ytr_clipped = Y_clip[train_idx]
|
| 514 |
+
Yvl_true = Y_raw[val_idx] # evaluate on true (unclipped) targets
|
| 515 |
+
w_tr = sw[train_idx] if sw is not None else None
|
| 516 |
+
|
| 517 |
+
model_cv = ExtraTreesRegressor(**_ETR_PARAMS_CV)
|
| 518 |
+
model_cv.fit(Xtr, Ytr_clipped, sample_weight=w_tr)
|
| 519 |
+
Yhat = model_cv.predict(Xvl)
|
| 520 |
+
|
| 521 |
+
# Convert log-ratio back to percentage move for reporting
|
| 522 |
+
h = horizons
|
| 523 |
+
if mean_absolute_error is not None:
|
| 524 |
+
yh_pct = (np.exp(Yvl_true[:, :h]) - 1.0) * 100.0
|
| 525 |
+
yl_pct = (np.exp(Yvl_true[:, h:]) - 1.0) * 100.0
|
| 526 |
+
yhat_h_pct = (np.exp(Yhat[:, :h]) - 1.0) * 100.0
|
| 527 |
+
yhat_l_pct = (np.exp(Yhat[:, h:]) - 1.0) * 100.0
|
| 528 |
+
high_mae = mean_absolute_error(yh_pct, yhat_h_pct)
|
| 529 |
+
low_mae = mean_absolute_error(yl_pct, yhat_l_pct)
|
| 530 |
+
fold_metrics.append({"high_mae_pct": round(float(high_mae), 4),
|
| 531 |
+
"low_mae_pct": round(float(low_mae), 4)})
|
| 532 |
+
|
| 533 |
+
final_model = ExtraTreesRegressor(**_ETR_PARAMS_FINAL)
|
| 534 |
+
final_model.fit(X, Y_clip, sample_weight=sw)
|
| 535 |
+
|
| 536 |
+
try:
|
| 537 |
+
fi = final_model.feature_importances_
|
| 538 |
+
feature_importances = sorted(
|
| 539 |
+
zip(feature_names, fi),
|
| 540 |
+
key=lambda t: t[1],
|
| 541 |
+
reverse=True
|
| 542 |
+
)[:30]
|
| 543 |
+
feature_importances = [(str(n), float(v)) for n, v in feature_importances]
|
| 544 |
+
except Exception:
|
| 545 |
+
feature_importances = None
|
| 546 |
+
|
| 547 |
+
bundle = {
|
| 548 |
+
"model": final_model,
|
| 549 |
+
"feature_names": feature_names,
|
| 550 |
+
"horizons": horizons,
|
| 551 |
+
"trained_rows": int(X.shape[0]),
|
| 552 |
+
"metrics": fold_metrics or None,
|
| 553 |
+
"sklearn_version": _get_sklearn_version(),
|
| 554 |
+
"ticker": key,
|
| 555 |
+
"model_path": None,
|
| 556 |
+
"winsor": winsor_info,
|
| 557 |
+
"blend_weight": _BLEND_TA_WEIGHT,
|
| 558 |
+
"transform": "logratio",
|
| 559 |
+
"feature_importances": feature_importances,
|
| 560 |
+
"algo": "EXTRATREES_MEAN",
|
| 561 |
+
}
|
| 562 |
+
|
| 563 |
+
_MEM_CACHE[key] = bundle
|
| 564 |
+
return bundle
|
| 565 |
+
|
| 566 |
+
# --------------------- Forecast ---------------------
|
| 567 |
+
|
| 568 |
+
def forecast_next_15_high_low(ticker: str, stock_data: pd.DataFrame):
|
| 569 |
+
"""
|
| 570 |
+
Train/load from memory and forecast next 15 business days' High/Low.
|
| 571 |
+
If no ML available or insufficient data, uses TA fallback.
|
| 572 |
+
Returns dict: dates, pred_high, pred_low, base_close, bundle_meta
|
| 573 |
+
"""
|
| 574 |
+
if not isinstance(stock_data.index, pd.DatetimeIndex):
|
| 575 |
+
stock_data = stock_data.copy()
|
| 576 |
+
stock_data.index = pd.to_datetime(stock_data.index)
|
| 577 |
+
|
| 578 |
+
ohlc = _ensure_ohlc_columns(stock_data)
|
| 579 |
+
|
| 580 |
+
try:
|
| 581 |
+
bundle = train_or_load_highlow_15d(stock_data, ticker, horizons=15)
|
| 582 |
+
model = bundle.get("model", None)
|
| 583 |
+
horizons = bundle.get("horizons", 15)
|
| 584 |
+
|
| 585 |
+
# Build latest feature row
|
| 586 |
+
feats_full = _compute_ta_features(stock_data)
|
| 587 |
+
feats_full = feats_full.replace([np.inf, -np.inf], np.nan)
|
| 588 |
+
feats_full = feats_full.loc[:, feats_full.notna().any(axis=0)]
|
| 589 |
+
feats_full = feats_full.fillna(method="ffill").fillna(method="bfill")
|
| 590 |
+
if len(feats_full) > 60:
|
| 591 |
+
feats_full = feats_full.iloc[60:]
|
| 592 |
+
if feats_full.empty:
|
| 593 |
+
raise ValueError("No features available for inference after cleaning.")
|
| 594 |
+
|
| 595 |
+
feature_names = bundle["feature_names"]
|
| 596 |
+
for col in feature_names:
|
| 597 |
+
if col not in feats_full.columns:
|
| 598 |
+
feats_full[col] = 0.0
|
| 599 |
+
feats_full = feats_full[feature_names]
|
| 600 |
+
X_t = feats_full.iloc[[-1]].values
|
| 601 |
+
|
| 602 |
+
base_close = float(ohlc.iloc[-1]["close"])
|
| 603 |
+
if not np.isfinite(base_close) or base_close <= 0:
|
| 604 |
+
base_close = float(ohlc["close"].replace(0.0, np.nan).dropna().iloc[-1])
|
| 605 |
+
|
| 606 |
+
y_pred_log = None
|
| 607 |
+
|
| 608 |
+
# Path 1: ExtraTrees multi-output mean-regression
|
| 609 |
+
if model is not None:
|
| 610 |
+
y_pred_log = model.predict(X_t).reshape(-1)
|
| 611 |
+
|
| 612 |
+
# Path 2: Quantile gradient boosting per-horizon
|
| 613 |
+
elif "q_models_high" in bundle and "q_models_low" in bundle:
|
| 614 |
+
qh = bundle["q_models_high"]
|
| 615 |
+
ql = bundle["q_models_low"]
|
| 616 |
+
yh = np.array([qh[k].predict(X_t)[0] for k in range(horizons)], dtype=float)
|
| 617 |
+
yl = np.array([ql[k].predict(X_t)[0] for k in range(horizons)], dtype=float)
|
| 618 |
+
y_pred_log = np.concatenate([yh, yl], axis=0)
|
| 619 |
+
|
| 620 |
+
if y_pred_log is not None:
|
| 621 |
+
# Optional hybrid blend with TA fallback in log space for stability
|
| 622 |
+
blend_w = float(bundle.get("blend_weight", _BLEND_TA_WEIGHT) or 0.0)
|
| 623 |
+
if blend_w > 0.0:
|
| 624 |
+
try:
|
| 625 |
+
_, hi_ta, lo_ta = _ta_fallback_forecast(ohlc, horizons=horizons)
|
| 626 |
+
with np.errstate(divide="ignore", invalid="ignore"):
|
| 627 |
+
yh_ta_log = np.log(np.maximum(hi_ta, 1e-12) / base_close)
|
| 628 |
+
yl_ta_log = np.log(np.maximum(lo_ta, 1e-12) / base_close)
|
| 629 |
+
yh_ml_log = y_pred_log[:horizons]
|
| 630 |
+
yl_ml_log = y_pred_log[horizons:]
|
| 631 |
+
yh_blend_log = (1.0 - blend_w) * yh_ml_log + blend_w * yh_ta_log
|
| 632 |
+
yl_blend_log = (1.0 - blend_w) * yl_ml_log + blend_w * yl_ta_log
|
| 633 |
+
y_pred_log = np.concatenate([yh_blend_log, yl_blend_log], axis=0)
|
| 634 |
+
except Exception:
|
| 635 |
+
pass
|
| 636 |
+
|
| 637 |
+
# Convert back from log-ratio to price
|
| 638 |
+
yh = y_pred_log[:horizons]
|
| 639 |
+
yl = y_pred_log[horizons:]
|
| 640 |
+
pred_high = np.exp(yh) * base_close
|
| 641 |
+
pred_low = np.exp(yl) * base_close
|
| 642 |
+
|
| 643 |
+
pred_high = np.maximum(pred_high, 0.0)
|
| 644 |
+
pred_low = np.maximum(pred_low, 0.0)
|
| 645 |
+
swp = pred_low > pred_high
|
| 646 |
+
if np.any(swp):
|
| 647 |
+
tmp = pred_high.copy()
|
| 648 |
+
pred_high[swp] = pred_low[swp]
|
| 649 |
+
pred_low[swp] = tmp[swp]
|
| 650 |
+
|
| 651 |
+
last_date = feats_full.index[-1]
|
| 652 |
+
future_dates = _next_business_days(last_date, horizons)
|
| 653 |
+
date_str = [pd.Timestamp(d).strftime("%Y-%m-%d") for d in future_dates]
|
| 654 |
+
|
| 655 |
+
return {
|
| 656 |
+
"dates": date_str,
|
| 657 |
+
"pred_high": [round(float(x), 2) for x in pred_high],
|
| 658 |
+
"pred_low": [round(float(x), 2) for x in pred_low],
|
| 659 |
+
"base_close": round(float(base_close), 4),
|
| 660 |
+
"bundle_meta": {
|
| 661 |
+
"model": bundle.get("algo", "UNKNOWN"),
|
| 662 |
+
"trained_rows": bundle.get("trained_rows"),
|
| 663 |
+
"sklearn_version": bundle.get("sklearn_version"),
|
| 664 |
+
"metrics": bundle.get("metrics"),
|
| 665 |
+
"bundle_path": None,
|
| 666 |
+
"ticker": bundle.get("ticker"),
|
| 667 |
+
"winsor": bundle.get("winsor"),
|
| 668 |
+
"blend_weight": bundle.get("blend_weight"),
|
| 669 |
+
"transform": bundle.get("transform"),
|
| 670 |
+
"feature_importances_top30": bundle.get("feature_importances"),
|
| 671 |
+
"quantiles": {"high": _Q_HIGH, "low": _Q_LOW} if "q_models_high" in bundle else None,
|
| 672 |
+
},
|
| 673 |
+
}
|
| 674 |
+
except Exception:
|
| 675 |
+
pass
|
| 676 |
+
|
| 677 |
+
base_close, pred_high, pred_low = _ta_fallback_forecast(ohlc, horizons=15)
|
| 678 |
+
last_date = ohlc.index[-1]
|
| 679 |
+
future_dates = _next_business_days(last_date, 15)
|
| 680 |
+
date_str = [pd.Timestamp(d).strftime("%Y-%m-%d") for d in future_dates]
|
| 681 |
+
|
| 682 |
+
return {
|
| 683 |
+
"dates": date_str,
|
| 684 |
+
"pred_high": [round(float(x), 2) for x in pred_high],
|
| 685 |
+
"pred_low": [round(float(x), 2) for x in pred_low],
|
| 686 |
+
"base_close": round(float(base_close), 4),
|
| 687 |
+
"bundle_meta": {
|
| 688 |
+
"model": "TA heuristic fallback (ATR/EMA/RSI/ADX), no ML",
|
| 689 |
+
"trained_rows": int(len(ohlc)),
|
| 690 |
+
"sklearn_version": _get_sklearn_version(),
|
| 691 |
+
"metrics": None,
|
| 692 |
+
"bundle_path": None,
|
| 693 |
+
"ticker": ticker.upper(),
|
| 694 |
+
},
|
| 695 |
+
}
|
predictedchart.py
DELETED
|
@@ -1,126 +0,0 @@
|
|
| 1 |
-
import yfinance as yf
|
| 2 |
-
import pandas as pd
|
| 3 |
-
import numpy as np
|
| 4 |
-
import talib
|
| 5 |
-
from sklearn.preprocessing import MinMaxScaler
|
| 6 |
-
import torch
|
| 7 |
-
import torch.nn as nn
|
| 8 |
-
from torch.utils.data import Dataset, DataLoader
|
| 9 |
-
|
| 10 |
-
# Step 1: Download data with TA indicators
|
| 11 |
-
def fetch_stock_data_with_indicators(ticker, start="2020-01-01", end="2025-09-10"):
|
| 12 |
-
df = yf.download(ticker, start=start, end=end)
|
| 13 |
-
actualdata = yf.download(ticker, start=start, end="2025-09-11")
|
| 14 |
-
df = df[["Open", "High", "Low", "Close", "Volume"]]
|
| 15 |
-
close_prices = df['Close'].to_numpy().flatten()
|
| 16 |
-
low_prices = df['Low'].to_numpy().flatten()
|
| 17 |
-
high_prices = df['High'].to_numpy().flatten()
|
| 18 |
-
# Add indicators
|
| 19 |
-
#df["RSI"] = talib.RSI(close_prices, timeperiod=14)
|
| 20 |
-
#df["MACD"], df["MACD_signal"], _ = talib.MACD(close_prices)
|
| 21 |
-
df["EMA_20"] = talib.EMA(close_prices, timeperiod=20)
|
| 22 |
-
df["ATR"] = talib.ATR(high_prices, low_prices, close_prices, timeperiod=14)
|
| 23 |
-
|
| 24 |
-
df.dropna(inplace=True)
|
| 25 |
-
return df
|
| 26 |
-
|
| 27 |
-
def fetch_originaldata(ticker, start="2020-01-01", end="2025-01-03"):
|
| 28 |
-
actualdata = yf.download(ticker, start=start, end="2025-01-24")
|
| 29 |
-
return actualdata
|
| 30 |
-
|
| 31 |
-
# Step 2: Custom Dataset
|
| 32 |
-
class StockDataset(Dataset):
|
| 33 |
-
def __init__(self, series, window_size):
|
| 34 |
-
self.data = []
|
| 35 |
-
for i in range(len(series) - window_size):
|
| 36 |
-
self.data.append((series[i:i+window_size], series[i+window_size][3]))
|
| 37 |
-
|
| 38 |
-
def __len__(self):
|
| 39 |
-
return len(self.data)
|
| 40 |
-
|
| 41 |
-
def __getitem__(self, idx):
|
| 42 |
-
x, y = self.data[idx]
|
| 43 |
-
return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)
|
| 44 |
-
|
| 45 |
-
# Step 3: Transformer model
|
| 46 |
-
class TransformerPredictor(nn.Module):
|
| 47 |
-
def __init__(self, input_size, d_model=64, nhead=4, num_layers=2, dropout=0.1):
|
| 48 |
-
super(TransformerPredictor, self).__init__()
|
| 49 |
-
self.linear_in = nn.Linear(input_size, d_model)
|
| 50 |
-
encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
|
| 51 |
-
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
|
| 52 |
-
self.linear_out = nn.Linear(d_model, 1)
|
| 53 |
-
|
| 54 |
-
def forward(self, src):
|
| 55 |
-
x = self.linear_in(src) # [seq, batch, d_model]
|
| 56 |
-
x = self.transformer(x) # [seq, batch, d_model]
|
| 57 |
-
out = self.linear_out(x[-1]) # [batch, 1]
|
| 58 |
-
return out.squeeze()
|
| 59 |
-
|
| 60 |
-
# Step 4: Training function
|
| 61 |
-
def train_model(model, dataloader, epochs, lr=0.001):
|
| 62 |
-
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
|
| 63 |
-
loss_fn = nn.MSELoss()
|
| 64 |
-
for epoch in range(epochs):
|
| 65 |
-
for x, y in dataloader:
|
| 66 |
-
x = x.permute(1, 0, 2) # [batch, seq, features] -> [seq, batch, features]
|
| 67 |
-
pred = model(x)
|
| 68 |
-
loss = loss_fn(pred, y)
|
| 69 |
-
optimizer.zero_grad()
|
| 70 |
-
loss.backward()
|
| 71 |
-
optimizer.step()
|
| 72 |
-
print("Epoch {}/{} - Loss: {:.4f}".format(epoch+1, epochs, loss.item()))
|
| 73 |
-
|
| 74 |
-
# Step 5: Run pipeline
|
| 75 |
-
def run_stock_prediction(ticker):
|
| 76 |
-
df = fetch_stock_data_with_indicators(ticker)
|
| 77 |
-
scaler = MinMaxScaler()
|
| 78 |
-
scaled_data = scaler.fit_transform(df.values)
|
| 79 |
-
|
| 80 |
-
window_size = 20
|
| 81 |
-
dataset = StockDataset(scaled_data, window_size)
|
| 82 |
-
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
|
| 83 |
-
|
| 84 |
-
input_size = scaled_data.shape[1]
|
| 85 |
-
model = TransformerPredictor(input_size=input_size)
|
| 86 |
-
train_model(model, dataloader, epochs=2)
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
# Predict next 15 days
|
| 90 |
-
predictions = []
|
| 91 |
-
input_seq = scaled_data[-window_size:].copy() # shape: [20, features]
|
| 92 |
-
|
| 93 |
-
for i in range(30):
|
| 94 |
-
seq_tensor = torch.tensor(input_seq, dtype=torch.float32).unsqueeze(1) # [seq_len, 1, features]
|
| 95 |
-
|
| 96 |
-
with torch.no_grad():
|
| 97 |
-
predicted_scaled = model(seq_tensor).item()
|
| 98 |
-
|
| 99 |
-
# Create new row based on last row, replace only Close price (index 3)
|
| 100 |
-
new_row = input_seq[-1].copy()
|
| 101 |
-
new_row[3] = predicted_scaled
|
| 102 |
-
|
| 103 |
-
# Inverse scale to get actual Close price
|
| 104 |
-
predicted_row = scaler.inverse_transform([new_row])[0]
|
| 105 |
-
predicted_close = predicted_row[3]
|
| 106 |
-
predictions.append(predicted_close)
|
| 107 |
-
|
| 108 |
-
# Slide window: remove first row, append new row
|
| 109 |
-
input_seq = np.vstack([input_seq[1:], [new_row]])
|
| 110 |
-
|
| 111 |
-
# Get the last date from the dataset
|
| 112 |
-
last_date = df.index[-1]
|
| 113 |
-
predicted_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30, freq='B') # Business days
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
prediction_results = pd.DataFrame({
|
| 117 |
-
'Date': predicted_dates,
|
| 118 |
-
'Predicted Close': predictions,
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
})
|
| 122 |
-
|
| 123 |
-
return prediction_results
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prediction.py
DELETED
|
@@ -1,257 +0,0 @@
|
|
| 1 |
-
import os, re, joblib, numpy as np, pandas as pd, sklearn
|
| 2 |
-
from sklearn.ensemble import ExtraTreesRegressor
|
| 3 |
-
|
| 4 |
-
PRICE_COLS = ["Close Price", "Highest Price", "Lowest Price"]
|
| 5 |
-
|
| 6 |
-
def _drop_unnamed(df: pd.DataFrame) -> pd.DataFrame:
|
| 7 |
-
to_drop = [c for c in df.columns if str(c).startswith("Unnamed")]
|
| 8 |
-
return df.drop(columns=to_drop) if to_drop else df
|
| 9 |
-
|
| 10 |
-
def _read_excel_loose_header(xlsx_path: str) -> pd.DataFrame:
|
| 11 |
-
|
| 12 |
-
raw = pd.read_excel(xlsx_path, engine='openpyxl', header=None)
|
| 13 |
-
first_row = [str(x) for x in raw.iloc[0].tolist()]
|
| 14 |
-
header_row = 0 if any("Close Price" in s for s in first_row) else 1
|
| 15 |
-
return pd.read_excel(xlsx_path, engine='openpyxl', header=header_row)
|
| 16 |
-
|
| 17 |
-
def _map_training_indicators(df: pd.DataFrame) -> pd.DataFrame:
|
| 18 |
-
|
| 19 |
-
def map_series(s: pd.Series):
|
| 20 |
-
if s.dtype == 'O':
|
| 21 |
-
cleaned = s.astype(str).str.strip()
|
| 22 |
-
cleaned = cleaned.replace({'nan': np.nan, 'NaN': np.nan, 'None': np.nan, '': np.nan})
|
| 23 |
-
return cleaned.map({'Red': 0, 'Yellow': 1, 'Green': 2})
|
| 24 |
-
return s
|
| 25 |
-
out = df.copy()
|
| 26 |
-
for col in out.columns:
|
| 27 |
-
if col not in PRICE_COLS:
|
| 28 |
-
out[col] = map_series(out[col])
|
| 29 |
-
return out
|
| 30 |
-
|
| 31 |
-
def _map_testing_indicators(df: pd.DataFrame) -> pd.DataFrame:
|
| 32 |
-
|
| 33 |
-
def map_series(s: pd.Series):
|
| 34 |
-
if s.dtype == 'O':
|
| 35 |
-
cleaned = s.astype(str).str.strip()
|
| 36 |
-
cleaned = cleaned.replace({'nan': np.nan, 'NaN': np.nan, 'None': np.nan, '': np.nan})
|
| 37 |
-
return cleaned.map({'Red': 0, 'Yellow': 1, 'Green': 2})
|
| 38 |
-
|
| 39 |
-
return s.replace({10: 2, 5: 1, 0: 0})
|
| 40 |
-
out = df.copy()
|
| 41 |
-
for col in out.columns:
|
| 42 |
-
if col not in PRICE_COLS:
|
| 43 |
-
out[col] = map_series(out[col])
|
| 44 |
-
return out
|
| 45 |
-
|
| 46 |
-
def _find_target_cols(df: pd.DataFrame):
|
| 47 |
-
if "Highest Price" not in df.columns or "Lowest Price" not in df.columns:
|
| 48 |
-
raise ValueError("Excel must contain 'Highest Price' and 'Lowest Price' columns.")
|
| 49 |
-
return "Highest Price", "Lowest Price"
|
| 50 |
-
|
| 51 |
-
def load_or_train_highlow_model(xlsx_path: str, model_path: str):
|
| 52 |
-
|
| 53 |
-
def _is_cache_fresh():
|
| 54 |
-
return os.path.exists(model_path) and os.path.getmtime(model_path) >= os.path.getmtime(xlsx_path)
|
| 55 |
-
|
| 56 |
-
if os.path.exists(model_path) and _is_cache_fresh():
|
| 57 |
-
obj = joblib.load(model_path)
|
| 58 |
-
if isinstance(obj, dict) and {'model','features','medians'} <= set(obj.keys()):
|
| 59 |
-
return obj
|
| 60 |
-
|
| 61 |
-
if not os.path.exists(xlsx_path):
|
| 62 |
-
raise FileNotFoundError(f"Training Excel not found at: {xlsx_path}")
|
| 63 |
-
|
| 64 |
-
df = _read_excel_loose_header(xlsx_path)
|
| 65 |
-
df = _drop_unnamed(df)
|
| 66 |
-
|
| 67 |
-
y_high, y_low = _find_target_cols(df)
|
| 68 |
-
df_mapped = _map_training_indicators(df)
|
| 69 |
-
|
| 70 |
-
X = df_mapped.drop(columns=[y_high, y_low]).apply(pd.to_numeric, errors='coerce')
|
| 71 |
-
y = df_mapped[[y_high, y_low]].apply(pd.to_numeric, errors='coerce')
|
| 72 |
-
|
| 73 |
-
med = X.median(numeric_only=True)
|
| 74 |
-
X = X.fillna(med)
|
| 75 |
-
y = y.fillna(y.median(numeric_only=True))
|
| 76 |
-
|
| 77 |
-
model = ExtraTreesRegressor(
|
| 78 |
-
n_estimators=300,
|
| 79 |
-
random_state=42,
|
| 80 |
-
n_jobs=-1,
|
| 81 |
-
max_depth=None,
|
| 82 |
-
min_samples_leaf=2,
|
| 83 |
-
)
|
| 84 |
-
model.fit(X.values, y.values)
|
| 85 |
-
|
| 86 |
-
bundle = {
|
| 87 |
-
'model': model,
|
| 88 |
-
'features': X.columns.tolist(),
|
| 89 |
-
'medians': med.to_dict(),
|
| 90 |
-
'sklearn_version': sklearn.__version__,
|
| 91 |
-
'trained_rows': int(X.shape[0]),
|
| 92 |
-
}
|
| 93 |
-
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
| 94 |
-
joblib.dump(bundle, model_path)
|
| 95 |
-
return bundle
|
| 96 |
-
|
| 97 |
-
def _to_num(v):
|
| 98 |
-
import pandas as pd
|
| 99 |
-
if isinstance(v, (list, tuple, pd.Series, np.ndarray)):
|
| 100 |
-
if len(v) == 0:
|
| 101 |
-
return 0.0
|
| 102 |
-
return _to_num(v[-1])
|
| 103 |
-
if isinstance(v, dict):
|
| 104 |
-
numeric_vals = [vv for vv in v.values() if isinstance(vv, (int, float, np.number))]
|
| 105 |
-
if numeric_vals:
|
| 106 |
-
best = max(numeric_vals)
|
| 107 |
-
return 1.0 if float(best) > 0 else 0.0
|
| 108 |
-
return 1.0 if any(bool(vv) for vv in v.values()) else 0.0
|
| 109 |
-
if isinstance(v, (bool, int, float, np.number)):
|
| 110 |
-
try:
|
| 111 |
-
return float(v)
|
| 112 |
-
except Exception:
|
| 113 |
-
return 0.0
|
| 114 |
-
if isinstance(v, str):
|
| 115 |
-
s = v.strip().lower()
|
| 116 |
-
if s in {"buy", "bullish", "long", "breakout", "yes", "true", "dbuy"}:
|
| 117 |
-
return 1.0
|
| 118 |
-
if s in {"sell", "bearish", "short", "no", "false"}:
|
| 119 |
-
return 0.0
|
| 120 |
-
try:
|
| 121 |
-
return float(v)
|
| 122 |
-
except Exception:
|
| 123 |
-
return 0.0
|
| 124 |
-
try:
|
| 125 |
-
return float(v)
|
| 126 |
-
except Exception:
|
| 127 |
-
return 0.0
|
| 128 |
-
|
| 129 |
-
def build_current_features_row_23k(
|
| 130 |
-
ticker: str,
|
| 131 |
-
stock_data: pd.DataFrame,
|
| 132 |
-
rsi_trade_signal: dict,
|
| 133 |
-
macd_trade_signal: dict,
|
| 134 |
-
ema_trade_signal: dict,
|
| 135 |
-
atr_trade_signal: dict,
|
| 136 |
-
adx_trade_signal: dict,
|
| 137 |
-
bb_trade_signal: dict,
|
| 138 |
-
sr_trade_signal: dict,
|
| 139 |
-
priceaction_trade_signal: dict,
|
| 140 |
-
fibo_trade_signal: dict,
|
| 141 |
-
overall_ta_score: float,
|
| 142 |
-
) -> pd.DataFrame:
|
| 143 |
-
|
| 144 |
-
last_close = _to_num(stock_data['close'].iloc[-1])
|
| 145 |
-
|
| 146 |
-
rsi_sig = rsi_trade_signal.get('rsi_signals', {}) or {}
|
| 147 |
-
macd_sig = macd_trade_signal.get('macd_signals', {}) or {}
|
| 148 |
-
atr_sig = atr_trade_signal.get('atr_signals', {}) or {}
|
| 149 |
-
ema_sig = ema_trade_signal.get('ema_signals', {}) or {}
|
| 150 |
-
adx_sig = adx_trade_signal.get('adx_signals', {}) or {}
|
| 151 |
-
bb_sig = bb_trade_signal.get('bollinger_signals', {}) or {}
|
| 152 |
-
sr_sig = sr_trade_signal.get('support_resistance_signals', {}) or {}
|
| 153 |
-
pa_sig = priceaction_trade_signal.get('priceaction_signals', {}) or {}
|
| 154 |
-
fib_sig = priceaction_trade_signal.get('fib_signals') or fibo_trade_signal.get('fib_signals', {})
|
| 155 |
-
|
| 156 |
-
def sig_num(d, key): return _to_num(d.get(key, 0))
|
| 157 |
-
|
| 158 |
-
row = {
|
| 159 |
-
"TA Score": _to_num(overall_ta_score),
|
| 160 |
-
"Close Price": last_close,
|
| 161 |
-
|
| 162 |
-
# RSI
|
| 163 |
-
"RSI": _to_num(rsi_trade_signal.get('rsi_score', 0)),
|
| 164 |
-
"Overbought/Oversold": sig_num(rsi_sig, "Overbought/Oversold"),
|
| 165 |
-
"RSI Swing Rejection": sig_num(rsi_sig, "RSI Swing Rejection"),
|
| 166 |
-
"RSI Divergence": sig_num(rsi_sig, "RSI Divergence"),
|
| 167 |
-
"RSI_Bollinger Band": sig_num(rsi_sig, "RSI_Bollinger Band"),
|
| 168 |
-
"RSI 5/14 Crossover": sig_num(rsi_sig, "RSI 5/14 Crossover"),
|
| 169 |
-
"RSI Trend 50 Confirmation": sig_num(rsi_sig, "RSI Trend 50 Confirmation"),
|
| 170 |
-
"RSI_MA": _to_num(rsi_sig.get("RSI_MA", rsi_trade_signal.get("ma", 0))),
|
| 171 |
-
"Mean Reversion": sig_num(rsi_sig, "Mean Reversion"),
|
| 172 |
-
|
| 173 |
-
# MACD
|
| 174 |
-
"MACD": _to_num(macd_trade_signal.get('macd_score', 0)),
|
| 175 |
-
"MACD Line Crossover": sig_num(macd_sig, "MACD Line Crossover"),
|
| 176 |
-
"MACD Zero-Line Crossover": sig_num(macd_sig, "MACD Zero-Line Crossover"),
|
| 177 |
-
"MACD Divergence": sig_num(macd_sig, "MACD Divergence"),
|
| 178 |
-
"Hidden Divergence": sig_num(macd_sig, "Hidden Divergence"),
|
| 179 |
-
"MACD Volume": sig_num(macd_sig, "MACD Volume"),
|
| 180 |
-
"MACD Momentum": sig_num(macd_sig, "MACD Momentum"),
|
| 181 |
-
|
| 182 |
-
# ATR
|
| 183 |
-
"ATR": _to_num(atr_trade_signal.get('atr_score', 0)),
|
| 184 |
-
"ATR Breakout": sig_num(atr_sig, "ATR Breakout"),
|
| 185 |
-
"ATR Expansion": sig_num(atr_sig, "ATR Expansion"),
|
| 186 |
-
"ATR Squeeze": sig_num(atr_sig, "ATR Squeeze"),
|
| 187 |
-
"ATR Trend Reversal": sig_num(atr_sig, "ATR Trend Reversal"),
|
| 188 |
-
|
| 189 |
-
# EMA
|
| 190 |
-
"EMA": _to_num(ema_trade_signal.get('ema_score', 0)),
|
| 191 |
-
"EMA Crossover": sig_num(ema_sig, "EMA Crossover"),
|
| 192 |
-
"EMA Price Crossover": sig_num(ema_sig, "EMA Price Crossover"),
|
| 193 |
-
"EMA Slope": sig_num(ema_sig, "EMA Slope"),
|
| 194 |
-
"Triple EMA": sig_num(ema_sig, "Triple EMA"),
|
| 195 |
-
|
| 196 |
-
# ADX
|
| 197 |
-
"ADX": _to_num(adx_trade_signal.get('adx_score', 0)),
|
| 198 |
-
"ADX + DI Crossover": sig_num(adx_sig, "ADX + DI Crossover"),
|
| 199 |
-
"ADX Breakout": sig_num(adx_sig, "ADX Breakout"),
|
| 200 |
-
"ADX Slope": sig_num(adx_sig, "ADX Slope"),
|
| 201 |
-
"ADX Divergence": sig_num(adx_sig, "ADX Divergence"),
|
| 202 |
-
|
| 203 |
-
# Fibonacci
|
| 204 |
-
"Fibo": _to_num(fibo_trade_signal.get('fib_score', 0)),
|
| 205 |
-
"Fibonacci Retracement Bounce": sig_num(fib_sig, "Fibonacci Retracement Bounce"),
|
| 206 |
-
"Fibonacci Breakout": sig_num(fib_sig, "Fibonacci Breakout"),
|
| 207 |
-
"Golden Pocket Reversal": sig_num(fib_sig, "Golden Pocket Reversal"),
|
| 208 |
-
"Fibonacci Confluence": sig_num(fib_sig, "Fibonacci Confluence"),
|
| 209 |
-
|
| 210 |
-
# Bollinger
|
| 211 |
-
"BB": _to_num(bb_trade_signal.get('bollinger_score', 0)),
|
| 212 |
-
"BB Squeeze": sig_num(bb_sig, "BB Squeeze"),
|
| 213 |
-
"BB Breakout": sig_num(bb_sig, "BB Breakout"),
|
| 214 |
-
"BB Breakout Reversal": sig_num(bb_sig, "BB Breakout Reversal"),
|
| 215 |
-
"Middle Band Pullback": sig_num(bb_sig, "Middle Band Pullback"),
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
"SR": _to_num(sr_trade_signal.get('sr_score', 0)),
|
| 219 |
-
"Breakout": sig_num(sr_sig, "Breakout"),
|
| 220 |
-
"Reversal": sig_num(sr_sig, "Reversal"),
|
| 221 |
-
"Flip": sig_num(sr_sig, "Flip"),
|
| 222 |
-
"SR_Retest": sig_num(sr_sig, "SR_Retest"),
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
"PA_MS": _to_num(priceaction_trade_signal.get('priceaction_score', 0)),
|
| 226 |
-
"Candlestick Pattern": sig_num(pa_sig, "Candlestick Pattern"),
|
| 227 |
-
"HH_HL_LL_LH": sig_num(pa_sig, "HH_HL_LL_LH"),
|
| 228 |
-
"Triangle Breakout": sig_num(pa_sig, "Triangle Breakout"),
|
| 229 |
-
"Fair Value Gap": sig_num(pa_sig, "Fair Value Gap"),
|
| 230 |
-
"BOS": sig_num(pa_sig, "BOS"),
|
| 231 |
-
"CHoCH": sig_num(pa_sig, "CHoCH"),
|
| 232 |
-
"Order_Block": sig_num(pa_sig, "Order_Block"),
|
| 233 |
-
}
|
| 234 |
-
|
| 235 |
-
return pd.DataFrame([row]).replace([np.inf, -np.inf], np.nan)
|
| 236 |
-
|
| 237 |
-
def _prepare_test_currentrow(current_row_df: pd.DataFrame, feature_cols, train_medians: dict):
|
| 238 |
-
df = _map_testing_indicators(current_row_df.copy())
|
| 239 |
-
X = df.reindex(columns=feature_cols).apply(pd.to_numeric, errors='coerce')
|
| 240 |
-
X = X.fillna(pd.Series(train_medians))
|
| 241 |
-
return X
|
| 242 |
-
|
| 243 |
-
def predict_high_low_for_current_row(bundle: dict, current_row_df: pd.DataFrame, live_close: float):
|
| 244 |
-
|
| 245 |
-
feature_cols = bundle['features']
|
| 246 |
-
medians = bundle['medians']
|
| 247 |
-
model: ExtraTreesRegressor = bundle['model']
|
| 248 |
-
|
| 249 |
-
X = _prepare_test_currentrow(current_row_df, feature_cols, medians)
|
| 250 |
-
preds = model.predict(X.values)
|
| 251 |
-
high_pred, low_pred = float(preds[0, 0]), float(preds[0, 1])
|
| 252 |
-
|
| 253 |
-
if not np.isnan(live_close):
|
| 254 |
-
high_pred = max(high_pred, float(live_close))
|
| 255 |
-
low_pred = min(low_pred, float(live_close))
|
| 256 |
-
|
| 257 |
-
return round(high_pred, 2), round(low_pred, 2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|