Spaces:

pykara
/

pytrade-backend

Running

App Files Files Community

Oviya commited on Oct 9

Commit

8dfbce4

1 Parent(s): 62afd3f

update prediction

Browse files

Files changed (6) hide show

analysestock.py +47 -77
chatbot.py +0 -232
companies.py +0 -50
highlow_forecast.py +695 -0
predictedchart.py +0 -126
prediction.py +0 -257

analysestock.py CHANGED Viewed

@@ -22,45 +22,10 @@ from srstrategies import get_support_resistance_signal
 from bbstrategies import get_bollinger_trade_signal
 from fundamental import get_fundamental_details
 from news import get_latest_news_with_sentiment
-from prediction import (
-    load_or_train_highlow_model,
-    build_current_features_row_23k,
-    predict_high_low_for_current_row,
-)
 import os, numpy as np, pandas as pd
-BASE_DIR = Path(__file__).resolve().parent  # folder where analysestock.py lives
-# TRAIN_XLSX_PATH = r"D:\PY-Trade\backend alone\analysedata.xlsx"
-# MODEL_BUNDLE_PATH = r"C:\VIJI\pytrade-app\backend\models\gps_highlow_extratrees.pkl"
-# Excel path (priority: env var → file in repo → your Windows path)
-TRAIN_XLSX_PATH = (
-    os.getenv("TRAIN_XLSX_PATH")
-    or (str(BASE_DIR / "analysedata.xlsx") if (BASE_DIR / "analysedata.xlsx").exists() else None)
-    or (r"C:\VIJI\huggingface-deployment\deployment\pytrade-backend\analysedata.xlsx" if os.name == "nt" else None)
-)
-if not TRAIN_XLSX_PATH or not Path(TRAIN_XLSX_PATH).exists():
-    raise FileNotFoundError(
-        "Training Excel not found. Set TRAIN_XLSX_PATH or place 'analysedata.xlsx' next to analysestock.py."
-    )
-# Model path (priority: env var → model file in repo → your Windows path → /tmp for training)
-MODEL_BUNDLE_PATH = (
-    os.getenv("MODEL_BUNDLE_PATH")
-    or (str(BASE_DIR / "gps_highlow_extratrees.pkl") if (BASE_DIR / "gps_highlow_extratrees.pkl").exists() else None)
-    or (r"C:\VIJI\huggingface-deployment\deployment\pytrade-backend\gps_highlow_extratrees.pkl" if os.name == "nt" else None)
-    or "/tmp/pytrade-models/gps_highlow_extratrees.pkl"
-)
-Path(MODEL_BUNDLE_PATH).parent.mkdir(parents=True, exist_ok=True)  # ensure writable dir when training
-# --- end snippet ---
-from predictedchart import run_stock_prediction
 # ===================== TA scoring =====================
 def calculate_technical_analysis_score(indicator_scores):
@@ -271,37 +236,42 @@ def analysestock(ticker):
     #prediiction
-    predictions = run_stock_prediction(ticker)
-    predictions_float = [float(pred) for pred in predictions['Predicted Close']]
-    prediction_dates = pd.to_datetime(predictions['Date']).dt.strftime('%d-%m-%Y').tolist()
-    model_error = None
-    pred_high, pred_low = np.nan, np.nan
     try:
-        bundle = load_or_train_highlow_model(TRAIN_XLSX_PATH, MODEL_BUNDLE_PATH)
-        current_feat_row = build_current_features_row_23k(
             ticker=ticker,
-            stock_data=stock_data,
-            rsi_trade_signal=rsi_trade_signal,
-            macd_trade_signal=macd_trade_signal,
-            ema_trade_signal=ema_trade_signal,
-            atr_trade_signal=atr_trade_signal,
-            adx_trade_signal=adx_trade_signal,
-            bb_trade_signal=bb_trade_signal,
-            sr_trade_signal=sr_trade_signal,
-            priceaction_trade_signal=priceaction_trade_signal,
-            fibo_trade_signal=fibo_trade_signal,
-            overall_ta_score=overall_ta_score,
-        )
-        pred_high, pred_low = predict_high_low_for_current_row(
-            bundle=bundle,
-            current_row_df=current_feat_row,
-            live_close=stock_data['close'].iloc[-1]
         )
     except Exception as ex:
-        model_error = f"{type(ex).__name__}: {ex}"
-        print(f"[WARN] High/Low prediction failed: {model_error}")
@@ -352,22 +322,22 @@ def analysestock(ticker):
         "EMA 50": ema_trade_signal['EMA_50'],
         "ADX_Indicator": adx_trade_signal['ADX_Indicator'],
         "PLUS_DI": adx_trade_signal['PLUS_DI'],
-        "MINUS_DI": adx_trade_signal['MINUS_DI'],
-        "prediction_prices": predictions_float,
-        "prediction_dates": prediction_dates,
     }
     response.update({
-        "ai_predicted_highest_price": pred_high,
-        "ai_predicted_lowest_price": pred_low,
-        "ai_model_meta": {
-            "model": "ExtraTreesRegressor (multi-output capable, native)",
-            "bundle_path": MODEL_BUNDLE_PATH,
-            "trained_rows": (bundle.get("trained_rows") if 'bundle' in locals() else None),
-            "sklearn_version": (bundle.get("sklearn_version") if 'bundle' in locals() else None)
-        },
-        "ai_model_error": model_error
     })
     return response

 from bbstrategies import get_bollinger_trade_signal
 from fundamental import get_fundamental_details
 from news import get_latest_news_with_sentiment
+from highlow_forecast import forecast_next_15_high_low
 import os, numpy as np, pandas as pd
+BASE_DIR = Path(__file__).resolve().parent
 # ===================== TA scoring =====================
 def calculate_technical_analysis_score(indicator_scores):
     #prediiction
+    forecast_15 = None
     try:
+        forecast_15 = forecast_next_15_high_low(
             ticker=ticker,
+            stock_data=stock_data
         )
     except Exception as ex:
+        forecast_15 = {"error": f"{type(ex).__name__}: {ex}"}
+    # Summaries for 15-day forecast (max high, min low) + range series for charts
+    max_high_15 = None
+    max_high_15_date = None
+    min_low_15 = None
+    min_low_15_date = None
+    highlow_range_15 = None
+    if isinstance(forecast_15, dict) and all(k in forecast_15 for k in ("pred_high", "pred_low", "dates")):
+        highs = np.asarray(forecast_15["pred_high"], dtype=float)
+        lows = np.asarray(forecast_15["pred_low"], dtype=float)
+        dates = forecast_15["dates"]
+        if highs.size and lows.size and highs.size == lows.size == len(dates):
+            hi_idx = int(np.nanargmax(highs))
+            lo_idx = int(np.nanargmin(lows))
+            max_high_15 = round(float(highs[hi_idx]), 2)
+            max_high_15_date = dates[hi_idx]
+            min_low_15 = round(float(lows[lo_idx]), 2)
+            min_low_15_date = dates[lo_idx]
+            # Precomputed rangeBar data: [{x: date, y: [low, high]}]
+            highlow_range_15 = [
+                {"x": d, "y": [round(float(l), 2), round(float(h), 2)]}
+                for d, h, l in zip(dates, highs.tolist(), lows.tolist())
+            ]
         "EMA 50": ema_trade_signal['EMA_50'],
         "ADX_Indicator": adx_trade_signal['ADX_Indicator'],
         "PLUS_DI": adx_trade_signal['PLUS_DI'],
+        "MINUS_DI": adx_trade_signal['MINUS_DI']
     }
+    response.update({
+        "ai_predicted_daily_high_15": (forecast_15.get("pred_high") if isinstance(forecast_15, dict) and "pred_high" in forecast_15 else None),
+        "ai_predicted_daily_low_15": (forecast_15.get("pred_low") if isinstance(forecast_15, dict) and "pred_low" in forecast_15 else None),
+        "ai_predicted_dates_15": (forecast_15.get("dates") if isinstance(forecast_15, dict) and "dates" in forecast_15 else None),
+        "ai_model_meta_15d": (forecast_15.get("bundle_meta") if isinstance(forecast_15, dict) and "bundle_meta" in forecast_15 else None),
+        "ai_model_error_15d": (forecast_15.get("error") if isinstance(forecast_15, dict) and "error" in forecast_15 else None),
+    })
     response.update({
+        "ai_predicted_max_high_15": max_high_15,
+        "ai_predicted_max_high_15_date": max_high_15_date,
+        "ai_predicted_min_low_15": min_low_15,
+        "ai_predicted_min_low_15_date": min_low_15_date,
+        "ai_predicted_highlow_range_15": highlow_range_15
     })
     return response

chatbot.py DELETED Viewed

@@ -1,232 +0,0 @@
-# app.py
-import os
-import re
-import json
-import time
-from datetime import datetime
-from typing import List, Dict
-from flask import Flask, request, jsonify
-from dotenv import load_dotenv
-import requests
-# ----------------------------
-# Optional providers (OpenAI v1 / Cohere)
-# ----------------------------
-OPENAI_CLIENT = None
-try:
-    from openai import OpenAI
-    OPENAI_CLIENT = "available"
-except Exception:
-    OPENAI_CLIENT = None
-try:
-    import cohere
-except Exception:
-    cohere = None
-load_dotenv()
-app = Flask(__name__)
-# ----------------------------
-# Config
-# ----------------------------
-LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openai").lower().strip()
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-COHERE_API_KEY = os.getenv("COHERE_API_KEY")
-SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")
-SEARCH_TOPK = int(os.getenv("SEARCH_TOPK", "5"))
-TIMEZONE = "Asia/Kolkata"
-if LLM_PROVIDER == "openai" and not OPENAI_API_KEY:
-    print("[WARN] OPENAI_API_KEY not set; general answers will fail.")
-if LLM_PROVIDER == "cohere" and not COHERE_API_KEY:
-    print("[WARN] COHERE_API_KEY not set; general answers will fail.")
-if not SERPAPI_API_KEY:
-    print("[WARN] SERPAPI_API_KEY not set; 'latest' queries will not work.")
-# Initialize OpenAI client (v1+)
-openai_client = None
-if LLM_PROVIDER == "openai" and OPENAI_CLIENT and OPENAI_API_KEY:
-    openai_client = OpenAI(api_key=OPENAI_API_KEY)
-# ----------------------------
-# Utilities
-# ----------------------------
-# Common “latest/live” triggers
-LATEST_TRIGGERS = [
-    r"\btoday\b", r"\bnow\b", r"\blatest\b", r"\bupdate\b", r"\brecent\b",
-    r"\bbreaking\b", r"\blive\b", r"\bthis\s+hour\b", r"\bthis\s+minute\b",
-    r"\bcurrent\b", r"\bas of\b", r"\btoday'?s\b", r"\bprice\s+today\b"
-]
-LATEST_PATTERN = re.compile("|".join(LATEST_TRIGGERS), re.IGNORECASE)
-# Simple aliases for finance names/tickers (extend as needed)
-ALIASES = {
-    "tcs": "Tata Consultancy Services",
-    "ril": "Reliance Industries",
-    "infy": "Infosys",
-    "hdfc bank": "HDFC Bank",
-    "icici": "ICICI Bank",
-}
-def normalize_entities(text: str) -> str:
-    t = text
-    for k, v in ALIASES.items():
-        t = re.sub(rf"\b{k}\b", v, t, flags=re.IGNORECASE)
-    return t
-def needs_live_context(query: str) -> bool:
-    """Heuristic to detect time-sensitive queries."""
-    if not query:
-        return False
-    q = query.lower()
-    if LATEST_PATTERN.search(q):
-        return True
-    # Domain shortcuts
-    domain_triggers = [
-        "who won", "match result", "score now", "stock price", "share price",
-        "usd inr rate", "exchange rate", "weather", "today's weather",
-        "news on", "headline", "earnings today", "ipo today",
-        "live price", "current price", "price right now"
-    ]
-    if any(t in q for t in domain_triggers):
-        return True
-    # Finance shortcut: “price of <entity>”
-    if re.search(r"\bprice of\b", q) and not re.search(r"\byesterday|last close|history\b", q):
-        return True
-    return False
-def pick_is_news(query: str) -> bool:
-    """Treat as news if clear news terms appear."""
-    q = query.lower()
-    news_terms = ["news", "headline", "breaking", "election", "budget", "earthquake", "merger", "acquisition", "ceo resigns"]
-    return any(t in q for t in news_terms)
-def serpapi_search(query: str, is_news: bool = False, num: int = SEARCH_TOPK) -> List[Dict[str, str]]:
-    """Fetch top search or news results from SerpAPI."""
-    if not SERPAPI_API_KEY:
-        return []
-    params = {
-        "api_key": SERPAPI_API_KEY,
-        "q": query,
-    }
-    if is_news:
-        url = "https://serpapi.com/search.json"
-        params.update({"engine": "google_news", "num": min(num, 10), "hl": "en", "gl": "in"})
-    else:
-        url = "https://serpapi.com/search.json"
-        params.update({"engine": "google", "num": min(num, 10), "hl": "en", "gl": "in"})
-    r = requests.get(url, params=params, timeout=20)
-    r.raise_for_status()
-    data = r.json()
-    results: List[Dict[str, str]] = []
-    if is_news:
-        for item in (data.get("news_results") or [])[:num]:
-            results.append({
-                "title": item.get("title") or "",
-                "snippet": item.get("snippet") or item.get("description") or "",
-                "link": item.get("link") or "",
-                "source": (item.get("source") or {}).get("name") or item.get("source") or ""
-            })
-    else:
-        for item in (data.get("organic_results") or [])[:num]:
-            results.append({
-                "title": item.get("title") or "",
-                "snippet": item.get("snippet") or "",
-                "link": item.get("link") or "",
-                "source": item.get("source") or ""
-            })
-    return results
-def build_citation_block(hits: List[Dict[str, str]]) -> str:
-    """Compact citations for the LLM and the response."""
-    lines = []
-    for i, h in enumerate(hits, start=1):
-        title = (h.get("title") or "").strip()
-        link = (h.get("link") or "").strip()
-        source = (h.get("source") or "").strip()
-        snippet = (h.get("snippet") or "").strip()
-        lines.append(f"[{i}] {title} — {source}\n{snippet}\n{link}")
-    return "\n\n".join(lines)
-# ----------------------------
-# LLM Calls
-# ----------------------------
-BASE_SYSTEM_PROMPT = (
-    "You are a helpful and precise assistant. Use simple, neutral English. "
-    "When sources are provided, synthesize them, highlight clear facts, and include a short 'Sources' list as [1], [2], etc. "
-    "If information is uncertain or evolving, state that clearly."
-)
-def call_openai(system_prompt: str, user_prompt: str) -> str:
-    """OpenAI Python SDK ≥ 1.0.0."""
-    if not openai_client:
-        raise RuntimeError("OpenAI is not configured.")
-    resp = openai_client.chat.completions.create(
-        model="gpt-4o-mini",
-        messages=[
-            {"role": "system", "content": system_prompt},
-            {"role": "user",  "content": user_prompt}
-        ],
-        temperature=0.2,
-        max_tokens=900,
-    )
-    return (resp.choices[0].message.content or "").strip()
-def call_cohere(system_prompt: str, user_prompt: str) -> str:
-    """Cohere chat (adjust model if needed)."""
-    if not cohere or not COHERE_API_KEY:
-        raise RuntimeError("Cohere is not configured.")
-    client = cohere.Client(api_key=COHERE_API_KEY)
-    resp = client.chat(
-        model="command-r-plus",
-        messages=[
-            {"role": "system", "content": system_prompt},
-            {"role": "user",  "content": user_prompt}
-        ],
-        temperature=0.2,
-        max_tokens=900,
-    )
-    text = getattr(resp, "text", None) or (getattr(resp, "output_text", None))
-    if not text and hasattr(resp, "message") and hasattr(resp.message, "content"):
-        parts = resp.message.content
-        text = "".join(getattr(p, "text", "") for p in parts)
-    return (text or "").strip()
-def call_llm(system_prompt: str, user_prompt: str) -> str:
-    if LLM_PROVIDER == "openai":
-        return call_openai(system_prompt, user_prompt)
-    elif LLM_PROVIDER == "cohere":
-        return call_cohere(system_prompt, user_prompt)
-    else:
-        raise RuntimeError("Unsupported LLM_PROVIDER")
-def compose_live_user_prompt(query: str, hits: List[Dict[str, str]]) -> str:
-    citation_block = build_citation_block(hits)
-    today = datetime.now().strftime("%B %d, %Y")
-    return (
-        f"User question (time-sensitive): {query}\n"
-        f"Date today: {today}\n\n"
-        f"You have these top search results. Answer using only what these sources support. "
-        f"Be concise and include a 'Sources' section with numbered citations pointing to the links.\n\n"
-        f"{citation_block}\n\n"
-        f"Now write the answer:"
-    )
-def compose_general_user_prompt(query: str) -> str:
-    today = datetime.now().strftime("%B %d, %Y")
-    return (
-        f"User question: {query}\n"
-        f"(Answer in simple, neutral English. If facts might have changed after {today}, mention that briefly.)"
-    )

companies.py DELETED Viewed

@@ -1,50 +0,0 @@
-# utils.py
-import csv
-import io
-import requests
-from typing import List, Dict
-from requests.exceptions import RequestException
-import time
-# List of URLs for NIFTY50 and NIFTY100
-NIFTY_URLS = {
-    "NIFTY50": "https://www.niftyindices.com/IndexConstituent/ind_nifty50list.csv",
-    "NIFTY100": "https://www.niftyindices.com/IndexConstituent/ind_nifty100list.csv"
-}
-def fetch_nifty_companies(index_code: str, retries: int = 3, delay: int = 5) -> List[Dict[str, str]]:
-    # Get the URL for the given index_code
-    url = NIFTY_URLS.get(index_code)
-    if not url:
-        raise ValueError(f"Unknown index code: {index_code}")
-    # Retry logic
-    for attempt in range(retries):
-        try:
-            # Fetch the CSV data
-            response = requests.get(url)
-            # Ensure the request was successful
-            response.raise_for_status()
-            # Read CSV data from the response text
-            return parse_nifty_csv(response.text)
-        except RequestException as e:
-            print(f"Attempt {attempt + 1} failed: {e}")
-            if attempt < retries - 1:
-                time.sleep(delay)  # Wait before retrying
-            else:
-                raise Exception(f"Failed to fetch data after {retries} attempts.") from e
-# Function to fetch companies for both NIFTY50 and NIFTY100
-def get_companies_from_indices() -> Dict[str, List[Dict[str, str]]]:
-    nifty50_companies = fetch_nifty_companies("NIFTY50")
-    nifty100_companies = fetch_nifty_companies("NIFTY100")
-    # Combine both lists and return
-    all_companies = {
-        "NIFTY50": nifty50_companies,
-        "NIFTY100": nifty100_companies
-    }
-    return all_companies

highlow_forecast.py ADDED Viewed

	@@ -0,0 +1,695 @@

+import numpy as np
+import pandas as pd
+import talib
+# Optional ML imports (graceful fallback if scikit-learn is not installed)
+try:
+    from sklearn.ensemble import ExtraTreesRegressor
+    from sklearn.model_selection import TimeSeriesSplit
+    from sklearn.metrics import mean_absolute_error
+    _SKLEARN_AVAILABLE = True
+except Exception:
+    ExtraTreesRegressor = None
+    TimeSeriesSplit = None
+    mean_absolute_error = None
+    _SKLEARN_AVAILABLE = False
+# Optional: HistGradientBoostingRegressor for quantile regression
+try:
+    from sklearn.ensemble import HistGradientBoostingRegressor
+    _HGBR_AVAILABLE = True
+except Exception:
+    HistGradientBoostingRegressor = None
+    _HGBR_AVAILABLE = False
+# --------------------- Configuration ---------------------
+# Prefer quantile gradient boosting for extreme values (better for High/Low)
+_USE_HGBR_QUANTILE = True  # auto-fallback to ExtraTrees when unavailable
+# Quantiles for high/low tails (in log-ratio space)
+_Q_HIGH = 0.80  # upper-tail for High
+_Q_LOW = 0.20   # lower-tail for Low
+# Blend ML predictions with TA fallback (in log-return space)
+# Set to 0.0 to disable blending
+_BLEND_TA_WEIGHT = 0.20
+# Log-ratio target winsorization to reduce outlier impact: [q_low, q_high] (ExtraTrees path)
+_WINSOR_Q_LOW = 0.005
+_WINSOR_Q_HIGH = 0.995
+# Exponential recency weighting: larger = faster decay (0.0 to disable)
+_RECENCY_DECAY = 0.003  # per-sample step
+# ExtraTrees hyperparameters tuned for generalization
+_ETR_PARAMS_CV = dict(
+    n_estimators=800,
+    max_depth=None,
+    min_samples_split=2,
+    min_samples_leaf=3,
+    max_features=0.6,
+    bootstrap=False,
+    n_jobs=-1,
+    random_state=42,
+)
+_ETR_PARAMS_FINAL = dict(
+    n_estimators=1200,
+    max_depth=None,
+    min_samples_split=2,
+    min_samples_leaf=3,
+    max_features=0.6,
+    bootstrap=False,
+    n_jobs=-1,
+    random_state=42,
+)
+# HistGradientBoosting hyperparameters for quantile regression
+_HGBR_PARAMS = dict(
+    loss="quantile",
+    learning_rate=0.05,
+    max_iter=600,
+    max_depth=3,
+    max_leaf_nodes=31,
+    max_bins=255,
+    l2_regularization=0.0,
+    early_stopping=False,  # avoid random holdout leaking time
+    random_state=42,
+)
+# In-memory per-ticker model cache (no disk I/O)
+_MEM_CACHE = {}  # key: ticker.upper(), value: bundle dict
+# --------------------- OHLC Utilities ---------------------
+def _ensure_ohlc_columns(df: pd.DataFrame) -> pd.DataFrame:
+    cols = {c.lower(): c for c in df.columns}
+    need = ["open", "high", "low", "close", "volume"]
+    mapping = {}
+    for n in need:
+        if n in cols:
+            mapping[cols[n]] = n
+        else:
+            # try MultiIndex column cases from yfinance
+            for c in df.columns:
+                name = c[0].lower() if isinstance(c, tuple) and len(c) > 0 else str(c).lower()
+                if name == n:
+                    mapping[c] = n
+                    break
+    out = df.rename(columns=mapping).copy()
+    missing = [c for c in need if c not in out.columns]
+    if missing:
+        raise ValueError(f"Missing OHLCV columns after normalization: {missing}")
+    return out[["open", "high", "low", "close", "volume"]]
+# --------------------- Business day helper ---------------------
+def _next_business_days(last_date: pd.Timestamp, periods: int, exchange: str = "XNYS") -> pd.DatetimeIndex:
+    """
+    Return next 'periods' business sessions after last_date.
+    Tries exchange calendar via pandas_market_calendars (holidays-aware), fallback to weekdays-only.
+    exchange examples: 'XNYS' (NYSE), 'XBOM' (BSE), 'XNAS' (NASDAQ), 'XNSE' (NSE).
+    """
+    last_date = pd.Timestamp(last_date).tz_localize(None)
+    try:
+        import pandas_market_calendars as mcal
+        cal = mcal.get_calendar(exchange)
+        # buffer long enough to cover holidays
+        schedule = cal.schedule(start_date=last_date + pd.Timedelta(days=1),
+                                end_date=last_date + pd.Timedelta(days=180))
+        sessions = schedule.index.tz_localize(None)
+        if len(sessions) >= periods:
+            return sessions[:periods]
+        # If for some reason not enough sessions, extend with weekday fallback
+        needed = periods - len(sessions)
+        tail = pd.bdate_range(sessions[-1] + pd.offsets.BDay(1) if len(sessions) else last_date + pd.offsets.BDay(1),
+                              periods=needed)
+        return sessions.append(tail)
+    except Exception:
+        # Weekdays-only fallback
+        return pd.bdate_range(last_date + pd.offsets.BDay(1), periods=periods)
+# --------------------- TA Heuristic (Fallback, No ML) ---------------------
+def _last_finite(values: np.ndarray, default: float = np.nan) -> float:
+    for x in values[::-1]:
+        if np.isfinite(x):
+            return float(x)
+    return float(default)
+def _ta_fallback_forecast(ohlc: pd.DataFrame, horizons: int = 15):
+    h = ohlc["high"].astype(float).values
+    l = ohlc["low"].astype(float).values
+    c = ohlc["close"].astype(float).values
+    if len(c) < 60:
+        raise ValueError("Not enough history for TA fallback (need >=60 rows).")
+    base_close = _last_finite(ohlc["close"].replace(0.0, np.nan).values)
+    if not np.isfinite(base_close) or base_close <= 0:
+        raise ValueError("Invalid last close after cleaning.")
+    atr14 = talib.ATR(h, l, c, timeperiod=14)
+    atr_last = _last_finite(atr14, default=np.nan)
+    atr_pct = (atr_last / base_close) if np.isfinite(atr_last) and base_close > 0 else np.nan
+    ema20 = talib.EMA(c, timeperiod=20)
+    ema50 = talib.EMA(c, timeperiod=50)
+    ema20_last = _last_finite(ema20, default=np.nan)
+    ema50_last = _last_finite(ema50, default=np.nan)
+    trend_strength = 0.0
+    if np.isfinite(ema20_last) and np.isfinite(ema50_last) and ema50_last > 0:
+        trend_strength = np.clip(ema20_last / ema50_last - 1.0, -0.05, 0.05)
+    ema20_slope = 0.0
+    if len(ema20) >= 2 and np.isfinite(ema20[-1]) and np.isfinite(ema20[-2]) and ema20[-2] > 0:
+        ema20_slope = np.clip((ema20[-1] / ema20[-2]) - 1.0, -0.05, 0.05)
+    adx14 = talib.ADX(h, l, c, timeperiod=14)
+    adx = _last_finite(adx14, default=20.0) / 100.0
+    adx = float(np.clip(adx, 0.0, 1.0))
+    rsi14 = talib.RSI(c, timeperiod=14)
+    rsi = _last_finite(rsi14, default=50.0)
+    tilt = float(np.clip((rsi - 50.0) / 50.0, -1.0, 1.0))
+    logret = np.diff(np.log(np.maximum(c, 1e-12)))
+    if len(logret) >= 20 and np.isfinite(logret[-20:]).sum() >= 10:
+        sigma20 = float(pd.Series(logret).rolling(20).std().iloc[-1])
+    else:
+        sigma20 = float(np.nan)
+    components = []
+    if np.isfinite(sigma20):
+        components.append(sigma20)
+    if np.isfinite(atr_pct):
+        components.append(atr_pct)
+    daily_vol = 0.0
+    if components:
+        daily_vol = 0.6 * components[0] + (0.4 * components[1] if len(components) > 1 else 0.0)
+    daily_vol = float(np.clip(daily_vol if np.isfinite(daily_vol) else 0.02, 0.004, 0.08))
+    drift_per_day = float(np.clip(0.5 * trend_strength + 0.5 * ema20_slope, -0.02, 0.02))
+    up_weight = 1.0 - 0.3 * tilt
+    dn_weight = 1.0 + 0.3 * tilt
+    up_weight = float(np.clip(up_weight, 0.5, 1.5))
+    dn_weight = float(np.clip(dn_weight, 0.5, 1.5))
+    trend_amp = 0.75 + 0.5 * adx
+    pred_high, pred_low = [], []
+    for k in range(1, horizons + 1):
+        amp = daily_vol * np.sqrt(k) * trend_amp
+        drift = drift_per_day * k
+        up_move = amp * up_weight
+        dn_move = amp * dn_weight
+        hi = base_close * (1.0 + drift + up_move)
+        lo = base_close * (1.0 + drift - dn_move)
+        hi = max(0.0, hi)
+        lo = max(0.0, lo)
+        if lo > hi:
+            lo, hi = hi, lo
+        pred_high.append(hi)
+        pred_low.append(lo)
+    return base_close, np.array(pred_high), np.array(pred_low)
+# --------------------- Feature Engineering for ML ---------------------
+def _compute_ta_features(df: pd.DataFrame) -> pd.DataFrame:
+    df = _ensure_ohlc_columns(df).copy()
+    o, h, l, c, v = [df[k].astype(float).values for k in ("open", "high", "low", "close", "volume")]
+    close = df["close"].astype(float)
+    open_ = df["open"].astype(float)
+    high = df["high"].astype(float)
+    low = df["low"].astype(float)
+    vol = df["volume"].astype(float)
+    df_feat = pd.DataFrame(index=df.index)
+    # Basic price features
+    df_feat["ret_1"] = close.pct_change(1)
+    df_feat["logret_1"] = np.log(close.replace(0.0, np.nan)).diff(1)
+    df_feat["ret_5"] = close.pct_change(5)
+    df_feat["ret_10"] = close.pct_change(10)
+    df_feat["roll_mean_5"] = close.rolling(5).mean() / close - 1.0
+    df_feat["roll_mean_20"] = close.rolling(20).mean() / close - 1.0
+    df_feat["roll_std_10"] = close.pct_change().rolling(10).std()
+    df_feat["range_pct"] = (high - low) / close.replace(0.0, np.nan)
+    # Candle features (normalized)
+    with np.errstate(divide="ignore", invalid="ignore"):
+        body = (close - open_) / close
+        upper_shadow = (high - np.maximum(close, open_)) / close
+        lower_shadow = (np.minimum(close, open_) - low) / close
+    df_feat["candle_body"] = body
+    df_feat["candle_upper"] = upper_shadow
+    df_feat["candle_lower"] = lower_shadow
+    df_feat["gap_open"] = open_.shift(0) / close.shift(1) - 1.0
+    # EMAs and distances
+    ema5 = talib.EMA(close.values, timeperiod=5)
+    ema20 = talib.EMA(close.values, timeperiod=20)
+    ema50 = talib.EMA(close.values, timeperiod=50)
+    with np.errstate(divide="ignore", invalid="ignore"):
+        df_feat["ema5_dist"] = (ema5 / close.values) - 1.0
+        df_feat["ema20_dist"] = (ema20 / close.values) - 1.0
+        df_feat["ema50_dist"] = (ema50 / close.values) - 1.0
+        # EMA slopes (1-day change)
+        df_feat["ema20_slope"] = (pd.Series(ema20, index=df.index).pct_change(1))
+    # RSI family
+    df_feat["rsi14"] = talib.RSI(close.values, timeperiod=14) / 100.0
+    df_feat["rsi5"] = talib.RSI(close.values, timeperiod=5) / 100.0
+    # MACD
+    macd, macdsig, macdhist = talib.MACD(close.values, fastperiod=12, slowperiod=26, signalperiod=9)
+    df_feat["macd"] = macd
+    df_feat["macdsig"] = macdsig
+    df_feat["macdhist"] = macdhist
+    # Bollinger Bands width
+    upper, middle, lower = talib.BBANDS(close.values, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
+    with np.errstate(divide="ignore", invalid="ignore"):
+        df_feat["bb_width"] = (upper - lower) / middle
+    # Volatility/Trend
+    atr = talib.ATR(h, l, c, timeperiod=14)
+    with np.errstate(divide="ignore", invalid="ignore"):
+        df_feat["atr14"] = atr / close.values
+    df_feat["adx14"] = talib.ADX(h, l, c, timeperiod=14) / 100.0
+    # Additional momentum/oscillators
+    df_feat["roc10"] = talib.ROC(close.values, timeperiod=10) / 100.0
+    df_feat["cci14"] = talib.CCI(h, l, c, timeperiod=14) / 100.0
+    df_feat["mfi14"] = talib.MFI(h, l, c, v, timeperiod=14) / 100.0
+    df_feat["willr14"] = talib.WILLR(h, l, c, timeperiod=14) / 100.0  # [-1, 0]
+    # Stochastic
+    slowk, slowd = talib.STOCH(h, l, c, fastk_period=14, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
+    df_feat["stoch_k"] = slowk / 100.0
+    df_feat["stoch_d"] = slowd / 100.0
+    # OBV normalized (robust to missing/flat volume)
+    finite_vol = np.isfinite(vol.values)
+    if finite_vol.sum() >= max(30, int(0.5 * len(vol))):
+        obv = talib.OBV(close.values, vol.values)
+        df_feat["obv_z"] = pd.Series(obv, index=df.index).pct_change(5)
+    else:
+        df_feat["obv_z"] = 0.0
+    # Volume z-score and turnover proxies
+    vol_roll_mean = vol.rolling(20).mean()
+    vol_roll_std = vol.rolling(20).std()
+    with np.errstate(divide="ignore", invalid="ignore"):
+        df_feat["vol_z20"] = (vol - vol_roll_mean) / vol_roll_std
+        df_feat["turnover_z20"] = ((vol * close) - (vol * close).rolling(20).mean()) / (vol * close).rolling(20).std()
+    # Distance to rolling extremes
+    roll_max_20 = close.rolling(20).max()
+    roll_min_20 = close.rolling(20).min()
+    roll_max_55 = close.rolling(55).max()
+    roll_min_55 = close.rolling(55).min()
+    with np.errstate(divide="ignore", invalid="ignore"):
+        df_feat["dist_max20"] = roll_max_20 / close - 1.0
+        df_feat["dist_min20"] = close / roll_min_20 - 1.0
+        df_feat["dist_max55"] = roll_max_55 / close - 1.0
+        df_feat["dist_min55"] = close / roll_min_55 - 1.0
+    # Realized volatility features
+    logret = np.log(close.replace(0.0, np.nan)).diff(1)
+    df_feat["rv5"] = logret.rolling(5).std()
+    df_feat["rv20"] = logret.rolling(20).std()
+    df_feat["avg_range5"] = ((high - low) / close.replace(0.0, np.nan)).rolling(5).mean()
+    # Calendar (cyclical day-of-week, month-of-year)
+    dow = pd.Series(df.index).map(lambda d: d.weekday() if hasattr(d, "weekday") else pd.Timestamp(d).weekday())
+    df_feat["dow_sin"] = np.sin(2 * np.pi * dow / 7.0)
+    df_feat["dow_cos"] = np.cos(2 * np.pi * dow / 7.0)
+    moy = pd.Series(df.index).map(lambda d: (d.month if hasattr(d, "month") else pd.Timestamp(d).month))
+    df_feat["moy_sin"] = np.sin(2 * np.pi * (moy.astype(float) - 1.0) / 12.0)
+    df_feat["moy_cos"] = np.cos(2 * np.pi * (moy.astype(float) - 1.0) / 12.0)
+    # Lags of basic signals
+    df_feat["ret_1_lag1"] = df_feat["ret_1"].shift(1)
+    df_feat["ret_1_lag2"] = df_feat["ret_1"].shift(2)
+    df_feat["range_pct_lag1"] = df_feat["range_pct"].shift(1)
+    df_feat = df_feat.replace([np.inf, -np.inf], np.nan)
+    df_feat = df_feat.loc[:, df_feat.notna().any(axis=0)]
+    return df_feat
+def _clean_features_for_training(feats: pd.DataFrame, warmup: int = 60) -> pd.DataFrame:
+    if feats.empty:
+        return feats
+    clean = feats.copy()
+    clean = clean.fillna(method="ffill").fillna(method="bfill")
+    if len(clean) > warmup:
+        clean = clean.iloc[warmup:]
+    clean = clean.dropna()
+    return clean
+def _winsorize_targets(Y: np.ndarray, horizons: int, q_low: float, q_high: float) -> tuple[np.ndarray, dict]:
+    """
+    Winsorize concatenated targets Y = [highs(0:h), lows(h:2h)] row-wise using global quantiles.
+    Returns clipped Y and thresholds used.
+    """
+    h = horizons
+    Yh = Y[:, :h].ravel()
+    Yl = Y[:, h:].ravel()
+    lo_h, hi_h = np.quantile(Yh, [q_low, q_high]) if Yh.size else (-np.inf, np.inf)
+    lo_l, hi_l = np.quantile(Yl, [q_low, q_high]) if Yl.size else (-np.inf, np.inf)
+    Y_clip = Y.copy()
+    Y_clip[:, :h] = np.clip(Y_clip[:, :h], lo_h, hi_h)
+    Y_clip[:, h:] = np.clip(Y_clip[:, h:], lo_l, hi_l)
+    return Y_clip, {"high": (float(lo_h), float(hi_h)), "low": (float(lo_l), float(hi_l))}
+def _sample_weights(n: int, decay: float) -> np.ndarray:
+    """
+    Exponential recency weights. Newer samples get higher weight.
+    w_i = exp(-decay * (n-1-i)), i in [0..n-1]
+    """
+    if decay <= 0 or n <= 0:
+        return np.ones(n, dtype=float)
+    idx = np.arange(n, dtype=float)
+    w = np.exp(-decay * (n - 1 - idx))
+    w /= np.average(w)  # normalize to mean 1.0
+    return w
+def _make_supervised(df: pd.DataFrame, horizons: int = 15):
+    """
+    Build X, Y for multi-horizon high/low forecast.
+    Targets (log-ratio): y_high_h = log(High[t+h]/Close[t]), y_low_h = log(Low[t+h]/Close[t])
+    Log transform stabilizes variance and reduces skew.
+    """
+    ohlc = _ensure_ohlc_columns(df)
+    feats = _compute_ta_features(df)
+    feat_df = _clean_features_for_training(feats, warmup=60)
+    # Align to cleaned feature index
+    ohlc = ohlc.loc[feat_df.index]
+    highs = ohlc["high"].astype(float).values
+    lows = ohlc["low"].astype(float).values
+    closes = ohlc["close"].astype(float).values
+    X_all = feat_df.values
+    n = len(feat_df)
+    if n < horizons + 30:
+        raise ValueError(f"Not enough rows after feature warm-up for {horizons}-day training. Have: {n}")
+    X_list, Y_list = [], []
+    for i in range(n - horizons):
+        base_c = closes[i]
+        if not np.isfinite(base_c) or base_c <= 0:
+            continue
+        future_highs = highs[i + 1:i + horizons + 1]
+        future_lows = lows[i + 1:i + horizons + 1]
+        with np.errstate(divide="ignore", invalid="ignore"):
+            yh = np.log(np.maximum(future_highs, 1e-12) / base_c)
+            yl = np.log(np.maximum(future_lows, 1e-12) / base_c)
+        if np.any(~np.isfinite(yh)) or np.any(~np.isfinite(yl)):
+            continue
+        X_list.append(X_all[i, :])
+        Y_list.append(np.concatenate([yh, yl], axis=0))
+    X = np.asarray(X_list)
+    Y = np.asarray(Y_list)
+    if X.size == 0 or Y.size == 0:
+        raise ValueError("No valid supervised samples after cleaning. Check data quality (NaNs/zeros).")
+    feature_names = feat_df.columns.tolist()
+    return X, Y, feature_names, feat_df.index[:len(X)]
+def _get_sklearn_version():
+    try:
+        import sklearn
+        return sklearn.__version__
+    except Exception:
+        return None
+# --------------------- Model Train/Load (In-Memory Only) ---------------------
+def train_or_load_highlow_15d(df: pd.DataFrame, ticker: str, horizons: int = 15):
+    key = ticker.upper()
+    if key in _MEM_CACHE:
+        return _MEM_CACHE[key]
+    # If sklearn is not available at all, keep TA fallback metadata
+    if not _SKLEARN_AVAILABLE:
+        bundle = {
+            "model": None,
+            "feature_names": None,
+            "horizons": horizons,
+            "trained_rows": int(len(df)),
+            "metrics": None,
+            "sklearn_version": None,
+            "ticker": key,
+            "model_path": None,
+            "winsor": None,
+            "blend_weight": _BLEND_TA_WEIGHT,
+            "transform": "logratio",
+            "feature_importances": None,
+            "algo": "NONE",
+        }
+        _MEM_CACHE[key] = bundle
+        return bundle
+    # Build supervised set
+    X, Y_raw, feature_names, _ = _make_supervised(df, horizons=horizons)
+    sw = _sample_weights(X.shape[0], _RECENCY_DECAY)
+    # Prefer quantile gradient boosting if available
+    if _USE_HGBR_QUANTILE and _HGBR_AVAILABLE and HistGradientBoostingRegressor is not None:
+        q_models_high, q_models_low = [], []
+        for k in range(horizons):
+            # High models (upper quantile)
+            mh = HistGradientBoostingRegressor(**_HGBR_PARAMS, quantile=_Q_HIGH)
+            mh.fit(X, Y_raw[:, k], sample_weight=sw)
+            q_models_high.append(mh)
+            # Low models (lower quantile)
+            ml = HistGradientBoostingRegressor(**_HGBR_PARAMS, quantile=_Q_LOW)
+            ml.fit(X, Y_raw[:, horizons + k], sample_weight=sw)
+            q_models_low.append(ml)
+        bundle = {
+            "model": None,  # not used in quantile path
+            "q_models_high": q_models_high,
+            "q_models_low": q_models_low,
+            "feature_names": feature_names,
+            "horizons": horizons,
+            "trained_rows": int(X.shape[0]),
+            "metrics": None,  # optional: add custom CV if desired
+            "sklearn_version": _get_sklearn_version(),
+            "ticker": key,
+            "model_path": None,
+            "winsor": None,
+            "blend_weight": _BLEND_TA_WEIGHT,
+            "transform": "logratio",
+            "feature_importances": None,
+            "algo": f"HGBR_QUANTILE(high={_Q_HIGH}, low={_Q_LOW})",
+        }
+        _MEM_CACHE[key] = bundle
+        return bundle
+    # Else fall back to ExtraTrees mean-regression (existing path)
+    Y_clip, winsor_info = _winsorize_targets(Y_raw, horizons, _WINSOR_Q_LOW, _WINSOR_Q_HIGH)
+    fold_metrics = []
+    feature_importances = None
+    if TimeSeriesSplit is not None:
+        tscv = TimeSeriesSplit(n_splits=5)
+        for train_idx, val_idx in tscv.split(X):
+            Xtr, Xvl = X[train_idx], X[val_idx]
+            Ytr_clipped = Y_clip[train_idx]
+            Yvl_true = Y_raw[val_idx]  # evaluate on true (unclipped) targets
+            w_tr = sw[train_idx] if sw is not None else None
+            model_cv = ExtraTreesRegressor(**_ETR_PARAMS_CV)
+            model_cv.fit(Xtr, Ytr_clipped, sample_weight=w_tr)
+            Yhat = model_cv.predict(Xvl)
+            # Convert log-ratio back to percentage move for reporting
+            h = horizons
+            if mean_absolute_error is not None:
+                yh_pct = (np.exp(Yvl_true[:, :h]) - 1.0) * 100.0
+                yl_pct = (np.exp(Yvl_true[:, h:]) - 1.0) * 100.0
+                yhat_h_pct = (np.exp(Yhat[:, :h]) - 1.0) * 100.0
+                yhat_l_pct = (np.exp(Yhat[:, h:]) - 1.0) * 100.0
+                high_mae = mean_absolute_error(yh_pct, yhat_h_pct)
+                low_mae = mean_absolute_error(yl_pct, yhat_l_pct)
+                fold_metrics.append({"high_mae_pct": round(float(high_mae), 4),
+                                     "low_mae_pct": round(float(low_mae), 4)})
+    final_model = ExtraTreesRegressor(**_ETR_PARAMS_FINAL)
+    final_model.fit(X, Y_clip, sample_weight=sw)
+    try:
+        fi = final_model.feature_importances_
+        feature_importances = sorted(
+            zip(feature_names, fi),
+            key=lambda t: t[1],
+            reverse=True
+        )[:30]
+        feature_importances = [(str(n), float(v)) for n, v in feature_importances]
+    except Exception:
+        feature_importances = None
+    bundle = {
+        "model": final_model,
+        "feature_names": feature_names,
+        "horizons": horizons,
+        "trained_rows": int(X.shape[0]),
+        "metrics": fold_metrics or None,
+        "sklearn_version": _get_sklearn_version(),
+        "ticker": key,
+        "model_path": None,
+        "winsor": winsor_info,
+        "blend_weight": _BLEND_TA_WEIGHT,
+        "transform": "logratio",
+        "feature_importances": feature_importances,
+        "algo": "EXTRATREES_MEAN",
+    }
+    _MEM_CACHE[key] = bundle
+    return bundle
+# --------------------- Forecast ---------------------
+def forecast_next_15_high_low(ticker: str, stock_data: pd.DataFrame):
+    """
+    Train/load from memory and forecast next 15 business days' High/Low.
+    If no ML available or insufficient data, uses TA fallback.
+    Returns dict: dates, pred_high, pred_low, base_close, bundle_meta
+    """
+    if not isinstance(stock_data.index, pd.DatetimeIndex):
+        stock_data = stock_data.copy()
+        stock_data.index = pd.to_datetime(stock_data.index)
+    ohlc = _ensure_ohlc_columns(stock_data)
+    try:
+        bundle = train_or_load_highlow_15d(stock_data, ticker, horizons=15)
+        model = bundle.get("model", None)
+        horizons = bundle.get("horizons", 15)
+        # Build latest feature row
+        feats_full = _compute_ta_features(stock_data)
+        feats_full = feats_full.replace([np.inf, -np.inf], np.nan)
+        feats_full = feats_full.loc[:, feats_full.notna().any(axis=0)]
+        feats_full = feats_full.fillna(method="ffill").fillna(method="bfill")
+        if len(feats_full) > 60:
+            feats_full = feats_full.iloc[60:]
+        if feats_full.empty:
+            raise ValueError("No features available for inference after cleaning.")
+        feature_names = bundle["feature_names"]
+        for col in feature_names:
+            if col not in feats_full.columns:
+                feats_full[col] = 0.0
+        feats_full = feats_full[feature_names]
+        X_t = feats_full.iloc[[-1]].values
+        base_close = float(ohlc.iloc[-1]["close"])
+        if not np.isfinite(base_close) or base_close <= 0:
+            base_close = float(ohlc["close"].replace(0.0, np.nan).dropna().iloc[-1])
+        y_pred_log = None
+        # Path 1: ExtraTrees multi-output mean-regression
+        if model is not None:
+            y_pred_log = model.predict(X_t).reshape(-1)
+        # Path 2: Quantile gradient boosting per-horizon
+        elif "q_models_high" in bundle and "q_models_low" in bundle:
+            qh = bundle["q_models_high"]
+            ql = bundle["q_models_low"]
+            yh = np.array([qh[k].predict(X_t)[0] for k in range(horizons)], dtype=float)
+            yl = np.array([ql[k].predict(X_t)[0] for k in range(horizons)], dtype=float)
+            y_pred_log = np.concatenate([yh, yl], axis=0)
+        if y_pred_log is not None:
+            # Optional hybrid blend with TA fallback in log space for stability
+            blend_w = float(bundle.get("blend_weight", _BLEND_TA_WEIGHT) or 0.0)
+            if blend_w > 0.0:
+                try:
+                    _, hi_ta, lo_ta = _ta_fallback_forecast(ohlc, horizons=horizons)
+                    with np.errstate(divide="ignore", invalid="ignore"):
+                        yh_ta_log = np.log(np.maximum(hi_ta, 1e-12) / base_close)
+                        yl_ta_log = np.log(np.maximum(lo_ta, 1e-12) / base_close)
+                    yh_ml_log = y_pred_log[:horizons]
+                    yl_ml_log = y_pred_log[horizons:]
+                    yh_blend_log = (1.0 - blend_w) * yh_ml_log + blend_w * yh_ta_log
+                    yl_blend_log = (1.0 - blend_w) * yl_ml_log + blend_w * yl_ta_log
+                    y_pred_log = np.concatenate([yh_blend_log, yl_blend_log], axis=0)
+                except Exception:
+                    pass
+            # Convert back from log-ratio to price
+            yh = y_pred_log[:horizons]
+            yl = y_pred_log[horizons:]
+            pred_high = np.exp(yh) * base_close
+            pred_low = np.exp(yl) * base_close
+            pred_high = np.maximum(pred_high, 0.0)
+            pred_low = np.maximum(pred_low, 0.0)
+            swp = pred_low > pred_high
+            if np.any(swp):
+                tmp = pred_high.copy()
+                pred_high[swp] = pred_low[swp]
+                pred_low[swp] = tmp[swp]
+            last_date = feats_full.index[-1]
+            future_dates = _next_business_days(last_date, horizons)
+            date_str = [pd.Timestamp(d).strftime("%Y-%m-%d") for d in future_dates]
+            return {
+                "dates": date_str,
+                "pred_high": [round(float(x), 2) for x in pred_high],
+                "pred_low": [round(float(x), 2) for x in pred_low],
+                "base_close": round(float(base_close), 4),
+                "bundle_meta": {
+                    "model": bundle.get("algo", "UNKNOWN"),
+                    "trained_rows": bundle.get("trained_rows"),
+                    "sklearn_version": bundle.get("sklearn_version"),
+                    "metrics": bundle.get("metrics"),
+                    "bundle_path": None,
+                    "ticker": bundle.get("ticker"),
+                    "winsor": bundle.get("winsor"),
+                    "blend_weight": bundle.get("blend_weight"),
+                    "transform": bundle.get("transform"),
+                    "feature_importances_top30": bundle.get("feature_importances"),
+                    "quantiles": {"high": _Q_HIGH, "low": _Q_LOW} if "q_models_high" in bundle else None,
+                },
+            }
+    except Exception:
+        pass
+    base_close, pred_high, pred_low = _ta_fallback_forecast(ohlc, horizons=15)
+    last_date = ohlc.index[-1]
+    future_dates = _next_business_days(last_date, 15)
+    date_str = [pd.Timestamp(d).strftime("%Y-%m-%d") for d in future_dates]
+    return {
+        "dates": date_str,
+        "pred_high": [round(float(x), 2) for x in pred_high],
+        "pred_low": [round(float(x), 2) for x in pred_low],
+        "base_close": round(float(base_close), 4),
+        "bundle_meta": {
+            "model": "TA heuristic fallback (ATR/EMA/RSI/ADX), no ML",
+            "trained_rows": int(len(ohlc)),
+            "sklearn_version": _get_sklearn_version(),
+            "metrics": None,
+            "bundle_path": None,
+            "ticker": ticker.upper(),
+        },
+    }

predictedchart.py DELETED Viewed

@@ -1,126 +0,0 @@
-import yfinance as yf
-import pandas as pd
-import numpy as np
-import talib
-from sklearn.preprocessing import MinMaxScaler
-import torch
-import torch.nn as nn
-from torch.utils.data import Dataset, DataLoader
-# Step 1: Download data with TA indicators
-def fetch_stock_data_with_indicators(ticker, start="2020-01-01", end="2025-09-10"):
-    df = yf.download(ticker, start=start, end=end)
-    actualdata = yf.download(ticker, start=start, end="2025-09-11")
-    df = df[["Open", "High", "Low", "Close", "Volume"]]
-    close_prices = df['Close'].to_numpy().flatten()
-    low_prices = df['Low'].to_numpy().flatten()
-    high_prices = df['High'].to_numpy().flatten()
-    # Add indicators
-    #df["RSI"] = talib.RSI(close_prices, timeperiod=14)
-    #df["MACD"], df["MACD_signal"], _ = talib.MACD(close_prices)
-    df["EMA_20"] = talib.EMA(close_prices, timeperiod=20)
-    df["ATR"] = talib.ATR(high_prices, low_prices, close_prices, timeperiod=14)
-    df.dropna(inplace=True)
-    return df
-def fetch_originaldata(ticker, start="2020-01-01", end="2025-01-03"):
-    actualdata = yf.download(ticker, start=start, end="2025-01-24")
-    return actualdata
-# Step 2: Custom Dataset
-class StockDataset(Dataset):
-    def __init__(self, series, window_size):
-        self.data = []
-        for i in range(len(series) - window_size):
-            self.data.append((series[i:i+window_size], series[i+window_size][3]))
-    def __len__(self):
-        return len(self.data)
-    def __getitem__(self, idx):
-        x, y = self.data[idx]
-        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)
-# Step 3: Transformer model
-class TransformerPredictor(nn.Module):
-    def __init__(self, input_size, d_model=64, nhead=4, num_layers=2, dropout=0.1):
-        super(TransformerPredictor, self).__init__()
-        self.linear_in = nn.Linear(input_size, d_model)
-        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
-        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
-        self.linear_out = nn.Linear(d_model, 1)
-    def forward(self, src):
-        x = self.linear_in(src)         # [seq, batch, d_model]
-        x = self.transformer(x)         # [seq, batch, d_model]
-        out = self.linear_out(x[-1])    # [batch, 1]
-        return out.squeeze()
-# Step 4: Training function
-def train_model(model, dataloader, epochs, lr=0.001):
-    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
-    loss_fn = nn.MSELoss()
-    for epoch in range(epochs):
-        for x, y in dataloader:
-            x = x.permute(1, 0, 2)  # [batch, seq, features] -> [seq, batch, features]
-            pred = model(x)
-            loss = loss_fn(pred, y)
-            optimizer.zero_grad()
-            loss.backward()
-            optimizer.step()
-        print("Epoch {}/{} - Loss: {:.4f}".format(epoch+1, epochs, loss.item()))
-# Step 5: Run pipeline
-def run_stock_prediction(ticker):
-    df = fetch_stock_data_with_indicators(ticker)
-    scaler = MinMaxScaler()
-    scaled_data = scaler.fit_transform(df.values)
-    window_size = 20
-    dataset = StockDataset(scaled_data, window_size)
-    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
-    input_size = scaled_data.shape[1]
-    model = TransformerPredictor(input_size=input_size)
-    train_model(model, dataloader, epochs=2)
-    # Predict next 15 days
-    predictions = []
-    input_seq = scaled_data[-window_size:].copy()  # shape: [20, features]
-    for i in range(30):
-        seq_tensor = torch.tensor(input_seq, dtype=torch.float32).unsqueeze(1)  # [seq_len, 1, features]
-        with torch.no_grad():
-            predicted_scaled = model(seq_tensor).item()
-        # Create new row based on last row, replace only Close price (index 3)
-        new_row = input_seq[-1].copy()
-        new_row[3] = predicted_scaled
-        # Inverse scale to get actual Close price
-        predicted_row = scaler.inverse_transform([new_row])[0]
-        predicted_close = predicted_row[3]
-        predictions.append(predicted_close)
-        # Slide window: remove first row, append new row
-        input_seq = np.vstack([input_seq[1:], [new_row]])
-    # Get the last date from the dataset
-    last_date = df.index[-1]
-    predicted_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30, freq='B')  # Business days
-    prediction_results = pd.DataFrame({
-        'Date': predicted_dates,
-        'Predicted Close': predictions,
-    })
-    return prediction_results

prediction.py DELETED Viewed

@@ -1,257 +0,0 @@
-import os, re, joblib, numpy as np, pandas as pd, sklearn
-from sklearn.ensemble import ExtraTreesRegressor
-PRICE_COLS = ["Close Price", "Highest Price", "Lowest Price"]
-def _drop_unnamed(df: pd.DataFrame) -> pd.DataFrame:
-    to_drop = [c for c in df.columns if str(c).startswith("Unnamed")]
-    return df.drop(columns=to_drop) if to_drop else df
-def _read_excel_loose_header(xlsx_path: str) -> pd.DataFrame:
-    raw = pd.read_excel(xlsx_path, engine='openpyxl', header=None)
-    first_row = [str(x) for x in raw.iloc[0].tolist()]
-    header_row = 0 if any("Close Price" in s for s in first_row) else 1
-    return pd.read_excel(xlsx_path, engine='openpyxl', header=header_row)
-def _map_training_indicators(df: pd.DataFrame) -> pd.DataFrame:
-    def map_series(s: pd.Series):
-        if s.dtype == 'O':
-            cleaned = s.astype(str).str.strip()
-            cleaned = cleaned.replace({'nan': np.nan, 'NaN': np.nan, 'None': np.nan, '': np.nan})
-            return cleaned.map({'Red': 0, 'Yellow': 1, 'Green': 2})
-        return s
-    out = df.copy()
-    for col in out.columns:
-        if col not in PRICE_COLS:
-            out[col] = map_series(out[col])
-    return out
-def _map_testing_indicators(df: pd.DataFrame) -> pd.DataFrame:
-    def map_series(s: pd.Series):
-        if s.dtype == 'O':
-            cleaned = s.astype(str).str.strip()
-            cleaned = cleaned.replace({'nan': np.nan, 'NaN': np.nan, 'None': np.nan, '': np.nan})
-            return cleaned.map({'Red': 0, 'Yellow': 1, 'Green': 2})
-        return s.replace({10: 2, 5: 1, 0: 0})
-    out = df.copy()
-    for col in out.columns:
-        if col not in PRICE_COLS:
-            out[col] = map_series(out[col])
-    return out
-def _find_target_cols(df: pd.DataFrame):
-    if "Highest Price" not in df.columns or "Lowest Price" not in df.columns:
-        raise ValueError("Excel must contain 'Highest Price' and 'Lowest Price' columns.")
-    return "Highest Price", "Lowest Price"
-def load_or_train_highlow_model(xlsx_path: str, model_path: str):
-    def _is_cache_fresh():
-        return os.path.exists(model_path) and os.path.getmtime(model_path) >= os.path.getmtime(xlsx_path)
-    if os.path.exists(model_path) and _is_cache_fresh():
-        obj = joblib.load(model_path)
-        if isinstance(obj, dict) and {'model','features','medians'} <= set(obj.keys()):
-            return obj
-    if not os.path.exists(xlsx_path):
-        raise FileNotFoundError(f"Training Excel not found at: {xlsx_path}")
-    df = _read_excel_loose_header(xlsx_path)
-    df = _drop_unnamed(df)
-    y_high, y_low = _find_target_cols(df)
-    df_mapped = _map_training_indicators(df)
-    X = df_mapped.drop(columns=[y_high, y_low]).apply(pd.to_numeric, errors='coerce')
-    y = df_mapped[[y_high, y_low]].apply(pd.to_numeric, errors='coerce')
-    med = X.median(numeric_only=True)
-    X = X.fillna(med)
-    y = y.fillna(y.median(numeric_only=True))
-    model = ExtraTreesRegressor(
-        n_estimators=300,
-        random_state=42,
-        n_jobs=-1,
-        max_depth=None,
-        min_samples_leaf=2,
-    )
-    model.fit(X.values, y.values)
-    bundle = {
-        'model': model,
-        'features': X.columns.tolist(),
-        'medians': med.to_dict(),
-        'sklearn_version': sklearn.__version__,
-        'trained_rows': int(X.shape[0]),
-    }
-    os.makedirs(os.path.dirname(model_path), exist_ok=True)
-    joblib.dump(bundle, model_path)
-    return bundle
-def _to_num(v):
-    import pandas as pd
-    if isinstance(v, (list, tuple, pd.Series, np.ndarray)):
-        if len(v) == 0:
-            return 0.0
-        return _to_num(v[-1])
-    if isinstance(v, dict):
-        numeric_vals = [vv for vv in v.values() if isinstance(vv, (int, float, np.number))]
-        if numeric_vals:
-            best = max(numeric_vals)
-            return 1.0 if float(best) > 0 else 0.0
-        return 1.0 if any(bool(vv) for vv in v.values()) else 0.0
-    if isinstance(v, (bool, int, float, np.number)):
-        try:
-            return float(v)
-        except Exception:
-            return 0.0
-    if isinstance(v, str):
-        s = v.strip().lower()
-        if s in {"buy", "bullish", "long", "breakout", "yes", "true", "dbuy"}:
-            return 1.0
-        if s in {"sell", "bearish", "short", "no", "false"}:
-            return 0.0
-        try:
-            return float(v)
-        except Exception:
-            return 0.0
-    try:
-        return float(v)
-    except Exception:
-        return 0.0
-def build_current_features_row_23k(
-    ticker: str,
-    stock_data: pd.DataFrame,
-    rsi_trade_signal: dict,
-    macd_trade_signal: dict,
-    ema_trade_signal: dict,
-    atr_trade_signal: dict,
-    adx_trade_signal: dict,
-    bb_trade_signal: dict,
-    sr_trade_signal: dict,
-    priceaction_trade_signal: dict,
-    fibo_trade_signal: dict,
-    overall_ta_score: float,
-) -> pd.DataFrame:
-    last_close = _to_num(stock_data['close'].iloc[-1])
-    rsi_sig = rsi_trade_signal.get('rsi_signals', {}) or {}
-    macd_sig = macd_trade_signal.get('macd_signals', {}) or {}
-    atr_sig = atr_trade_signal.get('atr_signals', {}) or {}
-    ema_sig = ema_trade_signal.get('ema_signals', {}) or {}
-    adx_sig = adx_trade_signal.get('adx_signals', {}) or {}
-    bb_sig  = bb_trade_signal.get('bollinger_signals', {}) or {}
-    sr_sig  = sr_trade_signal.get('support_resistance_signals', {}) or {}
-    pa_sig  = priceaction_trade_signal.get('priceaction_signals', {}) or {}
-    fib_sig = priceaction_trade_signal.get('fib_signals') or fibo_trade_signal.get('fib_signals', {})
-    def sig_num(d, key): return _to_num(d.get(key, 0))
-    row = {
-        "TA Score": _to_num(overall_ta_score),
-        "Close Price": last_close,
-        # RSI
-        "RSI": _to_num(rsi_trade_signal.get('rsi_score', 0)),
-        "Overbought/Oversold": sig_num(rsi_sig, "Overbought/Oversold"),
-        "RSI Swing Rejection": sig_num(rsi_sig, "RSI Swing Rejection"),
-        "RSI Divergence": sig_num(rsi_sig, "RSI Divergence"),
-        "RSI_Bollinger Band": sig_num(rsi_sig, "RSI_Bollinger Band"),
-        "RSI 5/14 Crossover": sig_num(rsi_sig, "RSI 5/14 Crossover"),
-        "RSI Trend 50 Confirmation": sig_num(rsi_sig, "RSI Trend 50 Confirmation"),
-        "RSI_MA": _to_num(rsi_sig.get("RSI_MA", rsi_trade_signal.get("ma", 0))),
-        "Mean Reversion": sig_num(rsi_sig, "Mean Reversion"),
-        # MACD
-        "MACD": _to_num(macd_trade_signal.get('macd_score', 0)),
-        "MACD Line Crossover": sig_num(macd_sig, "MACD Line Crossover"),
-        "MACD Zero-Line Crossover": sig_num(macd_sig, "MACD Zero-Line Crossover"),
-        "MACD Divergence": sig_num(macd_sig, "MACD Divergence"),
-        "Hidden Divergence": sig_num(macd_sig, "Hidden Divergence"),
-        "MACD Volume": sig_num(macd_sig, "MACD Volume"),
-        "MACD Momentum": sig_num(macd_sig, "MACD Momentum"),
-        # ATR
-        "ATR": _to_num(atr_trade_signal.get('atr_score', 0)),
-        "ATR Breakout": sig_num(atr_sig, "ATR Breakout"),
-        "ATR Expansion": sig_num(atr_sig, "ATR Expansion"),
-        "ATR Squeeze": sig_num(atr_sig, "ATR Squeeze"),
-        "ATR Trend Reversal": sig_num(atr_sig, "ATR Trend Reversal"),
-        # EMA
-        "EMA": _to_num(ema_trade_signal.get('ema_score', 0)),
-        "EMA Crossover": sig_num(ema_sig, "EMA Crossover"),
-        "EMA Price Crossover": sig_num(ema_sig, "EMA Price Crossover"),
-        "EMA Slope": sig_num(ema_sig, "EMA Slope"),
-        "Triple EMA": sig_num(ema_sig, "Triple EMA"),
-        # ADX
-        "ADX": _to_num(adx_trade_signal.get('adx_score', 0)),
-        "ADX + DI Crossover": sig_num(adx_sig, "ADX + DI Crossover"),
-        "ADX Breakout": sig_num(adx_sig, "ADX Breakout"),
-        "ADX Slope": sig_num(adx_sig, "ADX Slope"),
-        "ADX Divergence": sig_num(adx_sig, "ADX Divergence"),
-        # Fibonacci
-        "Fibo": _to_num(fibo_trade_signal.get('fib_score', 0)),
-        "Fibonacci Retracement Bounce": sig_num(fib_sig, "Fibonacci Retracement Bounce"),
-        "Fibonacci Breakout": sig_num(fib_sig, "Fibonacci Breakout"),
-        "Golden Pocket Reversal": sig_num(fib_sig, "Golden Pocket Reversal"),
-        "Fibonacci Confluence": sig_num(fib_sig, "Fibonacci Confluence"),
-        # Bollinger
-        "BB": _to_num(bb_trade_signal.get('bollinger_score', 0)),
-        "BB Squeeze": sig_num(bb_sig, "BB Squeeze"),
-        "BB Breakout": sig_num(bb_sig, "BB Breakout"),
-        "BB Breakout Reversal": sig_num(bb_sig, "BB Breakout Reversal"),
-        "Middle Band Pullback": sig_num(bb_sig, "Middle Band Pullback"),
-        "SR": _to_num(sr_trade_signal.get('sr_score', 0)),
-        "Breakout": sig_num(sr_sig, "Breakout"),
-        "Reversal": sig_num(sr_sig, "Reversal"),
-        "Flip": sig_num(sr_sig, "Flip"),
-        "SR_Retest": sig_num(sr_sig, "SR_Retest"),
-        "PA_MS": _to_num(priceaction_trade_signal.get('priceaction_score', 0)),
-        "Candlestick Pattern": sig_num(pa_sig, "Candlestick Pattern"),
-        "HH_HL_LL_LH": sig_num(pa_sig, "HH_HL_LL_LH"),
-        "Triangle Breakout": sig_num(pa_sig, "Triangle Breakout"),
-        "Fair Value Gap": sig_num(pa_sig, "Fair Value Gap"),
-        "BOS": sig_num(pa_sig, "BOS"),
-        "CHoCH": sig_num(pa_sig, "CHoCH"),
-        "Order_Block": sig_num(pa_sig, "Order_Block"),
-    }
-    return pd.DataFrame([row]).replace([np.inf, -np.inf], np.nan)
-def _prepare_test_currentrow(current_row_df: pd.DataFrame, feature_cols, train_medians: dict):
-    df = _map_testing_indicators(current_row_df.copy())
-    X = df.reindex(columns=feature_cols).apply(pd.to_numeric, errors='coerce')
-    X = X.fillna(pd.Series(train_medians))
-    return X
-def predict_high_low_for_current_row(bundle: dict, current_row_df: pd.DataFrame, live_close: float):
-    feature_cols = bundle['features']
-    medians = bundle['medians']
-    model: ExtraTreesRegressor = bundle['model']
-    X = _prepare_test_currentrow(current_row_df, feature_cols, medians)
-    preds = model.predict(X.values)
-    high_pred, low_pred = float(preds[0, 0]), float(preds[0, 1])
-    if not np.isnan(live_close):
-        high_pred = max(high_pred, float(live_close))
-        low_pred  = min(low_pred,  float(live_close))
-    return round(high_pred, 2), round(low_pred, 2)