Oviya commited on
Commit
8dfbce4
·
1 Parent(s): 62afd3f

update prediction

Browse files
Files changed (6) hide show
  1. analysestock.py +47 -77
  2. chatbot.py +0 -232
  3. companies.py +0 -50
  4. highlow_forecast.py +695 -0
  5. predictedchart.py +0 -126
  6. prediction.py +0 -257
analysestock.py CHANGED
@@ -22,45 +22,10 @@ from srstrategies import get_support_resistance_signal
22
  from bbstrategies import get_bollinger_trade_signal
23
  from fundamental import get_fundamental_details
24
  from news import get_latest_news_with_sentiment
25
-
26
- from prediction import (
27
- load_or_train_highlow_model,
28
- build_current_features_row_23k,
29
- predict_high_low_for_current_row,
30
- )
31
  import os, numpy as np, pandas as pd
32
 
33
- BASE_DIR = Path(__file__).resolve().parent # folder where analysestock.py lives
34
-
35
- # TRAIN_XLSX_PATH = r"D:\PY-Trade\backend alone\analysedata.xlsx"
36
- # MODEL_BUNDLE_PATH = r"C:\VIJI\pytrade-app\backend\models\gps_highlow_extratrees.pkl"
37
-
38
- # Excel path (priority: env var → file in repo → your Windows path)
39
- TRAIN_XLSX_PATH = (
40
- os.getenv("TRAIN_XLSX_PATH")
41
- or (str(BASE_DIR / "analysedata.xlsx") if (BASE_DIR / "analysedata.xlsx").exists() else None)
42
- or (r"C:\VIJI\huggingface-deployment\deployment\pytrade-backend\analysedata.xlsx" if os.name == "nt" else None)
43
- )
44
-
45
- if not TRAIN_XLSX_PATH or not Path(TRAIN_XLSX_PATH).exists():
46
- raise FileNotFoundError(
47
- "Training Excel not found. Set TRAIN_XLSX_PATH or place 'analysedata.xlsx' next to analysestock.py."
48
- )
49
-
50
- # Model path (priority: env var → model file in repo → your Windows path → /tmp for training)
51
- MODEL_BUNDLE_PATH = (
52
- os.getenv("MODEL_BUNDLE_PATH")
53
- or (str(BASE_DIR / "gps_highlow_extratrees.pkl") if (BASE_DIR / "gps_highlow_extratrees.pkl").exists() else None)
54
- or (r"C:\VIJI\huggingface-deployment\deployment\pytrade-backend\gps_highlow_extratrees.pkl" if os.name == "nt" else None)
55
- or "/tmp/pytrade-models/gps_highlow_extratrees.pkl"
56
- )
57
-
58
- Path(MODEL_BUNDLE_PATH).parent.mkdir(parents=True, exist_ok=True) # ensure writable dir when training
59
- # --- end snippet ---
60
-
61
-
62
-
63
- from predictedchart import run_stock_prediction
64
 
65
  # ===================== TA scoring =====================
66
  def calculate_technical_analysis_score(indicator_scores):
@@ -271,37 +236,42 @@ def analysestock(ticker):
271
 
272
 
273
  #prediiction
274
- predictions = run_stock_prediction(ticker)
275
- predictions_float = [float(pred) for pred in predictions['Predicted Close']]
276
- prediction_dates = pd.to_datetime(predictions['Date']).dt.strftime('%d-%m-%Y').tolist()
277
- model_error = None
278
- pred_high, pred_low = np.nan, np.nan
279
  try:
280
- bundle = load_or_train_highlow_model(TRAIN_XLSX_PATH, MODEL_BUNDLE_PATH)
281
-
282
- current_feat_row = build_current_features_row_23k(
283
  ticker=ticker,
284
- stock_data=stock_data,
285
- rsi_trade_signal=rsi_trade_signal,
286
- macd_trade_signal=macd_trade_signal,
287
- ema_trade_signal=ema_trade_signal,
288
- atr_trade_signal=atr_trade_signal,
289
- adx_trade_signal=adx_trade_signal,
290
- bb_trade_signal=bb_trade_signal,
291
- sr_trade_signal=sr_trade_signal,
292
- priceaction_trade_signal=priceaction_trade_signal,
293
- fibo_trade_signal=fibo_trade_signal,
294
- overall_ta_score=overall_ta_score,
295
- )
296
-
297
- pred_high, pred_low = predict_high_low_for_current_row(
298
- bundle=bundle,
299
- current_row_df=current_feat_row,
300
- live_close=stock_data['close'].iloc[-1]
301
  )
302
  except Exception as ex:
303
- model_error = f"{type(ex).__name__}: {ex}"
304
- print(f"[WARN] High/Low prediction failed: {model_error}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
 
307
 
@@ -352,22 +322,22 @@ def analysestock(ticker):
352
  "EMA 50": ema_trade_signal['EMA_50'],
353
  "ADX_Indicator": adx_trade_signal['ADX_Indicator'],
354
  "PLUS_DI": adx_trade_signal['PLUS_DI'],
355
- "MINUS_DI": adx_trade_signal['MINUS_DI'],
356
- "prediction_prices": predictions_float,
357
- "prediction_dates": prediction_dates,
358
  }
359
-
 
 
 
 
 
 
360
 
361
  response.update({
362
- "ai_predicted_highest_price": pred_high,
363
- "ai_predicted_lowest_price": pred_low,
364
- "ai_model_meta": {
365
- "model": "ExtraTreesRegressor (multi-output capable, native)",
366
- "bundle_path": MODEL_BUNDLE_PATH,
367
- "trained_rows": (bundle.get("trained_rows") if 'bundle' in locals() else None),
368
- "sklearn_version": (bundle.get("sklearn_version") if 'bundle' in locals() else None)
369
- },
370
- "ai_model_error": model_error
371
  })
372
 
373
  return response
 
22
  from bbstrategies import get_bollinger_trade_signal
23
  from fundamental import get_fundamental_details
24
  from news import get_latest_news_with_sentiment
25
+ from highlow_forecast import forecast_next_15_high_low
 
 
 
 
 
26
  import os, numpy as np, pandas as pd
27
 
28
+ BASE_DIR = Path(__file__).resolve().parent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # ===================== TA scoring =====================
31
  def calculate_technical_analysis_score(indicator_scores):
 
236
 
237
 
238
  #prediiction
239
+ forecast_15 = None
 
 
 
 
240
  try:
241
+ forecast_15 = forecast_next_15_high_low(
 
 
242
  ticker=ticker,
243
+ stock_data=stock_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  )
245
  except Exception as ex:
246
+ forecast_15 = {"error": f"{type(ex).__name__}: {ex}"}
247
+
248
+
249
+ # Summaries for 15-day forecast (max high, min low) + range series for charts
250
+ max_high_15 = None
251
+ max_high_15_date = None
252
+ min_low_15 = None
253
+ min_low_15_date = None
254
+ highlow_range_15 = None
255
+
256
+ if isinstance(forecast_15, dict) and all(k in forecast_15 for k in ("pred_high", "pred_low", "dates")):
257
+ highs = np.asarray(forecast_15["pred_high"], dtype=float)
258
+ lows = np.asarray(forecast_15["pred_low"], dtype=float)
259
+ dates = forecast_15["dates"]
260
+
261
+ if highs.size and lows.size and highs.size == lows.size == len(dates):
262
+ hi_idx = int(np.nanargmax(highs))
263
+ lo_idx = int(np.nanargmin(lows))
264
+
265
+ max_high_15 = round(float(highs[hi_idx]), 2)
266
+ max_high_15_date = dates[hi_idx]
267
+ min_low_15 = round(float(lows[lo_idx]), 2)
268
+ min_low_15_date = dates[lo_idx]
269
+
270
+ # Precomputed rangeBar data: [{x: date, y: [low, high]}]
271
+ highlow_range_15 = [
272
+ {"x": d, "y": [round(float(l), 2), round(float(h), 2)]}
273
+ for d, h, l in zip(dates, highs.tolist(), lows.tolist())
274
+ ]
275
 
276
 
277
 
 
322
  "EMA 50": ema_trade_signal['EMA_50'],
323
  "ADX_Indicator": adx_trade_signal['ADX_Indicator'],
324
  "PLUS_DI": adx_trade_signal['PLUS_DI'],
325
+ "MINUS_DI": adx_trade_signal['MINUS_DI']
 
 
326
  }
327
+ response.update({
328
+ "ai_predicted_daily_high_15": (forecast_15.get("pred_high") if isinstance(forecast_15, dict) and "pred_high" in forecast_15 else None),
329
+ "ai_predicted_daily_low_15": (forecast_15.get("pred_low") if isinstance(forecast_15, dict) and "pred_low" in forecast_15 else None),
330
+ "ai_predicted_dates_15": (forecast_15.get("dates") if isinstance(forecast_15, dict) and "dates" in forecast_15 else None),
331
+ "ai_model_meta_15d": (forecast_15.get("bundle_meta") if isinstance(forecast_15, dict) and "bundle_meta" in forecast_15 else None),
332
+ "ai_model_error_15d": (forecast_15.get("error") if isinstance(forecast_15, dict) and "error" in forecast_15 else None),
333
+ })
334
 
335
  response.update({
336
+ "ai_predicted_max_high_15": max_high_15,
337
+ "ai_predicted_max_high_15_date": max_high_15_date,
338
+ "ai_predicted_min_low_15": min_low_15,
339
+ "ai_predicted_min_low_15_date": min_low_15_date,
340
+ "ai_predicted_highlow_range_15": highlow_range_15
 
 
 
 
341
  })
342
 
343
  return response
chatbot.py DELETED
@@ -1,232 +0,0 @@
1
- # app.py
2
- import os
3
- import re
4
- import json
5
- import time
6
- from datetime import datetime
7
- from typing import List, Dict
8
-
9
- from flask import Flask, request, jsonify
10
- from dotenv import load_dotenv
11
- import requests
12
-
13
- # ----------------------------
14
- # Optional providers (OpenAI v1 / Cohere)
15
- # ----------------------------
16
- OPENAI_CLIENT = None
17
- try:
18
- from openai import OpenAI
19
- OPENAI_CLIENT = "available"
20
- except Exception:
21
- OPENAI_CLIENT = None
22
-
23
- try:
24
- import cohere
25
- except Exception:
26
- cohere = None
27
-
28
- load_dotenv()
29
- app = Flask(__name__)
30
-
31
- # ----------------------------
32
- # Config
33
- # ----------------------------
34
- LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openai").lower().strip()
35
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
36
- COHERE_API_KEY = os.getenv("COHERE_API_KEY")
37
- SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")
38
- SEARCH_TOPK = int(os.getenv("SEARCH_TOPK", "5"))
39
- TIMEZONE = "Asia/Kolkata"
40
-
41
- if LLM_PROVIDER == "openai" and not OPENAI_API_KEY:
42
- print("[WARN] OPENAI_API_KEY not set; general answers will fail.")
43
- if LLM_PROVIDER == "cohere" and not COHERE_API_KEY:
44
- print("[WARN] COHERE_API_KEY not set; general answers will fail.")
45
- if not SERPAPI_API_KEY:
46
- print("[WARN] SERPAPI_API_KEY not set; 'latest' queries will not work.")
47
-
48
- # Initialize OpenAI client (v1+)
49
- openai_client = None
50
- if LLM_PROVIDER == "openai" and OPENAI_CLIENT and OPENAI_API_KEY:
51
- openai_client = OpenAI(api_key=OPENAI_API_KEY)
52
-
53
- # ----------------------------
54
- # Utilities
55
- # ----------------------------
56
-
57
- # Common “latest/live” triggers
58
- LATEST_TRIGGERS = [
59
- r"\btoday\b", r"\bnow\b", r"\blatest\b", r"\bupdate\b", r"\brecent\b",
60
- r"\bbreaking\b", r"\blive\b", r"\bthis\s+hour\b", r"\bthis\s+minute\b",
61
- r"\bcurrent\b", r"\bas of\b", r"\btoday'?s\b", r"\bprice\s+today\b"
62
- ]
63
- LATEST_PATTERN = re.compile("|".join(LATEST_TRIGGERS), re.IGNORECASE)
64
-
65
- # Simple aliases for finance names/tickers (extend as needed)
66
- ALIASES = {
67
- "tcs": "Tata Consultancy Services",
68
- "ril": "Reliance Industries",
69
- "infy": "Infosys",
70
- "hdfc bank": "HDFC Bank",
71
- "icici": "ICICI Bank",
72
- }
73
-
74
- def normalize_entities(text: str) -> str:
75
- t = text
76
- for k, v in ALIASES.items():
77
- t = re.sub(rf"\b{k}\b", v, t, flags=re.IGNORECASE)
78
- return t
79
-
80
- def needs_live_context(query: str) -> bool:
81
- """Heuristic to detect time-sensitive queries."""
82
- if not query:
83
- return False
84
- q = query.lower()
85
-
86
- if LATEST_PATTERN.search(q):
87
- return True
88
-
89
- # Domain shortcuts
90
- domain_triggers = [
91
- "who won", "match result", "score now", "stock price", "share price",
92
- "usd inr rate", "exchange rate", "weather", "today's weather",
93
- "news on", "headline", "earnings today", "ipo today",
94
- "live price", "current price", "price right now"
95
- ]
96
- if any(t in q for t in domain_triggers):
97
- return True
98
-
99
- # Finance shortcut: “price of <entity>”
100
- if re.search(r"\bprice of\b", q) and not re.search(r"\byesterday|last close|history\b", q):
101
- return True
102
-
103
- return False
104
-
105
- def pick_is_news(query: str) -> bool:
106
- """Treat as news if clear news terms appear."""
107
- q = query.lower()
108
- news_terms = ["news", "headline", "breaking", "election", "budget", "earthquake", "merger", "acquisition", "ceo resigns"]
109
- return any(t in q for t in news_terms)
110
-
111
- def serpapi_search(query: str, is_news: bool = False, num: int = SEARCH_TOPK) -> List[Dict[str, str]]:
112
- """Fetch top search or news results from SerpAPI."""
113
- if not SERPAPI_API_KEY:
114
- return []
115
-
116
- params = {
117
- "api_key": SERPAPI_API_KEY,
118
- "q": query,
119
- }
120
-
121
- if is_news:
122
- url = "https://serpapi.com/search.json"
123
- params.update({"engine": "google_news", "num": min(num, 10), "hl": "en", "gl": "in"})
124
- else:
125
- url = "https://serpapi.com/search.json"
126
- params.update({"engine": "google", "num": min(num, 10), "hl": "en", "gl": "in"})
127
-
128
- r = requests.get(url, params=params, timeout=20)
129
- r.raise_for_status()
130
- data = r.json()
131
-
132
- results: List[Dict[str, str]] = []
133
- if is_news:
134
- for item in (data.get("news_results") or [])[:num]:
135
- results.append({
136
- "title": item.get("title") or "",
137
- "snippet": item.get("snippet") or item.get("description") or "",
138
- "link": item.get("link") or "",
139
- "source": (item.get("source") or {}).get("name") or item.get("source") or ""
140
- })
141
- else:
142
- for item in (data.get("organic_results") or [])[:num]:
143
- results.append({
144
- "title": item.get("title") or "",
145
- "snippet": item.get("snippet") or "",
146
- "link": item.get("link") or "",
147
- "source": item.get("source") or ""
148
- })
149
- return results
150
-
151
- def build_citation_block(hits: List[Dict[str, str]]) -> str:
152
- """Compact citations for the LLM and the response."""
153
- lines = []
154
- for i, h in enumerate(hits, start=1):
155
- title = (h.get("title") or "").strip()
156
- link = (h.get("link") or "").strip()
157
- source = (h.get("source") or "").strip()
158
- snippet = (h.get("snippet") or "").strip()
159
- lines.append(f"[{i}] {title} — {source}\n{snippet}\n{link}")
160
- return "\n\n".join(lines)
161
-
162
- # ----------------------------
163
- # LLM Calls
164
- # ----------------------------
165
-
166
- BASE_SYSTEM_PROMPT = (
167
- "You are a helpful and precise assistant. Use simple, neutral English. "
168
- "When sources are provided, synthesize them, highlight clear facts, and include a short 'Sources' list as [1], [2], etc. "
169
- "If information is uncertain or evolving, state that clearly."
170
- )
171
-
172
- def call_openai(system_prompt: str, user_prompt: str) -> str:
173
- """OpenAI Python SDK ≥ 1.0.0."""
174
- if not openai_client:
175
- raise RuntimeError("OpenAI is not configured.")
176
- resp = openai_client.chat.completions.create(
177
- model="gpt-4o-mini",
178
- messages=[
179
- {"role": "system", "content": system_prompt},
180
- {"role": "user", "content": user_prompt}
181
- ],
182
- temperature=0.2,
183
- max_tokens=900,
184
- )
185
- return (resp.choices[0].message.content or "").strip()
186
-
187
- def call_cohere(system_prompt: str, user_prompt: str) -> str:
188
- """Cohere chat (adjust model if needed)."""
189
- if not cohere or not COHERE_API_KEY:
190
- raise RuntimeError("Cohere is not configured.")
191
- client = cohere.Client(api_key=COHERE_API_KEY)
192
- resp = client.chat(
193
- model="command-r-plus",
194
- messages=[
195
- {"role": "system", "content": system_prompt},
196
- {"role": "user", "content": user_prompt}
197
- ],
198
- temperature=0.2,
199
- max_tokens=900,
200
- )
201
- text = getattr(resp, "text", None) or (getattr(resp, "output_text", None))
202
- if not text and hasattr(resp, "message") and hasattr(resp.message, "content"):
203
- parts = resp.message.content
204
- text = "".join(getattr(p, "text", "") for p in parts)
205
- return (text or "").strip()
206
-
207
- def call_llm(system_prompt: str, user_prompt: str) -> str:
208
- if LLM_PROVIDER == "openai":
209
- return call_openai(system_prompt, user_prompt)
210
- elif LLM_PROVIDER == "cohere":
211
- return call_cohere(system_prompt, user_prompt)
212
- else:
213
- raise RuntimeError("Unsupported LLM_PROVIDER")
214
-
215
- def compose_live_user_prompt(query: str, hits: List[Dict[str, str]]) -> str:
216
- citation_block = build_citation_block(hits)
217
- today = datetime.now().strftime("%B %d, %Y")
218
- return (
219
- f"User question (time-sensitive): {query}\n"
220
- f"Date today: {today}\n\n"
221
- f"You have these top search results. Answer using only what these sources support. "
222
- f"Be concise and include a 'Sources' section with numbered citations pointing to the links.\n\n"
223
- f"{citation_block}\n\n"
224
- f"Now write the answer:"
225
- )
226
-
227
- def compose_general_user_prompt(query: str) -> str:
228
- today = datetime.now().strftime("%B %d, %Y")
229
- return (
230
- f"User question: {query}\n"
231
- f"(Answer in simple, neutral English. If facts might have changed after {today}, mention that briefly.)"
232
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
companies.py DELETED
@@ -1,50 +0,0 @@
1
- # utils.py
2
- import csv
3
- import io
4
- import requests
5
- from typing import List, Dict
6
- from requests.exceptions import RequestException
7
- import time
8
-
9
- # List of URLs for NIFTY50 and NIFTY100
10
- NIFTY_URLS = {
11
- "NIFTY50": "https://www.niftyindices.com/IndexConstituent/ind_nifty50list.csv",
12
- "NIFTY100": "https://www.niftyindices.com/IndexConstituent/ind_nifty100list.csv"
13
- }
14
-
15
- def fetch_nifty_companies(index_code: str, retries: int = 3, delay: int = 5) -> List[Dict[str, str]]:
16
- # Get the URL for the given index_code
17
- url = NIFTY_URLS.get(index_code)
18
-
19
- if not url:
20
- raise ValueError(f"Unknown index code: {index_code}")
21
-
22
- # Retry logic
23
- for attempt in range(retries):
24
- try:
25
- # Fetch the CSV data
26
- response = requests.get(url)
27
- # Ensure the request was successful
28
- response.raise_for_status()
29
- # Read CSV data from the response text
30
- return parse_nifty_csv(response.text)
31
-
32
- except RequestException as e:
33
- print(f"Attempt {attempt + 1} failed: {e}")
34
- if attempt < retries - 1:
35
- time.sleep(delay) # Wait before retrying
36
- else:
37
- raise Exception(f"Failed to fetch data after {retries} attempts.") from e
38
-
39
- # Function to fetch companies for both NIFTY50 and NIFTY100
40
- def get_companies_from_indices() -> Dict[str, List[Dict[str, str]]]:
41
- nifty50_companies = fetch_nifty_companies("NIFTY50")
42
- nifty100_companies = fetch_nifty_companies("NIFTY100")
43
-
44
- # Combine both lists and return
45
- all_companies = {
46
- "NIFTY50": nifty50_companies,
47
- "NIFTY100": nifty100_companies
48
- }
49
-
50
- return all_companies
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
highlow_forecast.py ADDED
@@ -0,0 +1,695 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import talib
4
+
5
+ # Optional ML imports (graceful fallback if scikit-learn is not installed)
6
+ try:
7
+ from sklearn.ensemble import ExtraTreesRegressor
8
+ from sklearn.model_selection import TimeSeriesSplit
9
+ from sklearn.metrics import mean_absolute_error
10
+ _SKLEARN_AVAILABLE = True
11
+ except Exception:
12
+ ExtraTreesRegressor = None
13
+ TimeSeriesSplit = None
14
+ mean_absolute_error = None
15
+ _SKLEARN_AVAILABLE = False
16
+
17
+ # Optional: HistGradientBoostingRegressor for quantile regression
18
+ try:
19
+ from sklearn.ensemble import HistGradientBoostingRegressor
20
+ _HGBR_AVAILABLE = True
21
+ except Exception:
22
+ HistGradientBoostingRegressor = None
23
+ _HGBR_AVAILABLE = False
24
+
25
+ # --------------------- Configuration ---------------------
26
+
27
+ # Prefer quantile gradient boosting for extreme values (better for High/Low)
28
+ _USE_HGBR_QUANTILE = True # auto-fallback to ExtraTrees when unavailable
29
+
30
+ # Quantiles for high/low tails (in log-ratio space)
31
+ _Q_HIGH = 0.80 # upper-tail for High
32
+ _Q_LOW = 0.20 # lower-tail for Low
33
+
34
+ # Blend ML predictions with TA fallback (in log-return space)
35
+ # Set to 0.0 to disable blending
36
+ _BLEND_TA_WEIGHT = 0.20
37
+
38
+ # Log-ratio target winsorization to reduce outlier impact: [q_low, q_high] (ExtraTrees path)
39
+ _WINSOR_Q_LOW = 0.005
40
+ _WINSOR_Q_HIGH = 0.995
41
+
42
+ # Exponential recency weighting: larger = faster decay (0.0 to disable)
43
+ _RECENCY_DECAY = 0.003 # per-sample step
44
+
45
+ # ExtraTrees hyperparameters tuned for generalization
46
+ _ETR_PARAMS_CV = dict(
47
+ n_estimators=800,
48
+ max_depth=None,
49
+ min_samples_split=2,
50
+ min_samples_leaf=3,
51
+ max_features=0.6,
52
+ bootstrap=False,
53
+ n_jobs=-1,
54
+ random_state=42,
55
+ )
56
+ _ETR_PARAMS_FINAL = dict(
57
+ n_estimators=1200,
58
+ max_depth=None,
59
+ min_samples_split=2,
60
+ min_samples_leaf=3,
61
+ max_features=0.6,
62
+ bootstrap=False,
63
+ n_jobs=-1,
64
+ random_state=42,
65
+ )
66
+
67
+ # HistGradientBoosting hyperparameters for quantile regression
68
+ _HGBR_PARAMS = dict(
69
+ loss="quantile",
70
+ learning_rate=0.05,
71
+ max_iter=600,
72
+ max_depth=3,
73
+ max_leaf_nodes=31,
74
+ max_bins=255,
75
+ l2_regularization=0.0,
76
+ early_stopping=False, # avoid random holdout leaking time
77
+ random_state=42,
78
+ )
79
+
80
+ # In-memory per-ticker model cache (no disk I/O)
81
+ _MEM_CACHE = {} # key: ticker.upper(), value: bundle dict
82
+
83
+ # --------------------- OHLC Utilities ---------------------
84
+
85
+ def _ensure_ohlc_columns(df: pd.DataFrame) -> pd.DataFrame:
86
+ cols = {c.lower(): c for c in df.columns}
87
+ need = ["open", "high", "low", "close", "volume"]
88
+ mapping = {}
89
+ for n in need:
90
+ if n in cols:
91
+ mapping[cols[n]] = n
92
+ else:
93
+ # try MultiIndex column cases from yfinance
94
+ for c in df.columns:
95
+ name = c[0].lower() if isinstance(c, tuple) and len(c) > 0 else str(c).lower()
96
+ if name == n:
97
+ mapping[c] = n
98
+ break
99
+ out = df.rename(columns=mapping).copy()
100
+ missing = [c for c in need if c not in out.columns]
101
+ if missing:
102
+ raise ValueError(f"Missing OHLCV columns after normalization: {missing}")
103
+ return out[["open", "high", "low", "close", "volume"]]
104
+
105
+ # --------------------- Business day helper ---------------------
106
+
107
+ def _next_business_days(last_date: pd.Timestamp, periods: int, exchange: str = "XNYS") -> pd.DatetimeIndex:
108
+ """
109
+ Return next 'periods' business sessions after last_date.
110
+ Tries exchange calendar via pandas_market_calendars (holidays-aware), fallback to weekdays-only.
111
+ exchange examples: 'XNYS' (NYSE), 'XBOM' (BSE), 'XNAS' (NASDAQ), 'XNSE' (NSE).
112
+ """
113
+ last_date = pd.Timestamp(last_date).tz_localize(None)
114
+ try:
115
+ import pandas_market_calendars as mcal
116
+ cal = mcal.get_calendar(exchange)
117
+ # buffer long enough to cover holidays
118
+ schedule = cal.schedule(start_date=last_date + pd.Timedelta(days=1),
119
+ end_date=last_date + pd.Timedelta(days=180))
120
+ sessions = schedule.index.tz_localize(None)
121
+ if len(sessions) >= periods:
122
+ return sessions[:periods]
123
+ # If for some reason not enough sessions, extend with weekday fallback
124
+ needed = periods - len(sessions)
125
+ tail = pd.bdate_range(sessions[-1] + pd.offsets.BDay(1) if len(sessions) else last_date + pd.offsets.BDay(1),
126
+ periods=needed)
127
+ return sessions.append(tail)
128
+ except Exception:
129
+ # Weekdays-only fallback
130
+ return pd.bdate_range(last_date + pd.offsets.BDay(1), periods=periods)
131
+
132
+ # --------------------- TA Heuristic (Fallback, No ML) ---------------------
133
+
134
+ def _last_finite(values: np.ndarray, default: float = np.nan) -> float:
135
+ for x in values[::-1]:
136
+ if np.isfinite(x):
137
+ return float(x)
138
+ return float(default)
139
+
140
+ def _ta_fallback_forecast(ohlc: pd.DataFrame, horizons: int = 15):
141
+ h = ohlc["high"].astype(float).values
142
+ l = ohlc["low"].astype(float).values
143
+ c = ohlc["close"].astype(float).values
144
+
145
+ if len(c) < 60:
146
+ raise ValueError("Not enough history for TA fallback (need >=60 rows).")
147
+
148
+ base_close = _last_finite(ohlc["close"].replace(0.0, np.nan).values)
149
+ if not np.isfinite(base_close) or base_close <= 0:
150
+ raise ValueError("Invalid last close after cleaning.")
151
+
152
+ atr14 = talib.ATR(h, l, c, timeperiod=14)
153
+ atr_last = _last_finite(atr14, default=np.nan)
154
+ atr_pct = (atr_last / base_close) if np.isfinite(atr_last) and base_close > 0 else np.nan
155
+
156
+ ema20 = talib.EMA(c, timeperiod=20)
157
+ ema50 = talib.EMA(c, timeperiod=50)
158
+ ema20_last = _last_finite(ema20, default=np.nan)
159
+ ema50_last = _last_finite(ema50, default=np.nan)
160
+
161
+ trend_strength = 0.0
162
+ if np.isfinite(ema20_last) and np.isfinite(ema50_last) and ema50_last > 0:
163
+ trend_strength = np.clip(ema20_last / ema50_last - 1.0, -0.05, 0.05)
164
+ ema20_slope = 0.0
165
+ if len(ema20) >= 2 and np.isfinite(ema20[-1]) and np.isfinite(ema20[-2]) and ema20[-2] > 0:
166
+ ema20_slope = np.clip((ema20[-1] / ema20[-2]) - 1.0, -0.05, 0.05)
167
+
168
+ adx14 = talib.ADX(h, l, c, timeperiod=14)
169
+ adx = _last_finite(adx14, default=20.0) / 100.0
170
+ adx = float(np.clip(adx, 0.0, 1.0))
171
+
172
+ rsi14 = talib.RSI(c, timeperiod=14)
173
+ rsi = _last_finite(rsi14, default=50.0)
174
+ tilt = float(np.clip((rsi - 50.0) / 50.0, -1.0, 1.0))
175
+
176
+ logret = np.diff(np.log(np.maximum(c, 1e-12)))
177
+ if len(logret) >= 20 and np.isfinite(logret[-20:]).sum() >= 10:
178
+ sigma20 = float(pd.Series(logret).rolling(20).std().iloc[-1])
179
+ else:
180
+ sigma20 = float(np.nan)
181
+
182
+ components = []
183
+ if np.isfinite(sigma20):
184
+ components.append(sigma20)
185
+ if np.isfinite(atr_pct):
186
+ components.append(atr_pct)
187
+ daily_vol = 0.0
188
+ if components:
189
+ daily_vol = 0.6 * components[0] + (0.4 * components[1] if len(components) > 1 else 0.0)
190
+ daily_vol = float(np.clip(daily_vol if np.isfinite(daily_vol) else 0.02, 0.004, 0.08))
191
+
192
+ drift_per_day = float(np.clip(0.5 * trend_strength + 0.5 * ema20_slope, -0.02, 0.02))
193
+
194
+ up_weight = 1.0 - 0.3 * tilt
195
+ dn_weight = 1.0 + 0.3 * tilt
196
+ up_weight = float(np.clip(up_weight, 0.5, 1.5))
197
+ dn_weight = float(np.clip(dn_weight, 0.5, 1.5))
198
+ trend_amp = 0.75 + 0.5 * adx
199
+
200
+ pred_high, pred_low = [], []
201
+ for k in range(1, horizons + 1):
202
+ amp = daily_vol * np.sqrt(k) * trend_amp
203
+ drift = drift_per_day * k
204
+ up_move = amp * up_weight
205
+ dn_move = amp * dn_weight
206
+ hi = base_close * (1.0 + drift + up_move)
207
+ lo = base_close * (1.0 + drift - dn_move)
208
+ hi = max(0.0, hi)
209
+ lo = max(0.0, lo)
210
+ if lo > hi:
211
+ lo, hi = hi, lo
212
+ pred_high.append(hi)
213
+ pred_low.append(lo)
214
+
215
+ return base_close, np.array(pred_high), np.array(pred_low)
216
+
217
+ # --------------------- Feature Engineering for ML ---------------------
218
+
219
+ def _compute_ta_features(df: pd.DataFrame) -> pd.DataFrame:
220
+ df = _ensure_ohlc_columns(df).copy()
221
+ o, h, l, c, v = [df[k].astype(float).values for k in ("open", "high", "low", "close", "volume")]
222
+
223
+ close = df["close"].astype(float)
224
+ open_ = df["open"].astype(float)
225
+ high = df["high"].astype(float)
226
+ low = df["low"].astype(float)
227
+ vol = df["volume"].astype(float)
228
+
229
+ df_feat = pd.DataFrame(index=df.index)
230
+
231
+ # Basic price features
232
+ df_feat["ret_1"] = close.pct_change(1)
233
+ df_feat["logret_1"] = np.log(close.replace(0.0, np.nan)).diff(1)
234
+ df_feat["ret_5"] = close.pct_change(5)
235
+ df_feat["ret_10"] = close.pct_change(10)
236
+ df_feat["roll_mean_5"] = close.rolling(5).mean() / close - 1.0
237
+ df_feat["roll_mean_20"] = close.rolling(20).mean() / close - 1.0
238
+ df_feat["roll_std_10"] = close.pct_change().rolling(10).std()
239
+ df_feat["range_pct"] = (high - low) / close.replace(0.0, np.nan)
240
+
241
+ # Candle features (normalized)
242
+ with np.errstate(divide="ignore", invalid="ignore"):
243
+ body = (close - open_) / close
244
+ upper_shadow = (high - np.maximum(close, open_)) / close
245
+ lower_shadow = (np.minimum(close, open_) - low) / close
246
+ df_feat["candle_body"] = body
247
+ df_feat["candle_upper"] = upper_shadow
248
+ df_feat["candle_lower"] = lower_shadow
249
+ df_feat["gap_open"] = open_.shift(0) / close.shift(1) - 1.0
250
+
251
+ # EMAs and distances
252
+ ema5 = talib.EMA(close.values, timeperiod=5)
253
+ ema20 = talib.EMA(close.values, timeperiod=20)
254
+ ema50 = talib.EMA(close.values, timeperiod=50)
255
+ with np.errstate(divide="ignore", invalid="ignore"):
256
+ df_feat["ema5_dist"] = (ema5 / close.values) - 1.0
257
+ df_feat["ema20_dist"] = (ema20 / close.values) - 1.0
258
+ df_feat["ema50_dist"] = (ema50 / close.values) - 1.0
259
+ # EMA slopes (1-day change)
260
+ df_feat["ema20_slope"] = (pd.Series(ema20, index=df.index).pct_change(1))
261
+
262
+ # RSI family
263
+ df_feat["rsi14"] = talib.RSI(close.values, timeperiod=14) / 100.0
264
+ df_feat["rsi5"] = talib.RSI(close.values, timeperiod=5) / 100.0
265
+
266
+ # MACD
267
+ macd, macdsig, macdhist = talib.MACD(close.values, fastperiod=12, slowperiod=26, signalperiod=9)
268
+ df_feat["macd"] = macd
269
+ df_feat["macdsig"] = macdsig
270
+ df_feat["macdhist"] = macdhist
271
+
272
+ # Bollinger Bands width
273
+ upper, middle, lower = talib.BBANDS(close.values, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
274
+ with np.errstate(divide="ignore", invalid="ignore"):
275
+ df_feat["bb_width"] = (upper - lower) / middle
276
+
277
+ # Volatility/Trend
278
+ atr = talib.ATR(h, l, c, timeperiod=14)
279
+ with np.errstate(divide="ignore", invalid="ignore"):
280
+ df_feat["atr14"] = atr / close.values
281
+ df_feat["adx14"] = talib.ADX(h, l, c, timeperiod=14) / 100.0
282
+
283
+ # Additional momentum/oscillators
284
+ df_feat["roc10"] = talib.ROC(close.values, timeperiod=10) / 100.0
285
+ df_feat["cci14"] = talib.CCI(h, l, c, timeperiod=14) / 100.0
286
+ df_feat["mfi14"] = talib.MFI(h, l, c, v, timeperiod=14) / 100.0
287
+ df_feat["willr14"] = talib.WILLR(h, l, c, timeperiod=14) / 100.0 # [-1, 0]
288
+
289
+ # Stochastic
290
+ slowk, slowd = talib.STOCH(h, l, c, fastk_period=14, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
291
+ df_feat["stoch_k"] = slowk / 100.0
292
+ df_feat["stoch_d"] = slowd / 100.0
293
+
294
+ # OBV normalized (robust to missing/flat volume)
295
+ finite_vol = np.isfinite(vol.values)
296
+ if finite_vol.sum() >= max(30, int(0.5 * len(vol))):
297
+ obv = talib.OBV(close.values, vol.values)
298
+ df_feat["obv_z"] = pd.Series(obv, index=df.index).pct_change(5)
299
+ else:
300
+ df_feat["obv_z"] = 0.0
301
+
302
+ # Volume z-score and turnover proxies
303
+ vol_roll_mean = vol.rolling(20).mean()
304
+ vol_roll_std = vol.rolling(20).std()
305
+ with np.errstate(divide="ignore", invalid="ignore"):
306
+ df_feat["vol_z20"] = (vol - vol_roll_mean) / vol_roll_std
307
+ df_feat["turnover_z20"] = ((vol * close) - (vol * close).rolling(20).mean()) / (vol * close).rolling(20).std()
308
+
309
+ # Distance to rolling extremes
310
+ roll_max_20 = close.rolling(20).max()
311
+ roll_min_20 = close.rolling(20).min()
312
+ roll_max_55 = close.rolling(55).max()
313
+ roll_min_55 = close.rolling(55).min()
314
+ with np.errstate(divide="ignore", invalid="ignore"):
315
+ df_feat["dist_max20"] = roll_max_20 / close - 1.0
316
+ df_feat["dist_min20"] = close / roll_min_20 - 1.0
317
+ df_feat["dist_max55"] = roll_max_55 / close - 1.0
318
+ df_feat["dist_min55"] = close / roll_min_55 - 1.0
319
+
320
+ # Realized volatility features
321
+ logret = np.log(close.replace(0.0, np.nan)).diff(1)
322
+ df_feat["rv5"] = logret.rolling(5).std()
323
+ df_feat["rv20"] = logret.rolling(20).std()
324
+ df_feat["avg_range5"] = ((high - low) / close.replace(0.0, np.nan)).rolling(5).mean()
325
+
326
+ # Calendar (cyclical day-of-week, month-of-year)
327
+ dow = pd.Series(df.index).map(lambda d: d.weekday() if hasattr(d, "weekday") else pd.Timestamp(d).weekday())
328
+ df_feat["dow_sin"] = np.sin(2 * np.pi * dow / 7.0)
329
+ df_feat["dow_cos"] = np.cos(2 * np.pi * dow / 7.0)
330
+ moy = pd.Series(df.index).map(lambda d: (d.month if hasattr(d, "month") else pd.Timestamp(d).month))
331
+ df_feat["moy_sin"] = np.sin(2 * np.pi * (moy.astype(float) - 1.0) / 12.0)
332
+ df_feat["moy_cos"] = np.cos(2 * np.pi * (moy.astype(float) - 1.0) / 12.0)
333
+
334
+ # Lags of basic signals
335
+ df_feat["ret_1_lag1"] = df_feat["ret_1"].shift(1)
336
+ df_feat["ret_1_lag2"] = df_feat["ret_1"].shift(2)
337
+ df_feat["range_pct_lag1"] = df_feat["range_pct"].shift(1)
338
+
339
+ df_feat = df_feat.replace([np.inf, -np.inf], np.nan)
340
+ df_feat = df_feat.loc[:, df_feat.notna().any(axis=0)]
341
+ return df_feat
342
+
343
+ def _clean_features_for_training(feats: pd.DataFrame, warmup: int = 60) -> pd.DataFrame:
344
+ if feats.empty:
345
+ return feats
346
+ clean = feats.copy()
347
+ clean = clean.fillna(method="ffill").fillna(method="bfill")
348
+ if len(clean) > warmup:
349
+ clean = clean.iloc[warmup:]
350
+ clean = clean.dropna()
351
+ return clean
352
+
353
+ def _winsorize_targets(Y: np.ndarray, horizons: int, q_low: float, q_high: float) -> tuple[np.ndarray, dict]:
354
+ """
355
+ Winsorize concatenated targets Y = [highs(0:h), lows(h:2h)] row-wise using global quantiles.
356
+ Returns clipped Y and thresholds used.
357
+ """
358
+ h = horizons
359
+ Yh = Y[:, :h].ravel()
360
+ Yl = Y[:, h:].ravel()
361
+
362
+ lo_h, hi_h = np.quantile(Yh, [q_low, q_high]) if Yh.size else (-np.inf, np.inf)
363
+ lo_l, hi_l = np.quantile(Yl, [q_low, q_high]) if Yl.size else (-np.inf, np.inf)
364
+
365
+ Y_clip = Y.copy()
366
+ Y_clip[:, :h] = np.clip(Y_clip[:, :h], lo_h, hi_h)
367
+ Y_clip[:, h:] = np.clip(Y_clip[:, h:], lo_l, hi_l)
368
+
369
+ return Y_clip, {"high": (float(lo_h), float(hi_h)), "low": (float(lo_l), float(hi_l))}
370
+
371
+ def _sample_weights(n: int, decay: float) -> np.ndarray:
372
+ """
373
+ Exponential recency weights. Newer samples get higher weight.
374
+ w_i = exp(-decay * (n-1-i)), i in [0..n-1]
375
+ """
376
+ if decay <= 0 or n <= 0:
377
+ return np.ones(n, dtype=float)
378
+ idx = np.arange(n, dtype=float)
379
+ w = np.exp(-decay * (n - 1 - idx))
380
+ w /= np.average(w) # normalize to mean 1.0
381
+ return w
382
+
383
+ def _make_supervised(df: pd.DataFrame, horizons: int = 15):
384
+ """
385
+ Build X, Y for multi-horizon high/low forecast.
386
+ Targets (log-ratio): y_high_h = log(High[t+h]/Close[t]), y_low_h = log(Low[t+h]/Close[t])
387
+ Log transform stabilizes variance and reduces skew.
388
+ """
389
+ ohlc = _ensure_ohlc_columns(df)
390
+ feats = _compute_ta_features(df)
391
+ feat_df = _clean_features_for_training(feats, warmup=60)
392
+
393
+ # Align to cleaned feature index
394
+ ohlc = ohlc.loc[feat_df.index]
395
+
396
+ highs = ohlc["high"].astype(float).values
397
+ lows = ohlc["low"].astype(float).values
398
+ closes = ohlc["close"].astype(float).values
399
+ X_all = feat_df.values
400
+
401
+ n = len(feat_df)
402
+ if n < horizons + 30:
403
+ raise ValueError(f"Not enough rows after feature warm-up for {horizons}-day training. Have: {n}")
404
+
405
+ X_list, Y_list = [], []
406
+ for i in range(n - horizons):
407
+ base_c = closes[i]
408
+ if not np.isfinite(base_c) or base_c <= 0:
409
+ continue
410
+
411
+ future_highs = highs[i + 1:i + horizons + 1]
412
+ future_lows = lows[i + 1:i + horizons + 1]
413
+
414
+ with np.errstate(divide="ignore", invalid="ignore"):
415
+ yh = np.log(np.maximum(future_highs, 1e-12) / base_c)
416
+ yl = np.log(np.maximum(future_lows, 1e-12) / base_c)
417
+
418
+ if np.any(~np.isfinite(yh)) or np.any(~np.isfinite(yl)):
419
+ continue
420
+
421
+ X_list.append(X_all[i, :])
422
+ Y_list.append(np.concatenate([yh, yl], axis=0))
423
+
424
+ X = np.asarray(X_list)
425
+ Y = np.asarray(Y_list)
426
+ if X.size == 0 or Y.size == 0:
427
+ raise ValueError("No valid supervised samples after cleaning. Check data quality (NaNs/zeros).")
428
+ feature_names = feat_df.columns.tolist()
429
+ return X, Y, feature_names, feat_df.index[:len(X)]
430
+
431
+ def _get_sklearn_version():
432
+ try:
433
+ import sklearn
434
+ return sklearn.__version__
435
+ except Exception:
436
+ return None
437
+
438
+ # --------------------- Model Train/Load (In-Memory Only) ---------------------
439
+
440
+ def train_or_load_highlow_15d(df: pd.DataFrame, ticker: str, horizons: int = 15):
441
+ key = ticker.upper()
442
+ if key in _MEM_CACHE:
443
+ return _MEM_CACHE[key]
444
+
445
+ # If sklearn is not available at all, keep TA fallback metadata
446
+ if not _SKLEARN_AVAILABLE:
447
+ bundle = {
448
+ "model": None,
449
+ "feature_names": None,
450
+ "horizons": horizons,
451
+ "trained_rows": int(len(df)),
452
+ "metrics": None,
453
+ "sklearn_version": None,
454
+ "ticker": key,
455
+ "model_path": None,
456
+ "winsor": None,
457
+ "blend_weight": _BLEND_TA_WEIGHT,
458
+ "transform": "logratio",
459
+ "feature_importances": None,
460
+ "algo": "NONE",
461
+ }
462
+ _MEM_CACHE[key] = bundle
463
+ return bundle
464
+
465
+ # Build supervised set
466
+ X, Y_raw, feature_names, _ = _make_supervised(df, horizons=horizons)
467
+ sw = _sample_weights(X.shape[0], _RECENCY_DECAY)
468
+
469
+ # Prefer quantile gradient boosting if available
470
+ if _USE_HGBR_QUANTILE and _HGBR_AVAILABLE and HistGradientBoostingRegressor is not None:
471
+ q_models_high, q_models_low = [], []
472
+ for k in range(horizons):
473
+ # High models (upper quantile)
474
+ mh = HistGradientBoostingRegressor(**_HGBR_PARAMS, quantile=_Q_HIGH)
475
+ mh.fit(X, Y_raw[:, k], sample_weight=sw)
476
+ q_models_high.append(mh)
477
+
478
+ # Low models (lower quantile)
479
+ ml = HistGradientBoostingRegressor(**_HGBR_PARAMS, quantile=_Q_LOW)
480
+ ml.fit(X, Y_raw[:, horizons + k], sample_weight=sw)
481
+ q_models_low.append(ml)
482
+
483
+ bundle = {
484
+ "model": None, # not used in quantile path
485
+ "q_models_high": q_models_high,
486
+ "q_models_low": q_models_low,
487
+ "feature_names": feature_names,
488
+ "horizons": horizons,
489
+ "trained_rows": int(X.shape[0]),
490
+ "metrics": None, # optional: add custom CV if desired
491
+ "sklearn_version": _get_sklearn_version(),
492
+ "ticker": key,
493
+ "model_path": None,
494
+ "winsor": None,
495
+ "blend_weight": _BLEND_TA_WEIGHT,
496
+ "transform": "logratio",
497
+ "feature_importances": None,
498
+ "algo": f"HGBR_QUANTILE(high={_Q_HIGH}, low={_Q_LOW})",
499
+ }
500
+ _MEM_CACHE[key] = bundle
501
+ return bundle
502
+
503
+ # Else fall back to ExtraTrees mean-regression (existing path)
504
+ Y_clip, winsor_info = _winsorize_targets(Y_raw, horizons, _WINSOR_Q_LOW, _WINSOR_Q_HIGH)
505
+
506
+ fold_metrics = []
507
+ feature_importances = None
508
+
509
+ if TimeSeriesSplit is not None:
510
+ tscv = TimeSeriesSplit(n_splits=5)
511
+ for train_idx, val_idx in tscv.split(X):
512
+ Xtr, Xvl = X[train_idx], X[val_idx]
513
+ Ytr_clipped = Y_clip[train_idx]
514
+ Yvl_true = Y_raw[val_idx] # evaluate on true (unclipped) targets
515
+ w_tr = sw[train_idx] if sw is not None else None
516
+
517
+ model_cv = ExtraTreesRegressor(**_ETR_PARAMS_CV)
518
+ model_cv.fit(Xtr, Ytr_clipped, sample_weight=w_tr)
519
+ Yhat = model_cv.predict(Xvl)
520
+
521
+ # Convert log-ratio back to percentage move for reporting
522
+ h = horizons
523
+ if mean_absolute_error is not None:
524
+ yh_pct = (np.exp(Yvl_true[:, :h]) - 1.0) * 100.0
525
+ yl_pct = (np.exp(Yvl_true[:, h:]) - 1.0) * 100.0
526
+ yhat_h_pct = (np.exp(Yhat[:, :h]) - 1.0) * 100.0
527
+ yhat_l_pct = (np.exp(Yhat[:, h:]) - 1.0) * 100.0
528
+ high_mae = mean_absolute_error(yh_pct, yhat_h_pct)
529
+ low_mae = mean_absolute_error(yl_pct, yhat_l_pct)
530
+ fold_metrics.append({"high_mae_pct": round(float(high_mae), 4),
531
+ "low_mae_pct": round(float(low_mae), 4)})
532
+
533
+ final_model = ExtraTreesRegressor(**_ETR_PARAMS_FINAL)
534
+ final_model.fit(X, Y_clip, sample_weight=sw)
535
+
536
+ try:
537
+ fi = final_model.feature_importances_
538
+ feature_importances = sorted(
539
+ zip(feature_names, fi),
540
+ key=lambda t: t[1],
541
+ reverse=True
542
+ )[:30]
543
+ feature_importances = [(str(n), float(v)) for n, v in feature_importances]
544
+ except Exception:
545
+ feature_importances = None
546
+
547
+ bundle = {
548
+ "model": final_model,
549
+ "feature_names": feature_names,
550
+ "horizons": horizons,
551
+ "trained_rows": int(X.shape[0]),
552
+ "metrics": fold_metrics or None,
553
+ "sklearn_version": _get_sklearn_version(),
554
+ "ticker": key,
555
+ "model_path": None,
556
+ "winsor": winsor_info,
557
+ "blend_weight": _BLEND_TA_WEIGHT,
558
+ "transform": "logratio",
559
+ "feature_importances": feature_importances,
560
+ "algo": "EXTRATREES_MEAN",
561
+ }
562
+
563
+ _MEM_CACHE[key] = bundle
564
+ return bundle
565
+
566
+ # --------------------- Forecast ---------------------
567
+
568
+ def forecast_next_15_high_low(ticker: str, stock_data: pd.DataFrame):
569
+ """
570
+ Train/load from memory and forecast next 15 business days' High/Low.
571
+ If no ML available or insufficient data, uses TA fallback.
572
+ Returns dict: dates, pred_high, pred_low, base_close, bundle_meta
573
+ """
574
+ if not isinstance(stock_data.index, pd.DatetimeIndex):
575
+ stock_data = stock_data.copy()
576
+ stock_data.index = pd.to_datetime(stock_data.index)
577
+
578
+ ohlc = _ensure_ohlc_columns(stock_data)
579
+
580
+ try:
581
+ bundle = train_or_load_highlow_15d(stock_data, ticker, horizons=15)
582
+ model = bundle.get("model", None)
583
+ horizons = bundle.get("horizons", 15)
584
+
585
+ # Build latest feature row
586
+ feats_full = _compute_ta_features(stock_data)
587
+ feats_full = feats_full.replace([np.inf, -np.inf], np.nan)
588
+ feats_full = feats_full.loc[:, feats_full.notna().any(axis=0)]
589
+ feats_full = feats_full.fillna(method="ffill").fillna(method="bfill")
590
+ if len(feats_full) > 60:
591
+ feats_full = feats_full.iloc[60:]
592
+ if feats_full.empty:
593
+ raise ValueError("No features available for inference after cleaning.")
594
+
595
+ feature_names = bundle["feature_names"]
596
+ for col in feature_names:
597
+ if col not in feats_full.columns:
598
+ feats_full[col] = 0.0
599
+ feats_full = feats_full[feature_names]
600
+ X_t = feats_full.iloc[[-1]].values
601
+
602
+ base_close = float(ohlc.iloc[-1]["close"])
603
+ if not np.isfinite(base_close) or base_close <= 0:
604
+ base_close = float(ohlc["close"].replace(0.0, np.nan).dropna().iloc[-1])
605
+
606
+ y_pred_log = None
607
+
608
+ # Path 1: ExtraTrees multi-output mean-regression
609
+ if model is not None:
610
+ y_pred_log = model.predict(X_t).reshape(-1)
611
+
612
+ # Path 2: Quantile gradient boosting per-horizon
613
+ elif "q_models_high" in bundle and "q_models_low" in bundle:
614
+ qh = bundle["q_models_high"]
615
+ ql = bundle["q_models_low"]
616
+ yh = np.array([qh[k].predict(X_t)[0] for k in range(horizons)], dtype=float)
617
+ yl = np.array([ql[k].predict(X_t)[0] for k in range(horizons)], dtype=float)
618
+ y_pred_log = np.concatenate([yh, yl], axis=0)
619
+
620
+ if y_pred_log is not None:
621
+ # Optional hybrid blend with TA fallback in log space for stability
622
+ blend_w = float(bundle.get("blend_weight", _BLEND_TA_WEIGHT) or 0.0)
623
+ if blend_w > 0.0:
624
+ try:
625
+ _, hi_ta, lo_ta = _ta_fallback_forecast(ohlc, horizons=horizons)
626
+ with np.errstate(divide="ignore", invalid="ignore"):
627
+ yh_ta_log = np.log(np.maximum(hi_ta, 1e-12) / base_close)
628
+ yl_ta_log = np.log(np.maximum(lo_ta, 1e-12) / base_close)
629
+ yh_ml_log = y_pred_log[:horizons]
630
+ yl_ml_log = y_pred_log[horizons:]
631
+ yh_blend_log = (1.0 - blend_w) * yh_ml_log + blend_w * yh_ta_log
632
+ yl_blend_log = (1.0 - blend_w) * yl_ml_log + blend_w * yl_ta_log
633
+ y_pred_log = np.concatenate([yh_blend_log, yl_blend_log], axis=0)
634
+ except Exception:
635
+ pass
636
+
637
+ # Convert back from log-ratio to price
638
+ yh = y_pred_log[:horizons]
639
+ yl = y_pred_log[horizons:]
640
+ pred_high = np.exp(yh) * base_close
641
+ pred_low = np.exp(yl) * base_close
642
+
643
+ pred_high = np.maximum(pred_high, 0.0)
644
+ pred_low = np.maximum(pred_low, 0.0)
645
+ swp = pred_low > pred_high
646
+ if np.any(swp):
647
+ tmp = pred_high.copy()
648
+ pred_high[swp] = pred_low[swp]
649
+ pred_low[swp] = tmp[swp]
650
+
651
+ last_date = feats_full.index[-1]
652
+ future_dates = _next_business_days(last_date, horizons)
653
+ date_str = [pd.Timestamp(d).strftime("%Y-%m-%d") for d in future_dates]
654
+
655
+ return {
656
+ "dates": date_str,
657
+ "pred_high": [round(float(x), 2) for x in pred_high],
658
+ "pred_low": [round(float(x), 2) for x in pred_low],
659
+ "base_close": round(float(base_close), 4),
660
+ "bundle_meta": {
661
+ "model": bundle.get("algo", "UNKNOWN"),
662
+ "trained_rows": bundle.get("trained_rows"),
663
+ "sklearn_version": bundle.get("sklearn_version"),
664
+ "metrics": bundle.get("metrics"),
665
+ "bundle_path": None,
666
+ "ticker": bundle.get("ticker"),
667
+ "winsor": bundle.get("winsor"),
668
+ "blend_weight": bundle.get("blend_weight"),
669
+ "transform": bundle.get("transform"),
670
+ "feature_importances_top30": bundle.get("feature_importances"),
671
+ "quantiles": {"high": _Q_HIGH, "low": _Q_LOW} if "q_models_high" in bundle else None,
672
+ },
673
+ }
674
+ except Exception:
675
+ pass
676
+
677
+ base_close, pred_high, pred_low = _ta_fallback_forecast(ohlc, horizons=15)
678
+ last_date = ohlc.index[-1]
679
+ future_dates = _next_business_days(last_date, 15)
680
+ date_str = [pd.Timestamp(d).strftime("%Y-%m-%d") for d in future_dates]
681
+
682
+ return {
683
+ "dates": date_str,
684
+ "pred_high": [round(float(x), 2) for x in pred_high],
685
+ "pred_low": [round(float(x), 2) for x in pred_low],
686
+ "base_close": round(float(base_close), 4),
687
+ "bundle_meta": {
688
+ "model": "TA heuristic fallback (ATR/EMA/RSI/ADX), no ML",
689
+ "trained_rows": int(len(ohlc)),
690
+ "sklearn_version": _get_sklearn_version(),
691
+ "metrics": None,
692
+ "bundle_path": None,
693
+ "ticker": ticker.upper(),
694
+ },
695
+ }
predictedchart.py DELETED
@@ -1,126 +0,0 @@
1
- import yfinance as yf
2
- import pandas as pd
3
- import numpy as np
4
- import talib
5
- from sklearn.preprocessing import MinMaxScaler
6
- import torch
7
- import torch.nn as nn
8
- from torch.utils.data import Dataset, DataLoader
9
-
10
- # Step 1: Download data with TA indicators
11
- def fetch_stock_data_with_indicators(ticker, start="2020-01-01", end="2025-09-10"):
12
- df = yf.download(ticker, start=start, end=end)
13
- actualdata = yf.download(ticker, start=start, end="2025-09-11")
14
- df = df[["Open", "High", "Low", "Close", "Volume"]]
15
- close_prices = df['Close'].to_numpy().flatten()
16
- low_prices = df['Low'].to_numpy().flatten()
17
- high_prices = df['High'].to_numpy().flatten()
18
- # Add indicators
19
- #df["RSI"] = talib.RSI(close_prices, timeperiod=14)
20
- #df["MACD"], df["MACD_signal"], _ = talib.MACD(close_prices)
21
- df["EMA_20"] = talib.EMA(close_prices, timeperiod=20)
22
- df["ATR"] = talib.ATR(high_prices, low_prices, close_prices, timeperiod=14)
23
-
24
- df.dropna(inplace=True)
25
- return df
26
-
27
- def fetch_originaldata(ticker, start="2020-01-01", end="2025-01-03"):
28
- actualdata = yf.download(ticker, start=start, end="2025-01-24")
29
- return actualdata
30
-
31
- # Step 2: Custom Dataset
32
- class StockDataset(Dataset):
33
- def __init__(self, series, window_size):
34
- self.data = []
35
- for i in range(len(series) - window_size):
36
- self.data.append((series[i:i+window_size], series[i+window_size][3]))
37
-
38
- def __len__(self):
39
- return len(self.data)
40
-
41
- def __getitem__(self, idx):
42
- x, y = self.data[idx]
43
- return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)
44
-
45
- # Step 3: Transformer model
46
- class TransformerPredictor(nn.Module):
47
- def __init__(self, input_size, d_model=64, nhead=4, num_layers=2, dropout=0.1):
48
- super(TransformerPredictor, self).__init__()
49
- self.linear_in = nn.Linear(input_size, d_model)
50
- encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
51
- self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
52
- self.linear_out = nn.Linear(d_model, 1)
53
-
54
- def forward(self, src):
55
- x = self.linear_in(src) # [seq, batch, d_model]
56
- x = self.transformer(x) # [seq, batch, d_model]
57
- out = self.linear_out(x[-1]) # [batch, 1]
58
- return out.squeeze()
59
-
60
- # Step 4: Training function
61
- def train_model(model, dataloader, epochs, lr=0.001):
62
- optimizer = torch.optim.Adam(model.parameters(), lr=lr)
63
- loss_fn = nn.MSELoss()
64
- for epoch in range(epochs):
65
- for x, y in dataloader:
66
- x = x.permute(1, 0, 2) # [batch, seq, features] -> [seq, batch, features]
67
- pred = model(x)
68
- loss = loss_fn(pred, y)
69
- optimizer.zero_grad()
70
- loss.backward()
71
- optimizer.step()
72
- print("Epoch {}/{} - Loss: {:.4f}".format(epoch+1, epochs, loss.item()))
73
-
74
- # Step 5: Run pipeline
75
- def run_stock_prediction(ticker):
76
- df = fetch_stock_data_with_indicators(ticker)
77
- scaler = MinMaxScaler()
78
- scaled_data = scaler.fit_transform(df.values)
79
-
80
- window_size = 20
81
- dataset = StockDataset(scaled_data, window_size)
82
- dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
83
-
84
- input_size = scaled_data.shape[1]
85
- model = TransformerPredictor(input_size=input_size)
86
- train_model(model, dataloader, epochs=2)
87
-
88
-
89
- # Predict next 15 days
90
- predictions = []
91
- input_seq = scaled_data[-window_size:].copy() # shape: [20, features]
92
-
93
- for i in range(30):
94
- seq_tensor = torch.tensor(input_seq, dtype=torch.float32).unsqueeze(1) # [seq_len, 1, features]
95
-
96
- with torch.no_grad():
97
- predicted_scaled = model(seq_tensor).item()
98
-
99
- # Create new row based on last row, replace only Close price (index 3)
100
- new_row = input_seq[-1].copy()
101
- new_row[3] = predicted_scaled
102
-
103
- # Inverse scale to get actual Close price
104
- predicted_row = scaler.inverse_transform([new_row])[0]
105
- predicted_close = predicted_row[3]
106
- predictions.append(predicted_close)
107
-
108
- # Slide window: remove first row, append new row
109
- input_seq = np.vstack([input_seq[1:], [new_row]])
110
-
111
- # Get the last date from the dataset
112
- last_date = df.index[-1]
113
- predicted_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30, freq='B') # Business days
114
-
115
-
116
- prediction_results = pd.DataFrame({
117
- 'Date': predicted_dates,
118
- 'Predicted Close': predictions,
119
-
120
-
121
- })
122
-
123
- return prediction_results
124
-
125
-
126
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prediction.py DELETED
@@ -1,257 +0,0 @@
1
- import os, re, joblib, numpy as np, pandas as pd, sklearn
2
- from sklearn.ensemble import ExtraTreesRegressor
3
-
4
- PRICE_COLS = ["Close Price", "Highest Price", "Lowest Price"]
5
-
6
- def _drop_unnamed(df: pd.DataFrame) -> pd.DataFrame:
7
- to_drop = [c for c in df.columns if str(c).startswith("Unnamed")]
8
- return df.drop(columns=to_drop) if to_drop else df
9
-
10
- def _read_excel_loose_header(xlsx_path: str) -> pd.DataFrame:
11
-
12
- raw = pd.read_excel(xlsx_path, engine='openpyxl', header=None)
13
- first_row = [str(x) for x in raw.iloc[0].tolist()]
14
- header_row = 0 if any("Close Price" in s for s in first_row) else 1
15
- return pd.read_excel(xlsx_path, engine='openpyxl', header=header_row)
16
-
17
- def _map_training_indicators(df: pd.DataFrame) -> pd.DataFrame:
18
-
19
- def map_series(s: pd.Series):
20
- if s.dtype == 'O':
21
- cleaned = s.astype(str).str.strip()
22
- cleaned = cleaned.replace({'nan': np.nan, 'NaN': np.nan, 'None': np.nan, '': np.nan})
23
- return cleaned.map({'Red': 0, 'Yellow': 1, 'Green': 2})
24
- return s
25
- out = df.copy()
26
- for col in out.columns:
27
- if col not in PRICE_COLS:
28
- out[col] = map_series(out[col])
29
- return out
30
-
31
- def _map_testing_indicators(df: pd.DataFrame) -> pd.DataFrame:
32
-
33
- def map_series(s: pd.Series):
34
- if s.dtype == 'O':
35
- cleaned = s.astype(str).str.strip()
36
- cleaned = cleaned.replace({'nan': np.nan, 'NaN': np.nan, 'None': np.nan, '': np.nan})
37
- return cleaned.map({'Red': 0, 'Yellow': 1, 'Green': 2})
38
-
39
- return s.replace({10: 2, 5: 1, 0: 0})
40
- out = df.copy()
41
- for col in out.columns:
42
- if col not in PRICE_COLS:
43
- out[col] = map_series(out[col])
44
- return out
45
-
46
- def _find_target_cols(df: pd.DataFrame):
47
- if "Highest Price" not in df.columns or "Lowest Price" not in df.columns:
48
- raise ValueError("Excel must contain 'Highest Price' and 'Lowest Price' columns.")
49
- return "Highest Price", "Lowest Price"
50
-
51
- def load_or_train_highlow_model(xlsx_path: str, model_path: str):
52
-
53
- def _is_cache_fresh():
54
- return os.path.exists(model_path) and os.path.getmtime(model_path) >= os.path.getmtime(xlsx_path)
55
-
56
- if os.path.exists(model_path) and _is_cache_fresh():
57
- obj = joblib.load(model_path)
58
- if isinstance(obj, dict) and {'model','features','medians'} <= set(obj.keys()):
59
- return obj
60
-
61
- if not os.path.exists(xlsx_path):
62
- raise FileNotFoundError(f"Training Excel not found at: {xlsx_path}")
63
-
64
- df = _read_excel_loose_header(xlsx_path)
65
- df = _drop_unnamed(df)
66
-
67
- y_high, y_low = _find_target_cols(df)
68
- df_mapped = _map_training_indicators(df)
69
-
70
- X = df_mapped.drop(columns=[y_high, y_low]).apply(pd.to_numeric, errors='coerce')
71
- y = df_mapped[[y_high, y_low]].apply(pd.to_numeric, errors='coerce')
72
-
73
- med = X.median(numeric_only=True)
74
- X = X.fillna(med)
75
- y = y.fillna(y.median(numeric_only=True))
76
-
77
- model = ExtraTreesRegressor(
78
- n_estimators=300,
79
- random_state=42,
80
- n_jobs=-1,
81
- max_depth=None,
82
- min_samples_leaf=2,
83
- )
84
- model.fit(X.values, y.values)
85
-
86
- bundle = {
87
- 'model': model,
88
- 'features': X.columns.tolist(),
89
- 'medians': med.to_dict(),
90
- 'sklearn_version': sklearn.__version__,
91
- 'trained_rows': int(X.shape[0]),
92
- }
93
- os.makedirs(os.path.dirname(model_path), exist_ok=True)
94
- joblib.dump(bundle, model_path)
95
- return bundle
96
-
97
- def _to_num(v):
98
- import pandas as pd
99
- if isinstance(v, (list, tuple, pd.Series, np.ndarray)):
100
- if len(v) == 0:
101
- return 0.0
102
- return _to_num(v[-1])
103
- if isinstance(v, dict):
104
- numeric_vals = [vv for vv in v.values() if isinstance(vv, (int, float, np.number))]
105
- if numeric_vals:
106
- best = max(numeric_vals)
107
- return 1.0 if float(best) > 0 else 0.0
108
- return 1.0 if any(bool(vv) for vv in v.values()) else 0.0
109
- if isinstance(v, (bool, int, float, np.number)):
110
- try:
111
- return float(v)
112
- except Exception:
113
- return 0.0
114
- if isinstance(v, str):
115
- s = v.strip().lower()
116
- if s in {"buy", "bullish", "long", "breakout", "yes", "true", "dbuy"}:
117
- return 1.0
118
- if s in {"sell", "bearish", "short", "no", "false"}:
119
- return 0.0
120
- try:
121
- return float(v)
122
- except Exception:
123
- return 0.0
124
- try:
125
- return float(v)
126
- except Exception:
127
- return 0.0
128
-
129
- def build_current_features_row_23k(
130
- ticker: str,
131
- stock_data: pd.DataFrame,
132
- rsi_trade_signal: dict,
133
- macd_trade_signal: dict,
134
- ema_trade_signal: dict,
135
- atr_trade_signal: dict,
136
- adx_trade_signal: dict,
137
- bb_trade_signal: dict,
138
- sr_trade_signal: dict,
139
- priceaction_trade_signal: dict,
140
- fibo_trade_signal: dict,
141
- overall_ta_score: float,
142
- ) -> pd.DataFrame:
143
-
144
- last_close = _to_num(stock_data['close'].iloc[-1])
145
-
146
- rsi_sig = rsi_trade_signal.get('rsi_signals', {}) or {}
147
- macd_sig = macd_trade_signal.get('macd_signals', {}) or {}
148
- atr_sig = atr_trade_signal.get('atr_signals', {}) or {}
149
- ema_sig = ema_trade_signal.get('ema_signals', {}) or {}
150
- adx_sig = adx_trade_signal.get('adx_signals', {}) or {}
151
- bb_sig = bb_trade_signal.get('bollinger_signals', {}) or {}
152
- sr_sig = sr_trade_signal.get('support_resistance_signals', {}) or {}
153
- pa_sig = priceaction_trade_signal.get('priceaction_signals', {}) or {}
154
- fib_sig = priceaction_trade_signal.get('fib_signals') or fibo_trade_signal.get('fib_signals', {})
155
-
156
- def sig_num(d, key): return _to_num(d.get(key, 0))
157
-
158
- row = {
159
- "TA Score": _to_num(overall_ta_score),
160
- "Close Price": last_close,
161
-
162
- # RSI
163
- "RSI": _to_num(rsi_trade_signal.get('rsi_score', 0)),
164
- "Overbought/Oversold": sig_num(rsi_sig, "Overbought/Oversold"),
165
- "RSI Swing Rejection": sig_num(rsi_sig, "RSI Swing Rejection"),
166
- "RSI Divergence": sig_num(rsi_sig, "RSI Divergence"),
167
- "RSI_Bollinger Band": sig_num(rsi_sig, "RSI_Bollinger Band"),
168
- "RSI 5/14 Crossover": sig_num(rsi_sig, "RSI 5/14 Crossover"),
169
- "RSI Trend 50 Confirmation": sig_num(rsi_sig, "RSI Trend 50 Confirmation"),
170
- "RSI_MA": _to_num(rsi_sig.get("RSI_MA", rsi_trade_signal.get("ma", 0))),
171
- "Mean Reversion": sig_num(rsi_sig, "Mean Reversion"),
172
-
173
- # MACD
174
- "MACD": _to_num(macd_trade_signal.get('macd_score', 0)),
175
- "MACD Line Crossover": sig_num(macd_sig, "MACD Line Crossover"),
176
- "MACD Zero-Line Crossover": sig_num(macd_sig, "MACD Zero-Line Crossover"),
177
- "MACD Divergence": sig_num(macd_sig, "MACD Divergence"),
178
- "Hidden Divergence": sig_num(macd_sig, "Hidden Divergence"),
179
- "MACD Volume": sig_num(macd_sig, "MACD Volume"),
180
- "MACD Momentum": sig_num(macd_sig, "MACD Momentum"),
181
-
182
- # ATR
183
- "ATR": _to_num(atr_trade_signal.get('atr_score', 0)),
184
- "ATR Breakout": sig_num(atr_sig, "ATR Breakout"),
185
- "ATR Expansion": sig_num(atr_sig, "ATR Expansion"),
186
- "ATR Squeeze": sig_num(atr_sig, "ATR Squeeze"),
187
- "ATR Trend Reversal": sig_num(atr_sig, "ATR Trend Reversal"),
188
-
189
- # EMA
190
- "EMA": _to_num(ema_trade_signal.get('ema_score', 0)),
191
- "EMA Crossover": sig_num(ema_sig, "EMA Crossover"),
192
- "EMA Price Crossover": sig_num(ema_sig, "EMA Price Crossover"),
193
- "EMA Slope": sig_num(ema_sig, "EMA Slope"),
194
- "Triple EMA": sig_num(ema_sig, "Triple EMA"),
195
-
196
- # ADX
197
- "ADX": _to_num(adx_trade_signal.get('adx_score', 0)),
198
- "ADX + DI Crossover": sig_num(adx_sig, "ADX + DI Crossover"),
199
- "ADX Breakout": sig_num(adx_sig, "ADX Breakout"),
200
- "ADX Slope": sig_num(adx_sig, "ADX Slope"),
201
- "ADX Divergence": sig_num(adx_sig, "ADX Divergence"),
202
-
203
- # Fibonacci
204
- "Fibo": _to_num(fibo_trade_signal.get('fib_score', 0)),
205
- "Fibonacci Retracement Bounce": sig_num(fib_sig, "Fibonacci Retracement Bounce"),
206
- "Fibonacci Breakout": sig_num(fib_sig, "Fibonacci Breakout"),
207
- "Golden Pocket Reversal": sig_num(fib_sig, "Golden Pocket Reversal"),
208
- "Fibonacci Confluence": sig_num(fib_sig, "Fibonacci Confluence"),
209
-
210
- # Bollinger
211
- "BB": _to_num(bb_trade_signal.get('bollinger_score', 0)),
212
- "BB Squeeze": sig_num(bb_sig, "BB Squeeze"),
213
- "BB Breakout": sig_num(bb_sig, "BB Breakout"),
214
- "BB Breakout Reversal": sig_num(bb_sig, "BB Breakout Reversal"),
215
- "Middle Band Pullback": sig_num(bb_sig, "Middle Band Pullback"),
216
-
217
-
218
- "SR": _to_num(sr_trade_signal.get('sr_score', 0)),
219
- "Breakout": sig_num(sr_sig, "Breakout"),
220
- "Reversal": sig_num(sr_sig, "Reversal"),
221
- "Flip": sig_num(sr_sig, "Flip"),
222
- "SR_Retest": sig_num(sr_sig, "SR_Retest"),
223
-
224
-
225
- "PA_MS": _to_num(priceaction_trade_signal.get('priceaction_score', 0)),
226
- "Candlestick Pattern": sig_num(pa_sig, "Candlestick Pattern"),
227
- "HH_HL_LL_LH": sig_num(pa_sig, "HH_HL_LL_LH"),
228
- "Triangle Breakout": sig_num(pa_sig, "Triangle Breakout"),
229
- "Fair Value Gap": sig_num(pa_sig, "Fair Value Gap"),
230
- "BOS": sig_num(pa_sig, "BOS"),
231
- "CHoCH": sig_num(pa_sig, "CHoCH"),
232
- "Order_Block": sig_num(pa_sig, "Order_Block"),
233
- }
234
-
235
- return pd.DataFrame([row]).replace([np.inf, -np.inf], np.nan)
236
-
237
- def _prepare_test_currentrow(current_row_df: pd.DataFrame, feature_cols, train_medians: dict):
238
- df = _map_testing_indicators(current_row_df.copy())
239
- X = df.reindex(columns=feature_cols).apply(pd.to_numeric, errors='coerce')
240
- X = X.fillna(pd.Series(train_medians))
241
- return X
242
-
243
- def predict_high_low_for_current_row(bundle: dict, current_row_df: pd.DataFrame, live_close: float):
244
-
245
- feature_cols = bundle['features']
246
- medians = bundle['medians']
247
- model: ExtraTreesRegressor = bundle['model']
248
-
249
- X = _prepare_test_currentrow(current_row_df, feature_cols, medians)
250
- preds = model.predict(X.values)
251
- high_pred, low_pred = float(preds[0, 0]), float(preds[0, 1])
252
-
253
- if not np.isnan(live_close):
254
- high_pred = max(high_pred, float(live_close))
255
- low_pred = min(low_pred, float(live_close))
256
-
257
- return round(high_pred, 2), round(low_pred, 2)