nanfangwuyu21 commited on
Commit
868a18b
·
1 Parent(s): f0fcc0f

Updates libraries impporting (especially langchain), add more apis, complete test for generation

Browse files
.gitignore CHANGED
@@ -6,3 +6,6 @@ data/
6
  docker/
7
  k8s/
8
  scripts/
 
 
 
 
6
  docker/
7
  k8s/
8
  scripts/
9
+ app/Archived
10
+ Archived/
11
+ .env
app/apis/generator.py CHANGED
@@ -21,8 +21,10 @@ class ChapterInput(BaseModel):
21
  @router.post("/generate")
22
  def generate_chapter(input: ChapterInput):
23
  result = chapter_chain(query=input.query)
24
- summary_chain(result) # auto-call summary_chain
25
- return result
 
 
26
 
27
  class ChapterOutput(BaseModel):
28
  chapter: str
@@ -34,6 +36,28 @@ def change_chapter(input: ChapterOutput):
34
  summary_chain(input.chapter, chapter_num=input.chapter_num)
35
  return True
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  # @router.post("/generate")
39
  # def generate_text(request: GenerateRequest):
 
21
  @router.post("/generate")
22
  def generate_chapter(input: ChapterInput):
23
  result = chapter_chain(query=input.query)
24
+ print("chapter generated")
25
+ summary = summary_chain(result) # auto-call summary_chain
26
+ print("summary generated")
27
+ return {"chapter": result, "summary": summary}
28
 
29
  class ChapterOutput(BaseModel):
30
  chapter: str
 
36
  summary_chain(input.chapter, chapter_num=input.chapter_num)
37
  return True
38
 
39
+ @router.post("/get_all")
40
+ def get_all_chapters():
41
+ all_chapters = []
42
+ for filename in os.listdir("data/samples/raws"):
43
+ if filename.endswith(".txt"):
44
+ with open(os.path.join("data/samples/raws", filename), "r", encoding="utf-8") as f:
45
+ content = f.read()
46
+ all_chapters.append({"filename": filename, "content": content})
47
+ all_chapters.sort(key=lambda x: int(x["filename"].split("_")[1].split(".")[0]))
48
+ return all_chapters
49
+
50
+ @router.post("/get_one")
51
+ def get_one_chapter(chapter_num: int):
52
+ filename = f"chapter_{chapter_num:03}.txt"
53
+ filepath = os.path.join("data/samples/raws", filename)
54
+ if os.path.exists(filepath):
55
+ with open(filepath, "r", encoding="utf-8") as f:
56
+ content = f.read()
57
+ return {"filename": filename, "content": content}
58
+ else:
59
+ return {"error": "Chapter not found."}
60
+
61
 
62
  # @router.post("/generate")
63
  # def generate_text(request: GenerateRequest):
app/main.py CHANGED
@@ -5,7 +5,6 @@ from app.apis import generator, extractor
5
  app = FastAPI(title="AI Novelist RAG")
6
 
7
  app.include_router(generator.router)
8
- app.include_router(test.router)
9
  app.include_router(extractor.router)
10
 
11
 
 
5
  app = FastAPI(title="AI Novelist RAG")
6
 
7
  app.include_router(generator.router)
 
8
  app.include_router(extractor.router)
9
 
10
 
app/managers/chapter_manager.py CHANGED
@@ -1,26 +1,15 @@
1
- from langchain.chains import RetrievalQA
2
- from langchain.chains.llm import LLMChain
3
- from langchain.prompts import PromptTemplate
4
- from langchain.llms import OpenAI
5
- # from app.utils.memory import faiss_index
6
- from langchain.vectorstores import FAISS
7
- import numpy as np
8
  from app.models.model import LLM
9
  from app.utils.prompts import get_chapter_generation_prompt
10
  from app.managers import vector_manager as vm
11
  import os
12
- from langchain.docstore import InMemoryDocstore
13
- from langchain_huggingface import HuggingFaceEmbeddings
14
- from langchain.schema import Document
15
- import faiss
16
 
17
  SAVE_DIR = "data/samples/raws"
18
  os.makedirs(SAVE_DIR, exist_ok=True)
19
 
20
 
21
 
22
- def get_latest_chapter_num():
23
- vectorstore = vm.load_vectorstore("summary")
24
  all_docs = vectorstore.docstore._dict.values()
25
  chapter_numbers = [doc.metadata.get("chapter", 0) for doc in all_docs if isinstance(doc.metadata.get("chapter", 0), int)]
26
  latest_chapter_num = max(chapter_numbers) if chapter_numbers else 0
@@ -48,7 +37,7 @@ def setup_prompt(query: str, context_info):
48
 
49
 
50
  def generate_chapter(prompt):
51
- generated_chapter = LLM.generate(prompt, max_tokens=512)
52
  return generated_chapter
53
 
54
  def save_chapter_to_file(chapter: str, chapter_num: int):
@@ -78,8 +67,8 @@ def chapter_chain(query):
78
  context_info = get_latest_and_relevant_chapter_summaries(query)
79
  prompt = setup_prompt(query, context_info)
80
  chapter = generate_chapter(prompt)
81
- chapter_num = context_info[0] + 1
82
- save_chapter_to_file(chapter, chapter_num)
83
- add_chapter(chapter, chapter_num)
84
- return True
85
 
 
 
 
 
 
 
 
 
1
  from app.models.model import LLM
2
  from app.utils.prompts import get_chapter_generation_prompt
3
  from app.managers import vector_manager as vm
4
  import os
 
 
 
 
5
 
6
  SAVE_DIR = "data/samples/raws"
7
  os.makedirs(SAVE_DIR, exist_ok=True)
8
 
9
 
10
 
11
+ def get_latest_chapter_num(store_type="summary"):
12
+ vectorstore = vm.load_vectorstore(store_type)
13
  all_docs = vectorstore.docstore._dict.values()
14
  chapter_numbers = [doc.metadata.get("chapter", 0) for doc in all_docs if isinstance(doc.metadata.get("chapter", 0), int)]
15
  latest_chapter_num = max(chapter_numbers) if chapter_numbers else 0
 
37
 
38
 
39
  def generate_chapter(prompt):
40
+ generated_chapter = LLM.generate(prompt, max_tokens=2048)
41
  return generated_chapter
42
 
43
  def save_chapter_to_file(chapter: str, chapter_num: int):
 
67
  context_info = get_latest_and_relevant_chapter_summaries(query)
68
  prompt = setup_prompt(query, context_info)
69
  chapter = generate_chapter(prompt)
70
+ new_chapter_num = context_info[0] + 1
71
+ save_chapter_to_file(chapter, new_chapter_num)
72
+ add_chapter(chapter, new_chapter_num)
73
+ return chapter
74
 
app/managers/summary_manager.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- from langchain.schema import Document
3
  from app.models.model import Summary_Model
4
  from app.managers import vector_manager as vm
5
  from app.managers.chapter_manager import get_latest_chapter_num, chapter_chain
@@ -47,9 +46,12 @@ def get_relevant_summaries(query: str, top_k: int = 10):
47
 
48
  def summary_chain(text: str, chapter_num: int = None) -> str:
49
  new_chapter_num = get_latest_chapter_num() + 1
 
50
  if chapter_num is None or chapter_num >= new_chapter_num:
51
  chapter_num = new_chapter_num
 
52
  summary = generate_summary(text)
 
53
  save_summary_to_file(summary, chapter_num)
54
  add_summary(summary, chapter_num)
55
  else:
 
1
  import os
 
2
  from app.models.model import Summary_Model
3
  from app.managers import vector_manager as vm
4
  from app.managers.chapter_manager import get_latest_chapter_num, chapter_chain
 
46
 
47
  def summary_chain(text: str, chapter_num: int = None) -> str:
48
  new_chapter_num = get_latest_chapter_num() + 1
49
+ print("I'm here 1")
50
  if chapter_num is None or chapter_num >= new_chapter_num:
51
  chapter_num = new_chapter_num
52
+ print("I'm here 2")
53
  summary = generate_summary(text)
54
+ print("I'm here 3")
55
  save_summary_to_file(summary, chapter_num)
56
  add_summary(summary, chapter_num)
57
  else:
app/managers/vector_manager.py CHANGED
@@ -1,8 +1,9 @@
1
  import os
2
  import faiss
3
  from typing import List, Optional
4
- from langchain.vectorstores import FAISS
5
- from langchain.docstore import InMemoryDocstore
 
6
  from langchain.schema import Document
7
  from app.models.model import Embedding_model
8
 
@@ -29,11 +30,13 @@ def load_vectorstore(store_type: str) -> FAISS:
29
  assert store_type in VECTORSTORE_TYPES, "Invalid vectorstore type."
30
  path = os.path.join(BASE_PATH, VECTORSTORE_TYPES[store_type])
31
 
32
- if os.path.exists(path):
 
33
  return FAISS.load_local(path, Embedding_model, allow_dangerous_deserialization=True)
34
  else:
 
35
  vs = create_new_vectorstore(Embedding_model)
36
- vs.save_local(path)
37
  return vs
38
 
39
 
 
1
  import os
2
  import faiss
3
  from typing import List, Optional
4
+ from langchain_community.vectorstores import FAISS
5
+ # from langchain.docstore import InMemoryDocstore
6
+ from langchain_community.docstore.in_memory import InMemoryDocstore
7
  from langchain.schema import Document
8
  from app.models.model import Embedding_model
9
 
 
30
  assert store_type in VECTORSTORE_TYPES, "Invalid vectorstore type."
31
  path = os.path.join(BASE_PATH, VECTORSTORE_TYPES[store_type])
32
 
33
+ if os.path.exists(os.path.join(path, 'index.faiss')):
34
+ print("Reload existing faiss")
35
  return FAISS.load_local(path, Embedding_model, allow_dangerous_deserialization=True)
36
  else:
37
+ print("Create new faiss")
38
  vs = create_new_vectorstore(Embedding_model)
39
+ save_vectorstore(vs, store_type)
40
  return vs
41
 
42
 
app/models/model.py CHANGED
@@ -1,4 +1,4 @@
1
- from app.models.tinyllama import TinyLlamaModel
2
  from app.models.gpt4omini import GPT4OMini
3
  from app.models.bart_large_cnn import BartSummaryModel
4
  from langchain_huggingface import HuggingFaceEmbeddings
 
1
+ # from app.models.tinyllama import TinyLlamaModel
2
  from app.models.gpt4omini import GPT4OMini
3
  from app.models.bart_large_cnn import BartSummaryModel
4
  from langchain_huggingface import HuggingFaceEmbeddings