Spaces:

AlserFurma
/

LipSyncAI

Running

App Files Files Community

AlserFurma commited on 6 days ago

Commit

7802c36

verified ·

1 Parent(s): bd33908

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -68

app.py CHANGED Viewed

@@ -54,8 +54,12 @@ except Exception as e:
 # Вспомогательные функции
 # =========================
 def generate_quiz(text: str):
-    """ Генерирует один вопрос и два варианта ответа на основе текста. """
-    # (Оставляем как есть, без изменений)
     try:
         sentences = [s.strip() for s in text.replace("!", ".").replace("?", ".").split(".") if s.strip()]
         if len(sentences) < 1:
@@ -63,6 +67,7 @@ def generate_quiz(text: str):
         algo = random.choice([1, 2, 3])
         if algo == 1:  # Базовый алгоритм
             question_sentence = random.choice(sentences)
             words = question_sentence.split()
@@ -76,6 +81,7 @@ def generate_quiz(text: str):
             wrong_words = wrong_sentence.split()
             wrong_answer = " ".join(wrong_words[:6]) + ("..." if len(wrong_words) > 6 else "")
         elif algo == 2:  # Пропуск ключевого слова
             question_sentence = random.choice(sentences)
             words = question_sentence.split()
@@ -85,8 +91,10 @@ def generate_quiz(text: str):
                 correct_answer = key_word
                 wrong_answer = random.choice([w for w in words if w != key_word] or ["другое"])
             else:
                 return generate_quiz(text)
         elif algo == 3:  # Вопрос о числе или дате
             import re
             question_sentence = random.choice(sentences)
@@ -97,6 +105,7 @@ def generate_quiz(text: str):
                 correct_answer = number
                 wrong_answer = str(int(number)+random.randint(1,5))
             else:
                 return generate_quiz(text)
         options = [correct_answer, wrong_answer]
@@ -107,7 +116,6 @@ def generate_quiz(text: str):
 def synthesize_audio(text_ru: str):
     """Переводит русскую строку на казахский, синтезирует аудио и возвращает путь к файлу .wav"""
-    # (Оставляем как есть, с нормализацией)
     translation = translator(text_ru, src_lang="rus_Cyrl", tgt_lang="kaz_Cyrl")
     text_kk = translation[0]["translation_text"]
@@ -129,14 +137,13 @@ def synthesize_audio(text_ru: str):
 def concatenate_audio_files(audio_files):
     """Объединяет несколько аудио файлов в один с паузами между ними"""
-    # (Оставляем как есть)
     combined = AudioSegment.empty()
     pause = AudioSegment.silent(duration=1000)  # 1 секунда паузы
     for i, audio_file in enumerate(audio_files):
         audio = AudioSegment.from_wav(audio_file)
         combined += audio
-        if i < len(audio_files) - 1:
             combined += pause
     output_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
@@ -146,7 +153,6 @@ def concatenate_audio_files(audio_files):
 def make_talking_head(image_path: str, audio_path: str, max_retries=3):
     """Вызывает SkyReels/Talking Head space и возвращает путь или URL видео."""
-    # (Оставляем как есть)
     for attempt in range(max_retries):
         try:
             client = Client(TALKING_HEAD_SPACE)
@@ -188,7 +194,7 @@ def make_talking_head(image_path: str, audio_path: str, max_retries=3):
 # Основные обработчики для Gradio
 # =========================
 def start_lesson(image: Image.Image, text: str, state):
-    """Генерирует все сегменты видео заранее и начинает интерактивную лекцию"""
     if image is None or not text.strip() or len(text) > 500:
         return None, "Пожалуйста, загрузите фото и введите текст лекции (до 500 символов)", gr.update(visible=False), gr.update(visible=False), state
@@ -204,58 +210,51 @@ def start_lesson(image: Image.Image, text: str, state):
         # Генерируем вопрос
         question, options, correct = generate_quiz(text)
-        # Создаем аудио для всех частей заранее
-        audio_files = []  # Для лекции + вопрос + варианты
-        reaction_audios = {}  # Для реакций
-        # 1. Аудио лекции
-        audio_lecture = synthesize_audio(text)
-        audio_files.append(audio_lecture)
-        # 2. Аудио вопроса
         question_text = f"А теперь вопрос: {question}"
-        audio_question = synthesize_audio(question_text)
-        audio_files.append(audio_question)
-        # 3. Аудио вариантов
         options_text = f"Первый вариант: {options[0]}. Второй вариант: {options[1]}"
-        audio_options = synthesize_audio(options_text)
-        audio_files.append(audio_options)
-        # Объединяем аудио для основного видео (лекция + вопрос + варианты)
-        combined_audio_main = concatenate_audio_files(audio_files)
-        # Генерируем основное видео
-        video_main = make_talking_head(image_path, combined_audio_main)
-        # Генерируем реакции заранее
-        reaction_correct_ru = "Правильно! Отлично справились!"
-        audio_correct = synthesize_audio(reaction_correct_ru)
-        video_correct = make_talking_head(image_path, audio_correct)
-        reaction_wrong_ru = f"К сожалению неправильно. Правильный ответ был: {correct}"
-        audio_wrong = synthesize_audio(reaction_wrong_ru)
-        video_wrong = make_talking_head(image_path, audio_wrong)
-        # Сохраняем состояние (пути к видео, для последовательного показа)
         state_data = {
             'image_path': image_path,
             'correct': correct,
             'options': options,
-            'question': question,
-            'video_main': video_main,  # Первое видео: лекция + вопрос + варианты
-            'video_correct': video_correct,
-            'video_wrong': video_wrong,
-            'audio_files': audio_files + [audio_correct, audio_wrong, combined_audio_main],  # Для cleanup
-            'step': 'main'  # Текущий шаг лекции (для multi-step)
         }
-        # Удаляем временные аудио (кроме тех, что в state для позднего cleanup)
-        # (Очистку перенесём в конец сессии, если нужно)
-        question_display = f"**Вопрос:** {question} (После просмотра лекции выберите ответ)"
         return (
-            state_data['video_main'],  # Показываем основное видео сначала
             question_display,
             gr.update(value=options[0], visible=True),
             gr.update(value=options[1], visible=True),
@@ -266,35 +265,31 @@ def start_lesson(image: Image.Image, text: str, state):
         return None, f"❌ Ошибка: {e}", gr.update(visible=False), gr.update(visible=False), state
 def answer_selected(selected_option: str, state):
-    """Показывает предгенерированную реакцию и завершает шаг лекции"""
     if not state:
         return None, "❌ Ошибка: отсутствует состояние урока"
     try:
         correct = state.get('correct')
         if selected_option == correct:
-            reaction_video = state['video_correct']
-            display_message = "✅ **Дұрыс! Жарайсың!** \nЛекция завершена. Можно начать новую."
         else:
-            reaction_video = state['video_wrong']
-            display_message = f"❌ **Қате!** Дұрыс жауап: **{correct}** \nЛекция завершена. Можно начать новую."
-        # Cleanup: Удаляем все временные файлы после показа
-        for audio in state.get('audio_files', []):
-            try:
-                os.remove(audio)
-            except:
-                pass
-        for video in [state.get('video_main'), state.get('video_correct'), state.get('video_wrong')]:
-            if video and os.path.exists(video):
-                try:
-                    os.remove(video)
-                except:
-                    pass
-        # Обновляем state на 'completed' для предотвращения повторений
-        state['step'] = 'completed'
         return reaction_video, display_message
     except Exception as e:
@@ -308,8 +303,8 @@ title = "🎓 Интерактивті Бейне Мұғалім TiлГен"
 description = (
     "**Қалай жұмыс істейді:**\n"
     "1. Мұғалімнің суретін жүктеп, дәріс мәтінін енгізіңіз (орыс, 500 таңбаға дейін)\n"
-    "2. 'Сабақты бастау' түймесін басыңыз — генерируется вся лекция заранее, показывается видео с текстом, вопросом и вариантами\n"
-    "3. Выберите ответ — покажется реакция (предгенерированная)"
 )
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -345,14 +340,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     # Обработка ответов
     def handle_answer_1(state):
-        if state.get('step') != 'main':
-            return None, "Лекция уже завершена"
         option = state.get('options', [''])[0] if state else ''
         return answer_selected(option, state)
     def handle_answer_2(state):
-        if state.get('step') != 'main':
-            return None, "Лекция уже завершена"
         option = state.get('options', [''])[1] if state and len(state.get('options', [])) > 1 else ''
         return answer_selected(option, state)

 # Вспомогательные функции
 # =========================
 def generate_quiz(text: str):
+    """ Генерирует один вопрос и два варианта ответа на основе текста.
+    Алгоритмы:
+    1. Базовый: случайное предложение и первые слова.
+    2. Пропуск ключевого слова.
+    3. Вопрос о числе/дате.
+    """
     try:
         sentences = [s.strip() for s in text.replace("!", ".").replace("?", ".").split(".") if s.strip()]
         if len(sentences) < 1:
         algo = random.choice([1, 2, 3])
+        # ------------------------
         if algo == 1:  # Базовый алгоритм
             question_sentence = random.choice(sentences)
             words = question_sentence.split()
             wrong_words = wrong_sentence.split()
             wrong_answer = " ".join(wrong_words[:6]) + ("..." if len(wrong_words) > 6 else "")
+        # ------------------------
         elif algo == 2:  # Пропуск ключевого слова
             question_sentence = random.choice(sentences)
             words = question_sentence.split()
                 correct_answer = key_word
                 wrong_answer = random.choice([w for w in words if w != key_word] or ["другое"])
             else:
+                # fallback
                 return generate_quiz(text)
+        # ------------------------
         elif algo == 3:  # Вопрос о числе или дате
             import re
             question_sentence = random.choice(sentences)
                 correct_answer = number
                 wrong_answer = str(int(number)+random.randint(1,5))
             else:
+                # fallback к базовому
                 return generate_quiz(text)
         options = [correct_answer, wrong_answer]
 def synthesize_audio(text_ru: str):
     """Переводит русскую строку на казахский, синтезирует аудио и возвращает путь к файлу .wav"""
     translation = translator(text_ru, src_lang="rus_Cyrl", tgt_lang="kaz_Cyrl")
     text_kk = translation[0]["translation_text"]
 def concatenate_audio_files(audio_files):
     """Объединяет несколько аудио файлов в один с паузами между ними"""
     combined = AudioSegment.empty()
     pause = AudioSegment.silent(duration=1000)  # 1 секунда паузы
     for i, audio_file in enumerate(audio_files):
         audio = AudioSegment.from_wav(audio_file)
         combined += audio
+        if i < len(audio_files) - 1:  # Не добавляем паузу после последнего файла
             combined += pause
     output_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
 def make_talking_head(image_path: str, audio_path: str, max_retries=3):
     """Вызывает SkyReels/Talking Head space и возвращает путь или URL видео."""
     for attempt in range(max_retries):
         try:
             client = Client(TALKING_HEAD_SPACE)
 # Основные обработчики для Gradio
 # =========================
 def start_lesson(image: Image.Image, text: str, state):
+    """Создает одно видео: текст лекции + вопрос с вариантами ответа"""
     if image is None or not text.strip() or len(text) > 500:
         return None, "Пожалуйста, загрузите фото и введите текст лекции (до 500 символов)", gr.update(visible=False), gr.update(visible=False), state
         # Генерируем вопрос
         question, options, correct = generate_quiz(text)
+        # Создаем три аудио файла
+        audio_files = []
+        # 1. Текст лекции
+        audio1 = synthesize_audio(text)
+        audio_files.append(audio1)
+        # 2. Вопрос
         question_text = f"А теперь вопрос: {question}"
+        audio2 = synthesize_audio(question_text)
+        audio_files.append(audio2)
+        # 3. Варианты ответа
         options_text = f"Первый вариант: {options[0]}. Второй вариант: {options[1]}"
+        audio3 = synthesize_audio(options_text)
+        audio_files.append(audio3)
+        # Объединяем все аудио в одно
+        combined_audio = concatenate_audio_files(audio_files)
+        # Создаем одно видео с полным содержанием
+        video_path = make_talking_head(image_path, combined_audio)
+        # Сохраняем состояние
         state_data = {
             'image_path': image_path,
             'correct': correct,
             'options': options,
+            'question': question
         }
+        # Удаляем временные аудио файлы
+        for audio_file in audio_files:
+            try:
+                os.remove(audio_file)
+            except:
+                pass
+        try:
+            os.remove(combined_audio)
+        except:
+            pass
+        question_display = f"**Вопрос:** {question}"
         return (
+            video_path,
             question_display,
             gr.update(value=options[0], visible=True),
             gr.update(value=options[1], visible=True),
         return None, f"❌ Ошибка: {e}", gr.update(visible=False), gr.update(visible=False), state
 def answer_selected(selected_option: str, state):
+    """Генерирует реакцию лектора и показывает в том же окне"""
     if not state:
         return None, "❌ Ошибка: отсутствует состояние урока"
     try:
         correct = state.get('correct')
+        image_path = state.get('image_path')
         if selected_option == correct:
+            reaction_ru = "Правильно! Отлично справились!"
+            display_message = "✅ **Дұрыс! Жарайсың!**"
         else:
+            reaction_ru = f"К сожалению неправильно. Правильный ответ был: {correct}"
+            display_message = f"❌ **Қате!** Дұрыс жауап: **{correct}**"
+        # Создаем аудио с реакцией
+        audio_path = synthesize_audio(reaction_ru)
+        # Создаем видео с реакцией
+        reaction_video = make_talking_head(image_path, audio_path)
+        try:
+            os.remove(audio_path)
+        except:
+            pass
         return reaction_video, display_message
     except Exception as e:
 description = (
     "**Қалай жұмыс істейді:**\n"
     "1. Мұғалімнің суретін жүктеп, дәріс мәтінін енгізіңіз (орыс, 500 таңбаға дейін)\n"
+    "2. 'Сабақты бастау' түймесін басыңыз-мұғалім мәтінді оқып, сұрақ қояды\n"
+    "3. Дұрыс жауапты таңдаңыз-мұғалім сіздің жауабыңызға жауап береді"
 )
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     # Обработка ответов
     def handle_answer_1(state):
         option = state.get('options', [''])[0] if state else ''
         return answer_selected(option, state)
     def handle_answer_2(state):
         option = state.get('options', [''])[1] if state and len(state.get('options', [])) > 1 else ''
         return answer_selected(option, state)