| | import streamlit as st |
| | from app.draw_diagram import * |
| | from app.content import * |
| |
|
| | def dataset_contents(dataset, metrics): |
| | |
| | custom_css = """ |
| | <style> |
| | .my-dataset-info { |
| | # background-color: #F9EBEA; |
| | # padding: 10px; |
| | color: #626567; |
| | font-style: italic; |
| | font-size: 8px; |
| | height: auto; |
| | } |
| | </style> |
| | """ |
| | st.markdown(custom_css, unsafe_allow_html=True) |
| | st.markdown(f"""<div class="my-dataset-info"> |
| | <p>DATASET INFORMATION: {dataset}</p> |
| | </div>""", unsafe_allow_html=True) |
| | st.markdown(f"""<div class="my-dataset-info"> |
| | <p>METRIC INFORMATION: {metrics}</p> |
| | </div>""", unsafe_allow_html=True) |
| |
|
| |
|
| | def dashboard(): |
| |
|
| | with st.container(): |
| | st.title("AudioBench") |
| | |
| | st.markdown(""" |
| | [gh]: https://github.com/AudioLLMs/AudioBench |
| | [][gh] |
| | [][gh] |
| | """) |
| |
|
| | audio_url = "https://arxiv.org/abs/2406.16020" |
| |
|
| | st.divider() |
| | st.markdown("#### [AudioBench](%s)" % audio_url) |
| | st.markdown("##### :dizzy: A comprehensive evaluation benchmark designed for general instruction-following audiolanguage models") |
| | st.markdown(''' |
| | |
| | |
| | ''') |
| |
|
| | with st.container(): |
| | left_co, center_co, right_co = st.columns([0.5,1, 0.5]) |
| | with center_co: |
| | st.image("./style/audio_overview.png", |
| | caption="Overview of the datasets in AudioBench.", |
| | use_column_width = True) |
| | |
| | st.markdown(''' |
| | |
| | |
| | ''') |
| | |
| | st.markdown("###### :dart: Our Benchmark includes: ") |
| | cols = st.columns(10) |
| | cols[1].metric(label="Tasks", value="8") |
| | cols[2].metric(label="Datasets", value="26") |
| | cols[3].metric(label="Test Models", value="5") |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | st.divider() |
| | with st.container(): |
| | st.markdown("##### Citations") |
| |
|
| | st.markdown(''' |
| | :round_pushpin: AudioBench Paper \n |
| | @article{wang2024audiobench, |
| | title={AudioBench: A Universal Benchmark for Audio Large Language Models}, |
| | author={Wang, Bin and Zou, Xunlong and Lin, Geyu and Sun, Shuo and Liu, Zhuohan and Zhang, Wenyu and Liu, Zhengyuan and Aw, AiTi and Chen, Nancy F}, |
| | journal={arXiv preprint arXiv:2406.16020}, |
| | year={2024} |
| | } |
| | ''') |
| |
|
| | def asr(): |
| | st.title("Automatic Speech Recognition") |
| | |
| | filters_levelone = ['LibriSpeech-Test-Clean', |
| | 'LibriSpeech-Test-Other', |
| | 'Common-Voice-15-En-Test', |
| | 'Peoples-Speech-Test', |
| | 'GigaSpeech-Test', |
| | 'Earnings21-Test', |
| | 'Earnings22-Test', |
| | 'Tedlium3-Test', |
| | 'Tedlium3-Long-form-Test', |
| | 'IMDA-Part1-ASR-Test', |
| | 'IMDA-Part2-ASR-Test'] |
| | |
| | left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2]) |
| | |
| | with left: |
| | filter_1 = st.selectbox('Select Dataset', filters_levelone) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | if filter_1: |
| | dataset_contents(asr_datsets[filter_1], metrics['wer']) |
| | draw('su', 'ASR', filter_1, 'wer') |
| | |
| | |
| |
|
| |
|
| | |
| | |
| |
|
| | def sqa(): |
| | st.title("Speech Question Answering") |
| | |
| | binary = ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test'] |
| |
|
| | rest = ['SLUE-P2-SQA5-Test', |
| | 'Public-SG-Speech-QA-Test', |
| | 'Spoken-Squad-v1'] |
| |
|
| | filters_levelone = binary + rest |
| | |
| | left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2]) |
| | |
| | with left: |
| | filter_1 = st.selectbox('Select Dataset', filters_levelone) |
| |
|
| | if filter_1: |
| | if filter_1 in binary: |
| | dataset_contents(sqa_datasets[filter_1], metrics['llama3_70b_judge_binary']) |
| | draw('su', 'SQA', filter_1, 'llama3_70b_judge_binary') |
| | |
| | else: |
| | dataset_contents(sqa_datasets[filter_1], metrics['llama3_70b_judge']) |
| | draw('su', 'SQA', filter_1, 'llama3_70b_judge') |
| | |
| | |
| |
|
| | def si(): |
| | st.title("Speech Question Answering") |
| | |
| | filters_levelone = ['OpenHermes-Audio-Test', |
| | 'ALPACA-Audio-Test'] |
| | |
| | left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2]) |
| | |
| | with left: |
| | filter_1 = st.selectbox('Select Dataset', filters_levelone) |
| |
|
| | if filter_1: |
| | dataset_contents(si_datasets[filter_1], metrics['llama3_70b_judge']) |
| | draw('su', 'SI', filter_1, 'llama3_70b_judge') |
| | |
| | |
| |
|
| | def ac(): |
| | st.title("Audio Captioning") |
| |
|
| | filters_levelone = ['WavCaps-Test', |
| | 'AudioCaps-Test'] |
| | filters_leveltwo = ['Llama3-70b-judge', 'Meteor'] |
| | |
| | left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2]) |
| | |
| | with left: |
| | filter_1 = st.selectbox('Select Dataset', filters_levelone) |
| | with middle: |
| | metric = st.selectbox('Select Metric', filters_leveltwo) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | if filter_1 or metric: |
| | dataset_contents(ac_datasets[filter_1], metrics[metric.lower().replace('-', '_')]) |
| | draw('asu', 'AC',filter_1, metric.lower().replace('-', '_')) |
| | |
| | |
| |
|
| | def asqa(): |
| | st.title("Audio Scene Question Answering") |
| |
|
| | filters_levelone = ['Clotho-AQA-Test', |
| | 'WavCaps-QA-Test', |
| | 'AudioCaps-QA-Test'] |
| | |
| | left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2]) |
| | |
| | with left: |
| | filter_1 = st.selectbox('Select Dataset', filters_levelone) |
| | |
| | if filter_1: |
| | dataset_contents(asqa_datasets[filter_1], metrics['llama3_70b_judge']) |
| | draw('asu', 'AQA',filter_1, 'llama3_70b_judge') |
| | |
| | |
| |
|
| | def er(): |
| | st.title("Emotion Recognition") |
| |
|
| | filters_levelone = ['IEMOCAP-Emotion-Test', |
| | 'MELD-Sentiment-Test', |
| | 'MELD-Emotion-Test'] |
| | |
| | |
| | left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2]) |
| | |
| | with left: |
| | filter_1 = st.selectbox('Select Dataset', filters_levelone) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | if filter_1: |
| | dataset_contents(er_datasets[filter_1], metrics['llama3_70b_judge_binary']) |
| | draw('vu', 'ER', filter_1, 'llama3_70b_judge_binary') |
| | |
| | |
| |
|
| | def ar(): |
| | st.title("Accent Recognition") |
| |
|
| | filters_levelone = ['VoxCeleb-Accent-Test'] |
| | |
| | left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2]) |
| | |
| | with left: |
| | filter_1 = st.selectbox('Select Dataset', filters_levelone) |
| |
|
| |
|
| | if filter_1: |
| | dataset_contents(ar_datsets[filter_1], metrics['llama3_70b_judge']) |
| | draw('vu', 'AR', filter_1, 'llama3_70b_judge') |
| |
|
| |
|
| | def gr(): |
| | st.title("Emotion Recognition") |
| |
|
| | filters_levelone = ['VoxCeleb-Gender-Test', |
| | 'IEMOCAP-Gender-Test'] |
| | |
| | left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2]) |
| | |
| | with left: |
| | filter_1 = st.selectbox('Select Dataset', filters_levelone) |
| | |
| | if filter_1: |
| | dataset_contents(gr_datasets[filter_1], metrics['llama3_70b_judge_binary']) |
| | draw('vu', 'GR', filter_1, 'llama3_70b_judge_binary') |
| | |
| | |
| |
|
| | def spt(): |
| | st.title("Speech Translation") |
| |
|
| | filters_levelone = ['Covost2-EN-ID-test', |
| | 'Covost2-EN-ZH-test', |
| | 'Covost2-EN-TA-test', |
| | 'Covost2-ID-EN-test', |
| | 'Covost2-ZH-EN-test', |
| | 'Covost2-TA-EN-test'] |
| | |
| | left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2]) |
| | |
| | with left: |
| | filter_1 = st.selectbox('Select Dataset', filters_levelone) |
| | |
| | if filter_1: |
| | dataset_contents(spt_datasets[filter_1], metrics['bleu']) |
| | draw('su', 'ST', filter_1, 'bleu') |
| | |
| | |
| |
|
| | def cnasr(): |
| | st.title("Chinese Automatic Speech Recognition") |
| |
|
| | filters_levelone = ['Aishell-ASR-ZH-Test'] |
| | |
| | left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2]) |
| | |
| | with left: |
| | filter_1 = st.selectbox('Select Dataset', filters_levelone) |
| | |
| | if filter_1: |
| | dataset_contents(cnasr_datasets[filter_1], metrics['wer']) |
| | draw('su', 'CNASR', filter_1, 'wer') |
| | |
| | |
| |
|