Spaces:
Running
Running
| import streamlit as st | |
| from functions_preprocess import LinguisticPreprocessor, download_if_non_existent, CNN, build_vocab | |
| import pickle | |
| import nltk | |
| from datasets import load_dataset | |
| import torch | |
| nltk.download('stopwords') | |
| nltk.download('punkt') | |
| download_if_non_existent('corpora/stopwords', 'stopwords') | |
| download_if_non_existent('taggers/averaged_perceptron_tagger', 'averaged_perceptron_tagger') | |
| download_if_non_existent('corpora/wordnet', 'wordnet') | |
| from torchtext.data.utils import get_tokenizer | |
| from nltk.corpus import stopwords | |
| #################################################################### Streamlit interface | |
| st.title("Movie Reviews: An NLP Sentiment analysis") | |
| #################################################################### Cache the model loading | |
| def load_model(): | |
| model_pkl_file = "sentiment_model.pkl" | |
| with open(model_pkl_file, 'rb') as file: | |
| model = pickle.load(file) | |
| return model | |
| def load_cnn(): | |
| model = CNN(16236, 300, 128, [3, 8], 0.5, 2) | |
| model.load_state_dict(torch.load('model_cnn.pkl', map_location=torch.device('cpu'))) | |
| model.eval() | |
| return model | |
| def predict_sentiment(text, model, vocab, torch_text = False): | |
| tokenizer = get_tokenizer("basic_english") | |
| if torch_text == True: | |
| processor.transform(text) | |
| tokens = tokenizer(text) | |
| encoded = [vocab[token] for token in tokens] | |
| input_tensor = torch.tensor(encoded).unsqueeze(0).to(device) | |
| with torch.no_grad(): # No gradient needed | |
| model.eval() # Evaluation mode | |
| outputs = model(input_tensor) | |
| probs = torch.softmax(outputs, dim=1) | |
| pred_class = torch.argmax(probs, dim=1).item() | |
| return pred_class # Return the predicted class index | |
| else: | |
| processor.transform(text) | |
| prediction = model.predict([text]) | |
| return prediction | |
| model_1 = load_model() | |
| model_2 = load_cnn() | |
| processor = LinguisticPreprocessor() | |
| train_data = load_dataset('rotten_tomatoes', split='train') | |
| vocab, tokenizer = build_vocab(train_data) | |
| stop_words = set(stopwords.words('english')) | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| ############################################################# Text input | |
| with st.expander("Model 1: SGD Classifier"): | |
| st.markdown("Give it a go by writing a positive or negative text, and analyze it!") | |
| # Text input inside the expander | |
| user_input = st.text_area("Enter text here...", key='model1_input') | |
| if st.button('Analyze', key='model1_button'): | |
| # Displaying output | |
| result = predict_sentiment(user_input, model_1, vocab) | |
| if result >= 0.5: | |
| st.write('The sentiment is: Positive π', key='model1_poswrite') | |
| else: | |
| st.write('The sentiment is: Negative π', key='model1_negwrite') | |
| with st.expander("Model 2: CNN Sentiment analysis"): | |
| st.markdown("Give it a go by writing a positive or negative text, and analyze it!") | |
| # Text input inside the expander | |
| user_input = st.text_area("Enter text here...", key='model2_input') | |
| if st.button('Analyze', key='model2_button'): | |
| # Displaying output | |
| result = predict_sentiment(user_input, model_2, vocab, torch_text=True) | |
| if result >= 0.5: | |
| st.write('The sentiment is: Positive π', key='model2_poswrite') | |
| else: | |
| st.write('The sentiment is: Negative π', key='model2_negwrite') | |
| st.caption("Por @efeperro.") |