Skip to content

Sentiment Analysis Model

Abstract

Sentiment Analysis Model is a Python project that uses NLP to analyze sentiment in text. The application features data preprocessing, model training, and evaluation, demonstrating best practices in text analytics and AI.

Prerequisites

  • Python 3.8 or above
  • A code editor or IDE
  • Basic understanding of NLP and sentiment analysis
  • Required libraries: nltknltk, scikit-learnscikit-learn, pandaspandas

Before you Start

Install Python and the required libraries:

Install dependencies
pip install nltk scikit-learn pandas
Install dependencies
pip install nltk scikit-learn pandas

Getting Started

Create a Project

  1. Create a folder named sentiment-analysis-modelsentiment-analysis-model.
  2. Open the folder in your code editor or IDE.
  3. Create a file named sentiment_analysis_model.pysentiment_analysis_model.py.
  4. Copy the code below into your file.

Write the Code

⚙️ Sentiment Analysis Model
Sentiment Analysis Model
"""
Sentiment Analysis Model
 
A full sentiment analysis pipeline using scikit-learn and NLTK. Includes data loading, preprocessing, model training, prediction, and CLI for batch analysis.
"""
import pandas as pd
import numpy as np
import argparse
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import joblib
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
 
stop_words = set(stopwords.words('english'))
 
def preprocess(text):
    tokens = [w for w in text.lower().split() if w.isalpha() and w not in stop_words]
    return ' '.join(tokens)
 
def load_data(csv_path):
    df = pd.read_csv(csv_path)
    df['text'] = df['text'].apply(preprocess)
    return df
 
def train_model(df, model_path=None):
    X = df['text']
    y = df['label']
    vectorizer = CountVectorizer()
    X_vec = vectorizer.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)
    clf = MultinomialNB()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))
    if model_path:
        joblib.dump((clf, vectorizer), model_path)
        print(f"Model saved to {model_path}")
    return clf, vectorizer
 
def predict(model, vectorizer, texts):
    texts = [preprocess(t) for t in texts]
    X_vec = vectorizer.transform(texts)
    preds = model.predict(X_vec)
    return preds
 
def main():
    parser = argparse.ArgumentParser(description="Sentiment Analysis Model")
    parser.add_argument('--data', type=str, help='Path to CSV data file')
    parser.add_argument('--train', action='store_true', help='Train model')
    parser.add_argument('--model', type=str, default='sentiment_model.pkl', help='Path to save/load model')
    parser.add_argument('--predict', type=str, help='Text to predict sentiment')
    args = parser.parse_args()
 
    if args.train and args.data:
        df = load_data(args.data)
        train_model(df, args.model)
    elif args.predict:
        if not os.path.exists(args.model):
            print(f"Model file {args.model} not found. Train the model first.")
            return
        clf, vectorizer = joblib.load(args.model)
        result = predict(clf, vectorizer, [args.predict])
        print(f"Sentiment: {result[0]}")
    else:
        parser.print_help()
 
if __name__ == "__main__":
    main()
 
Sentiment Analysis Model
"""
Sentiment Analysis Model
 
A full sentiment analysis pipeline using scikit-learn and NLTK. Includes data loading, preprocessing, model training, prediction, and CLI for batch analysis.
"""
import pandas as pd
import numpy as np
import argparse
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import joblib
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
 
stop_words = set(stopwords.words('english'))
 
def preprocess(text):
    tokens = [w for w in text.lower().split() if w.isalpha() and w not in stop_words]
    return ' '.join(tokens)
 
def load_data(csv_path):
    df = pd.read_csv(csv_path)
    df['text'] = df['text'].apply(preprocess)
    return df
 
def train_model(df, model_path=None):
    X = df['text']
    y = df['label']
    vectorizer = CountVectorizer()
    X_vec = vectorizer.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)
    clf = MultinomialNB()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))
    if model_path:
        joblib.dump((clf, vectorizer), model_path)
        print(f"Model saved to {model_path}")
    return clf, vectorizer
 
def predict(model, vectorizer, texts):
    texts = [preprocess(t) for t in texts]
    X_vec = vectorizer.transform(texts)
    preds = model.predict(X_vec)
    return preds
 
def main():
    parser = argparse.ArgumentParser(description="Sentiment Analysis Model")
    parser.add_argument('--data', type=str, help='Path to CSV data file')
    parser.add_argument('--train', action='store_true', help='Train model')
    parser.add_argument('--model', type=str, default='sentiment_model.pkl', help='Path to save/load model')
    parser.add_argument('--predict', type=str, help='Text to predict sentiment')
    args = parser.parse_args()
 
    if args.train and args.data:
        df = load_data(args.data)
        train_model(df, args.model)
    elif args.predict:
        if not os.path.exists(args.model):
            print(f"Model file {args.model} not found. Train the model first.")
            return
        clf, vectorizer = joblib.load(args.model)
        result = predict(clf, vectorizer, [args.predict])
        print(f"Sentiment: {result[0]}")
    else:
        parser.print_help()
 
if __name__ == "__main__":
    main()
 

Example Usage

Run sentiment analysis
python sentiment_analysis_model.py
Run sentiment analysis
python sentiment_analysis_model.py

Explanation

Key Features

  • Sentiment Analysis: Analyzes sentiment in text using NLP.
  • Data Preprocessing: Cleans and prepares text data.
  • Model Training: Trains a model for sentiment analysis.
  • Evaluation: Assesses model performance.
  • Error Handling: Validates inputs and manages exceptions.

Code Breakdown

  1. Import Libraries and Setup Data
sentiment_analysis_model.py
import nltk
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
sentiment_analysis_model.py
import nltk
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
  1. Data Preprocessing and Model Training Functions
sentiment_analysis_model.py
def preprocess_data(df):
    return df.dropna()
 
def train_model(X, y):
    model = MultinomialNB()
    model.fit(X, y)
    return model
sentiment_analysis_model.py
def preprocess_data(df):
    return df.dropna()
 
def train_model(X, y):
    model = MultinomialNB()
    model.fit(X, y)
    return model
  1. Evaluation and Error Handling
sentiment_analysis_model.py
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))
 
def main():
    print("Sentiment Analysis Model")
    # df = pd.read_csv('sentiment_data.csv')
    # X, y = df['text'], df['label']
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # model = train_model(X_train, y_train)
    # evaluate_model(model, X_test, y_test)
    print("[Demo] Sentiment analysis logic here.")
 
if __name__ == "__main__":
    main()
sentiment_analysis_model.py
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))
 
def main():
    print("Sentiment Analysis Model")
    # df = pd.read_csv('sentiment_data.csv')
    # X, y = df['text'], df['label']
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # model = train_model(X_train, y_train)
    # evaluate_model(model, X_test, y_test)
    print("[Demo] Sentiment analysis logic here.")
 
if __name__ == "__main__":
    main()

Features

  • Sentiment Analysis: Data preprocessing, model training, and evaluation
  • Modular Design: Separate functions for each task
  • Error Handling: Manages invalid inputs and exceptions
  • Production-Ready: Scalable and maintainable code

Next Steps

Enhance the project by:

  • Integrating with real sentiment datasets
  • Supporting advanced NLP models
  • Creating a GUI for analysis
  • Adding real-time analytics
  • Unit testing for reliability

Educational Value

This project teaches:

  • Text Analytics: Sentiment analysis and NLP
  • Software Design: Modular, maintainable code
  • Error Handling: Writing robust Python code

Real-World Applications

  • Social Media Analytics
  • Customer Feedback Platforms
  • Business Intelligence

Conclusion

Sentiment Analysis Model demonstrates how to build a scalable and accurate sentiment analysis tool using Python. With modular design and extensibility, this project can be adapted for real-world applications in analytics, business intelligence, and more. For more advanced projects, visit Python Central Hub.

Was this page helpful?

Let us know how we did