Data Visualization Suite with Matplotlib & Seaborn
Abstract
Create a comprehensive data visualization suite that generates professional-quality charts and statistical plots using Matplotlib, Pandas, and Seaborn. This project demonstrates data analysis techniques, statistical visualization, dashboard creation, and advanced plotting capabilities for data science applications.
Prerequisites
- Python 3.7 or above
- Text Editor or IDE
- Solid understanding of Python syntax and data structures
- Knowledge of data analysis and statistics concepts
- Familiarity with pandas and numpy libraries
- Understanding of data visualization principles
- Basic knowledge of statistical analysis and interpretation
Getting Started
Create a new project
- Create a new project folder and name it
dataVisualizationSuite
dataVisualizationSuite
. - Create a new file and name it
datavisualization.py
datavisualization.py
. - Install required dependencies:
pip install matplotlib pandas seaborn numpy
pip install matplotlib pandas seaborn numpy
- Prepare sample data files (CSV format recommended)
- Open the project folder in your favorite text editor or IDE.
- Copy the code below and paste it into your
datavisualization.py
datavisualization.py
file.
Write the code
- Add the following code to your
datavisualization.py
datavisualization.py
file.
⚙️ Data Visualization Suite with Matplotlib & Seaborn
# Data Visualization with Matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import seaborn as sns
import json
from typing import List, Dict, Optional
import random
class DataVisualizer:
def __init__(self):
# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
# Sample data
self.sample_data = self.generate_sample_data()
def generate_sample_data(self) -> Dict:
"""Generate sample data for visualization"""
# Sales data
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
sales_2022 = [random.randint(10000, 50000) for _ in months]
sales_2023 = [random.randint(12000, 55000) for _ in months]
# Student scores
students = [f"Student {i}" for i in range(1, 21)]
math_scores = [random.randint(60, 100) for _ in students]
science_scores = [random.randint(55, 95) for _ in students]
english_scores = [random.randint(65, 100) for _ in students]
# Stock data
dates = [datetime.now() - timedelta(days=x) for x in range(30, 0, -1)]
stock_prices = []
price = 100
for _ in dates:
price += random.uniform(-5, 5)
stock_prices.append(max(price, 10)) # Ensure positive price
# Survey data
age_groups = ['18-25', '26-35', '36-45', '46-55', '56-65', '65+']
responses = [random.randint(50, 200) for _ in age_groups]
# Weather data
days = [f"Day {i}" for i in range(1, 8)]
temperatures = [random.randint(15, 35) for _ in days]
humidity = [random.randint(30, 80) for _ in days]
return {
'sales': {'months': months, '2022': sales_2022, '2023': sales_2023},
'students': {'names': students, 'math': math_scores, 'science': science_scores, 'english': english_scores},
'stock': {'dates': dates, 'prices': stock_prices},
'survey': {'age_groups': age_groups, 'responses': responses},
'weather': {'days': days, 'temperature': temperatures, 'humidity': humidity}
}
def create_line_chart(self, save=True):
"""Create a line chart showing sales trends"""
plt.figure(figsize=(12, 6))
months = self.sample_data['sales']['months']
sales_2022 = self.sample_data['sales']['2022']
sales_2023 = self.sample_data['sales']['2023']
plt.plot(months, sales_2022, marker='o', linewidth=2, label='2022', color='#3498db')
plt.plot(months, sales_2023, marker='s', linewidth=2, label='2023', color='#e74c3c')
plt.title('Monthly Sales Comparison (2022 vs 2023)', fontsize=16, fontweight='bold')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
# Format y-axis to show values in thousands
plt.ticklabel_format(style='plain', axis='y')
# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
plt.tight_layout()
if save:
plt.savefig('sales_comparison.png', dpi=300, bbox_inches='tight')
print("Line chart saved as 'sales_comparison.png'")
plt.show()
def create_bar_chart(self, save=True):
"""Create a bar chart showing student scores"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
students = self.sample_data['students']['names'][:10] # First 10 students
math_scores = self.sample_data['students']['math'][:10]
science_scores = self.sample_data['students']['science'][:10]
english_scores = self.sample_data['students']['english'][:10]
# Grouped bar chart
x = np.arange(len(students))
width = 0.25
ax1.bar(x - width, math_scores, width, label='Math', color='#3498db', alpha=0.8)
ax1.bar(x, science_scores, width, label='Science', color='#e74c3c', alpha=0.8)
ax1.bar(x + width, english_scores, width, label='English', color='#2ecc71', alpha=0.8)
ax1.set_title('Student Scores by Subject', fontsize=14, fontweight='bold')
ax1.set_xlabel('Students', fontsize=12)
ax1.set_ylabel('Scores', fontsize=12)
ax1.set_xticks(x)
ax1.set_xticklabels(students, rotation=45, ha='right')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Average scores bar chart
subjects = ['Math', 'Science', 'English']
avg_scores = [
np.mean(self.sample_data['students']['math']),
np.mean(self.sample_data['students']['science']),
np.mean(self.sample_data['students']['english'])
]
colors = ['#3498db', '#e74c3c', '#2ecc71']
bars = ax2.bar(subjects, avg_scores, color=colors, alpha=0.8)
ax2.set_title('Average Scores by Subject', fontsize=14, fontweight='bold')
ax2.set_ylabel('Average Score', fontsize=12)
ax2.grid(True, alpha=0.3)
# Add value labels on bars
for bar, score in zip(bars, avg_scores):
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{score:.1f}', ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
if save:
plt.savefig('student_scores.png', dpi=300, bbox_inches='tight')
print("Bar chart saved as 'student_scores.png'")
plt.show()
def create_pie_chart(self, save=True):
"""Create pie charts showing survey data"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
# Age group responses
age_groups = self.sample_data['survey']['age_groups']
responses = self.sample_data['survey']['responses']
colors = plt.cm.Set3(np.linspace(0, 1, len(age_groups)))
wedges, texts, autotexts = ax1.pie(responses, labels=age_groups, autopct='%1.1f%%',
colors=colors, startangle=90)
ax1.set_title('Survey Responses by Age Group', fontsize=14, fontweight='bold')
# Make percentage text bold
for autotext in autotexts:
autotext.set_color('white')
autotext.set_fontweight('bold')
# Market share pie chart (example data)
companies = ['Company A', 'Company B', 'Company C', 'Company D', 'Others']
market_share = [30, 25, 20, 15, 10]
explode = (0.1, 0, 0, 0, 0) # explode 1st slice
wedges2, texts2, autotexts2 = ax2.pie(market_share, labels=companies, autopct='%1.1f%%',
explode=explode, shadow=True, startangle=90)
ax2.set_title('Market Share Distribution', fontsize=14, fontweight='bold')
# Make percentage text bold
for autotext in autotexts2:
autotext.set_color('white')
autotext.set_fontweight('bold')
plt.tight_layout()
if save:
plt.savefig('pie_charts.png', dpi=300, bbox_inches='tight')
print("Pie charts saved as 'pie_charts.png'")
plt.show()
def create_scatter_plot(self, save=True):
"""Create scatter plots showing correlations"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# Student scores correlation
math_scores = self.sample_data['students']['math']
science_scores = self.sample_data['students']['science']
english_scores = self.sample_data['students']['english']
ax1.scatter(math_scores, science_scores, alpha=0.6, s=60, color='#3498db', label='Math vs Science')
ax1.scatter(math_scores, english_scores, alpha=0.6, s=60, color='#e74c3c', label='Math vs English')
ax1.set_xlabel('Math Scores', fontsize=12)
ax1.set_ylabel('Other Subject Scores', fontsize=12)
ax1.set_title('Student Scores Correlation', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Temperature vs Humidity
temperature = self.sample_data['weather']['temperature']
humidity = self.sample_data['weather']['humidity']
# Create color map based on temperature
colors = plt.cm.coolwarm(np.linspace(0, 1, len(temperature)))
scatter = ax2.scatter(temperature, humidity, c=temperature, cmap='coolwarm',
s=100, alpha=0.7, edgecolors='black', linewidth=1)
ax2.set_xlabel('Temperature (°C)', fontsize=12)
ax2.set_ylabel('Humidity (%)', fontsize=12)
ax2.set_title('Temperature vs Humidity', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)
# Add colorbar
cbar = plt.colorbar(scatter, ax=ax2)
cbar.set_label('Temperature (°C)', fontsize=10)
plt.tight_layout()
if save:
plt.savefig('scatter_plots.png', dpi=300, bbox_inches='tight')
print("Scatter plots saved as 'scatter_plots.png'")
plt.show()
def create_histogram(self, save=True):
"""Create histograms showing data distribution"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
# Math scores distribution
math_scores = self.sample_data['students']['math']
ax1.hist(math_scores, bins=10, alpha=0.7, color='#3498db', edgecolor='black')
ax1.set_title('Math Scores Distribution', fontsize=12, fontweight='bold')
ax1.set_xlabel('Score')
ax1.set_ylabel('Frequency')
ax1.grid(True, alpha=0.3)
# Add mean line
mean_math = np.mean(math_scores)
ax1.axvline(mean_math, color='red', linestyle='--', linewidth=2, label=f'Mean: {mean_math:.1f}')
ax1.legend()
# Stock prices distribution
stock_prices = self.sample_data['stock']['prices']
ax2.hist(stock_prices, bins=15, alpha=0.7, color='#2ecc71', edgecolor='black')
ax2.set_title('Stock Prices Distribution', fontsize=12, fontweight='bold')
ax2.set_xlabel('Price ($)')
ax2.set_ylabel('Frequency')
ax2.grid(True, alpha=0.3)
# Temperature distribution
temperature = self.sample_data['weather']['temperature']
ax3.hist(temperature, bins=8, alpha=0.7, color='#e74c3c', edgecolor='black')
ax3.set_title('Temperature Distribution', fontsize=12, fontweight='bold')
ax3.set_xlabel('Temperature (°C)')
ax3.set_ylabel('Frequency')
ax3.grid(True, alpha=0.3)
# Combined scores distribution
all_scores = (self.sample_data['students']['math'] +
self.sample_data['students']['science'] +
self.sample_data['students']['english'])
ax4.hist(all_scores, bins=20, alpha=0.7, color='#9b59b6', edgecolor='black')
ax4.set_title('All Scores Distribution', fontsize=12, fontweight='bold')
ax4.set_xlabel('Score')
ax4.set_ylabel('Frequency')
ax4.grid(True, alpha=0.3)
plt.tight_layout()
if save:
plt.savefig('histograms.png', dpi=300, bbox_inches='tight')
print("Histograms saved as 'histograms.png'")
plt.show()
def create_heatmap(self, save=True):
"""Create a heatmap showing correlation matrix"""
# Create correlation data
students_df = pd.DataFrame({
'Math': self.sample_data['students']['math'],
'Science': self.sample_data['students']['science'],
'English': self.sample_data['students']['english']
})
# Calculate correlation matrix
correlation_matrix = students_df.corr()
plt.figure(figsize=(10, 8))
# Create heatmap
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
square=True, linewidths=0.5, cbar_kws={"shrink": .8})
plt.title('Subject Scores Correlation Matrix', fontsize=16, fontweight='bold', pad=20)
if save:
plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
print("Heatmap saved as 'correlation_heatmap.png'")
plt.show()
def create_time_series(self, save=True):
"""Create time series plot for stock prices"""
plt.figure(figsize=(14, 8))
dates = self.sample_data['stock']['dates']
prices = self.sample_data['stock']['prices']
plt.plot(dates, prices, linewidth=2, color='#3498db', marker='o', markersize=4)
# Fill area under the curve
plt.fill_between(dates, prices, alpha=0.3, color='#3498db')
plt.title('Stock Price Movement (30 Days)', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price ($)', fontsize=12)
plt.grid(True, alpha=0.3)
# Format dates on x-axis
plt.xticks(rotation=45)
# Add trend line
x_numeric = np.arange(len(dates))
z = np.polyfit(x_numeric, prices, 1)
p = np.poly1d(z)
plt.plot(dates, p(x_numeric), "r--", alpha=0.8, linewidth=2, label=f'Trend')
plt.legend()
plt.tight_layout()
if save:
plt.savefig('stock_timeseries.png', dpi=300, bbox_inches='tight')
print("Time series plot saved as 'stock_timeseries.png'")
plt.show()
def create_subplots_dashboard(self, save=True):
"""Create a comprehensive dashboard with multiple plots"""
fig = plt.figure(figsize=(16, 12))
# Layout: 3x3 grid
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)
# 1. Sales line chart
ax1 = fig.add_subplot(gs[0, :2])
months = self.sample_data['sales']['months']
sales_2022 = self.sample_data['sales']['2022']
sales_2023 = self.sample_data['sales']['2023']
ax1.plot(months, sales_2022, marker='o', label='2022', color='#3498db')
ax1.plot(months, sales_2023, marker='s', label='2023', color='#e74c3c')
ax1.set_title('Monthly Sales Trend', fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)
# 2. Age group pie chart
ax2 = fig.add_subplot(gs[0, 2])
age_groups = self.sample_data['survey']['age_groups']
responses = self.sample_data['survey']['responses']
ax2.pie(responses, labels=age_groups, autopct='%1.0f%%', textprops={'fontsize': 8})
ax2.set_title('Age Distribution', fontweight='bold')
# 3. Scores bar chart
ax3 = fig.add_subplot(gs[1, :2])
subjects = ['Math', 'Science', 'English']
avg_scores = [
np.mean(self.sample_data['students']['math']),
np.mean(self.sample_data['students']['science']),
np.mean(self.sample_data['students']['english'])
]
bars = ax3.bar(subjects, avg_scores, color=['#3498db', '#e74c3c', '#2ecc71'], alpha=0.7)
ax3.set_title('Average Subject Scores', fontweight='bold')
ax3.set_ylabel('Score')
# Add value labels
for bar, score in zip(bars, avg_scores):
height = bar.get_height()
ax3.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{score:.1f}', ha='center', va='bottom', fontweight='bold')
# 4. Temperature scatter
ax4 = fig.add_subplot(gs[1, 2])
temperature = self.sample_data['weather']['temperature']
humidity = self.sample_data['weather']['humidity']
ax4.scatter(temperature, humidity, c=temperature, cmap='coolwarm', s=50, alpha=0.7)
ax4.set_title('Temp vs Humidity', fontweight='bold')
ax4.set_xlabel('Temperature')
ax4.set_ylabel('Humidity')
# 5. Stock prices
ax5 = fig.add_subplot(gs[2, :])
dates = self.sample_data['stock']['dates']
prices = self.sample_data['stock']['prices']
ax5.plot(dates, prices, color='#2ecc71', linewidth=2)
ax5.fill_between(dates, prices, alpha=0.3, color='#2ecc71')
ax5.set_title('Stock Price Movement', fontweight='bold')
ax5.set_xlabel('Date')
ax5.set_ylabel('Price ($)')
ax5.tick_params(axis='x', rotation=45)
ax5.grid(True, alpha=0.3)
plt.suptitle('Data Visualization Dashboard', fontsize=20, fontweight='bold', y=0.95)
if save:
plt.savefig('dashboard.png', dpi=300, bbox_inches='tight')
print("Dashboard saved as 'dashboard.png'")
plt.show()
def load_custom_data(self, filename: str) -> Optional[pd.DataFrame]:
"""Load custom data from CSV file"""
try:
df = pd.read_csv(filename)
print(f"Loaded data from {filename}")
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
return df
except Exception as e:
print(f"Error loading data: {e}")
return None
def save_sample_data(self):
"""Save sample data to CSV files for user to experiment with"""
try:
# Save sales data
sales_df = pd.DataFrame({
'Month': self.sample_data['sales']['months'],
'Sales_2022': self.sample_data['sales']['2022'],
'Sales_2023': self.sample_data['sales']['2023']
})
sales_df.to_csv('sample_sales_data.csv', index=False)
# Save student data
students_df = pd.DataFrame({
'Student': self.sample_data['students']['names'],
'Math': self.sample_data['students']['math'],
'Science': self.sample_data['students']['science'],
'English': self.sample_data['students']['english']
})
students_df.to_csv('sample_student_data.csv', index=False)
# Save stock data
stock_df = pd.DataFrame({
'Date': [d.strftime('%Y-%m-%d') for d in self.sample_data['stock']['dates']],
'Price': self.sample_data['stock']['prices']
})
stock_df.to_csv('sample_stock_data.csv', index=False)
print("Sample data saved to CSV files:")
print("- sample_sales_data.csv")
print("- sample_student_data.csv")
print("- sample_stock_data.csv")
except Exception as e:
print(f"Error saving sample data: {e}")
def main():
"""Main function to run the data visualization app"""
visualizer = DataVisualizer()
while True:
print("\n=== Data Visualization with Matplotlib ===")
print("1. Line Chart (Sales Trends)")
print("2. Bar Chart (Student Scores)")
print("3. Pie Chart (Survey Data)")
print("4. Scatter Plot (Correlations)")
print("5. Histogram (Data Distribution)")
print("6. Heatmap (Correlation Matrix)")
print("7. Time Series (Stock Prices)")
print("8. Dashboard (Multiple Plots)")
print("9. Save Sample Data to CSV")
print("10. Load Custom Data")
print("0. Exit")
try:
choice = input("\nEnter your choice: ").strip()
if choice == '1':
visualizer.create_line_chart()
elif choice == '2':
visualizer.create_bar_chart()
elif choice == '3':
visualizer.create_pie_chart()
elif choice == '4':
visualizer.create_scatter_plot()
elif choice == '5':
visualizer.create_histogram()
elif choice == '6':
visualizer.create_heatmap()
elif choice == '7':
visualizer.create_time_series()
elif choice == '8':
visualizer.create_subplots_dashboard()
elif choice == '9':
visualizer.save_sample_data()
elif choice == '10':
filename = input("Enter CSV filename: ").strip()
if filename:
df = visualizer.load_custom_data(filename)
if df is not None:
print("\nFirst 5 rows:")
print(df.head())
print("\nData types:")
print(df.dtypes)
elif choice == '0':
print("Thank you for using the Data Visualization app!")
break
else:
print("Invalid choice. Please try again.")
except KeyboardInterrupt:
print("\n\nGoodbye!")
break
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()
# Data Visualization with Matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import seaborn as sns
import json
from typing import List, Dict, Optional
import random
class DataVisualizer:
def __init__(self):
# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
# Sample data
self.sample_data = self.generate_sample_data()
def generate_sample_data(self) -> Dict:
"""Generate sample data for visualization"""
# Sales data
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
sales_2022 = [random.randint(10000, 50000) for _ in months]
sales_2023 = [random.randint(12000, 55000) for _ in months]
# Student scores
students = [f"Student {i}" for i in range(1, 21)]
math_scores = [random.randint(60, 100) for _ in students]
science_scores = [random.randint(55, 95) for _ in students]
english_scores = [random.randint(65, 100) for _ in students]
# Stock data
dates = [datetime.now() - timedelta(days=x) for x in range(30, 0, -1)]
stock_prices = []
price = 100
for _ in dates:
price += random.uniform(-5, 5)
stock_prices.append(max(price, 10)) # Ensure positive price
# Survey data
age_groups = ['18-25', '26-35', '36-45', '46-55', '56-65', '65+']
responses = [random.randint(50, 200) for _ in age_groups]
# Weather data
days = [f"Day {i}" for i in range(1, 8)]
temperatures = [random.randint(15, 35) for _ in days]
humidity = [random.randint(30, 80) for _ in days]
return {
'sales': {'months': months, '2022': sales_2022, '2023': sales_2023},
'students': {'names': students, 'math': math_scores, 'science': science_scores, 'english': english_scores},
'stock': {'dates': dates, 'prices': stock_prices},
'survey': {'age_groups': age_groups, 'responses': responses},
'weather': {'days': days, 'temperature': temperatures, 'humidity': humidity}
}
def create_line_chart(self, save=True):
"""Create a line chart showing sales trends"""
plt.figure(figsize=(12, 6))
months = self.sample_data['sales']['months']
sales_2022 = self.sample_data['sales']['2022']
sales_2023 = self.sample_data['sales']['2023']
plt.plot(months, sales_2022, marker='o', linewidth=2, label='2022', color='#3498db')
plt.plot(months, sales_2023, marker='s', linewidth=2, label='2023', color='#e74c3c')
plt.title('Monthly Sales Comparison (2022 vs 2023)', fontsize=16, fontweight='bold')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
# Format y-axis to show values in thousands
plt.ticklabel_format(style='plain', axis='y')
# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
plt.tight_layout()
if save:
plt.savefig('sales_comparison.png', dpi=300, bbox_inches='tight')
print("Line chart saved as 'sales_comparison.png'")
plt.show()
def create_bar_chart(self, save=True):
"""Create a bar chart showing student scores"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
students = self.sample_data['students']['names'][:10] # First 10 students
math_scores = self.sample_data['students']['math'][:10]
science_scores = self.sample_data['students']['science'][:10]
english_scores = self.sample_data['students']['english'][:10]
# Grouped bar chart
x = np.arange(len(students))
width = 0.25
ax1.bar(x - width, math_scores, width, label='Math', color='#3498db', alpha=0.8)
ax1.bar(x, science_scores, width, label='Science', color='#e74c3c', alpha=0.8)
ax1.bar(x + width, english_scores, width, label='English', color='#2ecc71', alpha=0.8)
ax1.set_title('Student Scores by Subject', fontsize=14, fontweight='bold')
ax1.set_xlabel('Students', fontsize=12)
ax1.set_ylabel('Scores', fontsize=12)
ax1.set_xticks(x)
ax1.set_xticklabels(students, rotation=45, ha='right')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Average scores bar chart
subjects = ['Math', 'Science', 'English']
avg_scores = [
np.mean(self.sample_data['students']['math']),
np.mean(self.sample_data['students']['science']),
np.mean(self.sample_data['students']['english'])
]
colors = ['#3498db', '#e74c3c', '#2ecc71']
bars = ax2.bar(subjects, avg_scores, color=colors, alpha=0.8)
ax2.set_title('Average Scores by Subject', fontsize=14, fontweight='bold')
ax2.set_ylabel('Average Score', fontsize=12)
ax2.grid(True, alpha=0.3)
# Add value labels on bars
for bar, score in zip(bars, avg_scores):
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{score:.1f}', ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
if save:
plt.savefig('student_scores.png', dpi=300, bbox_inches='tight')
print("Bar chart saved as 'student_scores.png'")
plt.show()
def create_pie_chart(self, save=True):
"""Create pie charts showing survey data"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
# Age group responses
age_groups = self.sample_data['survey']['age_groups']
responses = self.sample_data['survey']['responses']
colors = plt.cm.Set3(np.linspace(0, 1, len(age_groups)))
wedges, texts, autotexts = ax1.pie(responses, labels=age_groups, autopct='%1.1f%%',
colors=colors, startangle=90)
ax1.set_title('Survey Responses by Age Group', fontsize=14, fontweight='bold')
# Make percentage text bold
for autotext in autotexts:
autotext.set_color('white')
autotext.set_fontweight('bold')
# Market share pie chart (example data)
companies = ['Company A', 'Company B', 'Company C', 'Company D', 'Others']
market_share = [30, 25, 20, 15, 10]
explode = (0.1, 0, 0, 0, 0) # explode 1st slice
wedges2, texts2, autotexts2 = ax2.pie(market_share, labels=companies, autopct='%1.1f%%',
explode=explode, shadow=True, startangle=90)
ax2.set_title('Market Share Distribution', fontsize=14, fontweight='bold')
# Make percentage text bold
for autotext in autotexts2:
autotext.set_color('white')
autotext.set_fontweight('bold')
plt.tight_layout()
if save:
plt.savefig('pie_charts.png', dpi=300, bbox_inches='tight')
print("Pie charts saved as 'pie_charts.png'")
plt.show()
def create_scatter_plot(self, save=True):
"""Create scatter plots showing correlations"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# Student scores correlation
math_scores = self.sample_data['students']['math']
science_scores = self.sample_data['students']['science']
english_scores = self.sample_data['students']['english']
ax1.scatter(math_scores, science_scores, alpha=0.6, s=60, color='#3498db', label='Math vs Science')
ax1.scatter(math_scores, english_scores, alpha=0.6, s=60, color='#e74c3c', label='Math vs English')
ax1.set_xlabel('Math Scores', fontsize=12)
ax1.set_ylabel('Other Subject Scores', fontsize=12)
ax1.set_title('Student Scores Correlation', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Temperature vs Humidity
temperature = self.sample_data['weather']['temperature']
humidity = self.sample_data['weather']['humidity']
# Create color map based on temperature
colors = plt.cm.coolwarm(np.linspace(0, 1, len(temperature)))
scatter = ax2.scatter(temperature, humidity, c=temperature, cmap='coolwarm',
s=100, alpha=0.7, edgecolors='black', linewidth=1)
ax2.set_xlabel('Temperature (°C)', fontsize=12)
ax2.set_ylabel('Humidity (%)', fontsize=12)
ax2.set_title('Temperature vs Humidity', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)
# Add colorbar
cbar = plt.colorbar(scatter, ax=ax2)
cbar.set_label('Temperature (°C)', fontsize=10)
plt.tight_layout()
if save:
plt.savefig('scatter_plots.png', dpi=300, bbox_inches='tight')
print("Scatter plots saved as 'scatter_plots.png'")
plt.show()
def create_histogram(self, save=True):
"""Create histograms showing data distribution"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
# Math scores distribution
math_scores = self.sample_data['students']['math']
ax1.hist(math_scores, bins=10, alpha=0.7, color='#3498db', edgecolor='black')
ax1.set_title('Math Scores Distribution', fontsize=12, fontweight='bold')
ax1.set_xlabel('Score')
ax1.set_ylabel('Frequency')
ax1.grid(True, alpha=0.3)
# Add mean line
mean_math = np.mean(math_scores)
ax1.axvline(mean_math, color='red', linestyle='--', linewidth=2, label=f'Mean: {mean_math:.1f}')
ax1.legend()
# Stock prices distribution
stock_prices = self.sample_data['stock']['prices']
ax2.hist(stock_prices, bins=15, alpha=0.7, color='#2ecc71', edgecolor='black')
ax2.set_title('Stock Prices Distribution', fontsize=12, fontweight='bold')
ax2.set_xlabel('Price ($)')
ax2.set_ylabel('Frequency')
ax2.grid(True, alpha=0.3)
# Temperature distribution
temperature = self.sample_data['weather']['temperature']
ax3.hist(temperature, bins=8, alpha=0.7, color='#e74c3c', edgecolor='black')
ax3.set_title('Temperature Distribution', fontsize=12, fontweight='bold')
ax3.set_xlabel('Temperature (°C)')
ax3.set_ylabel('Frequency')
ax3.grid(True, alpha=0.3)
# Combined scores distribution
all_scores = (self.sample_data['students']['math'] +
self.sample_data['students']['science'] +
self.sample_data['students']['english'])
ax4.hist(all_scores, bins=20, alpha=0.7, color='#9b59b6', edgecolor='black')
ax4.set_title('All Scores Distribution', fontsize=12, fontweight='bold')
ax4.set_xlabel('Score')
ax4.set_ylabel('Frequency')
ax4.grid(True, alpha=0.3)
plt.tight_layout()
if save:
plt.savefig('histograms.png', dpi=300, bbox_inches='tight')
print("Histograms saved as 'histograms.png'")
plt.show()
def create_heatmap(self, save=True):
"""Create a heatmap showing correlation matrix"""
# Create correlation data
students_df = pd.DataFrame({
'Math': self.sample_data['students']['math'],
'Science': self.sample_data['students']['science'],
'English': self.sample_data['students']['english']
})
# Calculate correlation matrix
correlation_matrix = students_df.corr()
plt.figure(figsize=(10, 8))
# Create heatmap
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
square=True, linewidths=0.5, cbar_kws={"shrink": .8})
plt.title('Subject Scores Correlation Matrix', fontsize=16, fontweight='bold', pad=20)
if save:
plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
print("Heatmap saved as 'correlation_heatmap.png'")
plt.show()
def create_time_series(self, save=True):
"""Create time series plot for stock prices"""
plt.figure(figsize=(14, 8))
dates = self.sample_data['stock']['dates']
prices = self.sample_data['stock']['prices']
plt.plot(dates, prices, linewidth=2, color='#3498db', marker='o', markersize=4)
# Fill area under the curve
plt.fill_between(dates, prices, alpha=0.3, color='#3498db')
plt.title('Stock Price Movement (30 Days)', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price ($)', fontsize=12)
plt.grid(True, alpha=0.3)
# Format dates on x-axis
plt.xticks(rotation=45)
# Add trend line
x_numeric = np.arange(len(dates))
z = np.polyfit(x_numeric, prices, 1)
p = np.poly1d(z)
plt.plot(dates, p(x_numeric), "r--", alpha=0.8, linewidth=2, label=f'Trend')
plt.legend()
plt.tight_layout()
if save:
plt.savefig('stock_timeseries.png', dpi=300, bbox_inches='tight')
print("Time series plot saved as 'stock_timeseries.png'")
plt.show()
def create_subplots_dashboard(self, save=True):
"""Create a comprehensive dashboard with multiple plots"""
fig = plt.figure(figsize=(16, 12))
# Layout: 3x3 grid
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)
# 1. Sales line chart
ax1 = fig.add_subplot(gs[0, :2])
months = self.sample_data['sales']['months']
sales_2022 = self.sample_data['sales']['2022']
sales_2023 = self.sample_data['sales']['2023']
ax1.plot(months, sales_2022, marker='o', label='2022', color='#3498db')
ax1.plot(months, sales_2023, marker='s', label='2023', color='#e74c3c')
ax1.set_title('Monthly Sales Trend', fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)
# 2. Age group pie chart
ax2 = fig.add_subplot(gs[0, 2])
age_groups = self.sample_data['survey']['age_groups']
responses = self.sample_data['survey']['responses']
ax2.pie(responses, labels=age_groups, autopct='%1.0f%%', textprops={'fontsize': 8})
ax2.set_title('Age Distribution', fontweight='bold')
# 3. Scores bar chart
ax3 = fig.add_subplot(gs[1, :2])
subjects = ['Math', 'Science', 'English']
avg_scores = [
np.mean(self.sample_data['students']['math']),
np.mean(self.sample_data['students']['science']),
np.mean(self.sample_data['students']['english'])
]
bars = ax3.bar(subjects, avg_scores, color=['#3498db', '#e74c3c', '#2ecc71'], alpha=0.7)
ax3.set_title('Average Subject Scores', fontweight='bold')
ax3.set_ylabel('Score')
# Add value labels
for bar, score in zip(bars, avg_scores):
height = bar.get_height()
ax3.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{score:.1f}', ha='center', va='bottom', fontweight='bold')
# 4. Temperature scatter
ax4 = fig.add_subplot(gs[1, 2])
temperature = self.sample_data['weather']['temperature']
humidity = self.sample_data['weather']['humidity']
ax4.scatter(temperature, humidity, c=temperature, cmap='coolwarm', s=50, alpha=0.7)
ax4.set_title('Temp vs Humidity', fontweight='bold')
ax4.set_xlabel('Temperature')
ax4.set_ylabel('Humidity')
# 5. Stock prices
ax5 = fig.add_subplot(gs[2, :])
dates = self.sample_data['stock']['dates']
prices = self.sample_data['stock']['prices']
ax5.plot(dates, prices, color='#2ecc71', linewidth=2)
ax5.fill_between(dates, prices, alpha=0.3, color='#2ecc71')
ax5.set_title('Stock Price Movement', fontweight='bold')
ax5.set_xlabel('Date')
ax5.set_ylabel('Price ($)')
ax5.tick_params(axis='x', rotation=45)
ax5.grid(True, alpha=0.3)
plt.suptitle('Data Visualization Dashboard', fontsize=20, fontweight='bold', y=0.95)
if save:
plt.savefig('dashboard.png', dpi=300, bbox_inches='tight')
print("Dashboard saved as 'dashboard.png'")
plt.show()
def load_custom_data(self, filename: str) -> Optional[pd.DataFrame]:
"""Load custom data from CSV file"""
try:
df = pd.read_csv(filename)
print(f"Loaded data from {filename}")
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
return df
except Exception as e:
print(f"Error loading data: {e}")
return None
def save_sample_data(self):
"""Save sample data to CSV files for user to experiment with"""
try:
# Save sales data
sales_df = pd.DataFrame({
'Month': self.sample_data['sales']['months'],
'Sales_2022': self.sample_data['sales']['2022'],
'Sales_2023': self.sample_data['sales']['2023']
})
sales_df.to_csv('sample_sales_data.csv', index=False)
# Save student data
students_df = pd.DataFrame({
'Student': self.sample_data['students']['names'],
'Math': self.sample_data['students']['math'],
'Science': self.sample_data['students']['science'],
'English': self.sample_data['students']['english']
})
students_df.to_csv('sample_student_data.csv', index=False)
# Save stock data
stock_df = pd.DataFrame({
'Date': [d.strftime('%Y-%m-%d') for d in self.sample_data['stock']['dates']],
'Price': self.sample_data['stock']['prices']
})
stock_df.to_csv('sample_stock_data.csv', index=False)
print("Sample data saved to CSV files:")
print("- sample_sales_data.csv")
print("- sample_student_data.csv")
print("- sample_stock_data.csv")
except Exception as e:
print(f"Error saving sample data: {e}")
def main():
"""Main function to run the data visualization app"""
visualizer = DataVisualizer()
while True:
print("\n=== Data Visualization with Matplotlib ===")
print("1. Line Chart (Sales Trends)")
print("2. Bar Chart (Student Scores)")
print("3. Pie Chart (Survey Data)")
print("4. Scatter Plot (Correlations)")
print("5. Histogram (Data Distribution)")
print("6. Heatmap (Correlation Matrix)")
print("7. Time Series (Stock Prices)")
print("8. Dashboard (Multiple Plots)")
print("9. Save Sample Data to CSV")
print("10. Load Custom Data")
print("0. Exit")
try:
choice = input("\nEnter your choice: ").strip()
if choice == '1':
visualizer.create_line_chart()
elif choice == '2':
visualizer.create_bar_chart()
elif choice == '3':
visualizer.create_pie_chart()
elif choice == '4':
visualizer.create_scatter_plot()
elif choice == '5':
visualizer.create_histogram()
elif choice == '6':
visualizer.create_heatmap()
elif choice == '7':
visualizer.create_time_series()
elif choice == '8':
visualizer.create_subplots_dashboard()
elif choice == '9':
visualizer.save_sample_data()
elif choice == '10':
filename = input("Enter CSV filename: ").strip()
if filename:
df = visualizer.load_custom_data(filename)
if df is not None:
print("\nFirst 5 rows:")
print(df.head())
print("\nData types:")
print(df.dtypes)
elif choice == '0':
print("Thank you for using the Data Visualization app!")
break
else:
print("Invalid choice. Please try again.")
except KeyboardInterrupt:
print("\n\nGoodbye!")
break
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()
- Save the file.
- Run the following command to run the application.
C:\Users\username\Documents\dataVisualizationSuite> python datavisualization.py
Data Visualization Suite
========================
1. Line Chart
2. Bar Chart
3. Scatter Plot
4. Histogram
5. Box Plot
6. Heatmap
7. Create Dashboard
8. Load CSV data
Choose visualization type: 1
✓ Line chart created: line_chart.png
✓ Chart saved to output folder
C:\Users\username\Documents\dataVisualizationSuite> python datavisualization.py
Data Visualization Suite
========================
1. Line Chart
2. Bar Chart
3. Scatter Plot
4. Histogram
5. Box Plot
6. Heatmap
7. Create Dashboard
8. Load CSV data
Choose visualization type: 1
✓ Line chart created: line_chart.png
✓ Chart saved to output folder
Explanation
- The
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
statement imports the main plotting library for chart creation. - The
import seaborn as sns
import seaborn as sns
imports advanced statistical visualization capabilities. - The
import pandas as pd
import pandas as pd
provides data manipulation and analysis tools. - The
DataVisualization
DataVisualization
class manages all chart creation and data processing operations. - The
create_line_chart()
create_line_chart()
method generates line plots for time series and trend analysis. - The
create_bar_chart()
create_bar_chart()
method creates bar plots for categorical data comparison. - The
create_scatter_plot()
create_scatter_plot()
method produces scatter plots for correlation analysis. - Sample data generation enables testing without external data files.
- Chart customization includes colors, themes, fonts, and styling options.
- Dashboard creation combines multiple charts into comprehensive reports.
- Data export functionality saves charts in various formats (PNG, PDF, SVG).
- Statistical analysis features include correlation matrices and distribution plots.
Next Steps
Congratulations! You have successfully created a Data Visualization Suite in Python. Experiment with the code and see if you can modify the application. Here are a few suggestions:
- Add interactive charts with Plotly or Bokeh
- Implement real-time data streaming visualization
- Create animated charts for time-based data
- Add machine learning visualization components
- Implement web-based dashboard with Flask/Django
- Create geospatial mapping capabilities
- Add advanced statistical analysis features
- Implement chart templates and themes
- Create collaborative data visualization tools
Conclusion
In this project, you learned how to create a Data Visualization Suite in Python using Matplotlib and Seaborn. You also learned about statistical plotting, data analysis, dashboard creation, and implementing professional data visualization solutions. You can find the source code on GitHub
⚙️ Data Visualization Suite
# Data Visualization with Matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import seaborn as sns
import json
from typing import List, Dict, Optional
import random
class DataVisualizer:
def __init__(self):
# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
# Sample data
self.sample_data = self.generate_sample_data()
def generate_sample_data(self) -> Dict:
"""Generate sample data for visualization"""
# Sales data
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
sales_2022 = [random.randint(10000, 50000) for _ in months]
sales_2023 = [random.randint(12000, 55000) for _ in months]
# Student scores
students = [f"Student {i}" for i in range(1, 21)]
math_scores = [random.randint(60, 100) for _ in students]
science_scores = [random.randint(55, 95) for _ in students]
english_scores = [random.randint(65, 100) for _ in students]
# Stock data
dates = [datetime.now() - timedelta(days=x) for x in range(30, 0, -1)]
stock_prices = []
price = 100
for _ in dates:
price += random.uniform(-5, 5)
stock_prices.append(max(price, 10)) # Ensure positive price
# Survey data
age_groups = ['18-25', '26-35', '36-45', '46-55', '56-65', '65+']
responses = [random.randint(50, 200) for _ in age_groups]
# Weather data
days = [f"Day {i}" for i in range(1, 8)]
temperatures = [random.randint(15, 35) for _ in days]
humidity = [random.randint(30, 80) for _ in days]
return {
'sales': {'months': months, '2022': sales_2022, '2023': sales_2023},
'students': {'names': students, 'math': math_scores, 'science': science_scores, 'english': english_scores},
'stock': {'dates': dates, 'prices': stock_prices},
'survey': {'age_groups': age_groups, 'responses': responses},
'weather': {'days': days, 'temperature': temperatures, 'humidity': humidity}
}
def create_line_chart(self, save=True):
"""Create a line chart showing sales trends"""
plt.figure(figsize=(12, 6))
months = self.sample_data['sales']['months']
sales_2022 = self.sample_data['sales']['2022']
sales_2023 = self.sample_data['sales']['2023']
plt.plot(months, sales_2022, marker='o', linewidth=2, label='2022', color='#3498db')
plt.plot(months, sales_2023, marker='s', linewidth=2, label='2023', color='#e74c3c')
plt.title('Monthly Sales Comparison (2022 vs 2023)', fontsize=16, fontweight='bold')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
# Format y-axis to show values in thousands
plt.ticklabel_format(style='plain', axis='y')
# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
plt.tight_layout()
if save:
plt.savefig('sales_comparison.png', dpi=300, bbox_inches='tight')
print("Line chart saved as 'sales_comparison.png'")
plt.show()
def create_bar_chart(self, save=True):
"""Create a bar chart showing student scores"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
students = self.sample_data['students']['names'][:10] # First 10 students
math_scores = self.sample_data['students']['math'][:10]
science_scores = self.sample_data['students']['science'][:10]
english_scores = self.sample_data['students']['english'][:10]
# Grouped bar chart
x = np.arange(len(students))
width = 0.25
ax1.bar(x - width, math_scores, width, label='Math', color='#3498db', alpha=0.8)
ax1.bar(x, science_scores, width, label='Science', color='#e74c3c', alpha=0.8)
ax1.bar(x + width, english_scores, width, label='English', color='#2ecc71', alpha=0.8)
ax1.set_title('Student Scores by Subject', fontsize=14, fontweight='bold')
ax1.set_xlabel('Students', fontsize=12)
ax1.set_ylabel('Scores', fontsize=12)
ax1.set_xticks(x)
ax1.set_xticklabels(students, rotation=45, ha='right')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Average scores bar chart
subjects = ['Math', 'Science', 'English']
avg_scores = [
np.mean(self.sample_data['students']['math']),
np.mean(self.sample_data['students']['science']),
np.mean(self.sample_data['students']['english'])
]
colors = ['#3498db', '#e74c3c', '#2ecc71']
bars = ax2.bar(subjects, avg_scores, color=colors, alpha=0.8)
ax2.set_title('Average Scores by Subject', fontsize=14, fontweight='bold')
ax2.set_ylabel('Average Score', fontsize=12)
ax2.grid(True, alpha=0.3)
# Add value labels on bars
for bar, score in zip(bars, avg_scores):
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{score:.1f}', ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
if save:
plt.savefig('student_scores.png', dpi=300, bbox_inches='tight')
print("Bar chart saved as 'student_scores.png'")
plt.show()
def create_pie_chart(self, save=True):
"""Create pie charts showing survey data"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
# Age group responses
age_groups = self.sample_data['survey']['age_groups']
responses = self.sample_data['survey']['responses']
colors = plt.cm.Set3(np.linspace(0, 1, len(age_groups)))
wedges, texts, autotexts = ax1.pie(responses, labels=age_groups, autopct='%1.1f%%',
colors=colors, startangle=90)
ax1.set_title('Survey Responses by Age Group', fontsize=14, fontweight='bold')
# Make percentage text bold
for autotext in autotexts:
autotext.set_color('white')
autotext.set_fontweight('bold')
# Market share pie chart (example data)
companies = ['Company A', 'Company B', 'Company C', 'Company D', 'Others']
market_share = [30, 25, 20, 15, 10]
explode = (0.1, 0, 0, 0, 0) # explode 1st slice
wedges2, texts2, autotexts2 = ax2.pie(market_share, labels=companies, autopct='%1.1f%%',
explode=explode, shadow=True, startangle=90)
ax2.set_title('Market Share Distribution', fontsize=14, fontweight='bold')
# Make percentage text bold
for autotext in autotexts2:
autotext.set_color('white')
autotext.set_fontweight('bold')
plt.tight_layout()
if save:
plt.savefig('pie_charts.png', dpi=300, bbox_inches='tight')
print("Pie charts saved as 'pie_charts.png'")
plt.show()
def create_scatter_plot(self, save=True):
"""Create scatter plots showing correlations"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# Student scores correlation
math_scores = self.sample_data['students']['math']
science_scores = self.sample_data['students']['science']
english_scores = self.sample_data['students']['english']
ax1.scatter(math_scores, science_scores, alpha=0.6, s=60, color='#3498db', label='Math vs Science')
ax1.scatter(math_scores, english_scores, alpha=0.6, s=60, color='#e74c3c', label='Math vs English')
ax1.set_xlabel('Math Scores', fontsize=12)
ax1.set_ylabel('Other Subject Scores', fontsize=12)
ax1.set_title('Student Scores Correlation', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Temperature vs Humidity
temperature = self.sample_data['weather']['temperature']
humidity = self.sample_data['weather']['humidity']
# Create color map based on temperature
colors = plt.cm.coolwarm(np.linspace(0, 1, len(temperature)))
scatter = ax2.scatter(temperature, humidity, c=temperature, cmap='coolwarm',
s=100, alpha=0.7, edgecolors='black', linewidth=1)
ax2.set_xlabel('Temperature (°C)', fontsize=12)
ax2.set_ylabel('Humidity (%)', fontsize=12)
ax2.set_title('Temperature vs Humidity', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)
# Add colorbar
cbar = plt.colorbar(scatter, ax=ax2)
cbar.set_label('Temperature (°C)', fontsize=10)
plt.tight_layout()
if save:
plt.savefig('scatter_plots.png', dpi=300, bbox_inches='tight')
print("Scatter plots saved as 'scatter_plots.png'")
plt.show()
def create_histogram(self, save=True):
"""Create histograms showing data distribution"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
# Math scores distribution
math_scores = self.sample_data['students']['math']
ax1.hist(math_scores, bins=10, alpha=0.7, color='#3498db', edgecolor='black')
ax1.set_title('Math Scores Distribution', fontsize=12, fontweight='bold')
ax1.set_xlabel('Score')
ax1.set_ylabel('Frequency')
ax1.grid(True, alpha=0.3)
# Add mean line
mean_math = np.mean(math_scores)
ax1.axvline(mean_math, color='red', linestyle='--', linewidth=2, label=f'Mean: {mean_math:.1f}')
ax1.legend()
# Stock prices distribution
stock_prices = self.sample_data['stock']['prices']
ax2.hist(stock_prices, bins=15, alpha=0.7, color='#2ecc71', edgecolor='black')
ax2.set_title('Stock Prices Distribution', fontsize=12, fontweight='bold')
ax2.set_xlabel('Price ($)')
ax2.set_ylabel('Frequency')
ax2.grid(True, alpha=0.3)
# Temperature distribution
temperature = self.sample_data['weather']['temperature']
ax3.hist(temperature, bins=8, alpha=0.7, color='#e74c3c', edgecolor='black')
ax3.set_title('Temperature Distribution', fontsize=12, fontweight='bold')
ax3.set_xlabel('Temperature (°C)')
ax3.set_ylabel('Frequency')
ax3.grid(True, alpha=0.3)
# Combined scores distribution
all_scores = (self.sample_data['students']['math'] +
self.sample_data['students']['science'] +
self.sample_data['students']['english'])
ax4.hist(all_scores, bins=20, alpha=0.7, color='#9b59b6', edgecolor='black')
ax4.set_title('All Scores Distribution', fontsize=12, fontweight='bold')
ax4.set_xlabel('Score')
ax4.set_ylabel('Frequency')
ax4.grid(True, alpha=0.3)
plt.tight_layout()
if save:
plt.savefig('histograms.png', dpi=300, bbox_inches='tight')
print("Histograms saved as 'histograms.png'")
plt.show()
def create_heatmap(self, save=True):
"""Create a heatmap showing correlation matrix"""
# Create correlation data
students_df = pd.DataFrame({
'Math': self.sample_data['students']['math'],
'Science': self.sample_data['students']['science'],
'English': self.sample_data['students']['english']
})
# Calculate correlation matrix
correlation_matrix = students_df.corr()
plt.figure(figsize=(10, 8))
# Create heatmap
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
square=True, linewidths=0.5, cbar_kws={"shrink": .8})
plt.title('Subject Scores Correlation Matrix', fontsize=16, fontweight='bold', pad=20)
if save:
plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
print("Heatmap saved as 'correlation_heatmap.png'")
plt.show()
def create_time_series(self, save=True):
"""Create time series plot for stock prices"""
plt.figure(figsize=(14, 8))
dates = self.sample_data['stock']['dates']
prices = self.sample_data['stock']['prices']
plt.plot(dates, prices, linewidth=2, color='#3498db', marker='o', markersize=4)
# Fill area under the curve
plt.fill_between(dates, prices, alpha=0.3, color='#3498db')
plt.title('Stock Price Movement (30 Days)', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price ($)', fontsize=12)
plt.grid(True, alpha=0.3)
# Format dates on x-axis
plt.xticks(rotation=45)
# Add trend line
x_numeric = np.arange(len(dates))
z = np.polyfit(x_numeric, prices, 1)
p = np.poly1d(z)
plt.plot(dates, p(x_numeric), "r--", alpha=0.8, linewidth=2, label=f'Trend')
plt.legend()
plt.tight_layout()
if save:
plt.savefig('stock_timeseries.png', dpi=300, bbox_inches='tight')
print("Time series plot saved as 'stock_timeseries.png'")
plt.show()
def create_subplots_dashboard(self, save=True):
"""Create a comprehensive dashboard with multiple plots"""
fig = plt.figure(figsize=(16, 12))
# Layout: 3x3 grid
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)
# 1. Sales line chart
ax1 = fig.add_subplot(gs[0, :2])
months = self.sample_data['sales']['months']
sales_2022 = self.sample_data['sales']['2022']
sales_2023 = self.sample_data['sales']['2023']
ax1.plot(months, sales_2022, marker='o', label='2022', color='#3498db')
ax1.plot(months, sales_2023, marker='s', label='2023', color='#e74c3c')
ax1.set_title('Monthly Sales Trend', fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)
# 2. Age group pie chart
ax2 = fig.add_subplot(gs[0, 2])
age_groups = self.sample_data['survey']['age_groups']
responses = self.sample_data['survey']['responses']
ax2.pie(responses, labels=age_groups, autopct='%1.0f%%', textprops={'fontsize': 8})
ax2.set_title('Age Distribution', fontweight='bold')
# 3. Scores bar chart
ax3 = fig.add_subplot(gs[1, :2])
subjects = ['Math', 'Science', 'English']
avg_scores = [
np.mean(self.sample_data['students']['math']),
np.mean(self.sample_data['students']['science']),
np.mean(self.sample_data['students']['english'])
]
bars = ax3.bar(subjects, avg_scores, color=['#3498db', '#e74c3c', '#2ecc71'], alpha=0.7)
ax3.set_title('Average Subject Scores', fontweight='bold')
ax3.set_ylabel('Score')
# Add value labels
for bar, score in zip(bars, avg_scores):
height = bar.get_height()
ax3.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{score:.1f}', ha='center', va='bottom', fontweight='bold')
# 4. Temperature scatter
ax4 = fig.add_subplot(gs[1, 2])
temperature = self.sample_data['weather']['temperature']
humidity = self.sample_data['weather']['humidity']
ax4.scatter(temperature, humidity, c=temperature, cmap='coolwarm', s=50, alpha=0.7)
ax4.set_title('Temp vs Humidity', fontweight='bold')
ax4.set_xlabel('Temperature')
ax4.set_ylabel('Humidity')
# 5. Stock prices
ax5 = fig.add_subplot(gs[2, :])
dates = self.sample_data['stock']['dates']
prices = self.sample_data['stock']['prices']
ax5.plot(dates, prices, color='#2ecc71', linewidth=2)
ax5.fill_between(dates, prices, alpha=0.3, color='#2ecc71')
ax5.set_title('Stock Price Movement', fontweight='bold')
ax5.set_xlabel('Date')
ax5.set_ylabel('Price ($)')
ax5.tick_params(axis='x', rotation=45)
ax5.grid(True, alpha=0.3)
plt.suptitle('Data Visualization Dashboard', fontsize=20, fontweight='bold', y=0.95)
if save:
plt.savefig('dashboard.png', dpi=300, bbox_inches='tight')
print("Dashboard saved as 'dashboard.png'")
plt.show()
def load_custom_data(self, filename: str) -> Optional[pd.DataFrame]:
"""Load custom data from CSV file"""
try:
df = pd.read_csv(filename)
print(f"Loaded data from {filename}")
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
return df
except Exception as e:
print(f"Error loading data: {e}")
return None
def save_sample_data(self):
"""Save sample data to CSV files for user to experiment with"""
try:
# Save sales data
sales_df = pd.DataFrame({
'Month': self.sample_data['sales']['months'],
'Sales_2022': self.sample_data['sales']['2022'],
'Sales_2023': self.sample_data['sales']['2023']
})
sales_df.to_csv('sample_sales_data.csv', index=False)
# Save student data
students_df = pd.DataFrame({
'Student': self.sample_data['students']['names'],
'Math': self.sample_data['students']['math'],
'Science': self.sample_data['students']['science'],
'English': self.sample_data['students']['english']
})
students_df.to_csv('sample_student_data.csv', index=False)
# Save stock data
stock_df = pd.DataFrame({
'Date': [d.strftime('%Y-%m-%d') for d in self.sample_data['stock']['dates']],
'Price': self.sample_data['stock']['prices']
})
stock_df.to_csv('sample_stock_data.csv', index=False)
print("Sample data saved to CSV files:")
print("- sample_sales_data.csv")
print("- sample_student_data.csv")
print("- sample_stock_data.csv")
except Exception as e:
print(f"Error saving sample data: {e}")
def main():
"""Main function to run the data visualization app"""
visualizer = DataVisualizer()
while True:
print("\n=== Data Visualization with Matplotlib ===")
print("1. Line Chart (Sales Trends)")
print("2. Bar Chart (Student Scores)")
print("3. Pie Chart (Survey Data)")
print("4. Scatter Plot (Correlations)")
print("5. Histogram (Data Distribution)")
print("6. Heatmap (Correlation Matrix)")
print("7. Time Series (Stock Prices)")
print("8. Dashboard (Multiple Plots)")
print("9. Save Sample Data to CSV")
print("10. Load Custom Data")
print("0. Exit")
try:
choice = input("\nEnter your choice: ").strip()
if choice == '1':
visualizer.create_line_chart()
elif choice == '2':
visualizer.create_bar_chart()
elif choice == '3':
visualizer.create_pie_chart()
elif choice == '4':
visualizer.create_scatter_plot()
elif choice == '5':
visualizer.create_histogram()
elif choice == '6':
visualizer.create_heatmap()
elif choice == '7':
visualizer.create_time_series()
elif choice == '8':
visualizer.create_subplots_dashboard()
elif choice == '9':
visualizer.save_sample_data()
elif choice == '10':
filename = input("Enter CSV filename: ").strip()
if filename:
df = visualizer.load_custom_data(filename)
if df is not None:
print("\nFirst 5 rows:")
print(df.head())
print("\nData types:")
print(df.dtypes)
elif choice == '0':
print("Thank you for using the Data Visualization app!")
break
else:
print("Invalid choice. Please try again.")
except KeyboardInterrupt:
print("\n\nGoodbye!")
break
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()
# Data Visualization with Matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import seaborn as sns
import json
from typing import List, Dict, Optional
import random
class DataVisualizer:
def __init__(self):
# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
# Sample data
self.sample_data = self.generate_sample_data()
def generate_sample_data(self) -> Dict:
"""Generate sample data for visualization"""
# Sales data
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
sales_2022 = [random.randint(10000, 50000) for _ in months]
sales_2023 = [random.randint(12000, 55000) for _ in months]
# Student scores
students = [f"Student {i}" for i in range(1, 21)]
math_scores = [random.randint(60, 100) for _ in students]
science_scores = [random.randint(55, 95) for _ in students]
english_scores = [random.randint(65, 100) for _ in students]
# Stock data
dates = [datetime.now() - timedelta(days=x) for x in range(30, 0, -1)]
stock_prices = []
price = 100
for _ in dates:
price += random.uniform(-5, 5)
stock_prices.append(max(price, 10)) # Ensure positive price
# Survey data
age_groups = ['18-25', '26-35', '36-45', '46-55', '56-65', '65+']
responses = [random.randint(50, 200) for _ in age_groups]
# Weather data
days = [f"Day {i}" for i in range(1, 8)]
temperatures = [random.randint(15, 35) for _ in days]
humidity = [random.randint(30, 80) for _ in days]
return {
'sales': {'months': months, '2022': sales_2022, '2023': sales_2023},
'students': {'names': students, 'math': math_scores, 'science': science_scores, 'english': english_scores},
'stock': {'dates': dates, 'prices': stock_prices},
'survey': {'age_groups': age_groups, 'responses': responses},
'weather': {'days': days, 'temperature': temperatures, 'humidity': humidity}
}
def create_line_chart(self, save=True):
"""Create a line chart showing sales trends"""
plt.figure(figsize=(12, 6))
months = self.sample_data['sales']['months']
sales_2022 = self.sample_data['sales']['2022']
sales_2023 = self.sample_data['sales']['2023']
plt.plot(months, sales_2022, marker='o', linewidth=2, label='2022', color='#3498db')
plt.plot(months, sales_2023, marker='s', linewidth=2, label='2023', color='#e74c3c')
plt.title('Monthly Sales Comparison (2022 vs 2023)', fontsize=16, fontweight='bold')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
# Format y-axis to show values in thousands
plt.ticklabel_format(style='plain', axis='y')
# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
plt.tight_layout()
if save:
plt.savefig('sales_comparison.png', dpi=300, bbox_inches='tight')
print("Line chart saved as 'sales_comparison.png'")
plt.show()
def create_bar_chart(self, save=True):
"""Create a bar chart showing student scores"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
students = self.sample_data['students']['names'][:10] # First 10 students
math_scores = self.sample_data['students']['math'][:10]
science_scores = self.sample_data['students']['science'][:10]
english_scores = self.sample_data['students']['english'][:10]
# Grouped bar chart
x = np.arange(len(students))
width = 0.25
ax1.bar(x - width, math_scores, width, label='Math', color='#3498db', alpha=0.8)
ax1.bar(x, science_scores, width, label='Science', color='#e74c3c', alpha=0.8)
ax1.bar(x + width, english_scores, width, label='English', color='#2ecc71', alpha=0.8)
ax1.set_title('Student Scores by Subject', fontsize=14, fontweight='bold')
ax1.set_xlabel('Students', fontsize=12)
ax1.set_ylabel('Scores', fontsize=12)
ax1.set_xticks(x)
ax1.set_xticklabels(students, rotation=45, ha='right')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Average scores bar chart
subjects = ['Math', 'Science', 'English']
avg_scores = [
np.mean(self.sample_data['students']['math']),
np.mean(self.sample_data['students']['science']),
np.mean(self.sample_data['students']['english'])
]
colors = ['#3498db', '#e74c3c', '#2ecc71']
bars = ax2.bar(subjects, avg_scores, color=colors, alpha=0.8)
ax2.set_title('Average Scores by Subject', fontsize=14, fontweight='bold')
ax2.set_ylabel('Average Score', fontsize=12)
ax2.grid(True, alpha=0.3)
# Add value labels on bars
for bar, score in zip(bars, avg_scores):
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{score:.1f}', ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
if save:
plt.savefig('student_scores.png', dpi=300, bbox_inches='tight')
print("Bar chart saved as 'student_scores.png'")
plt.show()
def create_pie_chart(self, save=True):
"""Create pie charts showing survey data"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
# Age group responses
age_groups = self.sample_data['survey']['age_groups']
responses = self.sample_data['survey']['responses']
colors = plt.cm.Set3(np.linspace(0, 1, len(age_groups)))
wedges, texts, autotexts = ax1.pie(responses, labels=age_groups, autopct='%1.1f%%',
colors=colors, startangle=90)
ax1.set_title('Survey Responses by Age Group', fontsize=14, fontweight='bold')
# Make percentage text bold
for autotext in autotexts:
autotext.set_color('white')
autotext.set_fontweight('bold')
# Market share pie chart (example data)
companies = ['Company A', 'Company B', 'Company C', 'Company D', 'Others']
market_share = [30, 25, 20, 15, 10]
explode = (0.1, 0, 0, 0, 0) # explode 1st slice
wedges2, texts2, autotexts2 = ax2.pie(market_share, labels=companies, autopct='%1.1f%%',
explode=explode, shadow=True, startangle=90)
ax2.set_title('Market Share Distribution', fontsize=14, fontweight='bold')
# Make percentage text bold
for autotext in autotexts2:
autotext.set_color('white')
autotext.set_fontweight('bold')
plt.tight_layout()
if save:
plt.savefig('pie_charts.png', dpi=300, bbox_inches='tight')
print("Pie charts saved as 'pie_charts.png'")
plt.show()
def create_scatter_plot(self, save=True):
"""Create scatter plots showing correlations"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# Student scores correlation
math_scores = self.sample_data['students']['math']
science_scores = self.sample_data['students']['science']
english_scores = self.sample_data['students']['english']
ax1.scatter(math_scores, science_scores, alpha=0.6, s=60, color='#3498db', label='Math vs Science')
ax1.scatter(math_scores, english_scores, alpha=0.6, s=60, color='#e74c3c', label='Math vs English')
ax1.set_xlabel('Math Scores', fontsize=12)
ax1.set_ylabel('Other Subject Scores', fontsize=12)
ax1.set_title('Student Scores Correlation', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
# Temperature vs Humidity
temperature = self.sample_data['weather']['temperature']
humidity = self.sample_data['weather']['humidity']
# Create color map based on temperature
colors = plt.cm.coolwarm(np.linspace(0, 1, len(temperature)))
scatter = ax2.scatter(temperature, humidity, c=temperature, cmap='coolwarm',
s=100, alpha=0.7, edgecolors='black', linewidth=1)
ax2.set_xlabel('Temperature (°C)', fontsize=12)
ax2.set_ylabel('Humidity (%)', fontsize=12)
ax2.set_title('Temperature vs Humidity', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)
# Add colorbar
cbar = plt.colorbar(scatter, ax=ax2)
cbar.set_label('Temperature (°C)', fontsize=10)
plt.tight_layout()
if save:
plt.savefig('scatter_plots.png', dpi=300, bbox_inches='tight')
print("Scatter plots saved as 'scatter_plots.png'")
plt.show()
def create_histogram(self, save=True):
"""Create histograms showing data distribution"""
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
# Math scores distribution
math_scores = self.sample_data['students']['math']
ax1.hist(math_scores, bins=10, alpha=0.7, color='#3498db', edgecolor='black')
ax1.set_title('Math Scores Distribution', fontsize=12, fontweight='bold')
ax1.set_xlabel('Score')
ax1.set_ylabel('Frequency')
ax1.grid(True, alpha=0.3)
# Add mean line
mean_math = np.mean(math_scores)
ax1.axvline(mean_math, color='red', linestyle='--', linewidth=2, label=f'Mean: {mean_math:.1f}')
ax1.legend()
# Stock prices distribution
stock_prices = self.sample_data['stock']['prices']
ax2.hist(stock_prices, bins=15, alpha=0.7, color='#2ecc71', edgecolor='black')
ax2.set_title('Stock Prices Distribution', fontsize=12, fontweight='bold')
ax2.set_xlabel('Price ($)')
ax2.set_ylabel('Frequency')
ax2.grid(True, alpha=0.3)
# Temperature distribution
temperature = self.sample_data['weather']['temperature']
ax3.hist(temperature, bins=8, alpha=0.7, color='#e74c3c', edgecolor='black')
ax3.set_title('Temperature Distribution', fontsize=12, fontweight='bold')
ax3.set_xlabel('Temperature (°C)')
ax3.set_ylabel('Frequency')
ax3.grid(True, alpha=0.3)
# Combined scores distribution
all_scores = (self.sample_data['students']['math'] +
self.sample_data['students']['science'] +
self.sample_data['students']['english'])
ax4.hist(all_scores, bins=20, alpha=0.7, color='#9b59b6', edgecolor='black')
ax4.set_title('All Scores Distribution', fontsize=12, fontweight='bold')
ax4.set_xlabel('Score')
ax4.set_ylabel('Frequency')
ax4.grid(True, alpha=0.3)
plt.tight_layout()
if save:
plt.savefig('histograms.png', dpi=300, bbox_inches='tight')
print("Histograms saved as 'histograms.png'")
plt.show()
def create_heatmap(self, save=True):
"""Create a heatmap showing correlation matrix"""
# Create correlation data
students_df = pd.DataFrame({
'Math': self.sample_data['students']['math'],
'Science': self.sample_data['students']['science'],
'English': self.sample_data['students']['english']
})
# Calculate correlation matrix
correlation_matrix = students_df.corr()
plt.figure(figsize=(10, 8))
# Create heatmap
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
square=True, linewidths=0.5, cbar_kws={"shrink": .8})
plt.title('Subject Scores Correlation Matrix', fontsize=16, fontweight='bold', pad=20)
if save:
plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
print("Heatmap saved as 'correlation_heatmap.png'")
plt.show()
def create_time_series(self, save=True):
"""Create time series plot for stock prices"""
plt.figure(figsize=(14, 8))
dates = self.sample_data['stock']['dates']
prices = self.sample_data['stock']['prices']
plt.plot(dates, prices, linewidth=2, color='#3498db', marker='o', markersize=4)
# Fill area under the curve
plt.fill_between(dates, prices, alpha=0.3, color='#3498db')
plt.title('Stock Price Movement (30 Days)', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price ($)', fontsize=12)
plt.grid(True, alpha=0.3)
# Format dates on x-axis
plt.xticks(rotation=45)
# Add trend line
x_numeric = np.arange(len(dates))
z = np.polyfit(x_numeric, prices, 1)
p = np.poly1d(z)
plt.plot(dates, p(x_numeric), "r--", alpha=0.8, linewidth=2, label=f'Trend')
plt.legend()
plt.tight_layout()
if save:
plt.savefig('stock_timeseries.png', dpi=300, bbox_inches='tight')
print("Time series plot saved as 'stock_timeseries.png'")
plt.show()
def create_subplots_dashboard(self, save=True):
"""Create a comprehensive dashboard with multiple plots"""
fig = plt.figure(figsize=(16, 12))
# Layout: 3x3 grid
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)
# 1. Sales line chart
ax1 = fig.add_subplot(gs[0, :2])
months = self.sample_data['sales']['months']
sales_2022 = self.sample_data['sales']['2022']
sales_2023 = self.sample_data['sales']['2023']
ax1.plot(months, sales_2022, marker='o', label='2022', color='#3498db')
ax1.plot(months, sales_2023, marker='s', label='2023', color='#e74c3c')
ax1.set_title('Monthly Sales Trend', fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)
# 2. Age group pie chart
ax2 = fig.add_subplot(gs[0, 2])
age_groups = self.sample_data['survey']['age_groups']
responses = self.sample_data['survey']['responses']
ax2.pie(responses, labels=age_groups, autopct='%1.0f%%', textprops={'fontsize': 8})
ax2.set_title('Age Distribution', fontweight='bold')
# 3. Scores bar chart
ax3 = fig.add_subplot(gs[1, :2])
subjects = ['Math', 'Science', 'English']
avg_scores = [
np.mean(self.sample_data['students']['math']),
np.mean(self.sample_data['students']['science']),
np.mean(self.sample_data['students']['english'])
]
bars = ax3.bar(subjects, avg_scores, color=['#3498db', '#e74c3c', '#2ecc71'], alpha=0.7)
ax3.set_title('Average Subject Scores', fontweight='bold')
ax3.set_ylabel('Score')
# Add value labels
for bar, score in zip(bars, avg_scores):
height = bar.get_height()
ax3.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{score:.1f}', ha='center', va='bottom', fontweight='bold')
# 4. Temperature scatter
ax4 = fig.add_subplot(gs[1, 2])
temperature = self.sample_data['weather']['temperature']
humidity = self.sample_data['weather']['humidity']
ax4.scatter(temperature, humidity, c=temperature, cmap='coolwarm', s=50, alpha=0.7)
ax4.set_title('Temp vs Humidity', fontweight='bold')
ax4.set_xlabel('Temperature')
ax4.set_ylabel('Humidity')
# 5. Stock prices
ax5 = fig.add_subplot(gs[2, :])
dates = self.sample_data['stock']['dates']
prices = self.sample_data['stock']['prices']
ax5.plot(dates, prices, color='#2ecc71', linewidth=2)
ax5.fill_between(dates, prices, alpha=0.3, color='#2ecc71')
ax5.set_title('Stock Price Movement', fontweight='bold')
ax5.set_xlabel('Date')
ax5.set_ylabel('Price ($)')
ax5.tick_params(axis='x', rotation=45)
ax5.grid(True, alpha=0.3)
plt.suptitle('Data Visualization Dashboard', fontsize=20, fontweight='bold', y=0.95)
if save:
plt.savefig('dashboard.png', dpi=300, bbox_inches='tight')
print("Dashboard saved as 'dashboard.png'")
plt.show()
def load_custom_data(self, filename: str) -> Optional[pd.DataFrame]:
"""Load custom data from CSV file"""
try:
df = pd.read_csv(filename)
print(f"Loaded data from {filename}")
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
return df
except Exception as e:
print(f"Error loading data: {e}")
return None
def save_sample_data(self):
"""Save sample data to CSV files for user to experiment with"""
try:
# Save sales data
sales_df = pd.DataFrame({
'Month': self.sample_data['sales']['months'],
'Sales_2022': self.sample_data['sales']['2022'],
'Sales_2023': self.sample_data['sales']['2023']
})
sales_df.to_csv('sample_sales_data.csv', index=False)
# Save student data
students_df = pd.DataFrame({
'Student': self.sample_data['students']['names'],
'Math': self.sample_data['students']['math'],
'Science': self.sample_data['students']['science'],
'English': self.sample_data['students']['english']
})
students_df.to_csv('sample_student_data.csv', index=False)
# Save stock data
stock_df = pd.DataFrame({
'Date': [d.strftime('%Y-%m-%d') for d in self.sample_data['stock']['dates']],
'Price': self.sample_data['stock']['prices']
})
stock_df.to_csv('sample_stock_data.csv', index=False)
print("Sample data saved to CSV files:")
print("- sample_sales_data.csv")
print("- sample_student_data.csv")
print("- sample_stock_data.csv")
except Exception as e:
print(f"Error saving sample data: {e}")
def main():
"""Main function to run the data visualization app"""
visualizer = DataVisualizer()
while True:
print("\n=== Data Visualization with Matplotlib ===")
print("1. Line Chart (Sales Trends)")
print("2. Bar Chart (Student Scores)")
print("3. Pie Chart (Survey Data)")
print("4. Scatter Plot (Correlations)")
print("5. Histogram (Data Distribution)")
print("6. Heatmap (Correlation Matrix)")
print("7. Time Series (Stock Prices)")
print("8. Dashboard (Multiple Plots)")
print("9. Save Sample Data to CSV")
print("10. Load Custom Data")
print("0. Exit")
try:
choice = input("\nEnter your choice: ").strip()
if choice == '1':
visualizer.create_line_chart()
elif choice == '2':
visualizer.create_bar_chart()
elif choice == '3':
visualizer.create_pie_chart()
elif choice == '4':
visualizer.create_scatter_plot()
elif choice == '5':
visualizer.create_histogram()
elif choice == '6':
visualizer.create_heatmap()
elif choice == '7':
visualizer.create_time_series()
elif choice == '8':
visualizer.create_subplots_dashboard()
elif choice == '9':
visualizer.save_sample_data()
elif choice == '10':
filename = input("Enter CSV filename: ").strip()
if filename:
df = visualizer.load_custom_data(filename)
if df is not None:
print("\nFirst 5 rows:")
print(df.head())
print("\nData types:")
print(df.dtypes)
elif choice == '0':
print("Thank you for using the Data Visualization app!")
break
else:
print("Invalid choice. Please try again.")
except KeyboardInterrupt:
print("\n\nGoodbye!")
break
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()
How It Works
1. DataVisualizer Class Architecture
class DataVisualizer:
def __init__(self):
self.data = None
self.figure_size = (12, 8)
self.color_palette = 'Set2'
self.style = 'whitegrid'
self.figures = []
class DataVisualizer:
def __init__(self):
self.data = None
self.figure_size = (12, 8)
self.color_palette = 'Set2'
self.style = 'whitegrid'
self.figures = []
The main class manages:
- Data Storage: Pandas DataFrames for data manipulation
- Plot Configuration: Default styles, colors, and layouts
- Figure Management: Multiple chart storage and organization
- Export Options: Various output formats and settings
2. Chart Type Implementation
def create_line_chart(self, x_col, y_col, title="Line Chart"):
"""Create a professional line chart"""
plt.figure(figsize=self.figure_size)
plt.plot(self.data[x_col], self.data[y_col],
linewidth=2, marker='o', markersize=6)
plt.title(title, fontsize=16, fontweight='bold')
plt.xlabel(x_col, fontsize=12)
plt.ylabel(y_col, fontsize=12)
plt.grid(True, alpha=0.3)
return plt.gcf()
def create_line_chart(self, x_col, y_col, title="Line Chart"):
"""Create a professional line chart"""
plt.figure(figsize=self.figure_size)
plt.plot(self.data[x_col], self.data[y_col],
linewidth=2, marker='o', markersize=6)
plt.title(title, fontsize=16, fontweight='bold')
plt.xlabel(x_col, fontsize=12)
plt.ylabel(y_col, fontsize=12)
plt.grid(True, alpha=0.3)
return plt.gcf()
3. Statistical Visualizations
def create_correlation_heatmap(self, title="Correlation Matrix"):
"""Create correlation heatmap for numerical data"""
corr_matrix = self.data.select_dtypes(include=[np.number]).corr()
plt.figure(figsize=self.figure_size)
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm',
center=0, square=True, linewidths=0.5)
plt.title(title, fontsize=16, fontweight='bold')
return plt.gcf()
def create_correlation_heatmap(self, title="Correlation Matrix"):
"""Create correlation heatmap for numerical data"""
corr_matrix = self.data.select_dtypes(include=[np.number]).corr()
plt.figure(figsize=self.figure_size)
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm',
center=0, square=True, linewidths=0.5)
plt.title(title, fontsize=16, fontweight='bold')
return plt.gcf()
4. Dashboard Creation
def create_dashboard(self, charts_config):
"""Create multi-chart dashboard"""
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Data Analysis Dashboard', fontsize=20, fontweight='bold')
# Create individual charts in subplot grid
for i, chart_config in enumerate(charts_config):
ax = axes[i//2, i%2]
self.create_chart_in_subplot(ax, chart_config)
plt.tight_layout()
return fig
def create_dashboard(self, charts_config):
"""Create multi-chart dashboard"""
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Data Analysis Dashboard', fontsize=20, fontweight='bold')
# Create individual charts in subplot grid
for i, chart_config in enumerate(charts_config):
ax = axes[i//2, i%2]
self.create_chart_in_subplot(ax, chart_config)
plt.tight_layout()
return fig
Chart Types Available
1. Line Charts
# Basic line chart
visualizer.create_line_chart('date', 'sales', 'Sales Trend Over Time')
# Multiple line chart
visualizer.create_multi_line_chart(['sales', 'profit', 'costs'],
'Financial Metrics Comparison')
# Basic line chart
visualizer.create_line_chart('date', 'sales', 'Sales Trend Over Time')
# Multiple line chart
visualizer.create_multi_line_chart(['sales', 'profit', 'costs'],
'Financial Metrics Comparison')
2. Bar Charts
# Vertical bar chart
visualizer.create_bar_chart('category', 'value', 'Category Performance')
# Horizontal bar chart
visualizer.create_horizontal_bar_chart('product', 'revenue',
'Product Revenue Analysis')
# Grouped bar chart
visualizer.create_grouped_bar_chart(['q1', 'q2', 'q3', 'q4'],
'Quarterly Performance')
# Vertical bar chart
visualizer.create_bar_chart('category', 'value', 'Category Performance')
# Horizontal bar chart
visualizer.create_horizontal_bar_chart('product', 'revenue',
'Product Revenue Analysis')
# Grouped bar chart
visualizer.create_grouped_bar_chart(['q1', 'q2', 'q3', 'q4'],
'Quarterly Performance')
3. Scatter Plots
# Basic scatter plot
visualizer.create_scatter_plot('height', 'weight', 'Height vs Weight')
# Colored scatter plot
visualizer.create_colored_scatter_plot('x', 'y', 'category',
'Feature Analysis by Category')
# Bubble chart
visualizer.create_bubble_chart('x', 'y', 'size', 'Market Analysis')
# Basic scatter plot
visualizer.create_scatter_plot('height', 'weight', 'Height vs Weight')
# Colored scatter plot
visualizer.create_colored_scatter_plot('x', 'y', 'category',
'Feature Analysis by Category')
# Bubble chart
visualizer.create_bubble_chart('x', 'y', 'size', 'Market Analysis')
4. Statistical Charts
# Histogram
visualizer.create_histogram('age', 'Age Distribution', bins=20)
# Box plot
visualizer.create_box_plot(['group1', 'group2', 'group3'],
'Group Comparison')
# Violin plot
visualizer.create_violin_plot('category', 'value',
'Distribution by Category')
# Histogram
visualizer.create_histogram('age', 'Age Distribution', bins=20)
# Box plot
visualizer.create_box_plot(['group1', 'group2', 'group3'],
'Group Comparison')
# Violin plot
visualizer.create_violin_plot('category', 'value',
'Distribution by Category')
Usage Examples
Basic Data Analysis
# Initialize visualizer
visualizer = DataVisualizer()
# Load data
visualizer.load_data_from_csv('sales_data.csv')
# Create basic charts
visualizer.create_line_chart('month', 'sales', 'Monthly Sales Trend')
visualizer.create_bar_chart('region', 'revenue', 'Revenue by Region')
# Display charts
visualizer.show_all_charts()
# Initialize visualizer
visualizer = DataVisualizer()
# Load data
visualizer.load_data_from_csv('sales_data.csv')
# Create basic charts
visualizer.create_line_chart('month', 'sales', 'Monthly Sales Trend')
visualizer.create_bar_chart('region', 'revenue', 'Revenue by Region')
# Display charts
visualizer.show_all_charts()
Advanced Analytics Dashboard
# Create comprehensive dashboard
dashboard_config = [
{'type': 'line', 'x': 'date', 'y': 'sales', 'title': 'Sales Trend'},
{'type': 'bar', 'x': 'product', 'y': 'profit', 'title': 'Profit by Product'},
{'type': 'scatter', 'x': 'advertising', 'y': 'sales', 'title': 'Ad Spend vs Sales'},
{'type': 'heatmap', 'data': 'correlation', 'title': 'Feature Correlations'}
]
visualizer.create_dashboard(dashboard_config)
visualizer.export_dashboard('business_analytics.pdf')
# Create comprehensive dashboard
dashboard_config = [
{'type': 'line', 'x': 'date', 'y': 'sales', 'title': 'Sales Trend'},
{'type': 'bar', 'x': 'product', 'y': 'profit', 'title': 'Profit by Product'},
{'type': 'scatter', 'x': 'advertising', 'y': 'sales', 'title': 'Ad Spend vs Sales'},
{'type': 'heatmap', 'data': 'correlation', 'title': 'Feature Correlations'}
]
visualizer.create_dashboard(dashboard_config)
visualizer.export_dashboard('business_analytics.pdf')
Sample Data Generation
# Generate sample datasets
sample_data = visualizer.generate_sample_data('sales', records=1000)
visualizer.load_data(sample_data)
# Create various visualizations
visualizer.create_comprehensive_analysis()
# Generate sample datasets
sample_data = visualizer.generate_sample_data('sales', records=1000)
visualizer.load_data(sample_data)
# Create various visualizations
visualizer.create_comprehensive_analysis()
Running the Application
Command Line Usage
python datavisualization.py
python datavisualization.py
Interactive Mode
# Run in interactive mode
visualizer = DataVisualizer()
visualizer.start_interactive_mode()
# Follow the prompts:
# 1. Load your data file
# 2. Select chart type
# 3. Choose columns
# 4. Customize appearance
# 5. Export or display
# Run in interactive mode
visualizer = DataVisualizer()
visualizer.start_interactive_mode()
# Follow the prompts:
# 1. Load your data file
# 2. Select chart type
# 3. Choose columns
# 4. Customize appearance
# 5. Export or display
Sample Output
Console Output
=== Data Visualization Suite ===
Loading data from: sales_data.csv
✓ Data loaded successfully: 1000 rows, 8 columns
Creating visualizations...
✓ Line Chart: Monthly Sales Trend
✓ Bar Chart: Regional Performance
✓ Heatmap: Correlation Matrix
✓ Dashboard: Complete Analysis
Exporting charts...
✓ Saved: monthly_sales.png
✓ Saved: regional_performance.png
✓ Saved: correlation_heatmap.png
✓ Saved: complete_dashboard.pdf
Analysis complete! 📊
=== Data Visualization Suite ===
Loading data from: sales_data.csv
✓ Data loaded successfully: 1000 rows, 8 columns
Creating visualizations...
✓ Line Chart: Monthly Sales Trend
✓ Bar Chart: Regional Performance
✓ Heatmap: Correlation Matrix
✓ Dashboard: Complete Analysis
Exporting charts...
✓ Saved: monthly_sales.png
✓ Saved: regional_performance.png
✓ Saved: correlation_heatmap.png
✓ Saved: complete_dashboard.pdf
Analysis complete! 📊
Generated Charts Examples
Sales Trend Analysis
Monthly Sales Trend
├── January: $45,000 (↗️ +12%)
├── February: $52,000 (↗️ +15%)
├── March: $48,000 (↘️ -8%)
└── April: $58,000 (↗️ +21%)
Monthly Sales Trend
├── January: $45,000 (↗️ +12%)
├── February: $52,000 (↗️ +15%)
├── March: $48,000 (↘️ -8%)
└── April: $58,000 (↗️ +21%)
Statistical Summary
Dataset Overview:
- Records: 1,000
- Features: 8
- Missing Values: 0.2%
- Date Range: 2023-01-01 to 2025-09-02
Dataset Overview:
- Records: 1,000
- Features: 8
- Missing Values: 0.2%
- Date Range: 2023-01-01 to 2025-09-02
Advanced Features
1. Custom Styling
def apply_custom_theme(self):
"""Apply custom visual theme"""
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
# Custom color schemes
self.colors = {
'primary': '#3498db',
'secondary': '#e74c3c',
'success': '#2ecc71',
'warning': '#f39c12'
}
def apply_custom_theme(self):
"""Apply custom visual theme"""
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
# Custom color schemes
self.colors = {
'primary': '#3498db',
'secondary': '#e74c3c',
'success': '#2ecc71',
'warning': '#f39c12'
}
2. Animation Support
def create_animated_chart(self, x_col, y_col, time_col):
"""Create animated chart over time"""
import matplotlib.animation as animation
fig, ax = plt.subplots(figsize=self.figure_size)
def animate(frame):
ax.clear()
data_frame = self.data[self.data[time_col] <= frame]
ax.plot(data_frame[x_col], data_frame[y_col])
ax.set_title(f'Data Evolution - {frame}')
anim = animation.FuncAnimation(fig, animate,
frames=self.data[time_col].unique(),
interval=500, repeat=True)
return anim
def create_animated_chart(self, x_col, y_col, time_col):
"""Create animated chart over time"""
import matplotlib.animation as animation
fig, ax = plt.subplots(figsize=self.figure_size)
def animate(frame):
ax.clear()
data_frame = self.data[self.data[time_col] <= frame]
ax.plot(data_frame[x_col], data_frame[y_col])
ax.set_title(f'Data Evolution - {frame}')
anim = animation.FuncAnimation(fig, animate,
frames=self.data[time_col].unique(),
interval=500, repeat=True)
return anim
3. Interactive Charts with Plotly
def create_interactive_chart(self, chart_type, **kwargs):
"""Create interactive chart with Plotly"""
import plotly.express as px
if chart_type == 'scatter':
fig = px.scatter(self.data, x=kwargs['x'], y=kwargs['y'],
color=kwargs.get('color'),
size=kwargs.get('size'),
hover_data=kwargs.get('hover_data'))
elif chart_type == 'line':
fig = px.line(self.data, x=kwargs['x'], y=kwargs['y'],
color=kwargs.get('color'))
fig.show()
return fig
def create_interactive_chart(self, chart_type, **kwargs):
"""Create interactive chart with Plotly"""
import plotly.express as px
if chart_type == 'scatter':
fig = px.scatter(self.data, x=kwargs['x'], y=kwargs['y'],
color=kwargs.get('color'),
size=kwargs.get('size'),
hover_data=kwargs.get('hover_data'))
elif chart_type == 'line':
fig = px.line(self.data, x=kwargs['x'], y=kwargs['y'],
color=kwargs.get('color'))
fig.show()
return fig
Data Processing Features
1. Data Cleaning
def clean_data(self):
"""Clean and preprocess data"""
# Handle missing values
self.data = self.data.fillna(self.data.mean(numeric_only=True))
# Remove outliers
numeric_cols = self.data.select_dtypes(include=[np.number]).columns
for col in numeric_cols:
Q1 = self.data[col].quantile(0.25)
Q3 = self.data[col].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
self.data = self.data[(self.data[col] >= lower_bound) &
(self.data[col] <= upper_bound)]
def clean_data(self):
"""Clean and preprocess data"""
# Handle missing values
self.data = self.data.fillna(self.data.mean(numeric_only=True))
# Remove outliers
numeric_cols = self.data.select_dtypes(include=[np.number]).columns
for col in numeric_cols:
Q1 = self.data[col].quantile(0.25)
Q3 = self.data[col].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
self.data = self.data[(self.data[col] >= lower_bound) &
(self.data[col] <= upper_bound)]
2. Statistical Analysis
def perform_statistical_analysis(self):
"""Perform comprehensive statistical analysis"""
analysis = {
'descriptive_stats': self.data.describe(),
'correlation_matrix': self.data.corr(),
'skewness': self.data.skew(),
'kurtosis': self.data.kurtosis()
}
# Generate insights
insights = self.generate_insights(analysis)
return analysis, insights
def perform_statistical_analysis(self):
"""Perform comprehensive statistical analysis"""
analysis = {
'descriptive_stats': self.data.describe(),
'correlation_matrix': self.data.corr(),
'skewness': self.data.skew(),
'kurtosis': self.data.kurtosis()
}
# Generate insights
insights = self.generate_insights(analysis)
return analysis, insights
3. Data Transformation
def transform_data(self, transformations):
"""Apply data transformations"""
for transformation in transformations:
if transformation['type'] == 'log':
col = transformation['column']
self.data[f'{col}_log'] = np.log(self.data[col] + 1)
elif transformation['type'] == 'normalize':
col = transformation['column']
self.data[f'{col}_norm'] = (self.data[col] - self.data[col].min()) / \
(self.data[col].max() - self.data[col].min())
def transform_data(self, transformations):
"""Apply data transformations"""
for transformation in transformations:
if transformation['type'] == 'log':
col = transformation['column']
self.data[f'{col}_log'] = np.log(self.data[col] + 1)
elif transformation['type'] == 'normalize':
col = transformation['column']
self.data[f'{col}_norm'] = (self.data[col] - self.data[col].min()) / \
(self.data[col].max() - self.data[col].min())
Export and Sharing Options
1. Multiple Export Formats
def export_chart(self, chart, filename, formats=['png', 'pdf', 'svg']):
"""Export chart in multiple formats"""
for fmt in formats:
chart.savefig(f"{filename}.{fmt}",
dpi=300,
bbox_inches='tight',
facecolor='white')
def export_chart(self, chart, filename, formats=['png', 'pdf', 'svg']):
"""Export chart in multiple formats"""
for fmt in formats:
chart.savefig(f"{filename}.{fmt}",
dpi=300,
bbox_inches='tight',
facecolor='white')
2. Report Generation
def generate_report(self, template='standard'):
"""Generate comprehensive analysis report"""
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
# Create PDF report
report = canvas.Canvas("analysis_report.pdf", pagesize=letter)
# Add charts and analysis
report.drawString(100, 750, "Data Analysis Report")
report.drawString(100, 720, f"Generated: {datetime.now()}")
# Add statistical summary
stats = self.data.describe()
y_position = 680
for index, row in stats.iterrows():
report.drawString(100, y_position, f"{index}: {row.iloc[0]:.2f}")
y_position -= 20
report.save()
def generate_report(self, template='standard'):
"""Generate comprehensive analysis report"""
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
# Create PDF report
report = canvas.Canvas("analysis_report.pdf", pagesize=letter)
# Add charts and analysis
report.drawString(100, 750, "Data Analysis Report")
report.drawString(100, 720, f"Generated: {datetime.now()}")
# Add statistical summary
stats = self.data.describe()
y_position = 680
for index, row in stats.iterrows():
report.drawString(100, y_position, f"{index}: {row.iloc[0]:.2f}")
y_position -= 20
report.save()
3. Web Dashboard Export
def create_web_dashboard(self):
"""Create interactive web dashboard"""
import dash
from dash import dcc, html
app = dash.Dash(__name__)
app.layout = html.Div([
html.H1("Data Analysis Dashboard"),
dcc.Graph(figure=self.create_plotly_chart('line')),
dcc.Graph(figure=self.create_plotly_chart('bar')),
dcc.Graph(figure=self.create_plotly_chart('scatter'))
])
return app
def create_web_dashboard(self):
"""Create interactive web dashboard"""
import dash
from dash import dcc, html
app = dash.Dash(__name__)
app.layout = html.Div([
html.H1("Data Analysis Dashboard"),
dcc.Graph(figure=self.create_plotly_chart('line')),
dcc.Graph(figure=self.create_plotly_chart('bar')),
dcc.Graph(figure=self.create_plotly_chart('scatter'))
])
return app
Sample Datasets
1. Sales Data
def generate_sales_data(self, records=1000):
"""Generate sample sales dataset"""
np.random.seed(42)
data = {
'date': pd.date_range('2023-01-01', periods=records, freq='D'),
'sales': np.random.normal(50000, 10000, records),
'region': np.random.choice(['North', 'South', 'East', 'West'], records),
'product': np.random.choice(['A', 'B', 'C', 'D'], records),
'advertising': np.random.normal(5000, 1000, records),
'temperature': np.random.normal(20, 10, records)
}
return pd.DataFrame(data)
def generate_sales_data(self, records=1000):
"""Generate sample sales dataset"""
np.random.seed(42)
data = {
'date': pd.date_range('2023-01-01', periods=records, freq='D'),
'sales': np.random.normal(50000, 10000, records),
'region': np.random.choice(['North', 'South', 'East', 'West'], records),
'product': np.random.choice(['A', 'B', 'C', 'D'], records),
'advertising': np.random.normal(5000, 1000, records),
'temperature': np.random.normal(20, 10, records)
}
return pd.DataFrame(data)
2. Financial Data
def generate_financial_data(self, records=252):
"""Generate sample financial dataset"""
dates = pd.date_range('2024-01-01', periods=records, freq='B')
# Generate correlated stock prices
returns = np.random.multivariate_normal(
[0.001, 0.001, 0.001],
[[0.01, 0.005, 0.003],
[0.005, 0.01, 0.004],
[0.003, 0.004, 0.01]],
records
)
prices = np.cumprod(1 + returns, axis=0) * 100
data = pd.DataFrame({
'date': dates,
'stock_a': prices[:, 0],
'stock_b': prices[:, 1],
'stock_c': prices[:, 2],
'volume': np.random.exponential(1000000, records)
})
return data
def generate_financial_data(self, records=252):
"""Generate sample financial dataset"""
dates = pd.date_range('2024-01-01', periods=records, freq='B')
# Generate correlated stock prices
returns = np.random.multivariate_normal(
[0.001, 0.001, 0.001],
[[0.01, 0.005, 0.003],
[0.005, 0.01, 0.004],
[0.003, 0.004, 0.01]],
records
)
prices = np.cumprod(1 + returns, axis=0) * 100
data = pd.DataFrame({
'date': dates,
'stock_a': prices[:, 0],
'stock_b': prices[:, 1],
'stock_c': prices[:, 2],
'volume': np.random.exponential(1000000, records)
})
return data
Troubleshooting
Common Issues
1. Memory Issues with Large Datasets
# Solution: Process data in chunks
def process_large_dataset(self, filename, chunk_size=10000):
"""Process large datasets in chunks"""
chunks = []
for chunk in pd.read_csv(filename, chunksize=chunk_size):
# Process each chunk
processed_chunk = self.process_chunk(chunk)
chunks.append(processed_chunk)
return pd.concat(chunks, ignore_index=True)
# Solution: Process data in chunks
def process_large_dataset(self, filename, chunk_size=10000):
"""Process large datasets in chunks"""
chunks = []
for chunk in pd.read_csv(filename, chunksize=chunk_size):
# Process each chunk
processed_chunk = self.process_chunk(chunk)
chunks.append(processed_chunk)
return pd.concat(chunks, ignore_index=True)
2. Font Rendering Issues
# Solution: Configure matplotlib fonts
def configure_fonts(self):
"""Configure fonts for better rendering"""
import matplotlib.font_manager as fm
# Set default font
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.size'] = 10
# Handle missing fonts gracefully
try:
plt.rcParams['font.family'] = 'Arial'
except:
print("Arial font not available, using default")
# Solution: Configure matplotlib fonts
def configure_fonts(self):
"""Configure fonts for better rendering"""
import matplotlib.font_manager as fm
# Set default font
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.size'] = 10
# Handle missing fonts gracefully
try:
plt.rcParams['font.family'] = 'Arial'
except:
print("Arial font not available, using default")
3. Color Palette Issues
# Solution: Robust color handling
def get_color_palette(self, n_colors):
"""Get robust color palette"""
if n_colors <= 10:
return sns.color_palette("tab10", n_colors)
else:
return sns.color_palette("husl", n_colors)
# Solution: Robust color handling
def get_color_palette(self, n_colors):
"""Get robust color palette"""
if n_colors <= 10:
return sns.color_palette("tab10", n_colors)
else:
return sns.color_palette("husl", n_colors)
Performance Optimization
1. Efficient Data Handling
def optimize_data_types(self):
"""Optimize DataFrame data types for memory efficiency"""
# Convert to appropriate data types
for col in self.data.columns:
if self.data[col].dtype == 'object':
try:
self.data[col] = pd.to_datetime(self.data[col])
except:
self.data[col] = self.data[col].astype('category')
elif self.data[col].dtype == 'float64':
if self.data[col].min() >= 0 and self.data[col].max() <= 255:
self.data[col] = self.data[col].astype('uint8')
def optimize_data_types(self):
"""Optimize DataFrame data types for memory efficiency"""
# Convert to appropriate data types
for col in self.data.columns:
if self.data[col].dtype == 'object':
try:
self.data[col] = pd.to_datetime(self.data[col])
except:
self.data[col] = self.data[col].astype('category')
elif self.data[col].dtype == 'float64':
if self.data[col].min() >= 0 and self.data[col].max() <= 255:
self.data[col] = self.data[col].astype('uint8')
2. Caching Results
import functools
@functools.lru_cache(maxsize=128)
def cached_calculation(self, operation, column):
"""Cache expensive calculations"""
if operation == 'correlation':
return self.data[column].corr()
elif operation == 'describe':
return self.data[column].describe()
import functools
@functools.lru_cache(maxsize=128)
def cached_calculation(self, operation, column):
"""Cache expensive calculations"""
if operation == 'correlation':
return self.data[column].corr()
elif operation == 'describe':
return self.data[column].describe()
Extensions and Improvements
1. Machine Learning Integration
def add_ml_predictions(self):
"""Add machine learning predictions to visualizations"""
from sklearn.linear_model import LinearRegression
# Simple linear regression example
X = self.data[['feature1', 'feature2']].values
y = self.data['target'].values
model = LinearRegression()
model.fit(X, y)
predictions = model.predict(X)
self.data['predictions'] = predictions
def add_ml_predictions(self):
"""Add machine learning predictions to visualizations"""
from sklearn.linear_model import LinearRegression
# Simple linear regression example
X = self.data[['feature1', 'feature2']].values
y = self.data['target'].values
model = LinearRegression()
model.fit(X, y)
predictions = model.predict(X)
self.data['predictions'] = predictions
2. Real-time Data Integration
def setup_real_time_updates(self, data_source):
"""Setup real-time data updates"""
import threading
import time
def update_data():
while self.updating:
new_data = self.fetch_real_time_data(data_source)
self.update_charts(new_data)
time.sleep(60) # Update every minute
self.update_thread = threading.Thread(target=update_data, daemon=True)
self.update_thread.start()
def setup_real_time_updates(self, data_source):
"""Setup real-time data updates"""
import threading
import time
def update_data():
while self.updating:
new_data = self.fetch_real_time_data(data_source)
self.update_charts(new_data)
time.sleep(60) # Update every minute
self.update_thread = threading.Thread(target=update_data, daemon=True)
self.update_thread.start()
3. Custom Chart Types
def create_custom_chart_type(self, chart_config):
"""Create custom visualization types"""
if chart_config['type'] == 'radar':
return self.create_radar_chart(chart_config)
elif chart_config['type'] == 'sankey':
return self.create_sankey_diagram(chart_config)
elif chart_config['type'] == 'treemap':
return self.create_treemap(chart_config)
def create_custom_chart_type(self, chart_config):
"""Create custom visualization types"""
if chart_config['type'] == 'radar':
return self.create_radar_chart(chart_config)
elif chart_config['type'] == 'sankey':
return self.create_sankey_diagram(chart_config)
elif chart_config['type'] == 'treemap':
return self.create_treemap(chart_config)
Next Steps
After mastering this visualization suite, consider:
- Advanced Analytics: Integrate with scikit-learn for ML visualizations
- Web Applications: Build dashboards with Streamlit or Dash
- Business Intelligence: Create executive dashboards
- Real-time Analytics: Add streaming data capabilities
- 3D Visualizations: Explore 3D plotting with Plotly
Resources
- Matplotlib Documentation
- Seaborn Tutorial
- Pandas Documentation
- Plotly Python
- Data Visualization Best Practices
Conclusion
This data visualization suite provides a comprehensive toolkit for creating professional data visualizations. It demonstrates advanced plotting techniques, statistical analysis, and dashboard creation capabilities. The modular design allows for easy extension and customization for specific visualization needs.
The suite combines the power of Matplotlib, Seaborn, and Pandas to create publication-quality visualizations that can be used for business intelligence, academic research, and data storytelling. 📊🐍
Was this page helpful?
Let us know how we did