import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import numpy as np # Set seaborn style for better visuals sns.set(style="whitegrid") # Load CSV ecvalve = pd.read_csv( "/home/fferreira/Documents/teste_ecvalve.csv", names=["frequency", "state", "pressure", "capacity"]) print("Data shape:", ecvalve.shape) print("Label distribution:", ecvalve['state'].value_counts()) # Group by frequency and get unique states freq_states = ecvalve.groupby('frequency')['state'].unique().reset_index() freq_states['category'] = freq_states['state'].apply( lambda x: '0' if len(x) == 1 and x[0] == 0 else '1' if len(x) == 1 and x[0] == 1 else 'Both' ) print("\nFrequencies and their state categories:") print(freq_states[['frequency', 'category']].sort_values('frequency')) # Create scatter plot plt.figure(figsize=(10, 6)) sns.scatterplot( x='frequency', y='category', hue='category', style='category', data=freq_states, palette={'0': '#36A2EB', '1': '#FF6384', 'Both': '#FFCE56'}, s=100 ) plt.title('Frequencies by State Category (0 = No Leak, 1 = Leak, Both = Mixed)') plt.xlabel('Frequency (Hz)') plt.ylabel('State Category') plt.savefig('/home/fferreira/Documents/HarvardX/teste/frequency_state_categories.png') plt.close() print("Graph saved: /home/fferreira/Documents/HarvardX/teste/frequency_state_categories.png")