PAC 2 VISUALITZACIÓ DE DADES¶
HISTOGRAM¶
Descripció dataset: This is a simple dataset to start with. It contains only the height (inches) and weights (pounds) of 25,000 different humans of 18 years of age. This dataset can be used to build a model that can predict the heights or weights of a human. https://www.kaggle.com/datasets/burnoutminer/heights-and-weights-dataset
In [ ]:
from google.colab import drive
drive.mount('/content/drive')
dataset_histogram = '/content/drive/My Drive/Colab Notebooks/SOCR-HeightWeight.csv'
Mounted at /content/drive
In [ ]:
import pandas as pd
df = pd.read_csv(dataset_histogram)
print(df.head())
Index Height(Inches) Weight(Pounds) 0 1 65.78331 112.9925 1 2 71.51521 136.4873 2 3 69.39874 153.0269 3 4 68.21660 142.3354 4 5 67.78781 144.2971
In [ ]:
import matplotlib.pyplot as plt
# Columna a examinar Height
plt.hist(df['Height(Inches)'], bins=50, color='blue', edgecolor='black')
plt.title('Distribució de l\' alçada')
plt.xlabel('Alçada (Polsades)')
plt.ylabel('Freqüencia')
# Mostrar el histograma
plt.show()
In [ ]:
# Columna a examinar Weight, disminuim el parametre bins que indica el rang de valors
plt.hist(df['Weight(Pounds)'], bins=25, color='green', edgecolor='black')
plt.title('Distribució del pes')
plt.xlabel('Pes (Pounds)')
plt.ylabel('Freqüencia')
# Mostrar el histograma
plt.show()
Diagrama de xarxa¶
Descripció dataset: Dataset que conté tots els personatges de Joc de trons https://www.kaggle.com/code/mmmarchetti/game-of-thrones-network-analysis/input
In [ ]:
!pip install networkx --upgrade
Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (3.3)
In [ ]:
import networkx as nx
import matplotlib.pyplot as plt
dataset_network = '/content/drive/My Drive/Colab Notebooks/gameofthrons.csv'
df = pd.read_csv(dataset_network)
print(df.head())
Source Target Type weight \ 0 Addam-Marbrand Jaime-Lannister Undirected 3 1 Addam-Marbrand Tywin-Lannister Undirected 6 2 Aegon-I-Targaryen Daenerys-Targaryen Undirected 5 3 Aegon-I-Targaryen Eddard-Stark Undirected 4 4 Aemon-Targaryen-(Maester-Aemon) Alliser-Thorne Undirected 4 book 0 1 1 1 2 1 3 1 4 1
In [ ]:
G = nx.Graph()
# Afegim valors de les aristes al graph
for _, row in df.iterrows():
G.add_edge(row['Source'], row['Target'], weight=row['weight'])
In [ ]:
# Selecciono els més representatius
graus = dict(G.degree())
nodes_top = sorted(graus, key=graus.get, reverse=True)[:15]
subG = G.subgraph(nodes_top)
pos = nx.spring_layout(subG, k=0.15, iterations=20)
plt.figure(figsize=(12, 10))
nx.draw(subG, pos, with_labels=True, node_color='skyblue', node_size=700, edge_color='gray', font_size=10)
plt.show()
In [ ]:
import matplotlib.pyplot as plt
import networkx as nx
pos = nx.spring_layout(subG, k=0.15, iterations=20)
plt.figure(figsize=(12, 10))
nx.draw(subG, pos, with_labels=True, node_color='skyblue', node_size=700, edge_color='gray', font_size=10)
edge_weights = nx.get_edge_attributes(subG, 'weight')
# Afegim labels a les aristes
nx.draw_networkx_edge_labels(subG, pos, edge_labels=edge_weights)
plt.show()
In [ ]:
import matplotlib.pyplot as plt
import networkx as nx
pos = nx.spring_layout(subG, k=0.15, iterations=20)
plt.figure(figsize=(12, 10))
nx.draw(subG, pos, with_labels=True, node_color='skyblue', node_size=700, font_size=10, edge_color='gray')
weights = [subG[u][v]['weight'] for u, v in subG.edges()]
# Afegim gruix normalitzat a les aristes
weights_norm = [w * 0.05 for w in weights]
nx.draw_networkx_edges(subG, pos, width=weights_norm)
plt.show()
In [ ]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import networkx as nx
pos = nx.spring_layout(subG, k=0.15, iterations=20)
graus = dict(subG.degree())
max_grau = max(graus.values())
min_grau = min(graus.values())
fig, ax = plt.subplots(figsize=(12, 10))
# Asignació de colors
colors = [(graus[node] - min_grau) / (max_grau - min_grau) for node in subG.nodes()]
nx.draw(subG, pos, ax=ax, node_color=colors, cmap=plt.cm.viridis, with_labels=True, node_size=700, edge_color='gray', font_size=10)
# Llegenda amb objecte ScalarMappable
sm = plt.cm.ScalarMappable(cmap=plt.cm.viridis, norm=plt.Normalize(vmin=min_grau, vmax=max_grau))
sm.set_array([])
cbar = plt.colorbar(sm, ax=ax, orientation='horizontal', pad=0.05, aspect=40, shrink=0.5)
cbar.set_label('Graus del node')
plt.show()
In [ ]:
import networkx as nx
import matplotlib.pyplot as plt
import random
graus = dict(G.degree())
nodes_top = sorted(graus, key=graus.get, reverse=True)[:15]
subG = G.subgraph(nodes_top)
dirSubG = nx.DiGraph()
dirSubG.add_nodes_from(subG.nodes(data=True))
# Fletxes aleatories
for u, v in subG.edges():
if random.random() > 0.5:
dirSubG.add_edge(u, v)
else:
dirSubG.add_edge(v, u)
pos = nx.spring_layout(dirSubG, k=0.15, iterations=20)
plt.figure(figsize=(12, 10))
nx.draw(dirSubG, pos, with_labels=True, node_color='skyblue', node_size=700, edge_color='gray', font_size=10, arrows=True)
plt.show()
Marimekko Chart¶
Descripció dataset: Supermarket sales data https://www.kaggle.com/datasets/aungpyaeap/supermarket-sales
In [ ]:
dataset_Marimekko = '/content/drive/My Drive/Colab Notebooks/supermarket_sales - Sheet1.csv'
df = pd.read_csv(dataset_Marimekko)
print(df.head())
Invoice ID Branch City Customer type Gender \ 0 750-67-8428 A Yangon Member Female 1 226-31-3081 C Naypyitaw Normal Female 2 631-41-3108 A Yangon Normal Male 3 123-19-1176 A Yangon Member Male 4 373-73-7910 A Yangon Normal Male Product line Unit price Quantity Tax 5% Total Date \ 0 Health and beauty 74.69 7 26.1415 548.9715 1/5/2019 1 Electronic accessories 15.28 5 3.8200 80.2200 3/8/2019 2 Home and lifestyle 46.33 7 16.2155 340.5255 3/3/2019 3 Health and beauty 58.22 8 23.2880 489.0480 1/27/2019 4 Sports and travel 86.31 7 30.2085 634.3785 2/8/2019 Time Payment cogs gross margin percentage gross income Rating 0 13:08 Ewallet 522.83 4.761905 26.1415 9.1 1 10:29 Cash 76.40 4.761905 3.8200 9.6 2 13:23 Credit card 324.31 4.761905 16.2155 7.4 3 20:33 Ewallet 465.76 4.761905 23.2880 8.4 4 10:37 Ewallet 604.17 4.761905 30.2085 5.3
In [ ]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
fig, ax = plt.subplots(figsize=(10, 6))
bottom_dict = dict.fromkeys(grouped['Branch'].unique(), 0)
for _, row in grouped.iterrows():
ax.bar(row['Branch'], row['proportion'], bottom=bottom_dict[row['Branch']],
color=color_dict[row['Product line']], edgecolor='white',width=0.98, label=row['Product line'])
bottom_dict[row['Branch']] += row['proportion']
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
legend = ax.legend(by_label.values(), by_label.keys(), title="Tipus de producte", bbox_to_anchor=(1.05, 1), loc='upper left')
ax.set_xlabel('Sucursal')
ax.set_ylabel('Proporció de Ventes')
ax.set_title('Distribució de Ventes por tipus de producte i sucursal')
plt.tight_layout()
plt.show()
In [ ]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
fig, ax = plt.subplots(figsize=(10, 6))
bottom_dict = dict.fromkeys(grouped['Branch'].unique(), 0)
legend_labels = {}
for _, row in grouped.iterrows():
branch = row['Branch']
proportion = row['proportion']
product_line = row['Product line']
bottom = bottom_dict[branch]
bar = ax.bar(branch, proportion, bottom=bottom, color=color_dict[product_line], edgecolor='white', width=0.98)
bottom_dict[branch] += proportion
if product_line not in legend_labels:
legend_labels[product_line] = bar
height = proportion / 2 + bottom
percentage = f"{proportion * 100:.1f}%"
ax.text(bar[0].get_x() + bar[0].get_width() / 2, height, percentage, ha='center', va='center', color='white', fontsize=8)
handles, labels = zip(*[(handle[0], label) for label, handle in legend_labels.items()])
ax.legend(handles, labels, title="Tipus de producte", bbox_to_anchor=(1.05, 1), loc='upper left')
ax.set_xlabel('Sucursal')
ax.set_ylabel('Proporció de Ventes')
ax.set_title('Distribució de Ventes por tipus de producte i sucursal')
plt.tight_layout()
plt.show()