PAC 2 VISUALITZACIÓ DE DADES¶

HISTOGRAM¶

Descripció dataset: This is a simple dataset to start with. It contains only the height (inches) and weights (pounds) of 25,000 different humans of 18 years of age. This dataset can be used to build a model that can predict the heights or weights of a human. https://www.kaggle.com/datasets/burnoutminer/heights-and-weights-dataset

In [ ]:
from google.colab import drive
drive.mount('/content/drive')

dataset_histogram = '/content/drive/My Drive/Colab Notebooks/SOCR-HeightWeight.csv'
Mounted at /content/drive
In [ ]:
import pandas as pd

df = pd.read_csv(dataset_histogram)
print(df.head())
   Index  Height(Inches)  Weight(Pounds)
0      1        65.78331        112.9925
1      2        71.51521        136.4873
2      3        69.39874        153.0269
3      4        68.21660        142.3354
4      5        67.78781        144.2971
In [ ]:
import matplotlib.pyplot as plt

# Columna a examinar Height
plt.hist(df['Height(Inches)'], bins=50, color='blue', edgecolor='black')
plt.title('Distribució de l\' alçada')
plt.xlabel('Alçada (Polsades)')
plt.ylabel('Freqüencia')

# Mostrar el histograma
plt.show()
No description has been provided for this image
In [ ]:
# Columna a examinar Weight, disminuim el parametre bins que indica el rang de valors
plt.hist(df['Weight(Pounds)'], bins=25, color='green', edgecolor='black')
plt.title('Distribució del pes')
plt.xlabel('Pes (Pounds)')
plt.ylabel('Freqüencia')

# Mostrar el histograma
plt.show()
No description has been provided for this image

Diagrama de xarxa¶

Descripció dataset: Dataset que conté tots els personatges de Joc de trons https://www.kaggle.com/code/mmmarchetti/game-of-thrones-network-analysis/input

In [ ]:
!pip install networkx --upgrade
Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (3.3)
In [ ]:
import networkx as nx
import matplotlib.pyplot as plt
dataset_network = '/content/drive/My Drive/Colab Notebooks/gameofthrons.csv'
df = pd.read_csv(dataset_network)
print(df.head())
                            Source              Target        Type  weight  \
0                   Addam-Marbrand     Jaime-Lannister  Undirected       3   
1                   Addam-Marbrand     Tywin-Lannister  Undirected       6   
2                Aegon-I-Targaryen  Daenerys-Targaryen  Undirected       5   
3                Aegon-I-Targaryen        Eddard-Stark  Undirected       4   
4  Aemon-Targaryen-(Maester-Aemon)      Alliser-Thorne  Undirected       4   

   book  
0     1  
1     1  
2     1  
3     1  
4     1  
In [ ]:
G = nx.Graph()

# Afegim valors de les aristes al graph
for _, row in df.iterrows():
    G.add_edge(row['Source'], row['Target'], weight=row['weight'])
In [ ]:
# Selecciono els més representatius
graus = dict(G.degree())
nodes_top = sorted(graus, key=graus.get, reverse=True)[:15]
subG = G.subgraph(nodes_top)
pos = nx.spring_layout(subG, k=0.15, iterations=20)
plt.figure(figsize=(12, 10))
nx.draw(subG, pos, with_labels=True, node_color='skyblue', node_size=700, edge_color='gray', font_size=10)

plt.show()
No description has been provided for this image
In [ ]:
import matplotlib.pyplot as plt
import networkx as nx

pos = nx.spring_layout(subG, k=0.15, iterations=20)

plt.figure(figsize=(12, 10))
nx.draw(subG, pos, with_labels=True, node_color='skyblue', node_size=700, edge_color='gray', font_size=10)
edge_weights = nx.get_edge_attributes(subG, 'weight')
# Afegim labels a les aristes
nx.draw_networkx_edge_labels(subG, pos, edge_labels=edge_weights)

plt.show()
No description has been provided for this image
In [ ]:
import matplotlib.pyplot as plt
import networkx as nx

pos = nx.spring_layout(subG, k=0.15, iterations=20)
plt.figure(figsize=(12, 10))
nx.draw(subG, pos, with_labels=True, node_color='skyblue', node_size=700, font_size=10, edge_color='gray')
weights = [subG[u][v]['weight'] for u, v in subG.edges()]

# Afegim gruix normalitzat a les aristes
weights_norm = [w * 0.05 for w in weights]
nx.draw_networkx_edges(subG, pos, width=weights_norm)
plt.show()
No description has been provided for this image
In [ ]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import networkx as nx

pos = nx.spring_layout(subG, k=0.15, iterations=20)
graus = dict(subG.degree())
max_grau = max(graus.values())
min_grau = min(graus.values())


fig, ax = plt.subplots(figsize=(12, 10))

# Asignació de colors
colors = [(graus[node] - min_grau) / (max_grau - min_grau) for node in subG.nodes()]

nx.draw(subG, pos, ax=ax, node_color=colors, cmap=plt.cm.viridis, with_labels=True, node_size=700, edge_color='gray', font_size=10)

# Llegenda amb objecte ScalarMappable
sm = plt.cm.ScalarMappable(cmap=plt.cm.viridis, norm=plt.Normalize(vmin=min_grau, vmax=max_grau))
sm.set_array([])


cbar = plt.colorbar(sm, ax=ax, orientation='horizontal', pad=0.05, aspect=40, shrink=0.5)
cbar.set_label('Graus del node')

plt.show()
No description has been provided for this image
In [ ]:
import networkx as nx
import matplotlib.pyplot as plt
import random


graus = dict(G.degree())
nodes_top = sorted(graus, key=graus.get, reverse=True)[:15]


subG = G.subgraph(nodes_top)


dirSubG = nx.DiGraph()

dirSubG.add_nodes_from(subG.nodes(data=True))

# Fletxes aleatories
for u, v in subG.edges():
    if random.random() > 0.5:
        dirSubG.add_edge(u, v)
    else:
        dirSubG.add_edge(v, u)


pos = nx.spring_layout(dirSubG, k=0.15, iterations=20)


plt.figure(figsize=(12, 10))
nx.draw(dirSubG, pos, with_labels=True, node_color='skyblue', node_size=700, edge_color='gray', font_size=10, arrows=True)

plt.show()
No description has been provided for this image

Marimekko Chart¶

Descripció dataset: Supermarket sales data https://www.kaggle.com/datasets/aungpyaeap/supermarket-sales

In [ ]:
dataset_Marimekko = '/content/drive/My Drive/Colab Notebooks/supermarket_sales - Sheet1.csv'
df = pd.read_csv(dataset_Marimekko)
print(df.head())
    Invoice ID Branch       City Customer type  Gender  \
0  750-67-8428      A     Yangon        Member  Female   
1  226-31-3081      C  Naypyitaw        Normal  Female   
2  631-41-3108      A     Yangon        Normal    Male   
3  123-19-1176      A     Yangon        Member    Male   
4  373-73-7910      A     Yangon        Normal    Male   

             Product line  Unit price  Quantity   Tax 5%     Total       Date  \
0       Health and beauty       74.69         7  26.1415  548.9715   1/5/2019   
1  Electronic accessories       15.28         5   3.8200   80.2200   3/8/2019   
2      Home and lifestyle       46.33         7  16.2155  340.5255   3/3/2019   
3       Health and beauty       58.22         8  23.2880  489.0480  1/27/2019   
4       Sports and travel       86.31         7  30.2085  634.3785   2/8/2019   

    Time      Payment    cogs  gross margin percentage  gross income  Rating  
0  13:08      Ewallet  522.83                 4.761905       26.1415     9.1  
1  10:29         Cash   76.40                 4.761905        3.8200     9.6  
2  13:23  Credit card  324.31                 4.761905       16.2155     7.4  
3  20:33      Ewallet  465.76                 4.761905       23.2880     8.4  
4  10:37      Ewallet  604.17                 4.761905       30.2085     5.3  
In [ ]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


fig, ax = plt.subplots(figsize=(10, 6))

bottom_dict = dict.fromkeys(grouped['Branch'].unique(), 0)


for _, row in grouped.iterrows():
    ax.bar(row['Branch'], row['proportion'], bottom=bottom_dict[row['Branch']],
           color=color_dict[row['Product line']], edgecolor='white',width=0.98, label=row['Product line'])
    bottom_dict[row['Branch']] += row['proportion']


handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
legend = ax.legend(by_label.values(), by_label.keys(), title="Tipus de producte", bbox_to_anchor=(1.05, 1), loc='upper left')

ax.set_xlabel('Sucursal')
ax.set_ylabel('Proporció de Ventes')
ax.set_title('Distribució de Ventes por tipus de producte i sucursal')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

fig, ax = plt.subplots(figsize=(10, 6))
bottom_dict = dict.fromkeys(grouped['Branch'].unique(), 0)

legend_labels = {}

for _, row in grouped.iterrows():
    branch = row['Branch']
    proportion = row['proportion']
    product_line = row['Product line']
    bottom = bottom_dict[branch]

    bar = ax.bar(branch, proportion, bottom=bottom, color=color_dict[product_line], edgecolor='white', width=0.98)
    bottom_dict[branch] += proportion


    if product_line not in legend_labels:
        legend_labels[product_line] = bar


    height = proportion / 2 + bottom
    percentage = f"{proportion * 100:.1f}%"
    ax.text(bar[0].get_x() + bar[0].get_width() / 2, height, percentage, ha='center', va='center', color='white', fontsize=8)


handles, labels = zip(*[(handle[0], label) for label, handle in legend_labels.items()])
ax.legend(handles, labels, title="Tipus de producte", bbox_to_anchor=(1.05, 1), loc='upper left')

ax.set_xlabel('Sucursal')
ax.set_ylabel('Proporció de Ventes')
ax.set_title('Distribució de Ventes por tipus de producte i sucursal')

plt.tight_layout()
plt.show()
No description has been provided for this image