Graph Based Recommender

Course Project: Recommender System by Link Prediction using Algorithms

Importing necessary libraries

import networkx as nx
from networkx.algorithms.community.centrality import girvan_newman
from networkx.algorithms.community import kernighan_lin_bisection, louvain_communities
from networkx.algorithms.community.label_propagation import label_propagation_communities
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams

Miscellaneous functions

def newColorHex():
    color = '#'
    for i in range(6):
        color += np.random.choice(list("6789ABCD"))
    return color

Reading CSV files containing relations between people and the products they bought

edgelist_df = pd.read_csv('data/relations.csv')
people_df = pd.read_csv('data/people.csv')
names = people_df.set_index('id').T.to_dict('list')
names = {k: v[0] for k, v in names.items()}


purchases = pd.read_csv('data/purchases.csv')
products = pd.read_csv('data/products.csv').set_index('id').T.to_dict()
product_names = {k: v['product'] for k, v in products.items() if k in purchases['product_id'].unique()}
product_prices = {k: int(v['price']) for k, v in products.items() if k in purchases['product_id'].unique()}

Plotting relations between people

G = nx.Graph()
G = nx.from_pandas_edgelist(edgelist_df, source='id1', target='id2')
G = nx.relabel_nodes(G, names)

rcParams['figure.figsize'] = 14, 10
pos = nx.spring_layout(G, scale=20, k=3/np.sqrt(G.order()))
d = dict(G.degree)
nx.draw(G, pos, node_color=newColorHex(),
        with_labels=True,
        nodelist=d,
        node_size=[d[k]*300 for k in d])

Finding communities

Partitioning the set of people via centraility measures (Girvan-Newman algorithm)

G = nx.Graph()
G = nx.from_pandas_edgelist(edgelist_df, source='id1', target='id2')
G = nx.relabel_nodes(G, names)

communities = girvan_newman(G)

node_groups = []
for com in next(communities):
    node_groups.append(set(com))

group_colors = [newColorHex() for _ in node_groups]

color_map = []
for node in G:
    for group_num in range(len(node_groups)):
        if node in node_groups[group_num]:
            color_map.append(group_colors[group_num])

plt.figure(5, figsize=(14, 10))
nx.draw(G, node_color=color_map, with_labels=True, node_size=2000)

Finding bipartitions (Kernighan-Lin bipartition algorithm)

G = nx.Graph()
G = nx.from_pandas_edgelist(edgelist_df, source='id1', target='id2')
G = nx.relabel_nodes(G, names)

communities = kernighan_lin_bisection(G)

node_groups = []
for com in communities:
    node_groups.append(com)

group_colors = [newColorHex() for _ in node_groups]

color_map = []
for node in G:
    for group_num in range(len(node_groups)):
        if node in node_groups[group_num]:
            color_map.append(group_colors[group_num])

plt.figure(5, figsize=(14, 10))
nx.draw(G, node_color=color_map, with_labels=True, node_size=2000)

Finding communities using label propagation community detection algorithm

G = nx.Graph()
G = nx.from_pandas_edgelist(edgelist_df, source='id1', target='id2')
G = nx.relabel_nodes(G, names)

communities = label_propagation_communities(G)

node_groups = []
for com in communities:
    node_groups.append(com)

group_colors = [newColorHex() for _ in node_groups]

color_map = []
for node in G:
    for group_num in range(len(node_groups)):
        if node in node_groups[group_num]:
            color_map.append(group_colors[group_num])

plt.figure(5, figsize=(14, 10))
nx.draw(G, node_color=color_map, with_labels=True, node_size=2000)

Detecting communities using the Louvain community detection algorithm

G = nx.Graph()
G = nx.from_pandas_edgelist(edgelist_df, source='id1', target='id2')
G = nx.relabel_nodes(G, names)

communities = louvain_communities(G)

node_groups = []
for com in communities:
    node_groups.append(com)

group_colors = [newColorHex() for _ in node_groups]

color_map = []
for node in G:
    for group_num in range(len(node_groups)):
        if node in node_groups[group_num]:
            color_map.append(group_colors[group_num])

plt.figure(5, figsize=(14, 10))
nx.draw(G, node_color=color_map, with_labels=True, node_size=2000)

Plotting people and their purchases

G = nx.Graph()
G.add_edges_from(edgelist_df.values)
G.add_edges_from(purchases.values)

person_nodes = set(people_df.id.tolist())
product_nodes = set(purchases.product_id)

person_color = newColorHex()
product_color = newColorHex()

color_map = []
for node in G:
    if node in person_nodes:
        color_map.append(person_color)
    elif node in product_nodes:
        color_map.append(product_color)

G = nx.relabel_nodes(G, names)
G = nx.relabel_nodes(G, product_names)

plt.figure(5, figsize=(14, 10))
nx.draw(G, node_color=color_map, with_labels=True, node_size=2000)

Link Prediction using Algorithms

Using resource allocation index

G = nx.Graph()
G.add_edges_from(edgelist_df.values)
G.add_edges_from(purchases.values)

ebunch = []
for node1 in G:
    for node2 in G:
        if node1 != node2 and (node1, node2) not in G.edges and (node2, node1) not in G.edges:
            if node1 in person_nodes and node2 in product_nodes:
                ebunch.append((node1, node2))

iterator_list = list(nx.resource_allocation_index(G, ebunch=ebunch))
iterator_list_sorted = sorted(iterator_list, key=lambda x: x[2], reverse=True)
rai_predictions = list(map(lambda x: (names[x[0]], product_names[x[1]], x[2]), iterator_list_sorted))

df_predictions = pd.DataFrame(rai_predictions, columns=['person', 'product', 'resource_allocation_index'])
df_predictions

	person	product	resource_allocation_index
0	Ruben Shaw	Sony Alpha a7 III Mirrorless Camera	0.500000
1	Sasha Howard	Google Nest Mini (2nd Gen)	0.416667
2	Dereon Bright	Google Nest Mini (2nd Gen)	0.375000
3	Paola Whitaker	Canon EOS Rebel T7 DSLR Camera	0.366667
4	Sasha Howard	Nintendo Switch	0.366667
...	...	...	...
229	Katrina Barr	iPhone XR 64GB	0.000000
230	Katrina Barr	PlayStation 5	0.000000
231	Katrina Barr	Xbox Series X	0.000000
232	Katrina Barr	Dell XPS 13	0.000000
233	Katrina Barr	Garmin Forerunner 945	0.000000

234 rows × 3 columns

Using Jaccard coefficient

G = nx.Graph()
G.add_edges_from(edgelist_df.values)
G.add_edges_from(purchases.values)

ebunch = []
for node1 in G:
    for node2 in G:
        if node1 != node2 and (node1, node2) not in G.edges and (node2, node1) not in G.edges:
            if node1 in person_nodes and node2 in product_nodes:
                ebunch.append((node1, node2))

jaccard_coefficients_list = list(nx.jaccard_coefficient(G, ebunch=ebunch))
jaccard_coefficients_sorted = sorted(jaccard_coefficients_list, key=lambda x: x[2], reverse=True)
jaccard_predictions = list(map(lambda x: (names[x[0]], product_names[x[1]], x[2]), jaccard_coefficients_sorted))

df_jaccard_predictions = pd.DataFrame(jaccard_predictions, columns=['person', 'product', 'jaccard_coefficient'])
df_jaccard_predictions

	person	product	jaccard_coefficient
0	Lilliana Keith	PlayStation 5	0.500000
1	Lilliana Keith	Nintendo Switch	0.333333
2	Camille Khan	Garmin Forerunner 945	0.333333
3	Kenny Kidd	PlayStation 5	0.333333
4	Damon Webb	iPhone XR 64GB	0.333333
...	...	...	...
229	Katrina Barr	iPhone XR 64GB	0.000000
230	Katrina Barr	PlayStation 5	0.000000
231	Katrina Barr	Xbox Series X	0.000000
232	Katrina Barr	Dell XPS 13	0.000000
233	Katrina Barr	Garmin Forerunner 945	0.000000

234 rows × 3 columns

Using Adamic-Adar index

G = nx.Graph()
G.add_edges_from(edgelist_df.values)
G.add_edges_from(purchases.values)

ebunch = []
for node1 in G:
    for node2 in G:
        if node1 != node2 and (node1, node2) not in G.edges and (node2, node1) not in G.edges:
            if node1 in person_nodes and node2 in product_nodes:
                ebunch.append((node1, node2))

iterator_list = list(nx.resource_allocation_index(G, ebunch=ebunch))
iterator_list_sorted = sorted(iterator_list, key=lambda x: x[2], reverse=True)
aai_predictions = list(map(lambda x: (names[x[0]], product_names[x[1]], x[2]), iterator_list_sorted))

df_predictions = pd.DataFrame(aai_predictions, columns=['person', 'product', 'adamic_adar_index'])
df_predictions

	person	product	adamic_adar_index
0	Ruben Shaw	Sony Alpha a7 III Mirrorless Camera	0.500000
1	Sasha Howard	Google Nest Mini (2nd Gen)	0.416667
2	Dereon Bright	Google Nest Mini (2nd Gen)	0.375000
3	Paola Whitaker	Canon EOS Rebel T7 DSLR Camera	0.366667
4	Sasha Howard	Nintendo Switch	0.366667
...	...	...	...
229	Katrina Barr	iPhone XR 64GB	0.000000
230	Katrina Barr	PlayStation 5	0.000000
231	Katrina Barr	Xbox Series X	0.000000
232	Katrina Barr	Dell XPS 13	0.000000
233	Katrina Barr	Garmin Forerunner 945	0.000000

234 rows × 3 columns

Using common neighbour and centrality based parameterized algorithm score

G = nx.Graph()
G.add_edges_from(edgelist_df.values)
G.add_edges_from(purchases.values)

ebunch = []
for node1 in G:
    for node2 in G:
        if node1 != node2 and (node1, node2) not in G.edges and (node2, node1) not in G.edges:
            if node1 in person_nodes and node2 in product_nodes:
                ebunch.append((node1, node2))

iterator_list = list(nx.common_neighbor_centrality(G, ebunch=ebunch, alpha=0.8)) # default value alpha = 0.8
iterator_list_sorted = sorted(iterator_list, key=lambda x: x[2], reverse=True)
ccpa_predictions = list(map(lambda x: (names[x[0]], product_names[x[1]], x[2]), iterator_list_sorted))

df_predictions = pd.DataFrame(ccpa_predictions, columns=['person', 'product', 'ccpa_score'])
df_predictions

	person	product	ccpa_score
0	Paola Whitaker	Nintendo Switch	5.000000
1	Paola Whitaker	Canon EOS Rebel T7 DSLR Camera	5.000000
2	Lilliana Keith	MacBook Air M1	5.000000
3	Lilliana Keith	Nintendo Switch	5.000000
4	Lilliana Keith	Samsung Galaxy S10	5.000000
...	...	...	...
229	Yusuf Wagner	Amazon Echo Dot (4th Gen)	1.360000
230	Brennan Trevino	Google Nest Mini (2nd Gen)	1.360000
231	Brennan Trevino	PlayStation 5	1.360000
232	Brennan Trevino	Amazon Echo Dot (4th Gen)	1.360000
233	Damon Webb	Amazon Echo Dot (4th Gen)	1.133333

234 rows × 3 columns>

Ashish Kulkarni

Course Project: Recommender System by Link Prediction using Algorithms

Importing necessary libraries

Miscellaneous functions

Reading CSV files containing relations between people and the products they bought

Plotting relations between people

Finding communities

Partitioning the set of people via centraility measures (Girvan-Newman algorithm)

Finding bipartitions (Kernighan-Lin bipartition algorithm)

Finding communities using label propagation community detection algorithm

Detecting communities using the Louvain community detection algorithm

Plotting people and their purchases

Link Prediction using Algorithms

Using resource allocation index

Using Jaccard coefficient

Using Adamic-Adar index

Using common neighbour and centrality based parameterized algorithm score