“This is the 25th day of my participation in the Gwen Challenge in November. See details: The Last Gwen Challenge in 2021”

Practice social network visualization with Python-Networkx

01 Use NetworkX to make graph network model

  • Use NetworkX to create diagrams, add nodes and edges
  • View adjacency matrix/adjacency list/graph thermal map
  • Graph network empowerment
  • Graph network statistics (out and in, shortest path)

Guide package

import networkx as nx 
import matplotlib.pyplot as plt 
import collections 
import random 
import numpy as np 
from pylab import rcParams 
import warnings
warnings.filterwarnings("ignore")
# Support Chinese
plt.rcParams['font.sans-serif'] = ['SimHei']  # used to display Chinese labels normally
plt.rcParams['axes.unicode_minus'] = False  # is used to display the minus sign normally
Copy the code

Add a node

# add node
G = nx.Graph() Create a DiGraph using nx.digraph ()
G.add_node(1) Create a single node
G.add_nodes_from(range(10)) Create a group of nodes
nx.draw(G,node_color="darkblue")
Copy the code

Add edge

G.add_edge(1.2)
e = [(2.3), (9.3), (8.4), (3.5), (8.6), (0.8), (1.9)]
G.add_edges_from(e)
nx.draw(G,with_labels=True,node_color="darkblue",font_color="white")
Copy the code

Prints the adjacency matrix and adjacency list

Print the adjacency matrix
print(Adjacency matrix)
print(nx.adjacency_matrix(G))
print("="*50)
print(Adjacency list)
for i in G.adj.items():
    print(i)
Copy the code

Heat map

Thermal map of adjacency matrix: reflecting sparsity
print(nx.to_numpy_matrix(G)) Convert the adjacency matrix to numpy format
plt.imshow(nx.to_numpy_matrix(G)) # Create a 2-d thermal map
cbar = plt.colorbar() #--> Set colorbar thermal map caption
cbar.set_ticks([0.1]) #--> Set the scale range of colorbar
cbar.ax.set_yticklabels(['Zero'.'One'],) #-- Set the coordinate name of the colorbar scale axis
cbar.set_label('link', rotation=0) #-- Set the colorbar coordinate name
plt.xlabel('node idx') #-- Set the X-axis
plt.ylabel('node idx') Set the Y-axis
Copy the code

empowerment

# empowerment
for e in G.edges():
    G[e[0]][e[1]] ['weight'] = random.uniform(0.1) Reassign existing relationships with uniform distribution
    print(nx.to_numpy_matrix(G))
plt.imshow(nx.to_numpy_matrix(G))
cbar = plt.colorbar() #--> Set colorbar thermal map caption
cbar.set_ticks([0.1]) #--> Set the scale range of colorbar
cbar.ax.set_yticklabels(['Zero'.'One'],) #-- Set the coordinate name of the colorbar scale axis
cbar.set_label('link', rotation=0) #-- Set the colorbar coordinate name
plt.xlabel('node idx') #-- Set the X-axis
plt.ylabel('node idx') Set the Y-axis
Copy the code

Network statistics

##### Network statistics
# degrees
G =nx.karate_club_graph() # Create a Karate member club chart: This is a very popular social network chart
degree_sequence = sorted([d for n, d in G.degree()], reverse=True) Save the degree of each node in descending order
print("Degree arrangement", degree_sequence)
print("="*50)
degreeCount = collections.Counter(degree_sequence) # Count the number of each degree
print("Degree frequency", degreeCount)
print("="*50)

deg, cnt = zip(*degreeCount.items()) Create an iterator
rcParams['figure.figsize'] = 12.8 # Set artboard global size
fig, ax = plt.subplots() Create a subgraph

plt.bar(deg, cnt, width=0.80, color='darkblue')
plt.title("Degree histogram")
plt.ylabel("Frequency")
plt.xlabel("度")
plt.axes([0.4.0.4.0.5.0.5])
plt.axis('off')
ax.set_xticks([d + 0.4 for d in deg])
ax.set_xticklabels(deg)
pos = nx.spring_layout(G) #-- Set the network layout
nx.draw_networkx_nodes(G, pos, node_color= 'darkblue',node_size=20) # drawing
nx.draw_networkx_edges(G, pos, alpha=0.5) # picture edge
plt.show()
print('Degree mean of undirected graph:', np.mean(G.degree()))
Copy the code

The mean degree of undirected graphs: 10.544117647058824

The shortest path

# shortest path
R = nx.karate_club_graph() 
source=14 # Pick a starting point
target=16 # Pick a point to finish

# shortest path graph function
def short_path_plot(G,source,target) :
    pos = nx.spring_layout(G) #-- Set the network layout
    nx.draw(G,pos,node_color='k', with_labels=True, font_color='white') #-- Draw the structure

    path = nx.shortest_path(G,source=14,target=16) Call shortest Path to calculate the shortest path
    print("Shortest path:",path) 
    
    path_edges = list(zip(path,path[1:))Create the shortest path edge
    nx.draw_networkx_nodes(G,pos,nodelist=path,node_color='r', label=True)  # - node
    nx.draw_networkx_edges(G,pos,edgelist=path_edges,edge_color='r',width=10)  # - side
    plt.axis('equal')
    plt.show()
    return

# call function
short_path_plot(R,source,target)
Copy the code

02 Network Structure

  • Random graph
  • Small world model
  • Scale-free network
# Download power law distribution package
# !pip install powerlaw

import re
import powerlaw
import seaborn as sns
from operator import itemgetter

# Histogram drawing
def draw_hist_network(G) :
    degree_sequence = sorted([d for n, d in G.degree()], reverse=True) 
    degreeCount = collections.Counter(degree_sequence) 
    deg, cnt = zip(*degreeCount.items()) 
    rcParams['figure.figsize'] = 10.5 
    fig, ax = plt.subplots() 
    plt.bar(deg, cnt, width=0.80, color='darkblue') 
    plt.title("Degree histogram")
    plt.ylabel("Frequency")
    plt.xlabel("度")
    ax.set_xticks([d + 0.4 for d in deg])
    ax.set_xticklabels(deg)
    plt.axes([0.4.0.4.0.5.0.5])
    Gcc = G.subgraph(sorted(nx.connected_components(G), key=len, reverse=True) [0])
    pos = nx.spring_layout(G) 
    plt.axis('off') 
    nx.draw_networkx_nodes(G, pos, node_color= 'darkblue',node_size=20)
    nx.draw_networkx_edges(G, pos, alpha=0.4)
    plt.show()
    pass
# network rendering
def draw_network(G,name) :
    plt.title(name)
    pos = nx.circular_layout(G) 
    nx.draw_networkx_nodes(G, pos, node_color= 'darkblue',node_size=20)
    nx.draw_networkx_edges(G, pos, alpha=0.4)
    plt.axis('off') 
    plt.show()
    pass
Copy the code

Random graph

# Generate random graph
nodes_n=50
degree=20
simulation_number=10
rcParams['figure.figsize'] = 5.5

# Connectivity = 0.01:
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
G = nx.random_regular_graph(degree,nodes_n) #<-- Generate the Small World Network
draw_network(G,name="Random normal distribution")
Copy the code

Small World network

# Small World Network
nodes_n=1000
degree=20
rcParams['figure.figsize'] = 5.5

# Simple Small World model ------------------------------
G = nx.watts_strogatz_graph(20.5,p=0.01) #<-- Generates the small world model
draw_network(G,name="Small World Model")
# Connectivity = 0.01:
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
G = nx.watts_strogatz_graph(nodes_n,degree, p = 0.01) #<-- Generates the small world model
draw_hist_network(G)

# Connectivity = 0.05:
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
G = nx.watts_strogatz_graph(nodes_n,degree, p = 0.05) #<-- Generates the small world model
draw_hist_network(G)

# Connectivity = 0.1:
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
G = nx.watts_strogatz_graph(nodes_n,degree, p = 0.1) #<-- Generates the small world model
draw_hist_network(G)
Copy the code

Scale-free network

Scale – free network presents power – rate distribution

# scale-free network
node_n = 100000 # - the number of nodes
m=3   #-- The number of edges that the new node attaches to the existing node
G = nx.barabasi_albert_graph(node_n, m)
# draw_network(G,name=" scale-free network ") # draw_network(G,name=" scale-free network"
degree_freq = nx.degree_histogram(G)
degrees = range(len(degree_freq))
plt.figure(figsize=(12.8)) 
plt.loglog(degrees[m:], degree_freq[m:],'go-') 
plt.xlabel('度', fontsize = 'x-large')
plt.ylabel('frequency', fontsize = 'x-large')
plt.show()
Copy the code

Measurement at node 03 – centrality

What is centrality?

Centrality is a term that describes the importance of each node in a diagram. To answer the question “Which node is the most important node (vertex) in the diagram?” For this problem, a large number of experiments have been carried out. The list of experiments is as follows:

  • Degree Centrality
  • Betweenness Centrality
  • Closeness to and Centrality
  • Eigenvector Centrality
  • Katz Centrality

Point centrality/intermediate centrality/approach centrality all use absolute centrality

import pandas as pd
import matplotlib.colors as mcolors
from pylab import rcParams
Copy the code

Zachary Karate Club is a widely used social network where nodes represent members of the karate club and edges represent relationships between members.

# Zachary Karate Club
G = nx.karate_club_graph() 
pos = nx.circular_layout(G) #-- Disk layout
nodes = nx.draw_networkx_nodes(G, pos, node_size=400,node_color='White',edgecolors='b')
nodes.set_norm(mcolors.SymLogNorm(linthresh=0.03, linscale=2))
labels = {i:i for i in G.nodes()}
labels = nx.draw_networkx_labels(G, pos, labels, font_size=12)
edges = nx.draw_networkx_edges(G, pos, edge_color = 'grey')
plt.axis('off')
plt.show()

def draw(G, pos, measures, measure_name) :
    nodes = nx.draw_networkx_nodes(G, pos, node_size=400, cmap=plt.cm.plasma, 
                                   node_color=list(measures.values()),
                                   nodelist=list(measures.keys())) 
    nodes.set_norm(mcolors.SymLogNorm(linthresh=0.03, linscale=1))
    labels = nx.draw_networkx_labels(G, pos, font_color='white')
    edges = nx.draw_networkx_edges(G, pos)
    plt.title(measure_name,size=18)
    cbar = plt.colorbar(nodes)
    cbar.set_label('Centrality weight', rotation=0)
    plt.axis('off')
    plt.show()
    pass
Copy the code

Point degree center degree

Degree Centrality
draw(G, pos,nx.degree_centrality(G),'Point degree center degree')
Copy the code

Intermediate centrality

Betweenness Centrality
draw(G, pos, nx.betweenness_centrality(G), 'Intermediate centrality')
Copy the code

Proximity to center

# Closeness and Centrality
draw(G, pos, nx.closeness_centrality(G), 'Near center')
Copy the code

Eigenvector centrality

Eigenvector Centrality
draw(G, pos, nx.eigenvector_centrality(G), 'Eigenvector centrality')
Copy the code

Katz centrality

# Katz Centrality
draw(G, pos, nx.katz_centrality(G, alpha=0.1, beta=1.0), 'Katz centrality')
Copy the code

Dataset1 =[] for k in nx.degree_centrality(G).keys(): dataset1.append([k,nx.degree_centrality(G)[k],nx.katz_centrality(G)[k],nx.eigenvector_centrality(G)[k], nx.closeness_centrality(G)[k],nx.betweenness_centrality(G)[k]]) rcParams['figure.figsize'] = 5, Sns.set (style="ticks") # create dataframe: df=pd.DataFrame(dataset1,columns=['id', 'Degree centrality','Katz centrality','Eigenvector centrality', 'Clossness centrality','Betweenness centrality']) # Output the relationship between two variables: sns.pairplot(df)Copy the code

04. Overall measurement

  • The density of
  • The center for potential
# density
G = nx.karate_club_graph()
print('Karate_CLUB_graph density:', nx.density(G))

G = nx.watts_strogatz_graph(21.7,p=0.05) #<-- Generates the small world model
print('Generated small world model density:', nx.density(G))
Copy the code

Karate_club_graph density: 0.13903743315508021 Generated small world model density: 0.3

The density of

1. Density The density of undirected graph is D= 2Mn (n−1)D=\frac{2m}{n(n-1)} D=n(n−1)2m directed graph is D= MN (n−1)D=\frac{m}{n(n-1)}D=n(n−1)m where n is the number of nodes in G and m is the number of edges in G. The density of the boundless graph is 0, the density of the complete graph is 1, and the density of the multiple graph can be greater than 1.

The center for potential

2. Central potential: The central potential is used to measure the overall centrality of the network

  1. Firstly, find the maximum centrality value MAX in the network.
  2. Then the “difference” between the MAX value and the centrality of other points is calculated.
  3. The sum of these “differences” is calculated;
  4. And then you divide that sum by the maximum possible theoretical sum of the differences.

The absolute central potential is CAD=∑ I =1n(CADmax−CADi)(n−1)(n−2)C_{AD}= \ frac {\ sum_ {I = 1} ^ {n} (C_ {ADmax} – C_ {ADi})} {(n – 1) (n – 2)} CAD = (n – 1) n (n – 2) ∑ I = 1 (CADmax – CADi)

Relative center potential is CRD = ∑ I = 1 n (CRDmax – CRDi) (n – 2) C_ {RD} = \ frac {\ sum_ {I = 1} ^ {n} (C_ {RDmax} – C_ {RDi})} {(n – 2)} CRD = n (n – 2) ∑ I = 1 (CRDmax – CRDi)

Def Degree_Centralization(G,mode="AD"): N = nx.number_of_nodes(G) #-- n Number of nodes centrality_list = Np.array (list(nx.degree_centrality(G).values())) # Centrality list Max_centrality = Max (centrality_list) # if mode=='AD': degree_centralization = sum(max_centrality - centrality_list) / (n-1)*(n-2) if mode=='RD': degree_centralization = sum(max_centrality - centrality_list) / (n-2) return degree_centralizationCopy the code
G = nx.club_graph () print(' club_graph ') Degree_Centralization(G,mode='AD')) print('karate_club_graph: ', Degree_Centralization(G,mode='RD') draw_network(G,name="") G = nx.watts_strogatz_graph(21,7,p=0.05) # Degree_Centralization(G,mode='AD') ', Degree_Centralization(G,mode='RD')) draw_network(G,name="")Copy the code

Condensed subgroups

  • K- nucleus condensed subgroup
G = nx.karate_club_graph()
nx.core_number(G)

# output
{0: 4.1: 4.2: 4.3: 4.4: 3.5: 3.6: 3.7: 4.8: 4.9: 2.10: 3.11: 1.12: 2.13: 4.14: 2.15: 2.16: 2.17: 2.18: 2.19: 3.20: 2.21: 2.22: 2.23: 3.24: 3.25: 3.26: 2.27: 3.28: 3.29: 3.30: 4.31: 3.32: 4.33: 4}
Copy the code
def draw_clu(G, pos, measures, measure_name) :
    clusters=np.array(list(set(measures.values())))
    plt.figure()
    nodes = nx.draw_networkx_nodes(G, pos, node_size=250, cmap=mcolors.ListedColormap(plt.cm.Set3(clusters)), 
                                   node_color=np.array(list(measures.values()))-1,
                                   nodelist=list(measures.keys()))
    print(measures.values())
    print(measures.keys())
    labels = nx.draw_networkx_labels(G, pos)
    edges = nx.draw_networkx_edges(G, pos)
    plt.title(measure_name)
    rcParams['figure.figsize'] = 12.8
    rcParams['font.sans-serif'] = ['SimHei']
    cb = plt.colorbar(nodes,ticks=range(0.len(clusters)), label='Subgroup tag')
    cb.ax.tick_params(length=0)
    cb.set_ticklabels(list(set(measures.values())))
    nodes.set_clim(-0.5.len(clusters)-0.5)
    plt.axis('off')
    plt.show()
    
G = nx.karate_club_graph()
pos = nx.spring_layout(G)
draw_clu(G, pos, nx.core_number(G),'k-Core')
Copy the code

draw(G, pos, nx.core_number(G), 'k-Core')
Copy the code

06. Link prediction

import networkx as nx
import numpy as np
import urllib.request
urllib.request.urlretrieve("http://snap.stanford.edu/data/ca-GrQc.txt.gz"."ca-GrQc.txt.gz")
graph = nx.read_edgelist('ca-GrQc.txt.gz')
graph.order()
draw_network(graph,name="")
Copy the code

degrees = dict(graph.degree())
author = sorted(degrees.items(),key=lambda x: x[1],reverse=True) [500] [0]
print(The "degree" of scholar %s is %d'. % (author, graph.degree()[author]))

Get the subgraph
def get_subgraph(graph, nodes, n=100) :
    neighbors = set(a)for ni in nodes:
        neighbors |= set(graph.neighbors(ni))
    # plot at least the target node and his neighbors.
    result = set(nodes) | neighbors
    # add "friends of friends" up to n total nodes.
    for x in neighbors:
        # how many more nodes can we add?
        maxsize = n - len(result) 
        toadd = set(graph.neighbors(x)) - result
        result.update(list(toadd)[:maxsize])
        if len(result) > n:
            break
    return graph.subgraph(result)

subgraph = get_subgraph(graph, [author], n=30)
print('Subgraph has a %d node' % len(subgraph.nodes()))
Copy the code

Scholar 13813 has a degree of 13 and a subgraph of 30 nodes

# Draw a subgraph
def plot_subgraph(subgraph, target_nodes) :
    nodes = list(subgraph.nodes())
    colors = ['c'] * len(nodes)
    for n in target_nodes:
        idx = nodes.index(n)
        colors[idx] = 'r'
    sizes = [800] * len(nodes)
    sizes[idx] = 1000
    plt.figure(figsize=(8.8))
    plt.axis('off')
    nx.draw_networkx(subgraph, nodelist=nodes, with_labels=True,
                     width=. 5, node_color=colors,
                     node_size=sizes, alpha=. 5)

plot_subgraph(subgraph, [author])
Copy the code

Whom should author 13813 be recommended to cooperate with?

Link prediction method

  • The link prediction task is regarded as a sorting problem in information retrieval
  • Compute the new fraction s(X,Y)s(X,Y)s(X,Y) for each edge.
  • Arrange all possible fractional edges s(X,Y)s(X,Y)s(X,Y) in descending order.
  • Select the highest fraction edge s(X,Y)s(X,Y)s(X,Y) →\rightarrow→

1.) Shortest path: s(X,Y)=s(X,Y) =s(X,Y) = length of the shortest path from XXX to YYY.

# Shortest path algorithm
def rank_by_shortest_path(graph, node) :
    paths = nx.shortest_path_length(graph, node)
    return sorted(paths.items(), key=lambda x: x[1])

shortest_paths = rank_by_shortest_path(graph, author)
print('Shortest path top-20 :')
shortest_paths[:20]
print("Starting at author 13813, the cooperator with shortest path 2 in the network has %s bits." % len([s for s in shortest_paths if s[1] = =2]))
print('Link prediction Top-10 :')
[s for s in shortest_paths if s[1] = =2] [:10]
Copy the code

With author 13813 as the starting point, there are 57 collaborators with shortest path 2 in the network

Visualization, recommended effect

pair = set([author, '5227','5543','10931','639','18736'])
plot_subgraph(get_subgraph(graph, pair, n=30), pair)

Copy the code

Another approach: if XXX and YYy have many co-authors, then XXX and YYy are more likely to co-write

2.) jaccard: S = ∣ N (X) studying N (Y) ∣ ∣ N (X) ∪ N (Y) ∣ S = \ frac {| N (X) \ cap N (Y) |} {| N (X) \ cup N (Y) |} S = ∣ N (X) ∪ N (Y) ∣ ∣ N (X) studying N ∣ (Y)

def rank_by_jaccard(graph, node):
    neighbors = set(graph.neighbors(node))
    scores = []
    for n in graph.nodes():
        neighbors2 = set(graph.neighbors(n))
        scores.append((n, len(neighbors & neighbors2) / len(neighbors | neighbors2)))
    return sorted(scores, key=lambda x: x[1], reverse=True)

common_jaccard = rank_by_jaccard(graph, author)
common_jaccard[:20]
plt.plot(sorted([x[1] for x in common_jaccard if x[1] > 0.]))
plt.show()
Copy the code

def dataframe_scores(scores_list, names, n) :
    ids = set(a)for scores in scores_list:
        ids.update([x[0] for x in scores[:n]])
    print('Top % D' for link prediction by Jaccard algorithm % (n))
    results = []
    for id_ in ids:
        result = {'id': id_}
        for scores, name in zip(scores_list, names):
            for rank, (id2, score) in enumerate(scores):
                if id2 == id_:
                    result[name + '_rank'] = rank
                    result[name + '_score'] = score
                    break
        results.append(result)
    headers = ['id']
    for name in names:
        headers.append(name + '_rank')
        headers.append(name + '_score')
    return pd.DataFrame(results, columns=headers)
    
df = dataframe_scores([common_jaccard],['jaccard'].20)
df.sort_values('jaccard_rank').head(10)

pair = set([author, '23204'.'19204'.'17559'.'409'.'6746'])
plot_subgraph(get_subgraph(graph, pair, n=30), pair)
Copy the code