在NetworkX中,计算有向图节点的PageRank节点重要度。
参考资料
networkx官方教程:https://networkx.org/documentation/stable/tutorial.html
nx.Graph https://networkx.org/documentation/stable/reference/classes/graph.html#networkx.Graph
给图、节点、连接添加属性:https://networkx.org/documentation/stable/tutorial.html#attributes
读写图:https://networkx.org/documentation/stable/reference/readwrite/index.html
导入工具包
1
2
3
4
5
6
7
8
9
10
|
import networkx as nx # 图数据挖掘
import numpy as np # 数据分析
import random # 随机数
import pandas as pd
# 数据可视化
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['font.sans-serif']=['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号
|
1
|
nx.draw(G, with_labels = True)
|
数据下载地址:http://www.openkg.cn/dataset/ch4masterpieces
1
|
pagerank = nx.pagerank(G, alpha=0.8)
|
{0: 0.4583348922684132,
1: 0.07738072967594098,
2: 0.07738072967594098,
3: 0.07738072967594098,
4: 0.07738072967594098,
5: 0.07738072967594098,
6: 0.07738072967594098,
7: 0.07738072967594098}
1
2
|
# 导入 csv 文件定义的有向图
df = pd.read_csv('./triples.csv')
|
|
head |
tail |
relation |
label |
0 |
金蝉子 |
唐僧 |
past_life |
前世 |
1 |
孙悟空 |
唐僧 |
apprentice |
徒弟 |
2 |
猪八戒 |
唐僧 |
apprentice |
徒弟 |
3 |
沙僧 |
唐僧 |
apprentice |
徒弟 |
4 |
白龙马 |
唐僧 |
apprentice |
徒弟 |
... |
... |
... |
... |
... |
104 |
毗蓝婆菩萨 |
昴日星官 |
mother |
母亲 |
105 |
嫦娥 |
后羿 |
wife |
妻 |
106 |
敖摩昂 |
敖闰 |
son |
儿 |
107 |
哪吒 |
李靖 |
son |
儿 |
108 |
哪吒 |
如来 |
apprentice |
徒弟 |
109 rows × 4 columns
1
2
3
4
|
edges = [edge for edge in zip(df['head'], df['tail'])]
G = nx.DiGraph()
G.add_edges_from(edges)
|
1
2
3
4
5
|
# 可视化
plt.figure(figsize=(15,14))
pos = nx.spring_layout(G, iterations=3, seed=5) #设置为基于弹簧布局,迭代次数为3,次数越多,根据节点之间的相互作用力找到的节点位置越合理
nx.draw(G, pos, with_labels=True)
plt.show()
|
1
2
3
4
5
6
7
8
|
pagerank = nx.pagerank(G, # NetworkX graph 有向图,如果是无向图则自动转为双向有向图
alpha=0.85, # Damping Factor,阻尼系数,理解这个得理解谷歌矩阵,也就是明白算法本身才行
personalization=None, # 是否开启Personalized PageRank,随机传送至指定节点集合的概率更高或更低
max_iter=100, # 最大迭代次数
tol=1e-06, # 判定收敛的误差
nstart=None, # 每个节点初始PageRank值
dangling=None, # Dead End死胡同节点
)
|
1
|
sorted(pagerank.items(), key=lambda x: x[1], reverse=True)
|
[('唐僧', 0.13349105557884888),
('孙悟空', 0.10498354112014094),
('白龙马', 0.09531260474698808),
('猪八戒', 0.09247797536009736),
('沙僧', 0.07627154154696374),
('李世民', 0.052002919751408624),
('观音菩萨', 0.026625716774094633),
('高翠兰', 0.02579183411604112),
('卵二姐', 0.01860884001045803),
('太上老君', 0.014430996933862522),
('如来', 0.013334300311185142),
('牛魔王', 0.010256020230003658),
('哪吒', 0.009171370913926254),
('灵吉菩萨', 0.007800320258156309),
('宼栋', 0.007432108638238391),
('昴日星官', 0.007432108638238391),
('后羿', 0.007432108638238391),
('李靖', 0.006787403654483575),
('殷温娇', 0.005344620286308959),
('寇梁', 0.005344620286308959),
('袁天罡', 0.005344620286308959),
('金角', 0.005344620286308959),
('银角', 0.005344620286308959),
('西海龙王太子', 0.005344620286308959),
('弥勒佛', 0.005344620286308959),
('毗蓝婆菩萨', 0.005344620286308959),
('文殊菩萨', 0.005344620286308959),
('普贤菩萨', 0.005344620286308959),
('太乙救苦天尊', 0.005344620286308959),
('嫦娥', 0.005344620286308959),
('南极寿星', 0.005344620286308959),
('东来佛祖笑和尚', 0.005344620286308959),
('敖闰', 0.005344620286308959),
('木吒', 0.004812288400108432),
('金吒', 0.004812288400108432),
('高玉兰', 0.004116770300385284),
('金蝉子', 0.0028889203144616088),
('陈光蕊', 0.0028889203144616088),
('法明和尚', 0.0028889203144616088),
('殷开山', 0.0028889203144616088),
('菩提老祖', 0.0028889203144616088),
('镇元子', 0.0028889203144616088),
('蛟魔王', 0.0028889203144616088),
('鹏魔王', 0.0028889203144616088),
('狮驼王', 0.0028889203144616088),
('猕猴王', 0.0028889203144616088),
('禺狨王', 0.0028889203144616088),
('天蓬元帅', 0.0028889203144616088),
('卷帘大将', 0.0028889203144616088),
('西海龙王', 0.0028889203144616088),
('西海龙母', 0.0028889203144616088),
('敖摩昂太子', 0.0028889203144616088),
('西海龙女', 0.0028889203144616088),
('李渊', 0.0028889203144616088),
('李建成', 0.0028889203144616088),
('李元吉', 0.0028889203144616088),
('王珪', 0.0028889203144616088),
('秦琼', 0.0028889203144616088),
('萧瑀', 0.0028889203144616088),
('傅奕', 0.0028889203144616088),
('魏征', 0.0028889203144616088),
('李玉英', 0.0028889203144616088),
('房玄龄', 0.0028889203144616088),
('杜如晦', 0.0028889203144616088),
('徐世绩', 0.0028889203144616088),
('徐茂公', 0.0028889203144616088),
('许敬宗', 0.0028889203144616088),
('马三宝', 0.0028889203144616088),
('段志贤', 0.0028889203144616088),
('程咬金', 0.0028889203144616088),
('虞世南', 0.0028889203144616088),
('张道源', 0.0028889203144616088),
('张士衡', 0.0028889203144616088),
('高太公', 0.0028889203144616088),
('高香兰', 0.0028889203144616088),
('寇洪', 0.0028889203144616088),
('袁守诚', 0.0028889203144616088),
('正元龙', 0.0028889203144616088),
('二十四路诸天', 0.0028889203144616088),
('守山大神', 0.0028889203144616088),
('善财童子', 0.0028889203144616088),
('捧珠龙女', 0.0028889203144616088),
('红孩儿', 0.0028889203144616088),
('黑风怪', 0.0028889203144616088),
('黄风怪', 0.0028889203144616088),
('黄毛貂鼠', 0.0028889203144616088),
('铁扇公主', 0.0028889203144616088),
('九尾狐狸', 0.0028889203144616088),
('狐阿七', 0.0028889203144616088),
('鼍龙怪', 0.0028889203144616088),
('灵感大王', 0.0028889203144616088),
('独角兕大王', 0.0028889203144616088),
('玉面公主', 0.0028889203144616088),
('金毛犼', 0.0028889203144616088),
('黄眉道童', 0.0028889203144616088),
('百眼魔君', 0.0028889203144616088),
('青狮', 0.0028889203144616088),
('白象', 0.0028889203144616088),
('大鹏金翅雕', 0.0028889203144616088),
('九头狮子', 0.0028889203144616088),
('玉兔精', 0.0028889203144616088),
('白鹿精', 0.0028889203144616088),
('黄眉大王', 0.0028889203144616088),
('敖摩昂', 0.0028889203144616088)]
1
2
|
# 节点尺寸
node_sizes = (np.array(list(pagerank.values())) * 8000).astype(int)
|
array([ 23, 1067, 839, 739, 610, 762, 23, 42, 23, 416, 23,
23, 23, 82, 23, 23, 23, 23, 23, 23, 148, 206,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 32, 23, 42, 59, 23, 42,
54, 38, 73, 38, 23, 213, 23, 23, 23, 23, 106,
23, 23, 23, 62, 23, 42, 115, 42, 23, 23, 23,
23, 42, 23, 23, 23, 23, 23, 42, 23, 42, 23,
42, 23, 42, 23, 23, 42, 23, 42, 23, 42, 23,
42, 59, 59, 23, 42])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
# 节点颜色
M = G.number_of_edges()
edge_colors = range(2, M + 2)
plt.figure(figsize=(15,14))
# 绘制节点
nodes = nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color=node_sizes)
# 绘制连接
edges = nx.draw_networkx_edges(
G,
pos,
node_size=node_sizes, # 节点尺寸
arrowstyle="->", # 箭头样式
arrowsize=20, # 箭头尺寸
edge_color=edge_colors, # 连接颜色
edge_cmap=plt.cm.plasma,# 连接配色方案,可选:plt.cm.Blues
width=4 # 连接线宽
)
# 设置每个连接的透明度
edge_alphas = [(5 + i) / (M + 4) for i in range(M)]
for i in range(M):
edges[i].set_alpha(edge_alphas[i])
# # 图例
# pc = mpl.collections.PatchCollection(edges, cmap=cmap)
# pc.set_array(edge_colors)
# plt.colorbar(pc)
ax = plt.gca()
ax.set_axis_off()
plt.show()
|
/tmp/ipykernel_206894/3554403709.py:12: DeprecationWarning: `alltrue` is deprecated as of NumPy 1.25.0, and will be removed in NumPy 2.0. Please use `all` instead.
edges = nx.draw_networkx_edges(