Python的第三方库来进行社交网络的可视化pandas模块读取importpandasaspd
importjanitor
importdatetime
fromIPython.core.displayimportdisplay,HTML
frompyvisimportnetworkasnet
importnetworkxasnx
df_ori=pd.read_csv("Connections.csv",skiprows=3)
df_ori.head()
df=(
df_ori
.clean_names()#去除掉字符串中的空格以及大写变成小写
.drop(columns=['first_name','last_name','email_address'])#去除掉这三列
.dropna(subset=['company','position'])#去除掉company和position这两列当中的空值
.to_datetime('connected_on',format='%d%b%Y')
)
companypositionconnected_on
0xxxxxxxxxxTalentAcquisition2021-08-15
1xxxxxxxxxxxxAssociatePartner2021-08-14
2xxxxx猎头顾问2021-08-14
3xxxxxxxxxxxxxxxxxxxxxxxxxConsultant2021-07-26
4xxxxxxxxxxxxxxxxxxxxxxAccountManager2021-07-19
df['company'].value_counts().head(10).plot(kind="barh").invert_yaxis()
df['position'].value_counts().head(10).plot(kind="barh").invert_yaxis()
networkx模块以及pyvis模块,g=nx.Graph()
g.add_node(0,label="root")#intializeyourselfascentralnode
g.add_node(1,label="Company1",size=10,title="info1")
g.add_node(2,label="Company2",size=40,title="info2")
g.add_node(3,label="Company3",size=60,title="info3")
size代表着节点的大小,然后我们将这些个节点相连接g.add_edge(0,1)
g.add_edge(0,2)
g.add_edge(0,3)
df_company=df['company'].value_counts().reset_index()
df_company.columns=['company','count']
df_company=df_company.sort_values(by="count",ascending=False)
df_company.head(10)
companycount
0Amazonxx
1Googlexx
2Facebookxx
3StevensInstituteofTechnologyxx
4Microsoftxx
5JPMorganChase&Co.xx
6AmazonWebServices(AWS)xx
9Applex
10GoldmanSachsx
8Oraclex
#实例化网络
g=nx.Graph()
g.add_node('myself')#将自己放置在网络的中心
#遍历数据集当中的每一行
for_,rowindf_company_reduced.iterrows():
#将公司名和统计结果赋值给新的变量
company=row['company']
count=row['count']
title=f"<b>{company}</b>–{count}"
positions=set([xforxindf[company==df['company']]['position']])
positions=''.join('<li>{}</li>'.format(x)forxinpositions)
position_list=f"<ul>{positions}</ul>"
hover_info=title+position_list
g.add_node(company,size=count*2,title=hover_info,color='#3449eb')
g.add_edge('root',company,color='grey')
#生成网络图表
nt=net.Network(height='700px',width='700px',bgcolor="black",font_color='white')
nt.from_nx(g)
nt.hrepulsion()
nt.show('company_graph.html')
display(HTML('company_graph.html'))
df_position=df['position'].value_counts().reset_index()
df_position.columns=['position','count']
df_position=df_position.sort_values(by="count",ascending=False)
df_position.head(10)
positioncount
0SoftwareEngineerxx
1DataScientistxx
2SeniorSoftwareEngineerxx
3DataAnalystxx
4SeniorDataScientistxx
5SoftwareDevelopmentEngineerxx
6SoftwareDevelopmentEngineerIIxx
7Founderxx
8DataEngineerxx
9BusinessAnalystxx
g=nx.Graph()
g.add_node('myself')#将自己放置在网络的中心
for_,rowindf_position_reduced.iterrows():
#将岗位名和统计结果赋值给新的变量
position=row['position']
count=row['count']
title=f"<b>{position}</b>–{count}"
positions=set([xforxindf[position==df['position']]['position']])
positions=''.join('<li>{}</li>'.format(x)forxinpositions)
position_list=f"<ul>{positions}</ul>"
hover_info=title+position_list
g.add_node(position,size=count*2,title=hover_info,color='#3449eb')
g.add_edge('root',position,color='grey')
#生成网络图表
nt=net.Network(height='700px',width='700px',bgcolor="black",font_color='white')
nt.from_nx(g)
nt.hrepulsion()
nt.show('position_graph.html')
分享
点收藏
点点赞
点在看
文章转发自AI科技大本营微信公众号,版权归其所有。文章内容不代表本站立场和任何投资暗示。
Copyright © 2021.Company 元宇宙YITB.COM All rights reserved.元宇宙YITB.COM