gpt4 book ai didi

python微信好友数据分析详解

转载 作者:qq735679552 更新时间:2022-09-27 22:32:09 25 4
gpt4 key购买 nike

CFSDN坚持开源创造价值,我们致力于搭建一个资源共享平台,让每一个IT人在这里找到属于你的精彩世界.

这篇CFSDN的博客文章python微信好友数据分析详解由作者收集整理,如果你对这篇文章有兴趣,记得点赞哟.

基于微信开放的个人号接口python库itchat,实现对微信好友的获取,并对省份、性别、微信签名做数据分析.

效果:

python微信好友数据分析详解

python微信好友数据分析详解

python微信好友数据分析详解

直接上代码,建三个空文本文件stopwords.txt,newdit.txt、unionWords.txt,下载字体simhei.ttf或删除字体要求的代码,就可以直接运行.

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#wxfriends.py 2018-07-09
import itchat
import sys
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams[ 'font.sans-serif' ] = [ 'SimHei' ] #绘图时可以显示中文
plt.rcParams[ 'axes.unicode_minus' ] = False #绘图时可以显示中文
import jieba
import jieba.posseg as pseg
from scipy.misc import imread
from wordcloud import WordCloud
from os import path
#解决编码问题
non_bmp_map = dict .fromkeys( range ( 0x10000 , sys.maxunicode + 1 ), 0xfffd )
 
 
#获取好友信息
def getFriends():
  friends = itchat.get_friends(update = True )[ 0 :]
  flists = []
  for i in friends:
   fdict = {}
   fdict[ 'NickName' ] = i[ 'NickName' ].translate(non_bmp_map)
   if i[ 'Sex' ] = = 1 :
    fdict[ 'Sex' ] = '男'
   elif i[ 'Sex' ] = = 2 :
    fdict[ 'Sex' ] = '女'
   else :
    fdict[ 'Sex' ] = '雌雄同体'
   if i[ 'Province' ] = = '':
    fdict[ 'Province' ] = '未知'
   else :
    fdict[ 'Province' ] = i[ 'Province' ]
   fdict[ 'City' ] = i[ 'City' ]
   fdict[ 'Signature' ] = i[ 'Signature' ]
   flists.append(fdict)
  return flists
 
 
#将好友信息保存成CSV
def saveCSV(lists):
  df = pd.DataFrame(lists)
  try :
   df.to_csv( "wxfriends.csv" ,index = True ,encoding = 'gb18030' )
  except Exception as ret:
   print (ret)
  return df
 
 
#统计性别、省份字段
def anysys(df):
  df_sex = pd.DataFrame(df[ 'Sex' ].value_counts())
  df_province = pd.DataFrame(df[ 'Province' ].value_counts()[: 15 ])
  df_signature = pd.DataFrame(df[ 'Signature' ])
  return df_sex,df_province,df_signature
 
 
#绘制柱状图,并保存
def draw_chart(df_list,x_feature):
  try :
   x = list (df_list.index)
   ylist = df_list.values
   y = []
   for i in ylist :
    for j in i:
     y.append(j)
   plt.bar(x,y,label = x_feature)
   plt.legend()
   plt.savefig(x_feature)
   plt.close()
  except :
   print ( "绘图失败" )
 
 
#解析取个性签名构成列表 
def getSignList(signature):
  sig_list = []
  for i in signature.values:
   for j in i:
    sig_list.append(j.translate(non_bmp_map))
  return sig_list
 
 
#分词处理,并根据需要填写停用词、自定义词、合并词替换
def segmentWords(txtlist):
  stop_words = set (line.strip() for line in open ( 'stopwords.txt' , encoding = 'utf-8' ))
  newslist = []
  #新增自定义词
  jieba.load_userdict( "newdit.txt" )
  for subject in txtlist:
   if subject.isspace():
    continue
   word_list = pseg.cut(subject)
  
   for word, flag in word_list:
    if not word in stop_words and flag = = 'n' or flag = = 'eng' and word ! = 'span' and word ! = 'class' :
     newslist.append(word)
   #合并指定的相似词
  for line in open ( 'unionWords.txt' , encoding = 'utf-8' ):
   newline = line.encode( 'utf-8' ).decode( 'utf-8-sig' ) #解决ufeff问题
   unionlist = newline.split( "*" )
   for j in range ( 1 , len (unionlist)):
    #wordDict[unionlist[0]] += wordDict.pop(unionlist[j],0)
    for index,value in enumerate (newslist):
     if value = = unionlist[j]:
      newslist[index] = unionlist[ 0 ]
  return newslist
 
 
#高频词统计
def countWords(newslist):
  wordDict = {}
  for item in newslist:
   wordDict[item] = wordDict.get(item, 0 ) + 1
  itemList = list (wordDict.items())
  itemList.sort(key = lambda x:x[ 1 ],reverse = True
  for i in range ( 100 ):
   word, count = itemList[i]
   print ( "{}:{}" . format (word,count))
 
 
#绘制词云
def drawPlant(newslist):
  d = path.dirname(__file__)
  mask_image = imread(path.join(d, "timg.png" ))
  content = ' ' .join(newslist)
  wordcloud = WordCloud(font_path = 'simhei.ttf' , background_color = "white" ,width = 1300 ,height = 620 , max_words = 200 ).generate(content) #mask=mask_image,
  # Display the generated image:
  plt.imshow(wordcloud)
  plt.axis( "off" )
  wordcloud.to_file( 'wordcloud.jpg' )
  plt.show()
 
 
def main():
  #登陆微信
  itchat.auto_login() # 登陆后不需要扫码 hotReload=True
  flists = getFriends()
  fdf = saveCSV(flists)
  df_sex,df_province,df_signature = anysys(fdf)
  draw_chart(df_sex, "性别" )
  draw_chart(df_province, "省份" )
  wordList = segmentWords(getSignList(df_signature))
  countWords(wordList)
  drawPlant(wordList)
 
main()

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持我.

原文链接:https://blog.csdn.net/zenobia119/article/details/80990970 。

最后此篇关于python微信好友数据分析详解的文章就讲到这里了,如果你想了解更多关于python微信好友数据分析详解的内容请搜索CFSDN的文章或继续浏览相关文章,希望大家以后支持我的博客! 。

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com