基于微信开放的个人号接口python库itchat,实现对微信好友的获取,并对省份、性别、微信签名做数据分析。
效果:
直接上代码,建三个空文本文件stopwords.txt,newdit.txt、unionWords.txt,下载字体simhei.ttf或删除字体要求的代码,就可以直接运行。
#wxfriends.py 2018-07-09 import itchat import sys import pandas as pd import matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['SimHei']#绘图时可以显示中文 plt.rcParams['axes.unicode_minus']=False#绘图时可以显示中文 import jieba import jieba.posseg as pseg from scipy.misc import imread from wordcloud import WordCloud from os import path #解决编码问题 non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd) #获取好友信息 def getFriends(): friends = itchat.get_friends(update=True)[0:] flists = [] for i in friends: fdict={} fdict['NickName']=i['NickName'].translate(non_bmp_map) if i['Sex'] == 1: fdict['Sex']='男' elif i['Sex'] == 2: fdict['Sex']='女' else: fdict['Sex']='雌雄同体' if i['Province'] == '': fdict['Province'] ='未知' else: fdict['Province']=i['Province'] fdict['City']=i['City'] fdict['Signature']=i['Signature'] flists.append(fdict) return flists #将好友信息保存成CSV def saveCSV(lists): df = pd.DataFrame(lists) try: df.to_csv("wxfriends.csv",index = True,encoding='gb18030') except Exception as ret: print(ret) return df #统计性别、省份字段 def anysys(df): df_sex = pd.DataFrame(df['Sex'].value_counts()) df_province = pd.DataFrame(df['Province'].value_counts()[:15]) df_signature = pd.DataFrame(df['Signature']) return df_sex,df_province,df_signature #绘制柱状图,并保存 def draw_chart(df_list,x_feature): try: x = list(df_list.index) ylist = df_list.values y = [] for i in ylist : for j in i: y.append(j) plt.bar(x,y,label=x_feature) plt.legend() plt.savefig(x_feature) plt.close() except: print("绘图失败") #解析取个性签名构成列表 def getSignList(signature): sig_list = [] for i in signature.values: for j in i: sig_list.append(j.translate(non_bmp_map)) return sig_list #分词处理,并根据需要填写停用词、自定义词、合并词替换 def segmentWords(txtlist): stop_words = set(line.strip() for line in open('stopwords.txt', encoding='utf-8')) newslist = [] #新增自定义词 jieba.load_userdict("newdit.txt") for subject in txtlist: if subject.isspace(): continue word_list = pseg.cut(subject) for word, flag in word_list: if not word in stop_words and flag == 'n' or flag == 'eng' and word !='span' and word !='class': newslist.append(word) #合并指定的相似词 for line in open('unionWords.txt', encoding='utf-8'): newline = line.encode('utf-8').decode('utf-8-sig') #解决\ufeff问题 unionlist = newline.split("*") for j in range(1,len(unionlist)): #wordDict[unionlist[0]] += wordDict.pop(unionlist[j],0) for index,value in enumerate(newslist): if value == unionlist[j]: newslist[index] = unionlist[0] return newslist #高频词统计 def countWords(newslist): wordDict = {} for item in newslist: wordDict[item] = wordDict.get(item,0) + 1 itemList = list(wordDict.items()) itemList.sort(key=lambda x:x[1],reverse=True) for i in range(100): word, count = itemList[i] print("{}:{}".format(word,count)) #绘制词云 def drawPlant(newslist): d = path.dirname(__file__) mask_image = imread(path.join(d, "timg.png")) content = ' '.join(newslist) wordcloud = WordCloud(font_path='simhei.ttf', background_color="white",width=1300,height=620, max_words=200).generate(content) #mask=mask_image, # Display the generated image: plt.imshow(wordcloud) plt.axis("off") wordcloud.to_file('wordcloud.jpg') plt.show() def main(): #登陆微信 itchat.auto_login() # 登陆后不需要扫码 hotReload=True flists = getFriends() fdf = saveCSV(flists) df_sex,df_province,df_signature = anysys(fdf) draw_chart(df_sex,"性别") draw_chart(df_province,"省份") wordList = segmentWords(getSignList(df_signature)) countWords(wordList) drawPlant(wordList) main()
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。
华山资源网 Design By www.eoogi.com
广告合作:本站广告合作请联系QQ:858582 申请时备注:广告合作(否则不回)
免责声明:本站资源来自互联网收集,仅供用于学习和交流,请遵循相关法律法规,本站一切资源不代表本站立场,如有侵权、后门、不妥请联系本站删除!
免责声明:本站资源来自互联网收集,仅供用于学习和交流,请遵循相关法律法规,本站一切资源不代表本站立场,如有侵权、后门、不妥请联系本站删除!
华山资源网 Design By www.eoogi.com
暂无评论...
更新日志
2024年11月15日
2024年11月15日
- 黄乙玲1988-无稳定的爱心肝乱糟糟[日本东芝1M版][WAV+CUE]
- 群星《我们的歌第六季 第3期》[320K/MP3][70.68MB]
- 群星《我们的歌第六季 第3期》[FLAC/分轨][369.48MB]
- 群星《燃!沙排少女 影视原声带》[320K/MP3][175.61MB]
- 乱斗海盗瞎6胜卡组推荐一览 深暗领域乱斗海盗瞎卡组分享
- 炉石传说乱斗6胜卡组分享一览 深暗领域乱斗6胜卡组代码推荐
- 炉石传说乱斗本周卡组合集 乱斗模式卡组最新推荐
- 佟妍.2015-七窍玲珑心【万马旦】【WAV+CUE】
- 叶振棠陈晓慧.1986-龙的心·俘虏你(2006复黑限量版)【永恒】【WAV+CUE】
- 陈慧琳.1998-爱我不爱(国)【福茂】【WAV+CUE】
- 咪咕快游豪礼放送,百元京东卡、海量欢乐豆就在咪咕咪粉节!
- 双11百吋大屏焕新“热”,海信AI画质电视成最大赢家
- 海信电视E8N Ultra:真正的百吋,不止是大!
- 曾庆瑜1990-曾庆瑜历年精选[派森][WAV+CUE]
- 叶玉卿1999-深情之选[飞图][WAV+CUE]