微博数据客户端与手机端获取数据有差,有部分数据丢失。
import time
import pymysql
import re
from pyquery import PyQuery as py
'''
遇到不懂的问题?Python学习交流群:1136201545满足你的需求,资料都已经上传群文件,可以自行下载!
'''
db = pymysql.connect('localhost', 'root', '123456', 'hfut_weibo',use_unicode=True, charset="utf8")
cursor = db.cursor()
cursor.execute('SET NAMES utf8;')
cursor.execute('SET CHARACTER SET utf8;')
cursor.execute('SET character_set_connection=utf8;')
headers = {'user-agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Mobile Safari/537.36'}
def mysql(date,attitudes_count,comments_count, reposts_count, index,title, data):
data = transferContent(data)
title = transferContent(title)
comtent = (
'#冰糖说英语#', '#冰糖点歌台#', '#冰糖小百科#', '#冰糖的树洞#', '#冰糖么么答#', '#冰糖赠书#', '#冰糖书影音#', '#老照片#', '#直播#',
'#送福利#')
for com in comtent:
parttern = re.compile(com)
match = re.search(parttern,title)
if match:
title = match.group()
print(data)
sql = '''
INSERT INTO inital_weibo_data_special VALUES ("%s",'%d','%d','%d','%f',"%s","%s")
''' % (date, attitudes_count, comments_count, reposts_count, index, title, data)
cursor.execute(sql)
db.commit()
return
sql = '''
INSERT INTO inital_weibo_data VALUES ('%s',"%d","%d",'%d','%f',"%s","%s")
''' % (date, attitudes_count, comments_count, reposts_count, index, title, data)
cursor.execute(sql)
db.commit()
return
def transferContent( content):
string = ""
for c in content:
if c == '"':
string += '\\\"'
elif c == "'":
string += "\\\'"
else:
string += c
return string
def error_com(data):
# comtent = ('#冰糖说英语#','#冰糖点歌台#','#冰糖小百科#','#冰糖的树洞#','#冰糖么么答#','#冰糖赠书#','#冰糖书影音#','#老照片#','#直播#','#送福利#')
# # for com in comtent:
# for com in comtent:
# parttern = re.compile(com)
# martch = re.search(parttern,data)
#
# if martch:
# print(martch.string)
# return martch.string
doc = py(data)
return doc.find('a').text()
def data_store():
Q = 0
for i in range(1,30):
time.sleep(0.5)
url = "https://m.weibo.cn/api/container/getIndex?uid=2348668214&luicode=10000011&lfid=100103type%3D1%26q%3D%E5%90%88%E8%82%A5%E5%B7%A5%E4%B8%9A%E5%A4%A7%E5%AD%A6&featurecode=20000320&type=uid&value=2348668214&containerid=1076032348668214&page={num}".format(num = i)
r_1 = requests.get(
url,
headers=headers)
try:
if i == 1:
for j in range(2, 11):
time.sleep(0.05)
page_1 = r_1.json()['data']['cards'][j]['mblog']
date = page_1['created_at']
attitudes_count = page_1['attitudes_count']
comments_count = page_1['comments_count']
reposts_count = page_1['reposts_count']
data = page_1['text']
index = attitudes_count + comments_count*0.5 + reposts_count*2
doc = py(data)
title = doc.find('a').text()
print(page_1['text'])
print(page_1['attitudes_count'])
print(page_1['comments_count'])
print(page_1['reposts_count'])
print(index)
print(page_1['created_at'])
print(title)
try:
mysql(date, attitudes_count, comments_count, reposts_count, index, title, data)
except:
# title = page_1['page_info']['page_title']
# mysql(date, attitudes_count, comments_count, reposts_count, index, title)
# print('本次插入错误,继续下一次')
data = title
mysql(date, attitudes_count, comments_count, reposts_count, index, title, data)
continue
else:
print(r_1.json()['data']['cards'])
for j in range(0,10):
print(url)
time.sleep(0.05)
page_1 = r_1.json()['data']['cards'][j]['mblog']
date = page_1['created_at']
attitudes_count = page_1['attitudes_count']
comments_count = page_1['comments_count']
reposts_count = page_1['reposts_count']
data = page_1['text']
index = attitudes_count + comments_count * 0.5 + reposts_count * 2
doc = py(data)
title = doc.find('a').text()
print(j)
print(page_1['text'])
print(page_1['attitudes_count'])
print(page_1['comments_count'])
print(page_1['reposts_count'])
print(index)
print(page_1['created_at'])
try:
mysql(date, attitudes_count, comments_count, reposts_count, index, title, data)
except:
print(11111111111111111111111111111111111111111111111111111111111111111111111111111111)
# title = page_1['page_info']['page_title']
# mysql(date, attitudes_count, comments_count, reposts_count, index, title)
data = title
mysql(date, attitudes_count, comments_count, reposts_count, index, title, data)
continue
except:
Q = Q +1
continue
print(Q)
def main():
data_store()
if __name__ == '__main__':
main()
db.close()
import xlsxwriter
import pymysql
'''
遇到不懂的问题?Python学习交流群:1136201545满足你的需求,资料都已经上传群文件,可以自行下载!
'''
##################################################################
from config_155 import *
##################################################################
db = pymysql.connect('localhost', 'root', '123456', 'hfut_weibo',use_unicode=True, charset="utf8")
cursor = db.cursor()
cursor.execute('SET NAMES utf8;')
cursor.execute('SET CHARACTER SET utf8;')
cursor.execute('SET character_set_connection=utf8;')
workbook = xlsxwriter.Workbook("E:\合肥工业大学微博值班数据\%s.xlsx" % document_name)
worksheet1 = workbook.add_worksheet("日常值班")
worksheet1.write(0, 0,'日期')
worksheet1.write(0, 1, '值班人')
worksheet1.write(0, 2, '转发数')
worksheet1.write(0, 3, '评论数')
worksheet1.write(0, 4, '点赞数')
worksheet1.write(0, 5, '指标数')
worksheet2 = workbook.add_worksheet("特色栏目")
worksheet2.write(0, 0,'栏目')
worksheet2.write(0, 1, '负责人')
worksheet2.write(0, 2, '转发数')
worksheet2.write(0, 3, '评论数')
worksheet2.write(0, 4, '点赞数')
worksheet2.write(0, 5, '指标数')
def mysql_read(time,name,i,sig):
if sig == 1:
sql='''
SELECT * FROM inital_weibo_data WHERE date='%s'
''' % time
else:
sql = '''
SELECT * FROM inital_weibo_data_special WHERE title='%s' AND date BETWEEN '%s' AND '%s'
''' % (time,timt_start,time_conclude)
return sql
def excel_work(sql,i,time,name,worksheet):
try:
# 执行SQL语句
cursor.execute(sql)
# 获取所有记录列表
results = cursor.fetchall()
print(results)
worksheet.write(i, 0, time)
worksheet.write(i, 1, name)
index_all = 0
attitudes_count_all = 0
comments_count_all = 0
reposts_count_all = 0
for row in results:
date = row[0]
attitudes_count = row[1]
comments_count = row[2]
reposts_count = row[3]
index = row[4]
index_all += index
attitudes_count_all += attitudes_count
comments_count_all += comments_count
reposts_count_all += reposts_count
worksheet.write(i, 2, reposts_count)
worksheet.write(i, 3, comments_count)
worksheet.write(i, 4, attitudes_count)
worksheet.write(i, 5, index)
i = i + 1
# 打印结果
print(date, attitudes_count, comments_count, reposts_count, index)
worksheet.write(i, 2, reposts_count_all)
worksheet.write(i, 3, comments_count_all)
worksheet.write(i, 4, attitudes_count_all)
worksheet.write(i, 5, index_all)
return i + 2
except:
print("Error: unable to fecth data")
def main():
i = 1
for (k, v) in work_1.items():
time = k
name = v
sql = mysql_read(time, name, i, 1)
i = excel_work(sql,i,time,name,worksheet1)
i=1
for (k, v) in work_0.items():
time = k
name = v
sql = mysql_read(time, name, i, 0)
i = excel_work(sql, i, time, name, worksheet2)
if __name__ == '__main__':
main()
db.close()
workbook.close()
work = {'星期一':'张壮','星期二':'王修齐','星期三':'徐露露','星期四':'张雪辰','星期五':'孙艳丽','星期六':'马玉龙','星期日':'陈之羽',}
work_0 = \
{'#冰糖说英语#':'彭二丹','#冰糖点歌台#':'董令','#冰糖小百科#':'周成旭','#冰糖的树洞#':'张雪辰','#冰糖么么答#':'',\
'#冰糖赠书#':'蒋媛媛','#冰糖书影音#':'云曈','#老照片#':'彭二丹','#直播#':'蒋媛媛','#送福利#':'彭二丹','#冰糖说英语#':'彭二丹',}
work_1 = {'01-08':'张壮','01-09':'王修齐','01-10':'徐露露','01-11':'张雪辰','01-12':'孙艳丽','01-13':'马玉龙','01-14':'陈之羽',}
document_name = '150周(第十九教学周)数据统计(2018.1.08-2018.1.14)'
timt_start = '01-08'
time_conclude = '01-14'