#coding:utf-8
import star
from bs4 import BeautifulSoup
import win32clipboard, win32con
def getJuzi(urlroot):
nexturl = urlroot
ipage = 0
result = ''
while True:
ipage = ipage + 1
print(u'正在获取第 ' + str(ipage) + u' 页金句……')
print(nexturl)
html = star.fetch(nexturl)
soup = BeautifulSoup(html, "lxml")
texts = soup.select('a[class="xlistju"]')
for text in texts:
result = result + text.getText() + "\r\n\r\n"
nexturls = soup.select('a[rel="next"]')
if nexturls is None or len(nexturls)==0:
break
else:
nexturl = 'http://www.juzimi.com' + nexturls[0].attrs['href']
return result
s = getJuzi('http://www.juzimi.com/article/%E8%BF%BD%E9%A3%8E%E7%AD%9D%E7%9A%84%E4%BA%BA')
win32clipboard.OpenClipboard()
win32clipboard.EmptyClipboard()
win32clipboard.SetClipboardText(s)
win32clipboard.CloseClipboard()
print 'got it'
获取句子迷的经典语录[Python版本]
本文转载:CSDN博客