#coding:utf-8

import star
from bs4 import BeautifulSoup
import win32clipboard, win32con

def getJuzi(urlroot):
    nexturl = urlroot
    ipage = 0
    result = ''
    while True:
        ipage = ipage + 1
        print(u'正在获取第 ' + str(ipage) + u' 页金句……')
        print(nexturl)
        html = star.fetch(nexturl)
        soup = BeautifulSoup(html, "lxml")
        texts = soup.select('a[class="xlistju"]')
        for text in texts:
            result = result + text.getText() + "\r\n\r\n"
        nexturls = soup.select('a[rel="next"]')
        if nexturls is None or len(nexturls)==0:
            break
        else:
            nexturl = 'http://www.juzimi.com' + nexturls[0].attrs['href']
    return result
s = getJuzi('http://www.juzimi.com/article/%E8%BF%BD%E9%A3%8E%E7%AD%9D%E7%9A%84%E4%BA%BA')
win32clipboard.OpenClipboard()
win32clipboard.EmptyClipboard()
win32clipboard.SetClipboardText(s)
win32clipboard.CloseClipboard()
print 'got it'


本文转载:CSDN博客