#coding=utf-8
#python 2.x
import re
import urllib
def getHtml(url):
page=urllib.urlopen(url)
html=page.read()
return html
html=getHtml("http://www.jianshu.com")
reg=r'<h4 class="title"><a target="_blank" href="(.*?)">(.*?)</a>'
hotre=re.compile(reg)
artlist=re.findall(hotre,html)
for article in artlist:
for com in article:
print com
#python 2.x
import re
import urllib
def getHtml(url):
page=urllib.urlopen(url)
html=page.read()
return html
html=getHtml("http://www.jianshu.com")
reg=r'<h4 class="title"><a target="_blank" href="(.*?)">(.*?)</a>'
hotre=re.compile(reg)
artlist=re.findall(hotre,html)
for article in artlist:
for com in article:
print com
评论
发表评论