今年春节忘了带无线网卡回去,想到在家里没有条件,临走前写了个脚本,每天抓取酷讯网上的机票打折信息发给自己。这个脚本以后肯定还用得着。
以后可以改进的地方:
1、增加发邮件通知的方式。
2、增加一个变量来记录每日机票的历史最低价。
期中,引用了小熊(cocobear.cn@gmail.com)写的pyFetion模块,非常感谢他的开源模块。
以下是源码,加了些许注释。
#--coding:utf-8--
#定时抓取酷讯机票信息并发送短信通知
#author : yobin
#date 2009-1-19
import re
import urllib
#import sys
import time
from datetime import datetime, timedelta
import PyFetion
###==================================================================
#打开网页,获得页面源代码
def get_url_data(url):
nFail = 0
while nFail < 5:
try:
sock = urllib.urlopen(url)
htmlSource = sock.read()
sock.close()
return htmlSource
except:
nFail += 1
print "get url fail:%s" % (url)
return None
###==================================================================
#飞信发送短信的接口包装
def sendFetion(phoneNo = ‘151********’, FetionPasswd = ‘000’, destNo = ‘138********’, msg = ‘hello’):
#return
if FetionPasswd <> ‘000’:
try:
phone = PyFetion.PyFetion(phoneNo,FetionPasswd,"TCP")
except PyFetion.PyFetionInfoError,e:
print "corrent your mobile NO. and password"
return -1
phone.login()
phone.send_sms(msg,destNo, long=True)
###==================================================================
#用正则去解析页面获取想要的信息
def parseKuxun(desturl = ‘http://jipiao.kuxun.cn/fcgi-bin/fast_air_search_wrap?type=1&t=%E5%8D%97%E5%AE%81&From=NNG&q=%E4%B8%8A%E6%B5%B7&to=SHA&date=2009-02-01&Rtdate=2009-02-04&x=25&y=19'):
#print ‘parseKuxunL ’ + desturl
smsContent = ‘’
htmlContent = get_url_data(desturl)
if htmlContent:
reobj = ‘’’<span class="tab_a1 checked">(\d\d-\d\d)(周.)<br /><strong>(\d+)元</strong></span>’’’
matched_objs = re.findall(reobj, htmlContent)
for obj in matched_objs:
smsContent += ‘[%s,%s] ’ %(obj[0],obj[1])
reobj = ‘’’<a href="/fcgi-bin/fast_air_search_wrap?From=\w+&to=\w+&date=\d+-\d+-\d+" class="tab_a2">(\d+-\d+)(周.)<br /><strong>(\d+)元</strong></a>’’’
matched_objs = re.findall(reobj, htmlContent)
for obj in matched_objs:
smsContent += ‘[%s,%s] ’ %(obj[0],obj[1])
#print smsContent
return smsContent
###==================================================================
def ticket_getstage():
nowtime = datetime.utcnow() + timedelta(hours=+8)
hour = nowtime.hour
#在凌晨到早上8点之前就不要发查询了,价格不会有太大更新,也烦
if hour >= 0 and hour <= 8:
return False
return True
def main():
#配置飞信登录手机号、飞信密码、发送号码(发送号码只能是好友和自己)
phoneNo = ‘151********’#也可以自己发给自己
FetionPasswd = ‘your password’
destNo = ‘138********’#也可以自己发给自己
#配置查询间隔
Timeout1 = 60 #查询urllist里一个url的间隔
Timeout2 = 60*30 #半小时查询一次urllist,太频繁了就成短信轰炸机了
#配置要查询的飞机票列表,出于自己的个人需求,我只做了查询单程的,可以在列表里添加多个url
urlLists = ((‘北京-上海’,‘http://jipiao.kuxun.cn/fcgi-bin/fast_air_search_wrap?From=PEK&to=SHA&date=2009-02-05'),<br /> (‘北京-上海’,‘http://jipiao.kuxun.cn/fcgi-bin/fast_air_search_wrap?type=1&t=%E5%8C%97%E4%BA%AC&From=PEK&q=%E4%B8%8A%E6%B5%B7&to=SHA&date=2009-02-20&Rtdate=2009-02-23&x=36&y=9'),<br /> )
#开始循环查询,直到手工终止程序或关机
try:
while 1:
if ticket_getstage():
for desturl in urlLists:
smsContent = parseKuxun(desturl[1])
if smsContent != ‘’:
smsContent = desturl[0] + smsContent
sendFetion( phoneNo,FetionPasswd,destNo,smsContent)
time.sleep(Timeout1);
time.sleep(Timeout2);
except:
print "user interrput\n"
sendFetion( phoneNo,FetionPasswd,destNo,‘user interrput’)
finally:
sendFetion( phoneNo,FetionPasswd,destNo,‘Goodbye, Program is over!!!’)
print ‘Goodbye’
if name == "main":
main()
(python源码) 定时抓取酷讯机票信息并发送短信通知
(python源码) 定时抓取酷讯机票信息并发送短信通知
...