今年春节忘了带无线网卡回去,想到在家里没有条件,临走前写了个脚本,每天抓取酷讯网上的机票打折信息发给自己。这个脚本以后肯定还用得着。

以后可以改进的地方:
1、增加发邮件通知的方式。
2、增加一个变量来记录每日机票的历史最低价。


期中,引用了小熊(cocobear.cn@gmail.com)写的pyFetion模块,非常感谢他的开源模块。

以下是源码,加了些许注释。

#--coding:utf-8--  
#定时抓取酷讯机票信息并发送短信通知
#author : yobin
#date 2009-1-19

import re
import urllib
#import sys
import time
from datetime import datetime, timedelta
import PyFetion
   
###==================================================================
#打开网页,获得页面源代码
def get_url_data(url):
    nFail = 0
    while nFail < 5:
        try:
            sock = urllib.urlopen(url)
            htmlSource = sock.read()
            sock.close()
            return htmlSource
        except:
            nFail += 1
    print "get url fail:%s" % (url)
    return None


###==================================================================
#飞信发送短信的接口包装
def sendFetion(phoneNo = ‘151********’, FetionPasswd = ‘000’, destNo = ‘138********’, msg = ‘hello’):
    #return
    if FetionPasswd <> ‘000’:       
        try:
            phone = PyFetion.PyFetion(phoneNo,FetionPasswd,"TCP")
        except PyFetion.PyFetionInfoError,e:
            print "corrent your mobile NO. and password"
            return -1
        phone.login()
        phone.send_sms(msg,destNo, long=True)


###==================================================================
#用正则去解析页面获取想要的信息
def parseKuxun(desturl = ‘http://jipiao.kuxun.cn/fcgi-bin/fast_air_search_wrap?type=1&amp;t=%E5%8D%97%E5%AE%81&amp;From=NNG&amp;q=%E4%B8%8A%E6%B5%B7&amp;to=SHA&amp;date=2009-02-01&amp;Rtdate=2009-02-04&amp;x=25&amp;y=19'):
    #print ‘parseKuxunL ’ + desturl   
    smsContent = ‘’
    htmlContent = get_url_data(desturl)
    
    if htmlContent:
        reobj = ‘’’<span class="tab_a1 checked">(\d\d-\d\d)(周.)<br /><strong>(\d+)元</strong></span>’’’
        matched_objs = re.findall(reobj, htmlContent)
        for obj in matched_objs:
            smsContent += ‘[%s,%s] ’ %(obj[0],obj[1])

        reobj = ‘’’<a href="/fcgi-bin/fast_air_search_wrap?From=\w+&to=\w+&date=\d+-\d+-\d+" class="tab_a2">(\d+-\d+)(周.
)<br /><strong>(\d+)元</strong></a>’’’
        matched_objs = re.findall(reobj, htmlContent)
        for obj in matched_objs:
            smsContent += ‘[%s,%s] ’ %(obj[0],obj[1])

    #print smsContent          
    return smsContent

###==================================================================
def ticket_getstage():
    nowtime = datetime.utcnow() + timedelta(hours=+8)

    hour = nowtime.hour

    #在凌晨到早上8点之前就不要发查询了,价格不会有太大更新,也烦
    if hour >= 0 and hour <= 8:
        return False
    return True


def main():
    #配置飞信登录手机号、飞信密码、发送号码(发送号码只能是好友和自己)
    phoneNo      = ‘151********’#也可以自己发给自己
    FetionPasswd = ‘your password’
    destNo       = ‘138********’#也可以自己发给自己
   
    #配置查询间隔
    Timeout1 = 60 #查询urllist里一个url的间隔
    Timeout2 = 60*30 #半小时查询一次urllist,太频繁了就成短信轰炸机了
   
    #配置要查询的飞机票列表,出于自己的个人需求,我只做了查询单程的,可以在列表里添加多个url
    urlLists = ((‘北京-上海’,‘http://jipiao.kuxun.cn/fcgi-bin/fast_air_search_wrap?From=PEK&amp;to=SHA&amp;date=2009-02-05'),<br />                (‘北京-上海’,‘http://jipiao.kuxun.cn/fcgi-bin/fast_air_search_wrap?type=1&amp;t=%E5%8C%97%E4%BA%AC&amp;From=PEK&amp;q=%E4%B8%8A%E6%B5%B7&amp;to=SHA&amp;date=2009-02-20&amp;Rtdate=2009-02-23&amp;x=36&amp;y=9'),<br />                )
   
    #开始循环查询,直到手工终止程序或关机
    try:
        while 1:
            if ticket_getstage():
                for desturl in urlLists:
                    smsContent = parseKuxun(desturl[1])
                    if smsContent != ‘’:
                        smsContent = desturl[0] + smsContent
                        sendFetion( phoneNo,FetionPasswd,destNo,smsContent)
                    time.sleep(Timeout1);
            time.sleep(Timeout2);
    except:
        print "user interrput\n"  
        sendFetion( phoneNo,FetionPasswd,destNo,‘user interrput’)
    finally:
        sendFetion( phoneNo,FetionPasswd,destNo,‘Goodbye, Program is over!!!’)
        print ‘Goodbye’

if name == "main":
    main()