本文基于一个真实的项目,使用 python3.6 和最新官方 smtplib 接口。项目的目的是爬取网站,然后通过邮件给自己发送邮件提醒新文章。最后使用 linux 系统的 crond 服务定时执行。
发邮件方法
在定义发邮件方法之前,我们还定义了一个类和类中的爬虫,单拿出来发邮件来说,代码如下:
    def sent_email(self):
        fromaddr = 'princelailai@gmail.com'
        toaddrs  = ['princelailai@gmail.com']
        subject = "{}{}".format(datetime.now().strftime('%Y年%m月%d日'),'共有产权房信息')
        msg = ''.join(['日期:\t{}\n标题:\t{}\n地址:\t{}\n\n'.format(v[0],v[1],k) for k,v in self.result.items()])
        message = MIMEText(msg, 'plain', 'utf-8')
        message['From'] = Header(fromaddr, 'utf-8')
        message['To'] =  Header(','.join(toaddrs), 'utf-8')
        message['Subject'] = Header(subject, 'utf-8')
        #message = f"From: {fromaddr}\nTo: {','.join(toaddrs)}\nSubject: {subject}\n\n{msg}"
        username = 'princelailai@gmail.com'
        password = 'app password'
        try:
            server = smtplib.SMTP('smtp.gmail.com','587')
            server.ehlo()
            server.starttls()
            server.login(username,password)
            server.sendmail(fromaddr, toaddrs, message.as_string())
            server.quit()
            logging.info('Send Email Successful.')
        except:
            logging.info('Send Email Failed.')
需要注意的有几点:
- 邮件正文需要是 MIMEText 格式的
- 发信人、收信人、主题要用 Header 添加
- 如果你的 Google 账号开启了两步验证,那么你的邮箱密码就不是登录密码,而是 app 密码,关于 app 密码怎么生成可以查看这篇文章Sign in using App Passwords
- 其他关于 smtp 地址和端口的问题,可以查看这篇文章Use IMAP to check Gmail on other email clients
定时启动
创建一个文本文件,用于创建单一用户的 crond 文件
0 6 */3 * * /root/miniconda3/bin/python /root/monitor_house_info/monitor_house_info.py
关于 crond 配置,网上教程很多,或者man 5 crontab就可以看到详细的用法。
最后输入crontab file导入文件,就可以坐等收邮件了。
全部代码
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from requests_html import HTMLSession
import smtplib
import os
import json
import logging
from datetime import datetime
from email.mime.text import MIMEText
from email.header import Header
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
class monitor_house_info:
    def __init__(self):
        self.realpath = os.path.split(os.path.realpath(__file__))[0]
        self.realdb = os.path.join(self.realpath,'db.json')
        self.result = {}
        self.url = ['http://cpzjw.bjchp.gov.cn/cpzjw/336693/index.html',
                    'http://cpzjw.bjchp.gov.cn/cpzjw/336551/336554/index.html']
    def read_json(self):
        if not os.path.exists(self.realdb):
            self.db = {}
        else:
            with open(self.realdb) as f:
                self.db = json.loads(f.read())
        logging.info('Readed json db.')
    def get_news(self,url):
        session = HTMLSession()
        resp = session.get(url)
        element_date = resp.html.find('div.easysite-article-content > ul > li > span.date04')
        date = [i.text[1:-1] for i in element_date]
        element_content = resp.html.find('div.easysite-article-content > ul > li > span.title04')
        content = [i.text.strip() for i in element_content]
        link = [list(i.absolute_links)[0] for i in element_content]
        for l,d,c in zip(link,date,content):
            self.result[l] = [d,c]
        logging.info('geted web content.')
    def valid_news(self):
        for k in self.result.keys():
            if k in self.db:
                self.result.pop(k)
        with open(self.realdb,'w') as fp:
            self.db.update(self.result)
            fp.write(json.dumps(self.db,ensure_ascii=False))
        logging.info('valided news.')
    def sent_email(self):
        fromaddr = 'princelailai@gmail.com'
        toaddrs  = ['princelailai@gmail.com']
        subject = "{}{}".format(datetime.now().strftime('%Y年%m月%d日'),'共有产权房信息')
        msg = ''.join(['日期:\t{}\n标题:\t{}\n地址:\t{}\n\n'.format(v[0],v[1],k) for k,v in self.result.items()])
        message = MIMEText(msg, 'plain', 'utf-8')
        message['From'] = Header(fromaddr, 'utf-8')
        message['To'] =  Header(','.join(toaddrs), 'utf-8')
        message['Subject'] = Header(subject, 'utf-8')
        username = 'princelailai@gmail.com'
        password = 'app password'
        try:
            server = smtplib.SMTP('smtp.gmail.com','587')
            server.ehlo()
            server.starttls()
            server.login(username,password)
            server.sendmail(fromaddr, toaddrs, message.as_string())
            server.quit()
            logging.info('Send Email Successful.')
        except:
            logging.info('Send Email Failed.')
    def run(self):
        self.read_json()
        for u in self.url:
            self.get_news(u)
        self.valid_news()
        if len(self.result) != 0:
            self.sent_email()
if __name__ == '__main__':
    moni = monitor_house_info()
    moni.run()