本文基于一个真实的项目,使用 python3.6 和最新官方 smtplib 接口。项目的目的是爬取网站,然后通过邮件给自己发送邮件提醒新文章。最后使用 linux 系统的 crond 服务定时执行。
发邮件方法
在定义发邮件方法之前,我们还定义了一个类和类中的爬虫,单拿出来发邮件来说,代码如下:
def sent_email(self):
fromaddr = 'princelailai@gmail.com'
toaddrs = ['princelailai@gmail.com']
subject = "{}{}".format(datetime.now().strftime('%Y年%m月%d日'),'共有产权房信息')
msg = ''.join(['日期:\t{}\n标题:\t{}\n地址:\t{}\n\n'.format(v[0],v[1],k) for k,v in self.result.items()])
message = MIMEText(msg, 'plain', 'utf-8')
message['From'] = Header(fromaddr, 'utf-8')
message['To'] = Header(','.join(toaddrs), 'utf-8')
message['Subject'] = Header(subject, 'utf-8')
#message = f"From: {fromaddr}\nTo: {','.join(toaddrs)}\nSubject: {subject}\n\n{msg}"
username = 'princelailai@gmail.com'
password = 'app password'
try:
server = smtplib.SMTP('smtp.gmail.com','587')
server.ehlo()
server.starttls()
server.login(username,password)
server.sendmail(fromaddr, toaddrs, message.as_string())
server.quit()
logging.info('Send Email Successful.')
except:
logging.info('Send Email Failed.')
需要注意的有几点:
- 邮件正文需要是 MIMEText 格式的
- 发信人、收信人、主题要用 Header 添加
- 如果你的 Google 账号开启了两步验证,那么你的邮箱密码就不是登录密码,而是 app 密码,关于 app 密码怎么生成可以查看这篇文章Sign in using App Passwords
- 其他关于 smtp 地址和端口的问题,可以查看这篇文章Use IMAP to check Gmail on other email clients
定时启动
创建一个文本文件,用于创建单一用户的 crond 文件
0 6 */3 * * /root/miniconda3/bin/python /root/monitor_house_info/monitor_house_info.py
关于 crond 配置,网上教程很多,或者man 5 crontab
就可以看到详细的用法。
最后输入crontab file
导入文件,就可以坐等收邮件了。
全部代码
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from requests_html import HTMLSession
import smtplib
import os
import json
import logging
from datetime import datetime
from email.mime.text import MIMEText
from email.header import Header
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
class monitor_house_info:
def __init__(self):
self.realpath = os.path.split(os.path.realpath(__file__))[0]
self.realdb = os.path.join(self.realpath,'db.json')
self.result = {}
self.url = ['http://cpzjw.bjchp.gov.cn/cpzjw/336693/index.html',
'http://cpzjw.bjchp.gov.cn/cpzjw/336551/336554/index.html']
def read_json(self):
if not os.path.exists(self.realdb):
self.db = {}
else:
with open(self.realdb) as f:
self.db = json.loads(f.read())
logging.info('Readed json db.')
def get_news(self,url):
session = HTMLSession()
resp = session.get(url)
element_date = resp.html.find('div.easysite-article-content > ul > li > span.date04')
date = [i.text[1:-1] for i in element_date]
element_content = resp.html.find('div.easysite-article-content > ul > li > span.title04')
content = [i.text.strip() for i in element_content]
link = [list(i.absolute_links)[0] for i in element_content]
for l,d,c in zip(link,date,content):
self.result[l] = [d,c]
logging.info('geted web content.')
def valid_news(self):
for k in self.result.keys():
if k in self.db:
self.result.pop(k)
with open(self.realdb,'w') as fp:
self.db.update(self.result)
fp.write(json.dumps(self.db,ensure_ascii=False))
logging.info('valided news.')
def sent_email(self):
fromaddr = 'princelailai@gmail.com'
toaddrs = ['princelailai@gmail.com']
subject = "{}{}".format(datetime.now().strftime('%Y年%m月%d日'),'共有产权房信息')
msg = ''.join(['日期:\t{}\n标题:\t{}\n地址:\t{}\n\n'.format(v[0],v[1],k) for k,v in self.result.items()])
message = MIMEText(msg, 'plain', 'utf-8')
message['From'] = Header(fromaddr, 'utf-8')
message['To'] = Header(','.join(toaddrs), 'utf-8')
message['Subject'] = Header(subject, 'utf-8')
username = 'princelailai@gmail.com'
password = 'app password'
try:
server = smtplib.SMTP('smtp.gmail.com','587')
server.ehlo()
server.starttls()
server.login(username,password)
server.sendmail(fromaddr, toaddrs, message.as_string())
server.quit()
logging.info('Send Email Successful.')
except:
logging.info('Send Email Failed.')
def run(self):
self.read_json()
for u in self.url:
self.get_news(u)
self.valid_news()
if len(self.result) != 0:
self.sent_email()
if __name__ == '__main__':
moni = monitor_house_info()
moni.run()