first commit

This commit is contained in:
cyy_mac
2026-03-24 22:30:11 +08:00
commit 69a660bfeb
10 changed files with 593 additions and 0 deletions

13
Dockerfile Normal file
View File

@@ -0,0 +1,13 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
# 配置文件不打包,由 volumes 挂载
VOLUME ["/app/config.yaml", "/app/last_notifications.json"]
CMD ["python", "main.py"]

84
README.md Normal file
View File

@@ -0,0 +1,84 @@
# 教务处通知监控工具
## 功能特点
- 定时监控教务处网站通知
- 有新通知时自动发送邮件
- 可配置监控频率(分钟)
- 状态持久化,记录已发送通知避免重复
## 部署步骤
### 1. 安装依赖
```bash
pip install -r requirements.txt
```
### 2. 配置
编辑 `config.yaml`
```yaml
monitor:
url: "https://jwc.your.edu.cn/tzgg.htm" # 改为你的教务处地址
frequency_minutes: 30 # 监控频率
encoding: "utf-8"
notification:
smtp_host: "smtp.gmail.com" # 邮件服务器
smtp_port: 587
smtp_user: "your_email@gmail.com"
smtp_password: "your_app_password" # 推荐使用应用专用密码
to_email: "notify@example.com"
use_tls: true
```
### 3. 运行
```bash
python main.py
```
### 4. 后台运行Linux
```bash
nohup python main.py > output.log 2>&1 &
```
或使用 systemd 服务:
```ini
[Unit]
Description=JWC Monitor
After=network.target
[Service]
Type=simple
User=your_user
WorkingDirectory=/path/to/project
ExecStart=/usr/bin/python3 main.py
Restart=always
[Install]
WantedBy=multi-user.target
```
### 5. 常见邮件服务商 SMTP 设置
| 服务商 | SMTP Host | 端口 |
|--------|-----------|------|
| Gmail | smtp.gmail.com | 587 |
| QQ | smtp.qq.com | 587 |
| 163 | smtp.163.com | 465/994 |
**Gmail 提示**:需要开启"应用专用密码"或降低账户安全级别。
## 适配不同网站
如果抓取不到通知,可能需要调整选择器。参考 `main.py` 中的 `_parse_page` 方法,根据实际网页结构修改选择器。
常见问题:
1. 通知列表在哪个标签里?(ul/div/table)
2. 每条通知的标题和链接在哪?
3. 网页编码是什么?

Binary file not shown.

20
config.yaml Normal file
View File

@@ -0,0 +1,20 @@
monitor:
frequency_minutes: 30
sites:
- name: "物理与电子科学学院"
url: "https://www.csust.edu.cn/wdxy/xytz.htm"
encoding: "utf-8"
- name: "教务处"
url: "https://www.csust.edu.cn/jwc/index/tzgg.htm"
encoding: "utf-8"
notification:
smtp_host: "smtp.163.com"
smtp_port: 25
smtp_user: "chenyouyuan0505@163.com"
smtp_password: "AEszrLBtZZK5fJSv"
to_email: "3289288508@qq.com"
use_tls: true
state_file: "last_notifications.json"

16
docker-compose.yml Normal file
View File

@@ -0,0 +1,16 @@
version: '3'
services:
monitor:
build: .
restart: unless-stopped
volumes:
- ./config.yaml:/app/config.yaml:ro
- ./last_notifications.json:/app/last_notifications.json
- ./monitor.log:/app/monitor.log
# 环境变量方式配置邮件(可选,优先级高于 config.yaml
# environment:
# - SMTP_HOST=smtp.gmail.com
# - SMTP_PORT=587
# - SMTP_USER=your_email@gmail.com
# - SMTP_PASSWORD=your_app_password

12
environment.yml Normal file
View File

@@ -0,0 +1,12 @@
name: jwc-monitor
channels:
- defaults
- conda-forge
dependencies:
- python=3.11
- pip
- pip:
- requests>=2.28.0
- beautifulsoup4>=4.11.0
- pyyaml>=6.0
- schedule>=1.1.0

245
main.py Normal file
View File

@@ -0,0 +1,245 @@
#!/usr/bin/env python3
"""
教务处通知监控系统
支持多网站监控,有新通知时发送邮件提醒
"""
import json
import logging
import re
import sys
import time
from datetime import datetime
from pathlib import Path
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
import schedule
import yaml
from notifier import EmailNotifier
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('monitor.log'),
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
class JWCMonitor:
def __init__(self, config_path='config.yaml'):
self.config = self._load_config(config_path)
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
self.notifier = EmailNotifier(self.config['notification'])
self.state_file = Path(self.config.get('state_file', 'last_notifications.json'))
self.state = self._load_state()
def _load_config(self, path):
with open(path, 'r', encoding='utf-8') as f:
return yaml.safe_load(f)
def _load_state(self):
if self.state_file.exists():
with open(self.state_file, 'r', encoding='utf-8') as f:
return json.load(f)
return {}
def _save_state(self):
with open(self.state_file, 'w', encoding='utf-8') as f:
json.dump(self.state, f, ensure_ascii=False, indent=2)
def fetch_notifications(self, site):
"""抓取单个网站的通知列表"""
url = site['url']
encoding = site.get('encoding', 'utf-8')
try:
logger.info(f"正在抓取 [{site['name']}]: {url}")
response = self.session.get(url, timeout=30)
response.encoding = encoding
soup = BeautifulSoup(response.text, 'html.parser')
notifications = self._parse_page(soup, url)
logger.info(f"[{site['name']}] 获取到 {len(notifications)} 条通知")
return notifications
except Exception as e:
logger.error(f"[{site['name']}] 抓取失败: {e}")
return []
def _parse_page(self, soup, base_url):
"""解析通知列表页面"""
notifications = []
selectors = [
('ul', 'list'),
('ul', 'news-list'),
('ul', 'tzgg'),
('div', 'list'),
('div', 'news'),
('div', 'article-list'),
('table', 'list'),
]
for tag, class_name in selectors:
items = soup.find_all(tag, class_=class_name)
for container in items:
links = container.find_all('a', href=True)
for a_tag in links:
href = a_tag['href']
# 过滤 mailto 和 javascript
if not href or 'mailto:' in href or 'javascript' in href.lower():
continue
title = a_tag.get_text(strip=True)
if len(title) > 8 and not title.startswith('#') and not title.startswith('http'):
link = self._abs_url(href, base_url)
date = self._extract_date_from_element(a_tag)
notifications.append({
'title': title,
'link': link,
'date': date,
'id': href
})
if notifications:
break
if not notifications:
for a_tag in soup.find_all('a', href=True):
href = a_tag['href']
if not href or 'mailto:' in href or 'javascript' in href.lower():
continue
title = a_tag.get_text(strip=True)
if len(title) > 8 and not title.startswith('http'):
notifications.append({
'title': title,
'link': self._abs_url(href, base_url),
'date': '',
'id': href
})
seen = set()
unique = []
for n in notifications:
if n['title'] not in seen and n['link']:
seen.add(n['title'])
unique.append(n)
return unique[:20]
def _extract_date_from_element(self, element):
parent = element.parent
if parent:
text = parent.get_text(strip=True)
match = re.search(r'\d{4}-\d{2}-\d{2}', text)
if match:
return match.group(0)
for sibling in element.find_next_siblings():
text = sibling.get_text(strip=True)
match = re.search(r'\d{4}-\d{2}-\d{2}', text)
if match:
return match.group(0)
title = element.get_text(strip=True)
match = re.search(r'(\d{4}-\d{2}-\d{2})', title)
if match:
return match.group(1)
return ''
def _abs_url(self, href, base_url):
if not href or href.startswith('#') or 'javascript' in href.lower():
return ''
if href.startswith('http'):
return href
return urljoin(base_url, href)
def _is_valid_date(self, notification):
"""过滤2026年之前的通知"""
date_str = notification.get('date', '')
if not date_str:
match = re.search(r'(\d{4})-\d{2}-\d{2}', notification.get('title', ''))
if match:
date_str = match.group(1)
else:
return True
try:
year = int(date_str)
return year >= 2026
except (ValueError, IndexError):
return True
def check_updates(self):
"""检查所有网站的更新"""
sites = self.config.get('sites', [])
if not sites:
# 兼容单网站配置
if 'monitor' in self.config and 'url' in self.config['monitor']:
sites = [{
'name': '默认网站',
'url': self.config['monitor']['url'],
'encoding': self.config['monitor'].get('encoding', 'utf-8')
}]
all_new = []
for site in sites:
current = self.fetch_notifications(site)
if not current:
continue
# 过滤日期
current = [n for n in current if self._is_valid_date(n)]
# 获取该网站的历史状态
site_url = site['url']
existing_ids = set(self.state.get(site_url, []))
new_notifications = []
for n in current:
if n['id'] not in existing_ids:
new_notifications.append(n)
existing_ids.add(n['id'])
if new_notifications:
for n in new_notifications:
n['source'] = site['name']
all_new.extend(new_notifications)
logger.info(f"[{site['name']}] 发现 {len(new_notifications)} 条新通知")
# 更新该网站状态
self.state[site_url] = list(existing_ids)
if all_new:
logger.info(f"共发现 {len(all_new)} 条新通知")
self.notifier.send(all_new)
self._save_state()
else:
logger.info("没有新通知")
def run(self):
"""启动监控"""
frequency = self.config.get('monitor', {}).get('frequency_minutes', 30)
sites = self.config.get('sites', [])
site_names = [s['name'] for s in sites] if sites else ['默认网站']
logger.info(f"启动监控,每 {frequency} 分钟检查一次")
logger.info(f"监控网站: {', '.join(site_names)}")
self.check_updates()
schedule.every(frequency).minutes.do(self.check_updates)
while True:
schedule.run_pending()
time.sleep(30)
if __name__ == '__main__':
monitor = JWCMonitor()
monitor.run()

102
monitor.log Normal file
View File

@@ -0,0 +1,102 @@
2026-03-24 21:07:26,073 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 21:07:26,073 - INFO - 监控地址: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:07:26,073 - INFO - 正在抓取: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:08:06,509 - INFO - 获取到 15 条通知
2026-03-24 21:08:06,509 - INFO - 发现 15 条新通知
2026-03-24 21:08:10,437 - ERROR - 邮件发送失败: (535, b'5.7.8 Username and Password not accepted. For more information, go to\n5.7.8 https://support.google.com/mail/?p=BadCredentials d2e1a72fcca58-82b03bbebbfsm15412218b3a.18 - gsmtp')
2026-03-24 21:31:12,546 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 21:31:12,546 - INFO - 监控地址: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:31:12,546 - INFO - 正在抓取: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:31:42,963 - INFO - 获取到 15 条通知
2026-03-24 21:31:42,964 - INFO - 发现 15 条新通知
2026-03-24 21:32:05,402 - ERROR - 邮件发送失败: Connection unexpectedly closed
2026-03-24 21:36:52,466 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 21:36:52,466 - INFO - 监控地址: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:36:52,466 - INFO - 正在抓取: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:37:22,930 - INFO - 获取到 15 条通知
2026-03-24 21:37:22,930 - INFO - 发现 15 条新通知
2026-03-24 21:37:43,247 - ERROR - 邮件发送失败: Connection unexpectedly closed
2026-03-24 21:38:59,146 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 21:38:59,146 - INFO - 监控地址: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:38:59,146 - INFO - 正在抓取: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:39:29,536 - INFO - 获取到 15 条通知
2026-03-24 21:39:29,536 - INFO - 发现 15 条新通知
2026-03-24 21:39:37,605 - INFO - 邮件发送成功: 【教务通知】发现 15 条新通知
2026-03-24 21:46:33,382 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 21:46:33,382 - INFO - 监控地址: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:46:33,382 - INFO - 正在抓取: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:47:03,777 - INFO - 获取到 15 条通知
2026-03-24 21:47:03,777 - INFO - 过滤后剩余 15 条通知
2026-03-24 21:47:03,778 - INFO - 发现 15 条新通知
2026-03-24 21:47:08,020 - INFO - 邮件发送成功: 【教务通知】发现 15 条新通知
2026-03-24 21:52:11,622 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 21:52:11,622 - INFO - 监控地址: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:52:11,622 - INFO - 正在抓取: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:53:11,704 - ERROR - 抓取失败: HTTPSConnectionPool(host='www.csust.edu.cn', port=443): Read timed out. (read timeout=30)
2026-03-24 21:53:11,705 - WARNING - 未获取到通知
2026-03-24 21:53:35,395 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 21:53:35,395 - INFO - 监控地址: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:53:35,396 - INFO - 正在抓取: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:54:05,791 - INFO - 通知: 物理与电子科学学院2025年度人才引进公告2024-03-27 -> info/1011/6500.htm
2026-03-24 21:54:05,792 - INFO - 通知: 关于举办2026年全国大学生光电设计竞赛校内选拔赛的通知2026-03-19 -> info/1011/11927.htm
2026-03-24 21:54:05,792 - INFO - 通知: 物理与电子科学学院2025级本科生转专业拟接收名单公示2026-03-11 -> info/1011/11911.htm
2026-03-24 21:54:05,792 - INFO - 获取到 15 条通知
2026-03-24 21:54:05,792 - INFO - 过滤后剩余 15 条通知
2026-03-24 21:54:05,792 - INFO - 发现 15 条新通知
2026-03-24 21:54:11,803 - INFO - 邮件发送成功: 【教务通知】发现 15 条新通知
2026-03-24 21:55:47,362 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 21:55:47,362 - INFO - 监控地址: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:55:47,362 - INFO - 正在抓取: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:56:17,816 - INFO - 获取到 15 条通知
2026-03-24 21:56:17,816 - INFO - 过滤后剩余 15 条通知
2026-03-24 21:56:17,816 - INFO - 发现 15 条新通知
2026-03-24 21:56:20,188 - INFO - 邮件发送成功: 【教务通知】发现 15 条新通知
2026-03-24 21:59:02,678 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 21:59:02,678 - INFO - 监控地址: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 21:59:02,678 - INFO - 正在抓取: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 22:00:03,068 - ERROR - 抓取失败: HTTPSConnectionPool(host='www.csust.edu.cn', port=443): Read timed out. (read timeout=30)
2026-03-24 22:00:03,070 - WARNING - 未获取到通知
2026-03-24 22:00:20,261 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 22:00:20,262 - INFO - 监控地址: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 22:00:20,262 - INFO - 正在抓取: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 22:00:50,819 - INFO - 获取到 15 条通知
2026-03-24 22:00:50,820 - INFO - 过滤后剩余 8 条通知
2026-03-24 22:00:50,820 - INFO - 发现 8 条新通知
2026-03-24 22:00:52,165 - INFO - 邮件发送成功: 【教务通知】发现 8 条新通知
2026-03-24 22:15:44,640 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 22:15:44,640 - INFO - 监控网站: 物理与电子科学学院, 教务处
2026-03-24 22:15:44,640 - INFO - 正在抓取 [物理与电子科学学院]: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 22:16:15,123 - INFO - [物理与电子科学学院] 获取到 15 条通知
2026-03-24 22:16:15,124 - INFO - [物理与电子科学学院] 发现 8 条新通知
2026-03-24 22:16:15,124 - INFO - 正在抓取 [教务处]: https://www.csust.edu.cn/jwc/index/tzgg.htm
2026-03-24 22:16:15,191 - INFO - [教务处] 获取到 13 条通知
2026-03-24 22:16:15,191 - INFO - [教务处] 发现 13 条新通知
2026-03-24 22:16:15,191 - INFO - 共发现 21 条新通知
2026-03-24 22:16:19,954 - INFO - 邮件发送成功: 【教务通知】发现 21 条新通知
2026-03-24 22:22:02,051 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 22:22:02,051 - INFO - 监控网站: 物理与电子科学学院, 教务处
2026-03-24 22:22:02,051 - INFO - 正在抓取 [物理与电子科学学院]: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 22:22:32,383 - INFO - [物理与电子科学学院] 获取到 15 条通知
2026-03-24 22:22:32,384 - INFO - [物理与电子科学学院] 发现 8 条新通知
2026-03-24 22:22:32,385 - INFO - 正在抓取 [教务处]: https://www.csust.edu.cn/jwc/index/tzgg.htm
2026-03-24 22:22:32,460 - INFO - [教务处] 获取到 8 条通知
2026-03-24 22:22:32,460 - INFO - [教务处] 发现 8 条新通知
2026-03-24 22:22:32,460 - INFO - 共发现 16 条新通知
2026-03-24 22:22:33,731 - INFO - 邮件发送成功: 【教务通知】发现 16 条新通知
2026-03-24 22:25:16,791 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 22:25:16,791 - INFO - 监控网站: 物理与电子科学学院, 教务处
2026-03-24 22:25:16,792 - INFO - 正在抓取 [物理与电子科学学院]: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 22:25:47,063 - INFO - [物理与电子科学学院] 获取到 15 条通知
2026-03-24 22:25:47,065 - INFO - 正在抓取 [教务处]: https://www.csust.edu.cn/jwc/index/tzgg.htm
2026-03-24 22:25:47,139 - INFO - [教务处] 获取到 8 条通知
2026-03-24 22:25:47,139 - INFO - 没有新通知
2026-03-24 22:26:28,009 - INFO - 启动监控,每 30 分钟检查一次
2026-03-24 22:26:28,009 - INFO - 监控网站: 物理与电子科学学院, 教务处
2026-03-24 22:26:28,009 - INFO - 正在抓取 [物理与电子科学学院]: https://www.csust.edu.cn/wdxy/xytz.htm
2026-03-24 22:26:58,282 - INFO - [物理与电子科学学院] 获取到 15 条通知
2026-03-24 22:26:58,283 - INFO - [物理与电子科学学院] 发现 8 条新通知
2026-03-24 22:26:58,283 - INFO - 正在抓取 [教务处]: https://www.csust.edu.cn/jwc/index/tzgg.htm
2026-03-24 22:26:58,364 - INFO - [教务处] 获取到 8 条通知
2026-03-24 22:26:58,364 - INFO - [教务处] 发现 8 条新通知
2026-03-24 22:26:58,364 - INFO - 共发现 16 条新通知
2026-03-24 22:26:59,775 - INFO - 邮件发送成功: 【通知监控】发现 16 条新通知

97
notifier.py Normal file
View File

@@ -0,0 +1,97 @@
#!/usr/bin/env python3
"""
邮件通知模块
"""
import logging
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
logger = logging.getLogger(__name__)
class EmailNotifier:
def __init__(self, config):
self.smtp_host = config['smtp_host']
self.smtp_port = config['smtp_port']
self.smtp_user = config['smtp_user']
self.smtp_password = config['smtp_password']
self.to_email = config['to_email']
self.use_tls = config.get('use_tls', True)
def send(self, notifications):
"""发送邮件通知"""
if not notifications:
return
subject = f"【通知监控】发现 {len(notifications)} 条新通知"
html_body = self._build_html(notifications)
text_body = self._build_text(notifications)
msg = MIMEMultipart('alternative')
msg['Subject'] = subject
msg['From'] = self.smtp_user
msg['To'] = self.to_email
msg.attach(MIMEText(text_body, 'plain', 'utf-8'))
msg.attach(MIMEText(html_body, 'html', 'utf-8'))
try:
server = smtplib.SMTP(self.smtp_host, self.smtp_port)
if self.use_tls:
server.starttls()
server.login(self.smtp_user, self.smtp_password)
server.send_message(msg)
server.quit()
logger.info(f"邮件发送成功: {subject}")
except Exception as e:
logger.error(f"邮件发送失败: {e}")
def _build_html(self, notifications):
# 按来源网站分组
from collections import defaultdict
by_site = defaultdict(list)
for n in notifications:
source = n.get('source', '未知来源')
by_site[source].append(n)
html_parts = []
for site_name, items in by_site.items():
rows = []
for n in items:
date = n.get('date', '')
rows.append(f'<tr><td><a href="{n["link"]}">{n["title"]}</a></td><td>{date}</td></tr>')
html_parts.append(f'''
<h3>{site_name}</h3>
<table border="1" cellpadding="5" cellspacing="0">
<thead><tr><th>标题</th><th>日期</th></tr></thead>
<tbody>{''.join(rows)}</tbody>
</table>
''')
return f'''
<html>
<body>
<h2>新通知 (共 {len(notifications)} 条)</h2>
{''.join(html_parts)}
</body>
</html>
'''
def _build_text(self, notifications):
from collections import defaultdict
by_site = defaultdict(list)
for n in notifications:
source = n.get('source', '未知来源')
by_site[source].append(n)
lines = [f'新通知 (共 {len(notifications)} 条)\n']
for site_name, items in by_site.items():
lines.append(f'\n=== {site_name} ===')
for n in items:
date = n.get('date', '')
lines.append(f"- {n['title']} {date}")
lines.append(f" {n['link']}")
return '\n'.join(lines)

4
requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
requests>=2.28.0
beautifulsoup4>=4.11.0
pyyaml>=6.0
schedule>=1.1.0