131 lines
4.3 KiB
Python
131 lines
4.3 KiB
Python
import os
|
||
import re
|
||
import yaml
|
||
import markdown
|
||
from wordpress_xmlrpc import Client, WordPressPost
|
||
from wordpress_xmlrpc.methods import posts
|
||
from datetime import datetime
|
||
|
||
# WordPress 设置
|
||
wp_url = "https://blog.lnf1.skybyte.me/xmlrpc.php"
|
||
wp_username = "songtianlun"
|
||
wp_password = "sotilu,WP2024"
|
||
|
||
# Hexo 文章目录
|
||
hexo_root_dir = "/home/songtianlun/Sync/Develop/frytea/source"
|
||
|
||
|
||
def parse_hexo_post(file_path):
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 提取 front matter,允许更灵活的分隔符
|
||
front_matter_match = re.match(r'^[-*]{3,}\s+(.*?)\s+[-*]{3,}\s*(.*)', content, re.DOTALL)
|
||
if front_matter_match:
|
||
front_matter_str = front_matter_match.group(1)
|
||
post_content = front_matter_match.group(2).strip()
|
||
|
||
# 使用 PyYAML 解析 front matter
|
||
try:
|
||
front_matter = yaml.safe_load(front_matter_str)
|
||
except yaml.YAMLError:
|
||
# 如果 YAML 解析失败,使用手动解析
|
||
front_matter = {}
|
||
current_key = None
|
||
for line in front_matter_str.split('\n'):
|
||
if ':' in line and not line.strip().startswith('-'):
|
||
key, value = line.split(':', 1)
|
||
current_key = key.strip()
|
||
front_matter[current_key] = value.strip() or None
|
||
elif current_key and line.strip():
|
||
if isinstance(front_matter[current_key], list):
|
||
front_matter[current_key].append(line.strip())
|
||
elif front_matter[current_key] is None:
|
||
front_matter[current_key] = line.strip()
|
||
else:
|
||
front_matter[current_key] = [front_matter[current_key], line.strip()]
|
||
else:
|
||
front_matter = {}
|
||
post_content = content
|
||
|
||
return front_matter, post_content
|
||
|
||
|
||
def get_categories_from_path(file_path, root_dir):
|
||
rel_path = os.path.relpath(file_path, root_dir)
|
||
dir_path = os.path.dirname(rel_path)
|
||
categories = dir_path.split(os.sep)
|
||
return [cat for cat in categories if cat and cat != '_posts']
|
||
|
||
|
||
def migrate_to_wordpress(title, content, categories, tags, date, author):
|
||
client = Client(wp_url, wp_username, wp_password)
|
||
|
||
post = WordPressPost()
|
||
post.title = title
|
||
post.content = markdown.markdown(content)
|
||
post.post_status = 'publish'
|
||
post.terms_names = {
|
||
'category': categories,
|
||
'post_tag': tags
|
||
}
|
||
|
||
if date:
|
||
post.date = date
|
||
|
||
post_id = client.call(posts.NewPost(post))
|
||
return post_id
|
||
|
||
|
||
def parse_date(date_value):
|
||
if isinstance(date_value, datetime):
|
||
return date_value
|
||
|
||
if not isinstance(date_value, str):
|
||
return None
|
||
|
||
date_formats = [
|
||
"%Y-%m-%d %H:%M:%S",
|
||
"%Y-%m-%d %H:%M:%S%z",
|
||
"%Y-%m-%d %H:%M:%S %z",
|
||
"%Y-%m-%d"
|
||
]
|
||
for fmt in date_formats:
|
||
try:
|
||
return datetime.strptime(date_value, fmt)
|
||
except ValueError:
|
||
continue
|
||
return None
|
||
|
||
|
||
def main():
|
||
for root, dirs, files in os.walk(hexo_root_dir):
|
||
for filename in files:
|
||
if filename.endswith('.md'):
|
||
file_path = os.path.join(root, filename)
|
||
front_matter, content = parse_hexo_post(file_path)
|
||
|
||
title = front_matter.get('title', 'Untitled').strip()
|
||
tags = front_matter.get('tags', [])
|
||
if isinstance(tags, str):
|
||
tags = [tag.strip() for tag in tags.split(',') if tag.strip()]
|
||
date = parse_date(front_matter.get('date'))
|
||
author = front_matter.get('author')
|
||
|
||
dir_categories = get_categories_from_path(file_path, hexo_root_dir)
|
||
|
||
categories = front_matter.get('categories', [])
|
||
if isinstance(categories, str):
|
||
categories = [categories]
|
||
categories = list(set(categories + dir_categories))
|
||
|
||
post_id = migrate_to_wordpress(title, content, categories, tags, date, author)
|
||
print(f"Migrated post: {title} (ID: {post_id})")
|
||
print(f" filename: {filename}")
|
||
print(f" Categories: {categories}")
|
||
print(f" Tags: {tags}")
|
||
print(f" Date: {date}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main() |