131 lines
4.3 KiB
Python
131 lines
4.3 KiB
Python
|
import os
|
|||
|
import re
|
|||
|
import yaml
|
|||
|
import markdown
|
|||
|
from wordpress_xmlrpc import Client, WordPressPost
|
|||
|
from wordpress_xmlrpc.methods import posts
|
|||
|
from datetime import datetime
|
|||
|
|
|||
|
# WordPress 设置
|
|||
|
wp_url = "https://blog.lnf1.skybyte.me/xmlrpc.php"
|
|||
|
wp_username = "songtianlun"
|
|||
|
wp_password = "sotilu,WP2024"
|
|||
|
|
|||
|
# Hexo 文章目录
|
|||
|
hexo_root_dir = "/home/songtianlun/Sync/Develop/frytea/source"
|
|||
|
|
|||
|
|
|||
|
def parse_hexo_post(file_path):
|
|||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|||
|
content = f.read()
|
|||
|
|
|||
|
# 提取 front matter,允许更灵活的分隔符
|
|||
|
front_matter_match = re.match(r'^[-*]{3,}\s+(.*?)\s+[-*]{3,}\s*(.*)', content, re.DOTALL)
|
|||
|
if front_matter_match:
|
|||
|
front_matter_str = front_matter_match.group(1)
|
|||
|
post_content = front_matter_match.group(2).strip()
|
|||
|
|
|||
|
# 使用 PyYAML 解析 front matter
|
|||
|
try:
|
|||
|
front_matter = yaml.safe_load(front_matter_str)
|
|||
|
except yaml.YAMLError:
|
|||
|
# 如果 YAML 解析失败,使用手动解析
|
|||
|
front_matter = {}
|
|||
|
current_key = None
|
|||
|
for line in front_matter_str.split('\n'):
|
|||
|
if ':' in line and not line.strip().startswith('-'):
|
|||
|
key, value = line.split(':', 1)
|
|||
|
current_key = key.strip()
|
|||
|
front_matter[current_key] = value.strip() or None
|
|||
|
elif current_key and line.strip():
|
|||
|
if isinstance(front_matter[current_key], list):
|
|||
|
front_matter[current_key].append(line.strip())
|
|||
|
elif front_matter[current_key] is None:
|
|||
|
front_matter[current_key] = line.strip()
|
|||
|
else:
|
|||
|
front_matter[current_key] = [front_matter[current_key], line.strip()]
|
|||
|
else:
|
|||
|
front_matter = {}
|
|||
|
post_content = content
|
|||
|
|
|||
|
return front_matter, post_content
|
|||
|
|
|||
|
|
|||
|
def get_categories_from_path(file_path, root_dir):
|
|||
|
rel_path = os.path.relpath(file_path, root_dir)
|
|||
|
dir_path = os.path.dirname(rel_path)
|
|||
|
categories = dir_path.split(os.sep)
|
|||
|
return [cat for cat in categories if cat and cat != '_posts']
|
|||
|
|
|||
|
|
|||
|
def migrate_to_wordpress(title, content, categories, tags, date, author):
|
|||
|
client = Client(wp_url, wp_username, wp_password)
|
|||
|
|
|||
|
post = WordPressPost()
|
|||
|
post.title = title
|
|||
|
post.content = markdown.markdown(content)
|
|||
|
post.post_status = 'publish'
|
|||
|
post.terms_names = {
|
|||
|
'category': categories,
|
|||
|
'post_tag': tags
|
|||
|
}
|
|||
|
|
|||
|
if date:
|
|||
|
post.date = date
|
|||
|
|
|||
|
post_id = client.call(posts.NewPost(post))
|
|||
|
return post_id
|
|||
|
|
|||
|
|
|||
|
def parse_date(date_value):
|
|||
|
if isinstance(date_value, datetime):
|
|||
|
return date_value
|
|||
|
|
|||
|
if not isinstance(date_value, str):
|
|||
|
return None
|
|||
|
|
|||
|
date_formats = [
|
|||
|
"%Y-%m-%d %H:%M:%S",
|
|||
|
"%Y-%m-%d %H:%M:%S%z",
|
|||
|
"%Y-%m-%d %H:%M:%S %z",
|
|||
|
"%Y-%m-%d"
|
|||
|
]
|
|||
|
for fmt in date_formats:
|
|||
|
try:
|
|||
|
return datetime.strptime(date_value, fmt)
|
|||
|
except ValueError:
|
|||
|
continue
|
|||
|
return None
|
|||
|
|
|||
|
|
|||
|
def main():
|
|||
|
for root, dirs, files in os.walk(hexo_root_dir):
|
|||
|
for filename in files:
|
|||
|
if filename.endswith('.md'):
|
|||
|
file_path = os.path.join(root, filename)
|
|||
|
front_matter, content = parse_hexo_post(file_path)
|
|||
|
|
|||
|
title = front_matter.get('title', 'Untitled').strip()
|
|||
|
tags = front_matter.get('tags', [])
|
|||
|
if isinstance(tags, str):
|
|||
|
tags = [tag.strip() for tag in tags.split(',') if tag.strip()]
|
|||
|
date = parse_date(front_matter.get('date'))
|
|||
|
author = front_matter.get('author')
|
|||
|
|
|||
|
dir_categories = get_categories_from_path(file_path, hexo_root_dir)
|
|||
|
|
|||
|
categories = front_matter.get('categories', [])
|
|||
|
if isinstance(categories, str):
|
|||
|
categories = [categories]
|
|||
|
categories = list(set(categories + dir_categories))
|
|||
|
|
|||
|
post_id = migrate_to_wordpress(title, content, categories, tags, date, author)
|
|||
|
print(f"Migrated post: {title} (ID: {post_id})")
|
|||
|
print(f" filename: {filename}")
|
|||
|
print(f" Categories: {categories}")
|
|||
|
print(f" Tags: {tags}")
|
|||
|
print(f" Date: {date}")
|
|||
|
|
|||
|
|
|||
|
if __name__ == "__main__":
|
|||
|
main()
|