PracticeDev/migrate-blog/migrate_from_hexo_to_wordpress.py
2024-09-04 16:20:19 +08:00

131 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import re
import yaml
import markdown
from wordpress_xmlrpc import Client, WordPressPost
from wordpress_xmlrpc.methods import posts
from datetime import datetime
# WordPress 设置
wp_url = "https://blog.lnf1.skybyte.me/xmlrpc.php"
wp_username = "songtianlun"
wp_password = "sotilu,WP2024"
# Hexo 文章目录
hexo_root_dir = "/home/songtianlun/Sync/Develop/frytea/source"
def parse_hexo_post(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 提取 front matter允许更灵活的分隔符
front_matter_match = re.match(r'^[-*]{3,}\s+(.*?)\s+[-*]{3,}\s*(.*)', content, re.DOTALL)
if front_matter_match:
front_matter_str = front_matter_match.group(1)
post_content = front_matter_match.group(2).strip()
# 使用 PyYAML 解析 front matter
try:
front_matter = yaml.safe_load(front_matter_str)
except yaml.YAMLError:
# 如果 YAML 解析失败,使用手动解析
front_matter = {}
current_key = None
for line in front_matter_str.split('\n'):
if ':' in line and not line.strip().startswith('-'):
key, value = line.split(':', 1)
current_key = key.strip()
front_matter[current_key] = value.strip() or None
elif current_key and line.strip():
if isinstance(front_matter[current_key], list):
front_matter[current_key].append(line.strip())
elif front_matter[current_key] is None:
front_matter[current_key] = line.strip()
else:
front_matter[current_key] = [front_matter[current_key], line.strip()]
else:
front_matter = {}
post_content = content
return front_matter, post_content
def get_categories_from_path(file_path, root_dir):
rel_path = os.path.relpath(file_path, root_dir)
dir_path = os.path.dirname(rel_path)
categories = dir_path.split(os.sep)
return [cat for cat in categories if cat and cat != '_posts']
def migrate_to_wordpress(title, content, categories, tags, date, author):
client = Client(wp_url, wp_username, wp_password)
post = WordPressPost()
post.title = title
post.content = markdown.markdown(content)
post.post_status = 'publish'
post.terms_names = {
'category': categories,
'post_tag': tags
}
if date:
post.date = date
post_id = client.call(posts.NewPost(post))
return post_id
def parse_date(date_value):
if isinstance(date_value, datetime):
return date_value
if not isinstance(date_value, str):
return None
date_formats = [
"%Y-%m-%d %H:%M:%S",
"%Y-%m-%d %H:%M:%S%z",
"%Y-%m-%d %H:%M:%S %z",
"%Y-%m-%d"
]
for fmt in date_formats:
try:
return datetime.strptime(date_value, fmt)
except ValueError:
continue
return None
def main():
for root, dirs, files in os.walk(hexo_root_dir):
for filename in files:
if filename.endswith('.md'):
file_path = os.path.join(root, filename)
front_matter, content = parse_hexo_post(file_path)
title = front_matter.get('title', 'Untitled').strip()
tags = front_matter.get('tags', [])
if isinstance(tags, str):
tags = [tag.strip() for tag in tags.split(',') if tag.strip()]
date = parse_date(front_matter.get('date'))
author = front_matter.get('author')
dir_categories = get_categories_from_path(file_path, hexo_root_dir)
categories = front_matter.get('categories', [])
if isinstance(categories, str):
categories = [categories]
categories = list(set(categories + dir_categories))
post_id = migrate_to_wordpress(title, content, categories, tags, date, author)
print(f"Migrated post: {title} (ID: {post_id})")
print(f" filename: {filename}")
print(f" Categories: {categories}")
print(f" Tags: {tags}")
print(f" Date: {date}")
if __name__ == "__main__":
main()