import os import re import yaml import markdown from wordpress_xmlrpc import Client, WordPressPost from wordpress_xmlrpc.methods import posts from datetime import datetime # WordPress 设置 wp_url = "https://blog.lnf1.skybyte.me/xmlrpc.php" wp_username = "songtianlun" wp_password = "sotilu,WP2024" # Hexo 文章目录 hexo_root_dir = "/home/songtianlun/Sync/Develop/frytea/source" def parse_hexo_post(file_path): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # 提取 front matter,允许更灵活的分隔符 front_matter_match = re.match(r'^[-*]{3,}\s+(.*?)\s+[-*]{3,}\s*(.*)', content, re.DOTALL) if front_matter_match: front_matter_str = front_matter_match.group(1) post_content = front_matter_match.group(2).strip() # 使用 PyYAML 解析 front matter try: front_matter = yaml.safe_load(front_matter_str) except yaml.YAMLError: # 如果 YAML 解析失败,使用手动解析 front_matter = {} current_key = None for line in front_matter_str.split('\n'): if ':' in line and not line.strip().startswith('-'): key, value = line.split(':', 1) current_key = key.strip() front_matter[current_key] = value.strip() or None elif current_key and line.strip(): if isinstance(front_matter[current_key], list): front_matter[current_key].append(line.strip()) elif front_matter[current_key] is None: front_matter[current_key] = line.strip() else: front_matter[current_key] = [front_matter[current_key], line.strip()] else: front_matter = {} post_content = content return front_matter, post_content def get_categories_from_path(file_path, root_dir): rel_path = os.path.relpath(file_path, root_dir) dir_path = os.path.dirname(rel_path) categories = dir_path.split(os.sep) return [cat for cat in categories if cat and cat != '_posts'] def migrate_to_wordpress(title, content, categories, tags, date, author): client = Client(wp_url, wp_username, wp_password) post = WordPressPost() post.title = title post.content = markdown.markdown(content) post.post_status = 'publish' post.terms_names = { 'category': categories, 'post_tag': tags } if date: post.date = date post_id = client.call(posts.NewPost(post)) return post_id def parse_date(date_value): if isinstance(date_value, datetime): return date_value if not isinstance(date_value, str): return None date_formats = [ "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S%z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d" ] for fmt in date_formats: try: return datetime.strptime(date_value, fmt) except ValueError: continue return None def main(): for root, dirs, files in os.walk(hexo_root_dir): for filename in files: if filename.endswith('.md'): file_path = os.path.join(root, filename) front_matter, content = parse_hexo_post(file_path) title = front_matter.get('title', 'Untitled').strip() tags = front_matter.get('tags', []) if isinstance(tags, str): tags = [tag.strip() for tag in tags.split(',') if tag.strip()] date = parse_date(front_matter.get('date')) author = front_matter.get('author') dir_categories = get_categories_from_path(file_path, hexo_root_dir) categories = front_matter.get('categories', []) if isinstance(categories, str): categories = [categories] categories = list(set(categories + dir_categories)) post_id = migrate_to_wordpress(title, content, categories, tags, date, author) print(f"Migrated post: {title} (ID: {post_id})") print(f" filename: {filename}") print(f" Categories: {categories}") print(f" Tags: {tags}") print(f" Date: {date}") if __name__ == "__main__": main()