From 4a1628a4f94b0a3c720295b8d534c3562f1161c6 Mon Sep 17 00:00:00 2001 From: songtianlun Date: Wed, 4 Sep 2024 16:20:19 +0800 Subject: [PATCH] add migrate blog --- migrate-blog/.python-version | 1 + migrate-blog/fix_hexo_md_pre_frontmatter.py | 20 +++ .../migrate_from_hexo_to_wordpress.py | 131 +++++++++++++++++ .../migrate_from_hexo_to_wordpress_2.py | 133 ++++++++++++++++++ 4 files changed, 285 insertions(+) create mode 100644 migrate-blog/.python-version create mode 100644 migrate-blog/fix_hexo_md_pre_frontmatter.py create mode 100644 migrate-blog/migrate_from_hexo_to_wordpress.py create mode 100644 migrate-blog/migrate_from_hexo_to_wordpress_2.py diff --git a/migrate-blog/.python-version b/migrate-blog/.python-version new file mode 100644 index 0000000..78c9a28 --- /dev/null +++ b/migrate-blog/.python-version @@ -0,0 +1 @@ +3.9.12 diff --git a/migrate-blog/fix_hexo_md_pre_frontmatter.py b/migrate-blog/fix_hexo_md_pre_frontmatter.py new file mode 100644 index 0000000..7cbc202 --- /dev/null +++ b/migrate-blog/fix_hexo_md_pre_frontmatter.py @@ -0,0 +1,20 @@ +import os + +def fix_frontmatter(directory): + for root, _, files in os.walk(directory): + for file in files: + if file.endswith('.md'): + file_path = os.path.join(root, file) + with open(file_path, 'r+', encoding='utf-8') as f: + content = f.read() + # 检查文件开头是否缺少 '---' + if not content.startswith('---'): + # 在开头添加 '---' + fixed_content = '---\n' + content + f.seek(0) + f.write(fixed_content) + print(f"Fixed frontmatter in: {file_path}") + +if __name__ == "__main__": + hexo_posts_dir = '/home/songtianlun/Sync/Develop/frytea/source/_posts' # Hexo 文章目录 + fix_frontmatter(hexo_posts_dir) diff --git a/migrate-blog/migrate_from_hexo_to_wordpress.py b/migrate-blog/migrate_from_hexo_to_wordpress.py new file mode 100644 index 0000000..1ccfdd0 --- /dev/null +++ b/migrate-blog/migrate_from_hexo_to_wordpress.py @@ -0,0 +1,131 @@ +import os +import re +import yaml +import markdown +from wordpress_xmlrpc import Client, WordPressPost +from wordpress_xmlrpc.methods import posts +from datetime import datetime + +# WordPress 设置 +wp_url = "https://blog.lnf1.skybyte.me/xmlrpc.php" +wp_username = "songtianlun" +wp_password = "sotilu,WP2024" + +# Hexo 文章目录 +hexo_root_dir = "/home/songtianlun/Sync/Develop/frytea/source" + + +def parse_hexo_post(file_path): + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # 提取 front matter,允许更灵活的分隔符 + front_matter_match = re.match(r'^[-*]{3,}\s+(.*?)\s+[-*]{3,}\s*(.*)', content, re.DOTALL) + if front_matter_match: + front_matter_str = front_matter_match.group(1) + post_content = front_matter_match.group(2).strip() + + # 使用 PyYAML 解析 front matter + try: + front_matter = yaml.safe_load(front_matter_str) + except yaml.YAMLError: + # 如果 YAML 解析失败,使用手动解析 + front_matter = {} + current_key = None + for line in front_matter_str.split('\n'): + if ':' in line and not line.strip().startswith('-'): + key, value = line.split(':', 1) + current_key = key.strip() + front_matter[current_key] = value.strip() or None + elif current_key and line.strip(): + if isinstance(front_matter[current_key], list): + front_matter[current_key].append(line.strip()) + elif front_matter[current_key] is None: + front_matter[current_key] = line.strip() + else: + front_matter[current_key] = [front_matter[current_key], line.strip()] + else: + front_matter = {} + post_content = content + + return front_matter, post_content + + +def get_categories_from_path(file_path, root_dir): + rel_path = os.path.relpath(file_path, root_dir) + dir_path = os.path.dirname(rel_path) + categories = dir_path.split(os.sep) + return [cat for cat in categories if cat and cat != '_posts'] + + +def migrate_to_wordpress(title, content, categories, tags, date, author): + client = Client(wp_url, wp_username, wp_password) + + post = WordPressPost() + post.title = title + post.content = markdown.markdown(content) + post.post_status = 'publish' + post.terms_names = { + 'category': categories, + 'post_tag': tags + } + + if date: + post.date = date + + post_id = client.call(posts.NewPost(post)) + return post_id + + +def parse_date(date_value): + if isinstance(date_value, datetime): + return date_value + + if not isinstance(date_value, str): + return None + + date_formats = [ + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M:%S%z", + "%Y-%m-%d %H:%M:%S %z", + "%Y-%m-%d" + ] + for fmt in date_formats: + try: + return datetime.strptime(date_value, fmt) + except ValueError: + continue + return None + + +def main(): + for root, dirs, files in os.walk(hexo_root_dir): + for filename in files: + if filename.endswith('.md'): + file_path = os.path.join(root, filename) + front_matter, content = parse_hexo_post(file_path) + + title = front_matter.get('title', 'Untitled').strip() + tags = front_matter.get('tags', []) + if isinstance(tags, str): + tags = [tag.strip() for tag in tags.split(',') if tag.strip()] + date = parse_date(front_matter.get('date')) + author = front_matter.get('author') + + dir_categories = get_categories_from_path(file_path, hexo_root_dir) + + categories = front_matter.get('categories', []) + if isinstance(categories, str): + categories = [categories] + categories = list(set(categories + dir_categories)) + + post_id = migrate_to_wordpress(title, content, categories, tags, date, author) + print(f"Migrated post: {title} (ID: {post_id})") + print(f" filename: {filename}") + print(f" Categories: {categories}") + print(f" Tags: {tags}") + print(f" Date: {date}") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/migrate-blog/migrate_from_hexo_to_wordpress_2.py b/migrate-blog/migrate_from_hexo_to_wordpress_2.py new file mode 100644 index 0000000..6448f5d --- /dev/null +++ b/migrate-blog/migrate_from_hexo_to_wordpress_2.py @@ -0,0 +1,133 @@ +import os +import frontmatter +from wordpress_xmlrpc import Client, WordPressPost +from wordpress_xmlrpc.methods.posts import GetPosts, NewPost +from datetime import datetime + +# 配置部分 +hexo_posts_dir = '/home/songtianlun/Sync/Develop/frytea/source/_posts' # Hexo 文章目录 +wordpress_url = "https://blog.lnf1.skybyte.me/xmlrpc.php" +wordpress_username = 'songtianlun' +wordpress_password = 'sotilu,WP2024' + +# 初始化 WordPress 客户端 +wp = Client(wordpress_url, wordpress_username, wordpress_password) + +# 上传统计信息 +total_posts = 0 +successful_posts = 0 +failed_posts = 0 +duplicate_posts = [] + + +def get_all_wp_posts(): + print("Fetching existing posts from WordPress...") + all_posts = [] + offset = 0 + increment = 20 # 每次获取的文章数量,可以调整 + while True: + batch = wp.call(GetPosts({'number': increment, 'offset': offset})) + if not batch: + break + all_posts.extend(batch) + offset += increment + print(f"Total existing posts fetched: {len(all_posts)}") + return {post.title: post.id for post in all_posts} + + +def scan_directory_for_posts(directory, category_prefix=""): + posts = [] + post_count = 1 + for root, dirs, files in os.walk(directory): + category = category_prefix + os.path.basename(root) # 使用文件夹作为类别 + for file in files: + if file.endswith('.md'): + file_path = os.path.join(root, file) + post_title, post_data = process_md_file(file_path) + if post_title and post_data: + posts.append((post_title, post_data, category, post_count)) + post_count += 1 + return posts + + +def process_md_file(file_path): + with open(file_path, 'r', encoding='utf-8') as f: + post_data = frontmatter.load(f) + + # 构造 WordPress 文章 + post_title = post_data['title'] if 'title' in post_data else os.path.basename(file_path) + return post_title, post_data + + +def create_wordpress_post(post_title, post_data, category): + # 构造 WordPress 文章 + post = WordPressPost() + post.title = post_title + post.content = post_data.content + post.terms_names = { + 'category': [category], + 'post_tag': post_data['tags'] if 'tags' in post_data else [], + } + + if 'date' in post_data: + post.date = datetime.strptime(str(post_data['date']), '%Y-%m-%d %H:%M:%S') + + post.post_status = 'publish' # 或 'draft' 保存为草稿 + + return post + + +def upload_post_with_retries(post, post_number, retries=3): + global successful_posts, failed_posts + attempt = 0 + while attempt < retries: + try: + post_id = wp.call(NewPost(post)) + print(f"Post {post_number}: '{post.title}' - Upload successful. WordPress ID: {post_id}") + successful_posts += 1 + return True + except Exception as e: + attempt += 1 + print(f"Post {post_number}: '{post.title}' - Attempt {attempt} failed with error: {e}") + if attempt == retries: + print(f"Post {post_number}: '{post.title}' - Failed to upload after {retries} attempts.") + failed_posts += 1 + return False + + +def upload_posts_to_wordpress(posts_to_upload): + global total_posts + total_posts = len(posts_to_upload) + for post, post_number in posts_to_upload: + try: + upload_post_with_retries(post, post_number) + except Exception: + print(f"Post {post_number}: '{post.title}' - Failed permanently.") + + +if __name__ == "__main__": + existing_wp_posts = get_all_wp_posts() + posts = scan_directory_for_posts(hexo_posts_dir) + posts_to_upload = [] + + for post_title, post_data, category, post_number in posts: + if post_title in existing_wp_posts: + print(f"Post '{post_title}' skipped: already exists.") + duplicate_posts.append((post_title, post_number)) + else: + post = create_wordpress_post(post_title, post_data, category) + posts_to_upload.append((post, len(posts_to_upload) + 1)) + + upload_posts_to_wordpress(posts_to_upload) + + print("Migration completed!") + print(f"Total posts processed: {total_posts}") + print(f"Successfully uploaded: {successful_posts}") + print(f"Failed uploads: {failed_posts}") + if duplicate_posts: + print("\nDuplicate posts found:") + for title, post_number in duplicate_posts: + print(f"Title: {title} - File Number: {post_number}") + else: + print("\nNo duplicate posts found.") +