From f9c247264f7f5daa9ba1d7f7fff545ded410eb8a Mon Sep 17 00:00:00 2001 From: songtianlun Date: Thu, 5 Sep 2024 00:46:22 +0800 Subject: [PATCH] add mi --- migrate-blog/hexo_to_typecho/migrate.py | 131 -------------------- migrate-blog/hexo_to_typecho/migrate2.py | 147 +++++++++++++++++++++++ 2 files changed, 147 insertions(+), 131 deletions(-) delete mode 100644 migrate-blog/hexo_to_typecho/migrate.py create mode 100644 migrate-blog/hexo_to_typecho/migrate2.py diff --git a/migrate-blog/hexo_to_typecho/migrate.py b/migrate-blog/hexo_to_typecho/migrate.py deleted file mode 100644 index 9d11a0e..0000000 --- a/migrate-blog/hexo_to_typecho/migrate.py +++ /dev/null @@ -1,131 +0,0 @@ -import os -import frontmatter -from pytypecho import Typecho -from datetime import datetime - -# 配置部分 -hexo_posts_dir = '/home/songtianlun/Sync/Develop/frytea/source/_posts' # Hexo 文章目录 -wordpress_url = "https://blog.lnf1.skybyte.me/xmlrpc.php" -wordpress_username = 'songtianlun' -wordpress_password = 'sotilu,WP2024' - -typecho_url = "https://oskyla.com/action/xmlrpc" -typecho_username = 'songtianlun' -typecho_password = 'sotilu,typecho2019' - -tp = Typecho(typecho_url, typecho_username, typecho_password) - -# 上传统计信息 -total_posts = 0 -successful_posts = 0 -failed_posts = 0 -duplicate_posts = [] - - -def get_all_posts(): - print("Fetching existing posts from WordPress...") - max_posts = 1000 - all_posts = tp.get_posts(max_posts=max_posts) - print(f"Total existing posts fetched: {len(all_posts)}") - return {post.title: post.postid for post in all_posts} - - -def scan_directory_for_posts(directory, category_prefix=""): - posts = [] - post_count = 1 - for root, dirs, files in os.walk(directory): - category = category_prefix + os.path.basename(root) # 使用文件夹作为类别 - for file in files: - if file.endswith('.md'): - file_path = os.path.join(root, file) - post_title, post_data = process_md_file(file_path) - if post_title and post_data: - posts.append((post_title, post_data, category, post_count)) - post_count += 1 - return posts - - -def process_md_file(file_path): - with open(file_path, 'r', encoding='utf-8') as f: - post_data = frontmatter.load(f) - - # 构造 WordPress 文章 - post_title = post_data['title'] if 'title' in post_data else os.path.basename(file_path) - return post_title, post_data - - -def create_typecho_post(post_title, post_data, category): - post = Typecho.Post( - title=post_title, - categories=[category], - - ) - post.title = post_title - post.content = post_data.content - post.terms_names = { - 'category': [category], - 'post_tag': post_data['tags'] if 'tags' in post_data else [], - } - - if 'date' in post_data: - post.date = datetime.strptime(str(post_data['date']), '%Y-%m-%d %H:%M:%S') - - post.post_status = 'publish' # 或 'draft' 保存为草稿 - - return post - - -def upload_post_with_retries(post, post_number, retries=3): - global successful_posts, failed_posts - attempt = 0 - while attempt < retries: - try: - post_id = wp.call(NewPost(post)) - print(f"Post {post_number}: '{post.title}' - Upload successful. WordPress ID: {post_id}") - successful_posts += 1 - return True - except Exception as e: - attempt += 1 - print(f"Post {post_number}: '{post.title}' - Attempt {attempt} failed with error: {e}") - if attempt == retries: - print(f"Post {post_number}: '{post.title}' - Failed to upload after {retries} attempts.") - failed_posts += 1 - return False - - -def upload_posts_to_wordpress(posts_to_upload): - global total_posts - total_posts = len(posts_to_upload) - for post, post_number in posts_to_upload: - try: - upload_post_with_retries(post, post_number) - except Exception: - print(f"Post {post_number}: '{post.title}' - Failed permanently.") - - -if __name__ == "__main__": - existing_wp_posts = get_all_posts() - posts = scan_directory_for_posts(hexo_posts_dir) - posts_to_upload = [] - - for post_title, post_data, category, post_number in posts: - if post_title in existing_wp_posts: - print(f"Post '{post_title}' skipped: already exists.") - duplicate_posts.append((post_title, post_number)) - else: - post = create_wordpress_post(post_title, post_data, category) - posts_to_upload.append((post, len(posts_to_upload) + 1)) - - upload_posts_to_wordpress(posts_to_upload) - - print("Migration completed!") - print(f"Total posts processed: {total_posts}") - print(f"Successfully uploaded: {successful_posts}") - print(f"Failed uploads: {failed_posts}") - if duplicate_posts: - print("\nDuplicate posts found:") - for title, post_number in duplicate_posts: - print(f"Title: {title} - File Number: {post_number}") - else: - print("\nNo duplicate posts found.") - diff --git a/migrate-blog/hexo_to_typecho/migrate2.py b/migrate-blog/hexo_to_typecho/migrate2.py new file mode 100644 index 0000000..cf1371f --- /dev/null +++ b/migrate-blog/hexo_to_typecho/migrate2.py @@ -0,0 +1,147 @@ +import os +import yaml +import frontmatter +from pytypecho import Typecho, Post +import datetime + +# Typecho 博客配置 +TYPECHO_URL = 'https://your_domain/action/xmlrpc' # 替换为你的 Typecho API URL +USERNAME = 'your_name' +PASSWORD = 'your_pwd' + +# Hexo 博客目录 +HEXO_BLOG_DIR = '/root/frytea/source/source/_posts' + +def connect_to_typecho(): + try: + typecho = Typecho(TYPECHO_URL,USERNAME, PASSWORD) + print("成功连接到 Typecho") + return typecho + except Exception as e: + print(f"连接到 Typecho 失败: {e}") + return None + + +def get_existing_posts(typecho): + print("正在获取现有的 Typecho 文章...") + try: + posts = typecho.get_posts(1000) + existing_titles = [post['title'] for post in posts] + print(f"成功获取到 {len(existing_titles)} 篇现有文章") + return existing_titles + except Exception as e: + print(f"获取现有文章失败: {e}") + return [] + +def parse_hexo_post(file_path): + print(f"正在解析 Hexo 文章: {file_path}") + encodings = ['utf-8', 'gbk', 'iso-8859-1'] + for encoding in encodings: + try: + with open(file_path, 'r', encoding=encoding) as f: + #content = f.read() + post = frontmatter.load(f) + + if not 'title' in post: + print(post) + title = post.get('title', os.path.splitext(os.path.basename(file_path))[0]) + date = post.get('date') + content = post.content + categories = post.get('categories', []) + tags = post.get('tags', []) + + + rel_path = os.path.relpath(file_path, HEXO_BLOG_DIR) + dir_categories = os.path.dirname(rel_path).split(os.sep) + categories.extend(dir_categories) + + print(f"成功解析文章: {title}, time {date}") + return { + 'title': title, + 'date': date, + 'text': content, + 'categories': categories, + 'tags': tags + } + except UnicodeDecodeError: + continue + except Exception as e: + print(f"解析文章失败: {file_path} - {e}") + print + return None + + print(f"无法使用任何编码解析文章: {file_path}") + return None + +def import_posts(typecho, existing_posts): + print(f"开始导入文章,Hexo 博客目录: {HEXO_BLOG_DIR}") + continue_all = False + imported_count = 0 + skipped_count = 0 + + for root, _, files in os.walk(HEXO_BLOG_DIR): + for file in files: + if file.endswith('.md'): + file_path = os.path.join(root, file) + post_data = parse_hexo_post(file_path) + + if not post_data: + continue + + if post_data['title'] in existing_posts: + print(f"跳过已存在的文章: {post_data['title']}") + skipped_count += 1 + continue + + if not continue_all: + print(f"准备导入文章: {post_data['title']}") + action = input("输入 'c' 继续导入所有, 's' 导入当前文章, 其他键跳过: ").lower() + + if action == 'c': + continue_all = True + elif action != 's': + print(f"跳过: {post_data['title']}") + skipped_count += 1 + continue + + try: + #typecho.new_post(**post_data) + post = Post( + title=post_data['title'], + description=post_data['text'][:200], # 使用文章前200字符作为描述 + ) + # 设置文章内容 + post.text = post_data['text'] + + # 添加分类和标签 + if post_data['categories']: + post.categories = post_data['categories'] + if post_data['tags']: + post.tags = post_data['tags'] + # 设置发布时间 + if 'date' in post_data: + post.dateCreated = post_data['date'] + post.date_created_gmt = post_data['date'].replace(tzinfo=datetime.timezone.utc) + + # 发布文章 + result = typecho.new_post(post, publish=True) + print(f"成功导入: {post_data['title']} (ID: {result})") + imported_count += 1 + except Exception as e: + print(f"导入失败: {post_data['title']} - {e}") + + print(f"导入完成。成功导入 {imported_count} 篇文章,跳过 {skipped_count} 篇文章。") + +def main(): + print("开始执行导入脚本...") + typecho = connect_to_typecho() + if not typecho: + print("无法连接到 Typecho,脚本退出。") + return + + existing_posts = get_existing_posts(typecho) + import_posts(typecho, existing_posts) + print("脚本执行完毕。") + +if __name__ == "__main__": + main()