PracticeDev/migrate-blog/hexo_to_typecho/migrate2.py

import os
import yaml
import frontmatter
from pytypecho import Typecho, Post
import datetime

# Typecho 博客配置
TYPECHO_URL = 'https://your_domain/action/xmlrpc'  # 替换为你的 Typecho API URL
USERNAME = 'your_name'
PASSWORD = 'your_pwd'

# Hexo 博客目录
HEXO_BLOG_DIR = '/root/frytea/source/source/_posts'

def connect_to_typecho():
    try:
        typecho = Typecho(TYPECHO_URL,USERNAME, PASSWORD)
        print("成功连接到 Typecho")
        return typecho
    except Exception as e:
        print(f"连接到 Typecho 失败: {e}")
        return None


def get_existing_posts(typecho):
    print("正在获取现有的 Typecho 文章...")
    try:
        posts = typecho.get_posts(1000)
        existing_titles = [post['title'] for post in posts]
        print(f"成功获取到 {len(existing_titles)} 篇现有文章")
        return existing_titles
    except Exception as e:
        print(f"获取现有文章失败: {e}")
        return []

def parse_hexo_post(file_path):
    print(f"正在解析 Hexo 文章: {file_path}")
    encodings = ['utf-8', 'gbk', 'iso-8859-1']
    for encoding in encodings:
        try:
            with open(file_path, 'r', encoding=encoding) as f:
                #content = f.read()
                post = frontmatter.load(f)

                if not 'title' in post:
                    print(post)
                title = post.get('title', os.path.splitext(os.path.basename(file_path))[0])
                date = post.get('date')
                content = post.content
                categories = post.get('categories', [])
                tags = post.get('tags', [])


                rel_path = os.path.relpath(file_path, HEXO_BLOG_DIR)
                dir_categories = os.path.dirname(rel_path).split(os.sep)
                categories.extend(dir_categories)

                print(f"成功解析文章: {title}, time {date}")
                return {
                    'title': title,
                    'date': date,
                    'text': content,
                    'categories': categories,
                    'tags': tags
                }
        except UnicodeDecodeError:
            continue
        except Exception as e:
            print(f"解析文章失败: {file_path} - {e}")
            print
            return None

    print(f"无法使用任何编码解析文章: {file_path}")
    return None

def import_posts(typecho, existing_posts):
    print(f"开始导入文章，Hexo 博客目录: {HEXO_BLOG_DIR}")
    continue_all = False
    imported_count = 0
    skipped_count = 0

    for root, _, files in os.walk(HEXO_BLOG_DIR):
        for file in files:
            if file.endswith('.md'):
                file_path = os.path.join(root, file)
                post_data = parse_hexo_post(file_path)

                if not post_data:
                    continue

                if post_data['title'] in existing_posts:
                    print(f"跳过已存在的文章: {post_data['title']}")
                    skipped_count += 1
                    continue

                if not continue_all:
                    print(f"准备导入文章: {post_data['title']}")
                    action = input("输入 'c' 继续导入所有, 's' 导入当前文章, 其他键跳过: ").lower()

                    if action == 'c':
                        continue_all = True
                    elif action != 's':
                        print(f"跳过: {post_data['title']}")
                        skipped_count += 1
                        continue

                try:
                    #typecho.new_post(**post_data)
                    post = Post(
                        title=post_data['title'],
                        description=post_data['text'][:200],  # 使用文章前200字符作为描述
                    )
                    # 设置文章内容
                    post.text = post_data['text']

                    # 添加分类和标签
                    if post_data['categories']:
                        post.categories = post_data['categories']
                    if post_data['tags']:
                        post.tags = post_data['tags']
                    # 设置发布时间
                    if 'date' in post_data:
                        post.dateCreated = post_data['date']
                        post.date_created_gmt = post_data['date'].replace(tzinfo=datetime.timezone.utc)

                    # 发布文章
                    result = typecho.new_post(post, publish=True)
                    print(f"成功导入: {post_data['title']} (ID: {result})")
                    imported_count += 1
                except Exception as e:
                    print(f"导入失败: {post_data['title']} - {e}")

    print(f"导入完成。成功导入 {imported_count} 篇文章，跳过 {skipped_count} 篇文章。")

def main():
    print("开始执行导入脚本...")
    typecho = connect_to_typecho()
    if not typecho:
        print("无法连接到 Typecho，脚本退出。")
        return

    existing_posts = get_existing_posts(typecho)
    import_posts(typecho, existing_posts)
    print("脚本执行完毕。")

if __name__ == "__main__":
    main()