PracticeDev/migrate-blog/hexo_to_typecho/migrate2.py
2024-09-05 00:46:22 +08:00

148 lines
5.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import yaml
import frontmatter
from pytypecho import Typecho, Post
import datetime
# Typecho 博客配置
TYPECHO_URL = 'https://your_domain/action/xmlrpc' # 替换为你的 Typecho API URL
USERNAME = 'your_name'
PASSWORD = 'your_pwd'
# Hexo 博客目录
HEXO_BLOG_DIR = '/root/frytea/source/source/_posts'
def connect_to_typecho():
try:
typecho = Typecho(TYPECHO_URL,USERNAME, PASSWORD)
print("成功连接到 Typecho")
return typecho
except Exception as e:
print(f"连接到 Typecho 失败: {e}")
return None
def get_existing_posts(typecho):
print("正在获取现有的 Typecho 文章...")
try:
posts = typecho.get_posts(1000)
existing_titles = [post['title'] for post in posts]
print(f"成功获取到 {len(existing_titles)} 篇现有文章")
return existing_titles
except Exception as e:
print(f"获取现有文章失败: {e}")
return []
def parse_hexo_post(file_path):
print(f"正在解析 Hexo 文章: {file_path}")
encodings = ['utf-8', 'gbk', 'iso-8859-1']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
#content = f.read()
post = frontmatter.load(f)
if not 'title' in post:
print(post)
title = post.get('title', os.path.splitext(os.path.basename(file_path))[0])
date = post.get('date')
content = post.content
categories = post.get('categories', [])
tags = post.get('tags', [])
rel_path = os.path.relpath(file_path, HEXO_BLOG_DIR)
dir_categories = os.path.dirname(rel_path).split(os.sep)
categories.extend(dir_categories)
print(f"成功解析文章: {title}, time {date}")
return {
'title': title,
'date': date,
'text': content,
'categories': categories,
'tags': tags
}
except UnicodeDecodeError:
continue
except Exception as e:
print(f"解析文章失败: {file_path} - {e}")
print
return None
print(f"无法使用任何编码解析文章: {file_path}")
return None
def import_posts(typecho, existing_posts):
print(f"开始导入文章Hexo 博客目录: {HEXO_BLOG_DIR}")
continue_all = False
imported_count = 0
skipped_count = 0
for root, _, files in os.walk(HEXO_BLOG_DIR):
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
post_data = parse_hexo_post(file_path)
if not post_data:
continue
if post_data['title'] in existing_posts:
print(f"跳过已存在的文章: {post_data['title']}")
skipped_count += 1
continue
if not continue_all:
print(f"准备导入文章: {post_data['title']}")
action = input("输入 'c' 继续导入所有, 's' 导入当前文章, 其他键跳过: ").lower()
if action == 'c':
continue_all = True
elif action != 's':
print(f"跳过: {post_data['title']}")
skipped_count += 1
continue
try:
#typecho.new_post(**post_data)
post = Post(
title=post_data['title'],
description=post_data['text'][:200], # 使用文章前200字符作为描述
)
# 设置文章内容
post.text = post_data['text']
# 添加分类和标签
if post_data['categories']:
post.categories = post_data['categories']
if post_data['tags']:
post.tags = post_data['tags']
# 设置发布时间
if 'date' in post_data:
post.dateCreated = post_data['date']
post.date_created_gmt = post_data['date'].replace(tzinfo=datetime.timezone.utc)
# 发布文章
result = typecho.new_post(post, publish=True)
print(f"成功导入: {post_data['title']} (ID: {result})")
imported_count += 1
except Exception as e:
print(f"导入失败: {post_data['title']} - {e}")
print(f"导入完成。成功导入 {imported_count} 篇文章,跳过 {skipped_count} 篇文章。")
def main():
print("开始执行导入脚本...")
typecho = connect_to_typecho()
if not typecho:
print("无法连接到 Typecho脚本退出。")
return
existing_posts = get_existing_posts(typecho)
import_posts(typecho, existing_posts)
print("脚本执行完毕。")
if __name__ == "__main__":
main()