PracticeDev/migrate-blog/hexo_to_typecho/migrate2.py

148 lines
5.1 KiB
Python
Raw Normal View History

2024-09-05 00:46:22 +08:00
import os
import yaml
import frontmatter
from pytypecho import Typecho, Post
import datetime
# Typecho 博客配置
TYPECHO_URL = 'https://your_domain/action/xmlrpc' # 替换为你的 Typecho API URL
USERNAME = 'your_name'
PASSWORD = 'your_pwd'
# Hexo 博客目录
HEXO_BLOG_DIR = '/root/frytea/source/source/_posts'
def connect_to_typecho():
try:
typecho = Typecho(TYPECHO_URL,USERNAME, PASSWORD)
print("成功连接到 Typecho")
return typecho
except Exception as e:
print(f"连接到 Typecho 失败: {e}")
return None
def get_existing_posts(typecho):
print("正在获取现有的 Typecho 文章...")
try:
posts = typecho.get_posts(1000)
existing_titles = [post['title'] for post in posts]
print(f"成功获取到 {len(existing_titles)} 篇现有文章")
return existing_titles
except Exception as e:
print(f"获取现有文章失败: {e}")
return []
def parse_hexo_post(file_path):
print(f"正在解析 Hexo 文章: {file_path}")
encodings = ['utf-8', 'gbk', 'iso-8859-1']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
#content = f.read()
post = frontmatter.load(f)
if not 'title' in post:
print(post)
title = post.get('title', os.path.splitext(os.path.basename(file_path))[0])
date = post.get('date')
content = post.content
categories = post.get('categories', [])
tags = post.get('tags', [])
rel_path = os.path.relpath(file_path, HEXO_BLOG_DIR)
dir_categories = os.path.dirname(rel_path).split(os.sep)
categories.extend(dir_categories)
print(f"成功解析文章: {title}, time {date}")
return {
'title': title,
'date': date,
'text': content,
'categories': categories,
'tags': tags
}
except UnicodeDecodeError:
continue
except Exception as e:
print(f"解析文章失败: {file_path} - {e}")
print
return None
print(f"无法使用任何编码解析文章: {file_path}")
return None
def import_posts(typecho, existing_posts):
print(f"开始导入文章Hexo 博客目录: {HEXO_BLOG_DIR}")
continue_all = False
imported_count = 0
skipped_count = 0
for root, _, files in os.walk(HEXO_BLOG_DIR):
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
post_data = parse_hexo_post(file_path)
if not post_data:
continue
if post_data['title'] in existing_posts:
print(f"跳过已存在的文章: {post_data['title']}")
skipped_count += 1
continue
if not continue_all:
print(f"准备导入文章: {post_data['title']}")
action = input("输入 'c' 继续导入所有, 's' 导入当前文章, 其他键跳过: ").lower()
if action == 'c':
continue_all = True
elif action != 's':
print(f"跳过: {post_data['title']}")
skipped_count += 1
continue
try:
#typecho.new_post(**post_data)
post = Post(
title=post_data['title'],
description=post_data['text'][:200], # 使用文章前200字符作为描述
)
# 设置文章内容
post.text = post_data['text']
# 添加分类和标签
if post_data['categories']:
post.categories = post_data['categories']
if post_data['tags']:
post.tags = post_data['tags']
# 设置发布时间
if 'date' in post_data:
post.dateCreated = post_data['date']
post.date_created_gmt = post_data['date'].replace(tzinfo=datetime.timezone.utc)
# 发布文章
result = typecho.new_post(post, publish=True)
print(f"成功导入: {post_data['title']} (ID: {result})")
imported_count += 1
except Exception as e:
print(f"导入失败: {post_data['title']} - {e}")
print(f"导入完成。成功导入 {imported_count} 篇文章,跳过 {skipped_count} 篇文章。")
def main():
print("开始执行导入脚本...")
typecho = connect_to_typecho()
if not typecho:
print("无法连接到 Typecho脚本退出。")
return
existing_posts = get_existing_posts(typecho)
import_posts(typecho, existing_posts)
print("脚本执行完毕。")
if __name__ == "__main__":
main()