148 lines
5.1 KiB
Python
148 lines
5.1 KiB
Python
import os
|
||
import yaml
|
||
import frontmatter
|
||
from pytypecho import Typecho, Post
|
||
import datetime
|
||
|
||
# Typecho 博客配置
|
||
TYPECHO_URL = 'https://your_domain/action/xmlrpc' # 替换为你的 Typecho API URL
|
||
USERNAME = 'your_name'
|
||
PASSWORD = 'your_pwd'
|
||
|
||
# Hexo 博客目录
|
||
HEXO_BLOG_DIR = '/root/frytea/source/source/_posts'
|
||
|
||
def connect_to_typecho():
|
||
try:
|
||
typecho = Typecho(TYPECHO_URL,USERNAME, PASSWORD)
|
||
print("成功连接到 Typecho")
|
||
return typecho
|
||
except Exception as e:
|
||
print(f"连接到 Typecho 失败: {e}")
|
||
return None
|
||
|
||
|
||
def get_existing_posts(typecho):
|
||
print("正在获取现有的 Typecho 文章...")
|
||
try:
|
||
posts = typecho.get_posts(1000)
|
||
existing_titles = [post['title'] for post in posts]
|
||
print(f"成功获取到 {len(existing_titles)} 篇现有文章")
|
||
return existing_titles
|
||
except Exception as e:
|
||
print(f"获取现有文章失败: {e}")
|
||
return []
|
||
|
||
def parse_hexo_post(file_path):
|
||
print(f"正在解析 Hexo 文章: {file_path}")
|
||
encodings = ['utf-8', 'gbk', 'iso-8859-1']
|
||
for encoding in encodings:
|
||
try:
|
||
with open(file_path, 'r', encoding=encoding) as f:
|
||
#content = f.read()
|
||
post = frontmatter.load(f)
|
||
|
||
if not 'title' in post:
|
||
print(post)
|
||
title = post.get('title', os.path.splitext(os.path.basename(file_path))[0])
|
||
date = post.get('date')
|
||
content = post.content
|
||
categories = post.get('categories', [])
|
||
tags = post.get('tags', [])
|
||
|
||
|
||
rel_path = os.path.relpath(file_path, HEXO_BLOG_DIR)
|
||
dir_categories = os.path.dirname(rel_path).split(os.sep)
|
||
categories.extend(dir_categories)
|
||
|
||
print(f"成功解析文章: {title}, time {date}")
|
||
return {
|
||
'title': title,
|
||
'date': date,
|
||
'text': content,
|
||
'categories': categories,
|
||
'tags': tags
|
||
}
|
||
except UnicodeDecodeError:
|
||
continue
|
||
except Exception as e:
|
||
print(f"解析文章失败: {file_path} - {e}")
|
||
print
|
||
return None
|
||
|
||
print(f"无法使用任何编码解析文章: {file_path}")
|
||
return None
|
||
|
||
def import_posts(typecho, existing_posts):
|
||
print(f"开始导入文章,Hexo 博客目录: {HEXO_BLOG_DIR}")
|
||
continue_all = False
|
||
imported_count = 0
|
||
skipped_count = 0
|
||
|
||
for root, _, files in os.walk(HEXO_BLOG_DIR):
|
||
for file in files:
|
||
if file.endswith('.md'):
|
||
file_path = os.path.join(root, file)
|
||
post_data = parse_hexo_post(file_path)
|
||
|
||
if not post_data:
|
||
continue
|
||
|
||
if post_data['title'] in existing_posts:
|
||
print(f"跳过已存在的文章: {post_data['title']}")
|
||
skipped_count += 1
|
||
continue
|
||
|
||
if not continue_all:
|
||
print(f"准备导入文章: {post_data['title']}")
|
||
action = input("输入 'c' 继续导入所有, 's' 导入当前文章, 其他键跳过: ").lower()
|
||
|
||
if action == 'c':
|
||
continue_all = True
|
||
elif action != 's':
|
||
print(f"跳过: {post_data['title']}")
|
||
skipped_count += 1
|
||
continue
|
||
|
||
try:
|
||
#typecho.new_post(**post_data)
|
||
post = Post(
|
||
title=post_data['title'],
|
||
description=post_data['text'][:200], # 使用文章前200字符作为描述
|
||
)
|
||
# 设置文章内容
|
||
post.text = post_data['text']
|
||
|
||
# 添加分类和标签
|
||
if post_data['categories']:
|
||
post.categories = post_data['categories']
|
||
if post_data['tags']:
|
||
post.tags = post_data['tags']
|
||
# 设置发布时间
|
||
if 'date' in post_data:
|
||
post.dateCreated = post_data['date']
|
||
post.date_created_gmt = post_data['date'].replace(tzinfo=datetime.timezone.utc)
|
||
|
||
# 发布文章
|
||
result = typecho.new_post(post, publish=True)
|
||
print(f"成功导入: {post_data['title']} (ID: {result})")
|
||
imported_count += 1
|
||
except Exception as e:
|
||
print(f"导入失败: {post_data['title']} - {e}")
|
||
|
||
print(f"导入完成。成功导入 {imported_count} 篇文章,跳过 {skipped_count} 篇文章。")
|
||
|
||
def main():
|
||
print("开始执行导入脚本...")
|
||
typecho = connect_to_typecho()
|
||
if not typecho:
|
||
print("无法连接到 Typecho,脚本退出。")
|
||
return
|
||
|
||
existing_posts = get_existing_posts(typecho)
|
||
import_posts(typecho, existing_posts)
|
||
print("脚本执行完毕。")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|