This commit is contained in:
songtianlun 2024-09-05 00:46:22 +08:00
parent 2ba2ffe890
commit f9c247264f
2 changed files with 147 additions and 131 deletions

View File

@ -1,131 +0,0 @@
import os
import frontmatter
from pytypecho import Typecho
from datetime import datetime
# 配置部分
hexo_posts_dir = '/home/songtianlun/Sync/Develop/frytea/source/_posts' # Hexo 文章目录
wordpress_url = "https://blog.lnf1.skybyte.me/xmlrpc.php"
wordpress_username = 'songtianlun'
wordpress_password = 'sotilu,WP2024'
typecho_url = "https://oskyla.com/action/xmlrpc"
typecho_username = 'songtianlun'
typecho_password = 'sotilu,typecho2019'
tp = Typecho(typecho_url, typecho_username, typecho_password)
# 上传统计信息
total_posts = 0
successful_posts = 0
failed_posts = 0
duplicate_posts = []
def get_all_posts():
print("Fetching existing posts from WordPress...")
max_posts = 1000
all_posts = tp.get_posts(max_posts=max_posts)
print(f"Total existing posts fetched: {len(all_posts)}")
return {post.title: post.postid for post in all_posts}
def scan_directory_for_posts(directory, category_prefix=""):
posts = []
post_count = 1
for root, dirs, files in os.walk(directory):
category = category_prefix + os.path.basename(root) # 使用文件夹作为类别
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
post_title, post_data = process_md_file(file_path)
if post_title and post_data:
posts.append((post_title, post_data, category, post_count))
post_count += 1
return posts
def process_md_file(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
post_data = frontmatter.load(f)
# 构造 WordPress 文章
post_title = post_data['title'] if 'title' in post_data else os.path.basename(file_path)
return post_title, post_data
def create_typecho_post(post_title, post_data, category):
post = Typecho.Post(
title=post_title,
categories=[category],
)
post.title = post_title
post.content = post_data.content
post.terms_names = {
'category': [category],
'post_tag': post_data['tags'] if 'tags' in post_data else [],
}
if 'date' in post_data:
post.date = datetime.strptime(str(post_data['date']), '%Y-%m-%d %H:%M:%S')
post.post_status = 'publish' # 或 'draft' 保存为草稿
return post
def upload_post_with_retries(post, post_number, retries=3):
global successful_posts, failed_posts
attempt = 0
while attempt < retries:
try:
post_id = wp.call(NewPost(post))
print(f"Post {post_number}: '{post.title}' - Upload successful. WordPress ID: {post_id}")
successful_posts += 1
return True
except Exception as e:
attempt += 1
print(f"Post {post_number}: '{post.title}' - Attempt {attempt} failed with error: {e}")
if attempt == retries:
print(f"Post {post_number}: '{post.title}' - Failed to upload after {retries} attempts.")
failed_posts += 1
return False
def upload_posts_to_wordpress(posts_to_upload):
global total_posts
total_posts = len(posts_to_upload)
for post, post_number in posts_to_upload:
try:
upload_post_with_retries(post, post_number)
except Exception:
print(f"Post {post_number}: '{post.title}' - Failed permanently.")
if __name__ == "__main__":
existing_wp_posts = get_all_posts()
posts = scan_directory_for_posts(hexo_posts_dir)
posts_to_upload = []
for post_title, post_data, category, post_number in posts:
if post_title in existing_wp_posts:
print(f"Post '{post_title}' skipped: already exists.")
duplicate_posts.append((post_title, post_number))
else:
post = create_wordpress_post(post_title, post_data, category)
posts_to_upload.append((post, len(posts_to_upload) + 1))
upload_posts_to_wordpress(posts_to_upload)
print("Migration completed!")
print(f"Total posts processed: {total_posts}")
print(f"Successfully uploaded: {successful_posts}")
print(f"Failed uploads: {failed_posts}")
if duplicate_posts:
print("\nDuplicate posts found:")
for title, post_number in duplicate_posts:
print(f"Title: {title} - File Number: {post_number}")
else:
print("\nNo duplicate posts found.")

View File

@ -0,0 +1,147 @@
import os
import yaml
import frontmatter
from pytypecho import Typecho, Post
import datetime
# Typecho 博客配置
TYPECHO_URL = 'https://your_domain/action/xmlrpc' # 替换为你的 Typecho API URL
USERNAME = 'your_name'
PASSWORD = 'your_pwd'
# Hexo 博客目录
HEXO_BLOG_DIR = '/root/frytea/source/source/_posts'
def connect_to_typecho():
try:
typecho = Typecho(TYPECHO_URL,USERNAME, PASSWORD)
print("成功连接到 Typecho")
return typecho
except Exception as e:
print(f"连接到 Typecho 失败: {e}")
return None
def get_existing_posts(typecho):
print("正在获取现有的 Typecho 文章...")
try:
posts = typecho.get_posts(1000)
existing_titles = [post['title'] for post in posts]
print(f"成功获取到 {len(existing_titles)} 篇现有文章")
return existing_titles
except Exception as e:
print(f"获取现有文章失败: {e}")
return []
def parse_hexo_post(file_path):
print(f"正在解析 Hexo 文章: {file_path}")
encodings = ['utf-8', 'gbk', 'iso-8859-1']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
#content = f.read()
post = frontmatter.load(f)
if not 'title' in post:
print(post)
title = post.get('title', os.path.splitext(os.path.basename(file_path))[0])
date = post.get('date')
content = post.content
categories = post.get('categories', [])
tags = post.get('tags', [])
rel_path = os.path.relpath(file_path, HEXO_BLOG_DIR)
dir_categories = os.path.dirname(rel_path).split(os.sep)
categories.extend(dir_categories)
print(f"成功解析文章: {title}, time {date}")
return {
'title': title,
'date': date,
'text': content,
'categories': categories,
'tags': tags
}
except UnicodeDecodeError:
continue
except Exception as e:
print(f"解析文章失败: {file_path} - {e}")
print
return None
print(f"无法使用任何编码解析文章: {file_path}")
return None
def import_posts(typecho, existing_posts):
print(f"开始导入文章Hexo 博客目录: {HEXO_BLOG_DIR}")
continue_all = False
imported_count = 0
skipped_count = 0
for root, _, files in os.walk(HEXO_BLOG_DIR):
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
post_data = parse_hexo_post(file_path)
if not post_data:
continue
if post_data['title'] in existing_posts:
print(f"跳过已存在的文章: {post_data['title']}")
skipped_count += 1
continue
if not continue_all:
print(f"准备导入文章: {post_data['title']}")
action = input("输入 'c' 继续导入所有, 's' 导入当前文章, 其他键跳过: ").lower()
if action == 'c':
continue_all = True
elif action != 's':
print(f"跳过: {post_data['title']}")
skipped_count += 1
continue
try:
#typecho.new_post(**post_data)
post = Post(
title=post_data['title'],
description=post_data['text'][:200], # 使用文章前200字符作为描述
)
# 设置文章内容
post.text = post_data['text']
# 添加分类和标签
if post_data['categories']:
post.categories = post_data['categories']
if post_data['tags']:
post.tags = post_data['tags']
# 设置发布时间
if 'date' in post_data:
post.dateCreated = post_data['date']
post.date_created_gmt = post_data['date'].replace(tzinfo=datetime.timezone.utc)
# 发布文章
result = typecho.new_post(post, publish=True)
print(f"成功导入: {post_data['title']} (ID: {result})")
imported_count += 1
except Exception as e:
print(f"导入失败: {post_data['title']} - {e}")
print(f"导入完成。成功导入 {imported_count} 篇文章,跳过 {skipped_count} 篇文章。")
def main():
print("开始执行导入脚本...")
typecho = connect_to_typecho()
if not typecho:
print("无法连接到 Typecho脚本退出。")
return
existing_posts = get_existing_posts(typecho)
import_posts(typecho, existing_posts)
print("脚本执行完毕。")
if __name__ == "__main__":
main()