[Python]Word文章をPowerPointにコンバート

指定されたマイクロソフトWordドキュメン「サンプル.docx」のすべてのページを、Pythonを使ってPowerPointプレゼンテーションとして自動生成するスクリプトを作成します。

このスクリプトは、ドキュメントの各章やセクションを個別のスライドに割り当て、主要なテキストや表を自動で抽出し、レイアウトします。

実行前の準備
ソースコード
実行方法

実行前の準備

このスクリプトを実行するには、お使いのコンピュータにpython-pptxとpython-docxという2つのライブラリをインストールする必要があります。ターミナルやコマンドプロンプトで以下のコマンドを実行してください。

pip install python-pptx python-docx

ソースコード

準備ができたら、以下のPythonコードを実行してください。

# -*- coding: utf-8 -*-
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN
from docx import Document
import re

# --- Configuration ---
# Please specify the path to your Word document here
# ここにWord文書へのパスを指定してください
DOCX_FILE_PATH = "サンプル.docx"
# Specify the output file name for the PowerPoint presentation
# 出力するPowerPointファイル名を指定してください
OUTPUT_PPTX_FILE = "Sample.pptx"

# --- Helper Functions ---

def parse_docx(file_path):
    """
    Parses the Word document and structures its content for PowerPoint slides.
    Word文書を解析し、PowerPointスライド用にコンテンツを構造化します。
    """
    try:
        document = Document(file_path)
        slides_data = []
        current_slide = None
        table_buffer = []

        # Regex to identify chapter titles (e.g., 第1章, 1.1.)
        # 章のタイトル（例：第1章、1.1.）を識別するための正規表現
        title_pattern = re.compile(r"^(第\d+章|\d+\.\d+\.)\s.*")

        for para in document.paragraphs:
            if title_pattern.match(para.text.strip()):
                if current_slide:
                    # If there's a table buffer, add it to the previous slide
                    # テーブルバッファがあれば、前のスライドに追加
                    if table_buffer:
                        current_slide['tables'].append(table_buffer)
                        table_buffer = []
                    slides_data.append(current_slide)

                current_slide = {
                    'title': para.text.strip(),
                    'content': [],
                    'tables': []
                }
            elif current_slide:
                if para.text.strip(): # Avoid adding empty paragraphs
                    current_slide['content'].append(para.text.strip())

        # Handling tables - This is a simplified approach
        # テーブルの処理 - これは簡略化されたアプローチです
        # A more robust solution would interleave text and tables correctly.
        # より堅牢なソリューションは、テキストとテーブルを正しく交互に配置します。
        for table in document.tables:
            # Assuming tables belong to the last identified slide section
            # テーブルが最後に識別されたスライドセクションに属すると仮定
            if slides_data:
                table_data = []
                for row in table.rows:
                    row_data = [cell.text for cell in row.cells]
                    table_data.append(row_data)
                slides_data[-1]['tables'].append(table_data)

        # Add the last slide
        # 最後のスライドを追加
        if current_slide:
            slides_data.append(current_slide)

        # Create a title slide as the first slide
        # 最初のスライドとしてタイトルスライドを作成
        title_slide = {
            'title': document.paragraphs[0].text, # Assuming first line is the main title
            'content': ["サンプル文章"],
            'tables': [],
            'is_title_slide': True
        }
        slides_data.insert(0, title_slide)

        return slides_data

    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None
    except Exception as e:
        print(f"An error occurred while parsing the document: {e}")
        return None

def add_title_slide(pres, title, subtitle):
    """Adds a title slide to the presentation."""
    slide_layout = pres.slide_layouts[0] # Title Slide layout
    slide = pres.slides.add_slide(slide_layout)

    title_shape = slide.shapes.title
    title_shape.text = title

    subtitle_shape = slide.placeholders[1]
    subtitle_shape.text = subtitle

    # Custom formatting
    # カスタム書式設定
    for shape in [title_shape, subtitle_shape]:
        for paragraph in shape.text_frame.paragraphs:
            for run in paragraph.runs:
                run.font.name = 'メイリオ'
                run.font.color.rgb = RGBColor(34, 34, 34)
    title_shape.text_frame.paragraphs[0].runs[0].font.bold = True
    title_shape.text_frame.paragraphs[0].runs[0].font.size = Pt(44)

def add_content_slide(pres, title, content_list, tables_data):
    """Adds a content slide with text and tables."""
    slide_layout = pres.slide_layouts[5] # Title and Content layout
    slide = pres.slides.add_slide(slide_layout)

    # Add title
    # タイトルの追加
    title_shape = slide.shapes.add_textbox(Inches(0.5), Inches(0.2), Inches(15), Inches(1.0))
    title_frame = title_shape.text_frame
    p = title_frame.add_paragraph()
    p.text = title
    p.font.name = 'メイリオ'
    p.font.size = Pt(32)
    p.font.bold = True
    p.font.color.rgb = RGBColor(68, 84, 106)

    # Add content text
    # コンテンツテキストの追加
    content_shape = slide.shapes.add_textbox(Inches(0.5), Inches(1.2), Inches(15), Inches(2.5))
    content_frame = content_shape.text_frame
    content_frame.word_wrap = True
    for item in content_list:
        p = content_frame.add_paragraph()
        p.text = item
        p.font.name = 'メイリオ'
        p.font.size = Pt(16)
        p.level = 0
        p.space_after = Pt(8)

    # Add tables
    # テーブルの追加
    top_position = Inches(4.0)
    for table_data in tables_data:
        if not table_data: continue
        rows, cols = len(table_data), len(table_data[0])

        # Check if there's enough space for the table
        # テーブルのための十分なスペースがあるか確認
        if top_position > pres.slide_height - Inches(1.0):
            # Add a new slide if the current one is full
            # 現在のスライドがいっぱいの場合、新しいスライドを追加
            slide = pres.slides.add_slide(slide_layout)
            title_shape = slide.shapes.add_textbox(Inches(0.5), Inches(0.2), Inches(15), Inches(1.0))
            p = title_shape.text_frame.add_paragraph()
            p.text = title + " (続き)"
            p.font.name = 'メイリオ'
            p.font.size = Pt(32)
            p.font.bold = True
            top_position = Inches(1.5)

        table_shape = slide.shapes.add_table(rows, cols, Inches(0.5), top_position, Inches(15), Inches(0.5) * rows)
        table = table_shape.table

        # Populate table with data and format it
        # テーブルにデータを入力し、書式設定
        for r_idx, row_data in enumerate(table_data):
            for c_idx, cell_data in enumerate(row_data):
                cell = table.cell(r_idx, c_idx)
                cell.text = cell_data

                # Formatting
                # 書式設定
                para = cell.text_frame.paragraphs[0]
                para.font.name = 'メイリオ'
                para.font.size = Pt(12)
                if r_idx == 0: # Header row
                    para.font.bold = True
                    cell.fill.solid()
                    cell.fill.fore_color.rgb = RGBColor(220, 230, 241)

        top_position += table_shape.height + Inches(0.5)


def create_full_presentation():
    """
    Main function to generate the full PowerPoint presentation from the Word document.
    Word文書から完全なPowerPointプレゼンテーションを生成するメイン関数。
    """
    slides_content = parse_docx(DOCX_FILE_PATH)
    if not slides_content:
        print("Could not generate presentation due to parsing errors.")
        return

    prs = Presentation()
    prs.slide_width = Inches(16)
    prs.slide_height = Inches(9)

    for slide_data in slides_content:
        if slide_data.get('is_title_slide', False):
            add_title_slide(prs, slide_data['title'], "\n".join(slide_data['content']))
        else:
            add_content_slide(prs, slide_data['title'], slide_data['content'], slide_data['tables'])

    try:
        prs.save(OUTPUT_PPTX_FILE)
        print(f"Successfully created presentation: '{OUTPUT_PPTX_FILE}'")
    except Exception as e:
        print(f"An error occurred while saving the presentation: {e}")

# --- Execute the script ---
if __name__ == "__main__":
    create_full_presentation()

実行方法

作成されたファイルを開くと、内容が各スライドに分割されて表示されます。必要に応じて、デザインの微調整や画像の追加を行ってください。

ファイルの準備:
- 上記のPythonコードを、create_presentation.py のような名前で保存します。
- Wordファイル（サンプル.docx）を、保存したPythonスクリプトと同じフォルダに置いてください。
スクリプトの実行:
- ターミナルやコマンドプロンプトで、ファイルが保存されているフォルダに移動します。
- python create_presentation.py というコマンドを実行します。
完了:
- 実行が完了すると、同じフォルダに Sample.pptx という名前のPowerPointファイルが作成されます。