使用方法:

python strip_comments.py --root "源码目录" --outdir "去除注释后输出的文件目录"

strip_comments.py:

from __future__ import annotations

import argparse
import os
import stat
import shutil
from pathlib import Path

SKIP_PARTS = {'.git'}
EXTENSIONS = {'.c', '.h'}


def strip_comments(text: str) -> str:
    result = []
    index = 0
    length = len(text)
    state = 'code'

    while index < length:
        char = text[index]
        next_char = text[index + 1] if index + 1 < length else ''

        if state == 'code':
            if char == '"':
                result.append(char)
                state = 'string'
            elif char == "'":
                result.append(char)
                state = 'char'
            elif char == '/' and next_char == '/':
                state = 'line_comment'
                index += 1
            elif char == '/' and next_char == '*':
                state = 'block_comment'
                index += 1
            else:
                result.append(char)
        elif state == 'string':
            result.append(char)
            if char == '\\' and index + 1 < length:
                result.append(text[index + 1])
                index += 1
            elif char == '"':
                state = 'code'
        elif state == 'char':
            result.append(char)
            if char == '\\' and index + 1 < length:
                result.append(text[index + 1])
                index += 1
            elif char == "'":
                state = 'code'
        elif state == 'line_comment':
            if char == '\n':
                result.append(char)
                state = 'code'
        elif state == 'block_comment':
            if char == '\n':
                result.append(char)
            elif char == '*' and next_char == '/':
                state = 'code'
                index += 1

        index += 1

    return ''.join(result)


def collapse_blank_lines(text: str) -> str:
    lines = text.splitlines()
    if not lines:
        return text

    collapsed = []
    previous_blank = False
    for line in lines:
        is_blank = not line.strip()
        if is_blank:
            if previous_blank:
                continue
            collapsed.append('')
            previous_blank = True
        else:
            collapsed.append(line)
            previous_blank = False

    return '\n'.join(collapsed) + ('\n' if text.endswith(('\n', '\r')) else '')


def remove_readonly(func, path, _exc_info):
    os.chmod(path, stat.S_IWRITE)
    func(path)


def main() -> int:
    parser = argparse.ArgumentParser(description='Strip C/C++ comments from project sources.')
    parser.add_argument('--root', default='.', help='Project root directory.')
    parser.add_argument('--outdir', default='commentless-src', help='Output directory.')
    args = parser.parse_args()

    root = Path(args.root).resolve()
    outdir = Path(args.outdir).resolve()

    if outdir.exists():
        shutil.rmtree(outdir, onerror=remove_readonly)

    outdir.mkdir(parents=True, exist_ok=True)

    for path in root.rglob('*'):
        if not path.is_file() or path.suffix not in EXTENSIONS:
            if path.is_file():
                if any(part in SKIP_PARTS for part in path.parts):
                    continue
                relative_path = path.relative_to(root)
                target = outdir / relative_path
                target.parent.mkdir(parents=True, exist_ok=True)
                shutil.copy(path, target)
            continue
        if any(part in SKIP_PARTS for part in path.parts):
            continue

        relative_path = path.relative_to(root)
        target = outdir / relative_path
        target.parent.mkdir(parents=True, exist_ok=True)
        stripped = strip_comments(path.read_text(encoding='utf-8', errors='ignore'))
        target.write_text(collapse_blank_lines(stripped), encoding='utf-8')

    return 0


if __name__ == '__main__':
    raise SystemExit(main())
看都看了,点个赞吧!