使用方法:
python strip_comments.py --root "源码目录" --outdir "去除注释后输出的文件目录"strip_comments.py:
from __future__ import annotations
import argparse
import os
import stat
import shutil
from pathlib import Path
SKIP_PARTS = {'.git'}
EXTENSIONS = {'.c', '.h'}
def strip_comments(text: str) -> str:
result = []
index = 0
length = len(text)
state = 'code'
while index < length:
char = text[index]
next_char = text[index + 1] if index + 1 < length else ''
if state == 'code':
if char == '"':
result.append(char)
state = 'string'
elif char == "'":
result.append(char)
state = 'char'
elif char == '/' and next_char == '/':
state = 'line_comment'
index += 1
elif char == '/' and next_char == '*':
state = 'block_comment'
index += 1
else:
result.append(char)
elif state == 'string':
result.append(char)
if char == '\\' and index + 1 < length:
result.append(text[index + 1])
index += 1
elif char == '"':
state = 'code'
elif state == 'char':
result.append(char)
if char == '\\' and index + 1 < length:
result.append(text[index + 1])
index += 1
elif char == "'":
state = 'code'
elif state == 'line_comment':
if char == '\n':
result.append(char)
state = 'code'
elif state == 'block_comment':
if char == '\n':
result.append(char)
elif char == '*' and next_char == '/':
state = 'code'
index += 1
index += 1
return ''.join(result)
def collapse_blank_lines(text: str) -> str:
lines = text.splitlines()
if not lines:
return text
collapsed = []
previous_blank = False
for line in lines:
is_blank = not line.strip()
if is_blank:
if previous_blank:
continue
collapsed.append('')
previous_blank = True
else:
collapsed.append(line)
previous_blank = False
return '\n'.join(collapsed) + ('\n' if text.endswith(('\n', '\r')) else '')
def remove_readonly(func, path, _exc_info):
os.chmod(path, stat.S_IWRITE)
func(path)
def main() -> int:
parser = argparse.ArgumentParser(description='Strip C/C++ comments from project sources.')
parser.add_argument('--root', default='.', help='Project root directory.')
parser.add_argument('--outdir', default='commentless-src', help='Output directory.')
args = parser.parse_args()
root = Path(args.root).resolve()
outdir = Path(args.outdir).resolve()
if outdir.exists():
shutil.rmtree(outdir, onerror=remove_readonly)
outdir.mkdir(parents=True, exist_ok=True)
for path in root.rglob('*'):
if not path.is_file() or path.suffix not in EXTENSIONS:
if path.is_file():
if any(part in SKIP_PARTS for part in path.parts):
continue
relative_path = path.relative_to(root)
target = outdir / relative_path
target.parent.mkdir(parents=True, exist_ok=True)
shutil.copy(path, target)
continue
if any(part in SKIP_PARTS for part in path.parts):
continue
relative_path = path.relative_to(root)
target = outdir / relative_path
target.parent.mkdir(parents=True, exist_ok=True)
stripped = strip_comments(path.read_text(encoding='utf-8', errors='ignore'))
target.write_text(collapse_blank_lines(stripped), encoding='utf-8')
return 0
if __name__ == '__main__':
raise SystemExit(main())