Code Comment Extractor with Regex

Extracting comments from source code using regular expressions

Python

import re

def extract_comments(code, language):
    if language == 'python':
        # Match both single-line and multi-line comments
        pattern = r'(#.*?$|\'\'\'[\s\S]*?\'\'\'|"""[\s\S]*?""")'
    elif language in ['java', 'javascript', 'c', 'cpp']:
        # Match both single-line and multi-line comments
        pattern = r'(//.*?$|/\*[\s\S]*?\*/)'
    else:
        raise ValueError("Unsupported language")
    
    return re.findall(pattern, code, re.MULTILINE)

python_code = """
def hello_world():
    # This is a single-line comment
    print("Hello, World!")  # This is an inline comment
    
'''
This is a multi-line comment
It can span multiple lines
'''

"""

java_code = """
public class HelloWorld {
    // This is a single-line comment
    public static void main(String[] args) {
        System.out.println("Hello, World!");  // This is an inline comment
    }
    
    /*
    This is a multi-line comment
    It can span multiple lines
    */
}
"""

print("Python comments:")
for comment in extract_comments(python_code, 'python'):
    print(comment.strip())

print("\nJava comments:")
for comment in extract_comments(java_code, 'java'):
    print(comment.strip())

Click Run or press shift + ENTER to run code