import re def extract_links_and_text(html): link_pattern = r'<a\s+(?:[^>]*?\s+)?href="([^"]*)"[^>]*>(.*?)</a>' matches = re.findall(link_pattern, html, re.IGNORECASE | re.DOTALL) return matches html_content = """ <html> <body> <h1>Sample Page</h1> <p>Check out these links:</p> <ul> <a href="https://www.example.com">Example Site</a> <a href="/page">Internal Page</a> <a href="https://www.github.com">GitHub</a> </ul> </body> </html> """ links = extract_links_and_text(html_content) for url, text in links: print(f"URL: {url}") print(f"Text: {text}") print("---")