import re
def extract_links_and_text(html):
link_pattern = r'<a\s+(?:[^>]*?\s+)?href="([^"]*)"[^>]*>(.*?)</a>'
matches = re.findall(link_pattern, html, re.IGNORECASE | re.DOTALL)
return matches
html_content = """
<html>
<body>
<h1>Sample Page</h1>
<p>Check out these links:</p>
<ul>
<a href="https://www.example.com">Example Site</a>
<a href="/page">Internal Page</a>
<a href="https://www.github.com">GitHub</a>
</ul>
</body>
</html>
"""
links = extract_links_and_text(html_content)
for url, text in links:
print(f"URL: {url}")
print(f"Text: {text}")
print("---")
Click Run or press shift + ENTER to run code