Скрипт на Python : поиск фразы на сайте

Cегодня изучаем скирип написанный на Python 3 который «ходит по сайту» http://surgery.moscow и ищет заданную фразу «магнитный коврик»



import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup

# Define the URL of the website to crawl
base_url = 'http://surgery.moscow'
url = base_url + '/page-to-crawl'

# Define the phrase to search for
search_phrase = 'магнитный коврик'

# Make a GET request to the website and parse the HTML content with BeautifulSoup
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Find all the links on the page and store them in a list
links = []
for link in soup.find_all('a'):
    href = link.get('href')
    if href is not None and href != '':
        if href.startswith('#'):
            # Handle relative URL with # anchor by appending it to base URL
            link_url = urljoin(base_url, href)
        else:
            link_url = href
        links.append(link_url)

# Loop through each link and search for the phrase on the linked page
for link in links:
    link_response = requests.get(link)
    link_soup = BeautifulSoup(link_response.content, 'html.parser')
    if search_phrase in link_soup.get_text():
        print(f'The phrase "{search_phrase}" was found on {link}')

Описаный выше скрипт можно применять для любых сайтов, на которых нужно что нибудь такое найти, которое не отображается в результатах поиска стадартных поисковиков

результат выдачи приметно такой :

The phrase "магнитный коврик" was found on http://surgery.moscow/smos/2023/05/12/%d1%81%d0%ba%d1%80%d0%b8%d0%bf%d1%82-%d0%bd%d0%b0-python-%d0%bf%d0%be%d0%b8%d1%81%d0%ba-%d1%84%d1%80%d0%b0%d0%b7%d1%8b-%d0%bd%d0%b0-%d1%81%d0%b0%d0%b9%d1%82%d0%b5/
The phrase "магнитный коврик" was found on http://surgery.moscow/smos/2023/05/12/%d0%bf%d1%80%d0%b8%d0%bc%d0%b5%d1%80-%d0%ba%d0%be%d1%82%d0%be%d1%80%d1%8b%d0%b9-%d0%bc%d0%be%d0%b6%d0%b5%d1%82-%d0%bf%d0%be%d0%b1%d1%83%d0%b4%d0%b8%d1%82%d1%8c-%d0%b2%d0%b0%d1%81-%d0%bf%d1%80%d0%b8/
The phrase "магнитный коврик" was found on http://surgery.moscow/smos/2023/05/12/%d1%85%d0%b8%d1%80%d1%83%d1%80%d0%b3-%d1%85%d0%be%d1%87%d0%b5%d1%82-%d0%ba%d1%83%d0%bf%d0%b8%d1%82%d1%8c-%d0%bc%d0%b0%d0%b3%d0%bd%d0%b8%d1%82%d0%bd%d1%8b%d0%b9-%d0%ba%d0%be%d0%b2%d1%80%d0%b8%d0%ba/
The phrase "магнитный коврик" was found on http://surgery.moscow/smos/2023/05/12/%d0%ba%d0%be%d0%b3%d0%b4%d0%b0-%d1%85%d0%b8%d1%80%d1%83%d1%80%d0%b3%d0%b8%d1%87%d0%b5%d1%81%d0%ba%d0%b8%d0%b9-%d0%bc%d0%b0%d0%b3%d0%bd%d0%b8%d1%82%d0%bd%d1%8b%d0%b9-%d0%ba%d0%be%d0%b2%d1%80%d0%b8/
The phrase "магнитный коврик" was found on http://surgery.moscow/smos/2023/05/12/%d0%b5%d1%89%d0%b5-%d0%be%d0%b4%d0%b8%d0%bd-%d0%ba%d0%be%d0%bd%d0%ba%d1%80%d0%b5%d1%82%d0%bd%d1%8b%d0%b9-%d1%81%d0%bb%d1%83%d1%87%d0%b0%d0%b9-%d0%ba%d0%be%d0%b3%d0%b4%d0%b0-%d0%bc%d0%b0%d0%b3%d0%bd/