from selenium import webdriver
from selenium.webdriver.chrome.service import Service
s = Service('c:/users/smile/desktop/chromedriver.exe')
driver = webdriver.Chrome(service=s)
url='https://www.netflix.com/ph/browse/genre/839338'
driver.get(url)
html=driver.page_source
from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'html.parser')
section_list=soup.select('section.nm-collections-row')
count=0
results=[]
for section in section_list:
section_title=section.select('section > h2.nm-collections-row-name')[0].text.replace('Explore more','')
program_list=section.select('li')
for program in program_list:
program_title=program.select('span.nm-collections-title-name')[0].text
program_image=section.select('img.nm-collections-title-img')[0]['src']
program_link=section.select('a.nm-collections-title.nm-collections-link')[0]['href']
print(section_title,program_title,program_image,program_link,sep='\n')
data=[section_title,program_title,program_image,program_link]
results.append(data)
count=count+1
import pandas as pd
df=pd.DataFrame(results)
df.columns=['카테고리','프로그램명','이미지','링크']
df
df.to_excel('엑셀파일저장하기.xlsx',index=False)
'코딩공부' 카테고리의 다른 글
220123 [코딩공부] 워드크라우드 만들기 (0) | 2022.01.23 |
---|---|
220120 [코딩공부] 넷플릭스크롤링 영화제목가져오기 (0) | 2022.01.22 |
220121 [코딩공부] 넷플릭스 크롤링-카테고리별 프로그램명 찾기 (0) | 2022.01.22 |
220119 [코딩공부] daum 뉴스기사 타이틀 가져오기 (0) | 2022.01.19 |
220118 [코딩공부] DeprecationWarning: executable_path has been deprecated, please pass in a Service object 오류 해결 공부 (0) | 2022.01.18 |
댓글