1. 크롤링 준비하기
import requests
from bs4 import BeautifulSoup
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://movie.naver.com/movie/sdb/rank/rmovie.naver?sel=pnt&date=20210829',headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
URL만 크롤링할 사이트로 변경 후 print{soup) 잘 나오는 실행 시켜보기
import requests
from bs4 import BeautifulSoup
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://www.genie.co.kr/chart/top200?ditc=M&rtm=N&ymd=20210701',headers=headers)
#body-content > div.newest-list > div > table > tbody > tr:nth-child(1) > td.number
#body-content > div.newest-list > div > table > tbody > tr:nth-child(1) > td.info > a.title.ellipsis
#body-content > div.newest-list > div > table > tbody > tr:nth-child(1) > td.info > a.artist.ellipsis
soup = BeautifulSoup(data.text, 'html.parser')
#body-content 크롤링 할 부분 차례대로 넣어 놓기
musics = soup.select('#body-content > div.newest-list > div > table > tbody > tr')
for music in musics:
rank = music.select_one('td.number').text[0:2].strip()
title = music.select_one('td.info > a.title.ellipsis').text.strip()
artist = music.select_one('td.info > a.artist.ellipsis').text
print(rank,title,artist)
musics = soup.select(' ') 부분에 동일한 부분까지 #body ~ tr 까지 복, 붙
for문 작성
rank = music.select_one( ' ' ) 틀린 부분 td부터 복, 붙 동일하게 title, artist도 작성
rank = music.select_one('td.number').text
rank = music.select_one('td.number').text[0:2]
rank = music.select_one('td.number').text[0:2].strip()
import requests
from bs4 import BeautifulSoup
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://www.genie.co.kr/chart/top200?ditc=M&rtm=N&ymd=20210701',headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
musics = soup.select('#body-content > div.newest-list > div > table > tbody > tr')
for music in musics:
rank = music.select_one('td.number').text[0:2].strip()
title = music.select_one('td.info > a.title.ellipsis').text.strip()
artist = music.select_one('td.info > a.artist.ellipsis').text
print(rank, title, artist)
for music in musics:
rank = music.select_one('td.number').text[0:2].strip()
title = music.select_one('td.info > a.title.ellipsis').text.strip()
artist = music.select_one('td.info > a.artist.ellipsis').text
if rank == "15":
title = title.replace("\n", "");
title = title.replace(" ", "");
print(rank, title, artist)
"\n"은 줄바꿈을 ""로 바꿔서 제거해주고 그 아래 부분은 공백을 ""으로 변환해 제거
'Sparta 웹개발 종합반' 카테고리의 다른 글
2023-01-13 4주차 프로젝트 [스파르타피디아] GET, POST (2) | 2023.01.13 |
---|---|
2023-01-12 4주차 Flask, GET, POST, API (0) | 2023.01.12 |
2023-01-10 파이썬 웹스크래핑(크롤링) (0) | 2023.01.11 |
2023-01-10 파이썬 (0) | 2023.01.10 |
2023-01-10 3주차 Open API 붙여보기 (0) | 2023.01.10 |