PROGRAMMING

HTML 실습 - Naver 영화 예매 본문

HTML/Flask

HTML 실습 - Naver 영화 예매

Raccoon2125 2021. 1. 28. 13:50

Naver 영화
Youtube 시도 / 결과: None

※ Google 자체에서 유튜브에 대한 웹 크롤링을 막아서 requests로는 데이터 수집이 불가

   : selenium을 활용하여 크롤링을 진행해야 함

- requests, selenium 차이점

: selenium의 경우 로그인 기능이나 javaScript로 뿌려진 data를 가져올 수 있도록 되어있음.

 

 

 

CSS : ' > ' operator를 통해서 하위 네임으로 이동 가능

 

 

태그 계층 확인 ( Copy - Copy selector )
메모장에 붙인 결과

1. app.py

import requests
from bs4 import BeautifulSoup
from flask import Flask, render_template

app = Flask(__name__)
url = 'https://movie.naver.com/movie/running/current.nhn#'


class NaverMovie:
    def __init__(self, img_src, age, movie_name, netizen, netizen_num,\
                 actor_list, directer):
        self.img_src = img_src
        self.age = age
        self.movie_name = movie_name
        self.netizen = netizen
        self.netizen_num = netizen_num
        self.actor_list = actor_list
        self.actor_list = actor_list
        self.directer = directer


@app.route('/naver')
def naver_movie():
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    ul = soup.find('ul', class_='lst_detail_t1')
    movie_list = []
    li = ul.find_all('li')
    for piece in li:
        img = piece.find('img')
        actor_list = [act.text for act in piece.select('dl > dd:nth-child(3) > dl > dd:nth-child(6) > span > a')]
        img_src = img.get('src')
        directer = piece.select('dl > dd:nth-child(3) > dl > dd:nth-child(4) > span > a')[0].text
        age = [ag.text for ag in piece.select('dl > dt > span')]
        movie_name = piece.select(' dl > dt > a')[0].text
        netizen = piece.select('dl > dd.star > dl > dd > div > a > span.num')[0].text
        netizen_num = piece.select('dl > dd.star > dl.info_star > dd > div > a > span.num2 > em')[0].text
        naverMovie = NaverMovie(img_src, age, movie_name, netizen, netizen_num, actor_list, directer)
        movie_list.append(naverMovie)
        #movie_list.append([img_src, age, movie_name, netizen, netizen_num, actor_list, directer])
    for Movie in movie_list:
        print(Movie)
    return render_template('data_gathering.html', movie_list=movie_list)


# map 전용
def get_image(img):
    return img.get('src')


def get_text(list):
    if not list:
        list.text = "???"
    return list.text
# map 전용 end


@app.route('/')
def data_gather():
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    # print(soup.prettify()) # soup 를 정렬해서 보여줌
    movieList = soup.find('ul', class_="lst_detail_t1")
    movie_data = movieList.find_all('li')
    total_movie = []

    # print(i.get('alt'), i.get('src')) # 태그 내부 접근
    # print(i.text) # 텍스트 접근

    for item in movie_data:
        _img = item.select('li > div > a > img')
        img = _img[0].get('src')
        age_limit = item.select('li > dl > dt > span')[0].text if item.select('li > dl > dt > span') else "???"
        movie_name = item.select('li > dl > dt > a')[0].text
        netizen = item.select('dl > dd.star > dl.info_star > dt')[0].text
        netizen_score = item.select('dl > dd.star > dl.info_star > dd > div > a > span.num')[0].text
        netizen_num = item.select('dl > dd.star > dl.info_star > dd > div > a > span.num2')[0].text
        total_movie.append([img, age_limit, movie_name, netizen, netizen_score, netizen_num])

    return render_template('data_gathering.html', total_movie=total_movie)


if __name__ == '__main__':
    app.run()

2. data_gathering.html

<!DOCTYPE html>
<html lang="ko">
<head>
    <meta charset="UTF-8">
    <title>Naver Movie</title>

    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.0/dist/css/bootstrap.min.css" integrity="sha384-B0vP5xmATw1+K9KRQjQERJvTumQW0nPEzvF6L/Z6nronJ3oUOFUFpCjEUQouq2+l" crossorigin="anonymous">

{#    <style>#}
{#        .flex_container {#}
{#            display: flex;#}
{#            flex-direction: row;#}
{#            margin-top: 10px;#}
{#        }#}
{#    </style>#}

</head>
<body>

    <table class="table table-hover">
      <thead>
        <tr>
          <th scope="col">영화 포스터</th>
          <th scope="col">영화 상세내용</th>
        </tr>
      </thead>
      <tbody>
        {% for item in total_movie %}
        <tr>
          <td><img style="border-radius: 15%" src={{ item[0] }}></td>
          <td>
              <div style="margin-left: 10px">
                {% for text in item[1:] %}
                <li> {{ text }} </li>
                {% endfor %}
              </div>
          </td>
        </tr>
        {% endfor %}
      </tbody>
    </table>

{#{% for item in total_movie %}#}
{#<div class="card border-warning mb-3" style="max-width: 540px; border-radius: 3%">#}
{#  <div class="row g-0">#}
{#    <div class="col-md-4">#}
{#      <img style="border-radius: 15%" src={{ item[0] }}>#}
{#    </div>#}
{#    <div class="col-md-8">#}
{#      <div class="card-body">#}
{#        <h5 class="card-title">{{ item[2] }}</h5>#}
{#        <p class="card-text">#}
{#            {% for text in item[3:] %}#}
{#                <ul> {{ text }} </ul>#}
{#            {% endfor %}#}
{#        </p>#}
{#        <p class="card-text"><small class="text-muted">{{ item[1] }}</small></p>#}
{#      </div>#}
{#    </div>#}
{#  </div>#}
{#</div>#}
{#{% endfor %}#}

<script src="https://code.jquery.com/jquery-3.5.1.slim.min.js" integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.1/dist/umd/popper.min.js" integrity="sha384-9/reFTGAW83EW2RDu2S0VKaIzap3H66lZH81PoYlFhbGU+6BZp6G7niu735Sk7lN" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.0/dist/js/bootstrap.min.js" integrity="sha384-+YQ4JLhjyBLPDQt//I+STsc9iw4uQqACwlvpslubQzn4u2UU2UFM80nGisd026JF" crossorigin="anonymous"></script>

</body>
</html>

3. 결과 화면

Comments