Beautiful Soup implementation

 import pandas as pd

import requests

from bs4 import BeautifulSoup

import random

import mysql.connector

#url = "http://somewebsite?page=1"

# Create a list in a range of 10-20


mydb = mysql.connector.connect(

    host="localhost",

    user="root",

    password="password",

    database="dbname"

)

my_list = [*range(1, 4, 1)]

for iurl in my_list:

  #  ("Enabling special output mode (% s)") % (currentValue)

    url = "http://somewebsite?page={}".format(iurl)


# Create a list in a range of 10-20

#my_list = [*range(1, 300, 1)]

# Print the list

 #   print(url)


    user_agents = [

      "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0",

      "Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0",

      "Mozilla/5.0 (X11; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0"

      ]

    random_user_agent = random.choice(user_agents)

    headers = {

        'User-Agent': random_user_agent

    }

    webpage=requests.get(url,headers=headers).text


    soup=BeautifulSoup(webpage,'lxml')

    mycursor = mydb.cursor()





    data=soup.body.find_all('table')[2].find_all('tr')

    for i in data:

        if i.text.strip() == data[0].text.strip() or i.text.strip() == data[1].text.strip() or i.text.strip() == data[2].text.strip() or i.text.strip() == data[3].text.strip()or i.text.strip() == data[4].text.strip():

          continue

        pnumber=i.find_all('td')[0].text.strip()

        name=i.find_all('td')[1].text.strip()

        dob=i.find_all('td')[2].text.strip()

        doj=i.find_all('td')[3].text.strip()

        prank=i.find_all('td')[4].text.strip()

        punit=i.find_all('td')[5].text.strip()

        sunit=i.find_all('td')[6].text.strip()

        hdistrict=i.find_all('td')[7].text.strip()

        sql = "INSERT INTO uppnrs (pnumber,name,dob,doj,prank,punit,sunit,hdistrict) VALUES (%s, %s,%s, %s,%s, %s,%s,%s)"

        val = (pnumber, name, dob, doj, prank, punit, sunit, hdistrict)

        mycursor.execute(sql, val)

        mydb.commit()

        if i.text.strip() == data[29].text.strip():

            break

    

    #d8=i.find_all('td')[8].text

    #print(pnumber)

    #print(name)

    #print(dob)

    #print(d3)

    #print(d4)

    #print(d5)

    #print(d6)

    #print(d7)

    #print(d8)

    print(url+' done')

# Print the list

print('done')










*********************************************

to excel from database

import mysql.connector

import pandas as pd

import numpy as np


mydb = mysql.connector.connect(

    host="localhost",

    user="root",

    password="password",

    database="dbname"

)


mycursor = mydb.cursor()


mycursor.execute("SELECT * FROM tablename")


myresult = mycursor.fetchall()

df = pd.DataFrame(myresult)

#df

df.to_excel('data.xlsx')

#for x in myresult:

 # print(x)

Comments

Popular posts from this blog

Create table in mysql database in phpmyadmin panel