import requests
from bs4 import BeautifulSoup
import os

# mainURL = input('Please enter the URL of eGyanKosh page\n: ')
baseURL = 'https://egyankosh.ac.in'
mainURL = 'https://egyankosh.ac.in/handle/123456789/404'
print(mainURL)

def getDataFromURL(url):
    r = requests.get(url, verify=False)
    if r.status_code != 200:
        assert False
    return r.text

def stripEmptySpace(html):
    html = str(html)
    html = "".join(line.strip() for line in html.split("\n"))
    return BeautifulSoup(html, 'html.parser')

def getPageInfo(html):
    soup = BeautifulSoup(html, 'html.parser')
    pageTitle = soup.h2.text.split('\n')[0]
    pageType = soup.select(".col-md-8 small")[0].string
    return {'pageTitle': pageTitle, 'pageType': pageType}

def getSubPages(html, baseUrl = baseURL):
    soup = BeautifulSoup(html, 'html.parser')
    table = soup.select('.col-md-9 .list-group')[0]
    rows = table.select(".list-group-item.row")
    subpages = []
    for row in rows:
        row = stripEmptySpace(row)
        info = {
            'name': row.string,
            'url': baseURL + row.a.get('href')
        }
        subpages.append(info)
    return subpages

def getDownloadPageLinks(html, baseURL = baseURL):
    soup = BeautifulSoup(html, 'html.parser')
    anchors = soup.table.findAll('a')
    links = []
    for anchor in anchors:
        link = {
            'name': anchor.string,
            'url': baseURL + anchor.get('href')
        }
        links.append(link)
    return links

def downloadPdf(name, pageHtml, baseURL = baseURL):
    soup = BeautifulSoup(pageHtml, 'html.parser')
    pdfUrl= baseURL + soup.select(".break-all a")[0].get('href')
    fileName = name+'.pdf'
    print({'url': pdfUrl, 'fileName': fileName})


# print(getPageInfo(getDataFromURL(mainURL)))

# collectionPage = getDataFromURL('https://egyankosh.ac.in/handle/123456789/1576')
# print(getDownloadPageLinks(collectionPage))

# downloadPage = getDataFromURL('https://egyankosh.ac.in/handle/123456789/10976')
# downloadPdf('Unit-4 Mathematical Induction', downloadPage)