1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
| url = 'https://www.ctis.re.kr/ko/dmandTchnlgy/dmandTchnlgyList.do?key=1543#{%22tchnlgyNm%22:%22%22,%22sclasSn%22:%22%22,%22keyword%22:%22[]%22,%22lang%22:%22ko%22,%22sortOrder%22:%22desc%22,%22total%22:%22%22,%22ltN2Cd%22:%22%22,%22infoPrvd%22:%22%22,%22size%22:%2250%22,%22hasParam%22:true}'
driver = webdriver.Chrome('../../Downloads/chromedriver')
driver.get(url) time.sleep(1)
total = [] click_list = [3,4,5,6,7,3,4,5,6,7,8,3,4,5,6,7,8,7] try: for i in range(50): time.sleep(1) if (i == 4) | (i == 10) | (i == 16) : click = driver.find_elements(By.XPATH,'//*[@id="m_content"]/div[9]/div/div[2]/div[2]/div/span[' + str(click_list[i]) + ']/a') click[0].click() continue else: html = driver.page_source soup = BeautifulSoup(html,'html.parser') a = soup.select('#_grid td.jsgrid-cell.jsgrid-align-center') time.sleep(1) for j in range(len(a)): if j%4 == 1 : total.append(a[j].text)
click = driver.find_elements(By.XPATH,'//*[@id="m_content"]/div[9]/div/div[2]/div[2]/div/span[' + str(click_list[i]) + ']/a') click[0].click() except: print('No page')
df['소분류'] = df['소분류'].str[3:] df['소분류'] = df['소분류'].str.replace('&','·') df['소분류'] = df['소분류'].str.replace(',','·') df['소분류'] = df['소분류'].str.replace(' ','') df['소분류'] = df['소분류'].str.replace('Non-Co2','Non-CO2')
df_web = pd.DataFrame(total) df_web_val = pd.DataFrame(df_web.value_counts()) df_web_val.rename(columns={0:'기후 기술 수요량'},inplace=True) df_web_val.reset_index(inplace=True) df_web_val.rename(columns={0:'소분류'},inplace=True) df_web_val['소분류'] = df_web_val['소분류'].str.replace(' ','')
df = pd.merge(df,df_web_val,on='소분류',how='outer')
|