-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathscraping.py
49 lines (35 loc) · 1.56 KB
/
scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from datetime import datetime
import json
def news_verification():
r = Request('https://br.investing.com/economic-calendar/', headers={'User-Agent': 'Mozilla/5.0'})
response = urlopen(r).read()
soup = BeautifulSoup(response, "html.parser")
table = soup.find_all(class_ = "js-event-item")
result = []
base = {}
for bl in table:
time = bl.find(class_ ="first left time js-time").text
# evento = bl.find(class_ ="left event").text
currency = bl.find(class_ ="left flagCur noWrap").text.split(' ')
intensity = bl.find_all(class_="left textNum sentiment noWrap")
id_hour = currency[1] + '_' + time
if not id_hour in base:
base.update({id_hour : {'currency' : currency[1], 'time' : time,'intensity' : { "1": 0,"2": 0,"3": 0} } })
intencity = base[id_hour]['intensity']
for intence in intensity:
_true = intence.find_all(class_="grayFullBullishIcon")
_false = intence.find_all(class_="grayEmptyBullishIcon")
if len(_true) == 1:
intencity['1'] += 1
elif len(_true) == 2:
intencity['2'] += 1
elif len(_true) == 3:
intencity['3'] += 1
base[id_hour].update({'intensity' : intencity})
for b in base:
result.append(base[b])
return result
news = news_verification()
print(json.dumps(news, indent=2))