https://github.com/leandrocosta16/scraper-MajorMinors
Tip revision: 1dc70829cf6797d9f27b249a9b9a2039202255a7 authored by Leandro Costa on 20 April 2021, 10:36:45 UTC
scraper repo
scraper repo
Tip revision: 1dc7082
entitiesMarker.py
#!./env/bin/python3
from bs4 import BeautifulSoup
import re
import requests
import fileinput
import os
import datetime
#from selenium import webdriver
entities_toSearch_dictionary = {
"politicos" : [],
"famosos_geral" : [],
"animais" : [],
"religioes" : [],
"etnias" : [],
"cidades" : [],
"paises" : [],
"keywords" : []
}
entities_found_dictionary = {
"politicos" : [],
"famosos_geral" : [],
"animais" : [],
"religioes" : [],
"etnias" : [],
"cidades" : [],
"paises" : [],
"keywords" : []
}
def intialize_dictionary(text):
for entity in entities_toSearch_dictionary:
for word in entities_toSearch_dictionary[entity]:
if word in text:
entities_found_dictionary[entity].append(word)