Ont.py
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
import owlready2 as OWL
import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf
import os
import datetime
import json
from DataReader import Reader
class Ont():
def __init__(self, ontology):
self.ontology = OWL.get_ontology(ontology)
self.ontology.base_iri = ontology
self.ontology = self.ontology.load()
self.polarity_categories = {}
self.polarity_categories['positive'] = self.ontology.search(iri='*Positive')[0]
self.polarity_categories['negative'] = self.ontology.search(iri='*Negative')[0]
self.type1, self.type2, self.type3 = {}, {}, {}
classes = set(self.ontology.classes())
self.classes_dict = {onto_class: onto_class.lex for onto_class in classes}
self.classesIntoTypes(classes)
self.majority_count = 0
def classesIntoTypes(self, classes):
remove_words = ['property', 'mention', 'positive', 'neutral', 'negative']
for ont_class in classes:
class_name = ont_class.__name__.lower()
if any(word in class_name for word in remove_words): continue
names = [x.__name__ for x in ont_class.ancestors()]
names.sort()
for name in names:
if 'Generic' in name:
self.type1[class_name] = ont_class
break
elif any(x in name for x in ['Positive', 'Negative']):
self.type2[class_name] = ont_class
break
elif 'PropertyMention' in name:
self.type3[class_name] = ont_class
break
def getMajorityClass(self, polarity):
index = (pd.get_dummies(pd.DataFrame(np.concatenate([np.array(['negative','neutral','positive']), polarity])))
.values[3:,:].sum(0).argmax())
if index == 0: return np.array([[1, 0, 0]])
elif index == 1: return np.array([[0, 1, 0]])
else: return np.array([[0, 0, 1]])
def getClassPolarity(self, word_lemma_class, negated, type3):
positive, negative = False, False
if type3: OWL.sync_reasoner(debug=False) # To set relations of all newly created classes
if self.polarity_categories['positive'].__subclasscheck__(word_lemma_class):
if negated:
positive = False
negative = True
else:
positive = True
if self.polarity_categories['negative'].__subclasscheck__(word_lemma_class):
if negated:
positive = True
negative = False
else:
negative = True
return positive, negative
def categoryMatch(self, aspect_class, word_class):
if aspect_class is None: return False
aspect_mentions, word_mentions = [], []
for ancestor in aspect_class.ancestors():
if 'Mention' in ancestor.__name__:
aspect_mentions.append(ancestor.__name__.rsplit('Mention',1)[0])
for ancestor in word_class.ancestors():
if 'Mention' in ancestor.__name__:
word_mentions.append(ancestor.__name__.rsplit('Mention',1)[0])
common = set(aspect_mentions).intersection(set(word_mentions))
# If they have more than 2 ancestors in common (ontology.Mention, ontology.EntityMention and something else)
if len(common) > 2: return True
else: return False
def addSubclass(self, word_class, aspect_class):
class_name = word_class.__name__ + aspect_class.__name__
new_class = OWL.types.new_class(class_name, (word_class, aspect_class))
self.type3[new_class.__name__.lower()] = new_class
return new_class
def isNegated(self, word, words_in_sentence):
negation = ({"not","no","never","isnt","arent","wont","wasnt","werent",
"havent","hasnt", "nt", "cant", "couldnt", "dont", "doesnt"})
negated = False
index = words_in_sentence.index(word)
check = set(words_in_sentence[max(index-3,0):index])
if check.intersection(negation): negated = True
return negated
def predictSentiment(self, sentence, aspect):
lemmatizer = nltk.WordNetLemmatizer()
positive_list, negative_list = [], []
sentence_classes = {}
words_in_sentence = sentence.split()
aspect_class = None
for word, tag in np.array(nltk.pos_tag(nltk.word_tokenize(aspect))):
if tag.startswith("V"): aspect_lemma = lemmatizer.lemmatize(word, "v") # Verb
elif tag.startswith("J"): aspect_lemma = lemmatizer.lemmatize(word, "a") # Adjective
elif tag.startswith("R"): aspect_lemma = lemmatizer.lemmatize(word, "r") # Adverb
else: aspect_lemma = lemmatizer.lemmatize(word) # Other words do not change
for ont_class in list(self.classes_dict.values()):
if aspect_lemma in ont_class:
aspect_class = list(self.classes_dict.keys())[list(self.classes_dict.values()).index(ont_class)]
for word, tag in np.array(nltk.pos_tag(nltk.word_tokenize(sentence))):
if tag.startswith("V"): word_lemma = lemmatizer.lemmatize(word, "v") # Verb
elif tag.startswith("J"): word_lemma = lemmatizer.lemmatize(word, "a") # Adjective
elif tag.startswith("R"): word_lemma = lemmatizer.lemmatize(word, "r") # Adverb
else: word_lemma = lemmatizer.lemmatize(word) # Other words do not change
for ont_class in list(self.classes_dict.values()):
if word_lemma in ont_class:
word_class = list(self.classes_dict.keys())[list(self.classes_dict.values()).index(ont_class)]
sentence_classes[word] = word_class
if word == aspect:
aspect_class = word_class
is_negated = self.isNegated(word, words_in_sentence)
if word_lemma in self.type1:
positive, negative = self.getClassPolarity(word_class, is_negated, False)
positive_list.append(positive)
negative_list.append(negative)
elif word_lemma in self.type2:
if self.categoryMatch(aspect_class, word_class):
positive, negative = self.getClassPolarity(word_class, is_negated, False)
positive_list.append(positive)
negative_list.append(negative)
elif word_lemma in self.type3:
if (aspect_class != word_class) and (aspect_class is not None):
new_class = self.addSubclass(word_class, aspect_class)
positive, negative = self.getClassPolarity(new_class, is_negated, True)
positive_list.append(positive)
negative_list.append(negative)
if (True in positive_list) and (True not in negative_list):
prediction = np.array([[0,0,1]])
elif (True not in positive_list) and (True in negative_list):
prediction = np.array([[1,0,0]])
else:
prediction = self.majority_class
self.majority_count += 1
return prediction
def run(self, train_data, test_data, purpose):
self.majority_class = self.getMajorityClass(train_data[:, 2])
predictions = np.zeros([1,3])
for sentence, aspect, _ in test_data:
predictions = np.concatenate([predictions, self.predictSentiment(sentence, aspect)])
predictions = predictions[1:,:]
self.evaluation(test_data[:, 2], predictions, purpose)
print('')
return predictions
def evaluation(self, Y, pred, purpose, print_results=True):
real = (pd.get_dummies(pd.DataFrame(np.concatenate([np.array(['negative','neutral','positive']), Y]))).values[3:,:])
pos_pos,pos_neu,pos_neg,neg_pos,neg_neu,neg_neg=0,0,0,0,0,0
for i in range(len(pred)):
if (pred[i].argmax() == 0) and (real[i].argmax()==0): neg_neg += 1
if (pred[i].argmax() == 0) and (real[i].argmax()==1): neg_neu += 1
if (pred[i].argmax() == 0) and (real[i].argmax()==2): neg_pos += 1
if (pred[i].argmax() == 2) and (real[i].argmax()==0): pos_neg += 1
if (pred[i].argmax() == 2) and (real[i].argmax()==1): pos_neu += 1
if (pred[i].argmax() == 2) and (real[i].argmax()==2): pos_pos += 1
pos_pred = pos_pos + pos_neu + pos_neg
neg_pred = neg_pos + neg_neu + neg_neg
pos_true = pos_pos + neg_pos
neu_true = pos_neu + neg_neu
neg_true = pos_neg + neg_neg
total = pos_true + neu_true + neg_true
table = pd.DataFrame(columns = ['Negative', 'Neutral', 'Positive', '|', 'total'],
data = [[ neg_neg, neg_neu, neg_pos, '|', neg_pred],
[ pos_neg, pos_neu, pos_pos, '|', pos_pred],
['--------', '-------', '--------', '-', '-----'],
[ neg_true, neu_true, pos_true, '|', total]],
index = ['Negative', 'Positive', '--------', 'total'])
acc = np.round(100*sklearn.metrics.accuracy_score(np.argmax(pred, 1), np.argmax(real, 1)), 2)
loss = sklearn.metrics.log_loss(real, pred)
path = './Results/logs/{0}/{1}/'.format(self.__class__.__name__, purpose)
results = {'accuracy' : acc, 'loss' : loss, 'table' : table.to_json()}
if not os.path.exists(path):
os.makedirs(path)
with open(path+'results.json', 'w') as file:
file.write(json.dumps(results))
if print_results:
print(" "+purpose+" Data")
print(table)
print('Accuracy:', acc)
print('Loss:', loss)
if __name__ == '__main__':
np.set_printoptions(threshold=np.inf, edgeitems=3, linewidth=120)
FLAGS = tf.app.flags.FLAGS
for name in list(FLAGS):
if name not in ('showprefixforinfo',):
delattr(FLAGS, name)
tf.app.flags.DEFINE_string('f', '', 'kernel')
tf.app.flags.DEFINE_float('train_proportion', 1.0, 'Train proportion for train/validation split')
tf.app.flags.DEFINE_integer('seed', None, 'Random seed')
tf.app.flags.DEFINE_string('ontology_path', './Ontology/Ontology_restaurants.owl', 'Ontology path')
tf.app.flags.DEFINE_boolean('train_model', True, 'Run Ont on train data')
tf.app.flags.DEFINE_boolean('predict_values', True, 'Run Ont on test data')
tf.app.flags.DEFINE_string('train_data_path', './Data/ABSA-15_Restaurants_Train_Final.xml', 'Train data path')
tf.app.flags.DEFINE_string('test_data_path', './Data/ABSA15_Restaurants_Test.xml', 'Test data path')
reader = Reader(FLAGS)
data_train, _ = reader.readData(FLAGS.train_data_path)
data_test, _ = reader.readData(FLAGS.test_data_path)
model = Ont(FLAGS.ontology_path)
if FLAGS.train_model:
print('Training...')
a = datetime.datetime.now()
predictions_train = model.run(data_train, data_train, 'Train')
b = datetime.datetime.now()
print('Time:', b-a)
if FLAGS.predict_values:
print('Prediction...')
c = datetime.datetime.now()
predictions_test = model.run(data_train, data_test, 'Test')
d = datetime.datetime.now()
print('Time:', d-c)