Content - c17794cba3de15743b5713dc38f3008c1f5c16be - 4821d57/Ont.py

Ont.py
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

import owlready2 as OWL
import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf
import os
import datetime
import json
from DataReader import Reader


class Ont():
  def __init__(self, ontology):    
    self.ontology = OWL.get_ontology(ontology)
    self.ontology.base_iri = ontology
    self.ontology = self.ontology.load()    
    self.polarity_categories = {}
    self.polarity_categories['positive'] = self.ontology.search(iri='*Positive')[0]
    self.polarity_categories['negative'] = self.ontology.search(iri='*Negative')[0]
    self.type1, self.type2, self.type3 = {}, {}, {}    
    classes = set(self.ontology.classes())    
    self.classes_dict = {onto_class: onto_class.lex for onto_class in classes}
    self.classesIntoTypes(classes)    
    self.majority_count = 0  

  def classesIntoTypes(self, classes):
    remove_words = ['property', 'mention', 'positive', 'neutral', 'negative']
    for ont_class in classes:
      class_name = ont_class.__name__.lower()
      if any(word in class_name for word in remove_words): continue
      names = [x.__name__ for x in ont_class.ancestors()]
      names.sort()      
      for name in names:
        if 'Generic' in name:
          self.type1[class_name] = ont_class
          break
        elif any(x in name for x in ['Positive', 'Negative']):
          self.type2[class_name] = ont_class
          break
        elif 'PropertyMention' in name:
          self.type3[class_name] = ont_class
          break    
    
  def getMajorityClass(self, polarity):
    index = (pd.get_dummies(pd.DataFrame(np.concatenate([np.array(['negative','neutral','positive']), polarity])))
            .values[3:,:].sum(0).argmax())
    if index == 0: return np.array([[1, 0, 0]])
    elif index == 1: return np.array([[0, 1, 0]])
    else: return np.array([[0, 0, 1]])    

  def getClassPolarity(self, word_lemma_class, negated, type3):
    positive, negative = False, False
    if type3: OWL.sync_reasoner(debug=False) # To set relations of all newly created classes
    if self.polarity_categories['positive'].__subclasscheck__(word_lemma_class):
      if negated:
        positive = False
        negative = True
      else:
        positive = True
    if self.polarity_categories['negative'].__subclasscheck__(word_lemma_class):
      if negated:
        positive = True
        negative = False
      else:
        negative = True
    return positive, negative

  def categoryMatch(self, aspect_class, word_class):
    if aspect_class is None: return False
    aspect_mentions, word_mentions = [], []
    for ancestor in aspect_class.ancestors():
      if 'Mention' in ancestor.__name__:
        aspect_mentions.append(ancestor.__name__.rsplit('Mention',1)[0])
    for ancestor in word_class.ancestors():
      if 'Mention' in ancestor.__name__:
        word_mentions.append(ancestor.__name__.rsplit('Mention',1)[0])
    common = set(aspect_mentions).intersection(set(word_mentions))
    # If they have more than 2 ancestors in common (ontology.Mention, ontology.EntityMention and something else)
    if len(common) > 2: return True 
    else: return False
      
  def addSubclass(self, word_class, aspect_class):
    class_name = word_class.__name__ + aspect_class.__name__
    new_class = OWL.types.new_class(class_name, (word_class, aspect_class))
    self.type3[new_class.__name__.lower()] = new_class
    return new_class    
    
  def isNegated(self, word, words_in_sentence):
    negation = ({"not","no","never","isnt","arent","wont","wasnt","werent", 
                 "havent","hasnt", "nt", "cant", "couldnt", "dont", "doesnt"})
    negated = False
    index = words_in_sentence.index(word)
    check = set(words_in_sentence[max(index-3,0):index])
    if check.intersection(negation): negated = True
    return negated    
    
  def predictSentiment(self, sentence, aspect):
    lemmatizer = nltk.WordNetLemmatizer()
    positive_list, negative_list = [], []
    sentence_classes = {}
    words_in_sentence = sentence.split() 
    aspect_class = None
    
    for word, tag in np.array(nltk.pos_tag(nltk.word_tokenize(aspect))):
      if tag.startswith("V"): aspect_lemma = lemmatizer.lemmatize(word, "v")   # Verb
      elif tag.startswith("J"): aspect_lemma = lemmatizer.lemmatize(word, "a") # Adjective
      elif tag.startswith("R"): aspect_lemma = lemmatizer.lemmatize(word, "r") # Adverb
      else: aspect_lemma = lemmatizer.lemmatize(word)                          # Other words do not change
      for ont_class in list(self.classes_dict.values()):
        if aspect_lemma in ont_class:
          aspect_class = list(self.classes_dict.keys())[list(self.classes_dict.values()).index(ont_class)]
    
    for word, tag in np.array(nltk.pos_tag(nltk.word_tokenize(sentence))):
      if tag.startswith("V"): word_lemma = lemmatizer.lemmatize(word, "v")   # Verb
      elif tag.startswith("J"): word_lemma = lemmatizer.lemmatize(word, "a") # Adjective
      elif tag.startswith("R"): word_lemma = lemmatizer.lemmatize(word, "r") # Adverb
      else: word_lemma = lemmatizer.lemmatize(word)                          # Other words do not change
      for ont_class in list(self.classes_dict.values()):
        if word_lemma in ont_class:
          word_class = list(self.classes_dict.keys())[list(self.classes_dict.values()).index(ont_class)]
          sentence_classes[word] = word_class
          if word == aspect:
            aspect_class = word_class
          is_negated = self.isNegated(word, words_in_sentence)
          if word_lemma in self.type1:
            positive, negative = self.getClassPolarity(word_class, is_negated, False)
            positive_list.append(positive)
            negative_list.append(negative)
          elif word_lemma in self.type2:
            if self.categoryMatch(aspect_class, word_class):
              positive, negative = self.getClassPolarity(word_class, is_negated, False)
              positive_list.append(positive)
              negative_list.append(negative)              
          elif word_lemma in self.type3:
            if (aspect_class != word_class) and (aspect_class is not None):
              new_class = self.addSubclass(word_class, aspect_class)
              positive, negative = self.getClassPolarity(new_class, is_negated, True)
              positive_list.append(positive)
              negative_list.append(negative) 
    if (True in positive_list) and (True not in negative_list):
      prediction = np.array([[0,0,1]])
    elif (True not in positive_list) and (True in negative_list):
      prediction = np.array([[1,0,0]])
    else:
      prediction = self.majority_class
      self.majority_count += 1
    return prediction      

  def run(self, train_data, test_data, purpose):
    self.majority_class = self.getMajorityClass(train_data[:, 2])
    
    predictions = np.zeros([1,3])
    for sentence, aspect, _ in test_data:
      predictions = np.concatenate([predictions, self.predictSentiment(sentence, aspect)])
    predictions = predictions[1:,:]
    
    self.evaluation(test_data[:, 2], predictions, purpose)
    print('')
    return predictions  
   
  def evaluation(self, Y, pred, purpose, print_results=True):
    real = (pd.get_dummies(pd.DataFrame(np.concatenate([np.array(['negative','neutral','positive']), Y]))).values[3:,:])
    pos_pos,pos_neu,pos_neg,neg_pos,neg_neu,neg_neg=0,0,0,0,0,0
    for i in range(len(pred)):
      if (pred[i].argmax() == 0) and (real[i].argmax()==0): neg_neg += 1
      if (pred[i].argmax() == 0) and (real[i].argmax()==1): neg_neu += 1
      if (pred[i].argmax() == 0) and (real[i].argmax()==2): neg_pos += 1
            
      if (pred[i].argmax() == 2) and (real[i].argmax()==0): pos_neg += 1
      if (pred[i].argmax() == 2) and (real[i].argmax()==1): pos_neu += 1
      if (pred[i].argmax() == 2) and (real[i].argmax()==2): pos_pos += 1
                
    pos_pred = pos_pos + pos_neu + pos_neg
    neg_pred = neg_pos + neg_neu + neg_neg    
        
    pos_true = pos_pos + neg_pos
    neu_true = pos_neu + neg_neu
    neg_true = pos_neg + neg_neg    
    total = pos_true + neu_true + neg_true
        
    table = pd.DataFrame(columns = ['Negative', 'Neutral',  'Positive',  '|', 'total'],
                         data =   [[ neg_neg,    neg_neu,     neg_pos,   '|', neg_pred], 
                                   [ pos_neg,    pos_neu,     pos_pos,   '|', pos_pred],
                                   ['--------', '-------',   '--------', '-', '-----'],
                                   [ neg_true,   neu_true,    pos_true,  '|',  total]],
                         index =   ['Negative', 'Positive', '--------', 'total'])
    
    acc = np.round(100*sklearn.metrics.accuracy_score(np.argmax(pred, 1), np.argmax(real, 1)), 2)
    loss = sklearn.metrics.log_loss(real, pred)      
    path = './Results/logs/{0}/{1}/'.format(self.__class__.__name__, purpose)
    results = {'accuracy' : acc, 'loss' : loss, 'table' : table.to_json()}
    if not os.path.exists(path):
      os.makedirs(path)
    with open(path+'results.json', 'w') as file:
      file.write(json.dumps(results)) 
    if print_results:
      print("                  "+purpose+" Data")
      print(table)
      print('Accuracy:', acc)
      print('Loss:', loss)


if __name__ == '__main__':
  np.set_printoptions(threshold=np.inf, edgeitems=3, linewidth=120)
  
  FLAGS = tf.app.flags.FLAGS
  for name in list(FLAGS):
    if name not in ('showprefixforinfo',):
      delattr(FLAGS, name)

  tf.app.flags.DEFINE_string('f', '', 'kernel')
  tf.app.flags.DEFINE_float('train_proportion', 1.0, 'Train proportion for train/validation split')
  tf.app.flags.DEFINE_integer('seed', None, 'Random seed')
  tf.app.flags.DEFINE_string('ontology_path', './Ontology/Ontology_restaurants.owl', 'Ontology path')
  tf.app.flags.DEFINE_boolean('train_model', True, 'Run Ont on train data')
  tf.app.flags.DEFINE_boolean('predict_values', True, 'Run Ont on test data')  
  tf.app.flags.DEFINE_string('train_data_path', './Data/ABSA-15_Restaurants_Train_Final.xml', 'Train data path')
  tf.app.flags.DEFINE_string('test_data_path', './Data/ABSA15_Restaurants_Test.xml', 'Test data path')

  reader = Reader(FLAGS)

  data_train, _ = reader.readData(FLAGS.train_data_path)
  data_test, _ = reader.readData(FLAGS.test_data_path)

  model = Ont(FLAGS.ontology_path)

  if FLAGS.train_model:
    print('Training...')
    a = datetime.datetime.now()
    predictions_train = model.run(data_train, data_train, 'Train')
    b = datetime.datetime.now()
    print('Time:', b-a)

  if FLAGS.predict_values:
    print('Prediction...')
    c = datetime.datetime.now()
    predictions_test = model.run(data_train, data_test, 'Test')  
    d = datetime.datetime.now()
    print('Time:', d-c)