https://github.com/Cwaterschoot/Interactivity_scoring
Tip revision: f613f94f3e6c03b199fbd851fdbd198c8274e2d3 authored by Cwaterschoot on 17 May 2021, 13:27:26 UTC
Dutch cheatsheet during annotation
Dutch cheatsheet during annotation
Tip revision: f613f94
scoring.py
import pandas as pd
import math
import matplotlib.pyplot as plt
import argparse
import numpy as np
plt.style.use('seaborn-whitegrid')
# arguments on level 1
# now called by args
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", default="testdf.xlsx", help = "labelled messages")
parser.add_argument('--weight', type=float, default=1.1, help='extra weight repetitive parent argument')
parser.add_argument("--level1", default="arg_pro_con.xlsx", help = "argument dataframe, variable for pro and con needed")
args = parser.parse_args()
df = pd.read_excel(args.dataset)
weight = args.weight
level1 = pd.read_excel(args.level1)
# Assign pro and con lists without missing values if they have different sizes
con_temp = list(level1.con)
con = [x for x in con_temp if str(x) != 'nan']
pro_temp = list(level1.pro)
pro = [x for x in pro_temp if str(x) != 'nan']
# assign necessary variables for computation from input dataframe
l3 = df.argument
arg_num = len(df.index)
#################################################################
#################################################################
# Prereq assignments:
#################################################################
#################################################################
# Assign each message to either pro or con (level 1)
l1 = []
for i in l3:
if i in pro:
level1 = "pro"
l1.append(level1)
else:
level1 = "con"
l1.append(level1)
df["level1"] = l1
# cumulative count of argumentation for log operator
cumsum_arg = df.groupby('argument').cumcount() + 1
df["cumsum_l3"] = cumsum_arg
# creating log operator for each message (repetition of argument will weigh less for interactive contribution)
log_op = []
for j in cumsum_arg:
operator = (1 - math.log(j, 10))
log_op.append(operator)
df["log_operator_ind"] = log_op
# create cum log operator for thread interactivity (repetition of argument will weigh more towards extremes)
log_op_cumul = []
for j in cumsum_arg:
operator = (j - math.log(j, 10))
log_op_cumul.append(operator)
df["log_operator_cumul"] = log_op_cumul
# creating equalizers to level 1 and level 3 of parent message
parent_arg = df.argument[0]
parent_l1 = df.level1[0]
l1_equal = []
l3_equal = []
for k in l3:
if k == parent_arg:
l3_equal.append(True)
else:
l3_equal.append(False)
for k in l1:
if k == parent_l1:
l1_equal.append(True)
else:
l1_equal.append(False)
df["l1_equal"] = l1_equal
df["l3_equal"] = l3_equal
#################################################################
#################################################################
# SCORING:
#################################################################
#################################################################
def MIC(df):
reply_counter = 0
message_score = []
for row in df.itertuples():
if reply_counter == 0: # Parent message receives score
m_interact = 0
message_score.append(m_interact)
score_after = 0
reply_counter = reply_counter +2
else:
score_before = score_after # basic scoring for each further reply
if row.l3_equal ==False:
m_interact = row.log_operator_ind / reply_counter
else:
m_interact = row.log_operator_ind / reply_counter * (2-weight)
reply_counter = reply_counter +1
message_score.append(m_interact)
df["message_interactivity_value"] = message_score
return(df)
####################################################################
def TIS(df):
d = {}
rows = len(df)
for x in range(0, len(df)):
args = x + 1
d["string{0}".format(x)] = []
for row in df.head(args).itertuples():
if args == 1:
interact_share=0
d["string{0}".format(x)].append(interact_share)
else:
if row.l3_equal == False:
interact_share = row.log_operator_cumul / args
else:
if row.cumsum_l3 == args:
interact_share = row.cumsum_l3 / args
else:
if row.log_operator_cumul == 1:
interact_share = 1 / args
else:
interact_share = (((row.cumsum_l3-1) - math.log(row.cumsum_l3-1, 10)) / args * weight) + (1/args)
d["string{0}".format(x)].append(interact_share)
d["string{0}".format(x)].extend([0 for i in range(rows-args)])
dynamics = list(d.values())
dyna_df = df
for x in range(0, len(df)):
dyna_df[str(x)] = dynamics[x]
# Creating list with all echo and opposition scores by summing through the df
echo_dyna = []
oppo_dyna = []
for x in range(0, len(df)):
echo2 = df.loc[dyna_df['l1_equal'] == True, str(x)].sum()
echo_dyna.append(echo2)
oppo2 = df.loc[dyna_df['l1_equal'] == False, str(x)].sum()
oppo_dyna.append(oppo2)
dyna_df["echo"] = echo_dyna
dyna_df["opposite"] = oppo_dyna
# Calculating the thread score at post X by following the formula opposition - echo
dynamic_score = []
for x in range(0, len(df)):
score = oppo_dyna[x] - echo_dyna[x]
dynamic_score.append(score)
df["dynamic_score"] = dynamic_score
return(df)
#####################################################################
def valuable(df):
message_score = df.message_interactivity_value
length = len(message_score)
valuable = []
var_val = []
for i in range(length):
if i==0:
var_val.append(False)
else:
cur = message_score[i]
prev = message_score[i-1]
if (i == 1 and cur < 0.5):
var_val.append(False) # exception if first reply is same L3 than parent (this is no valuable interaction!)
else:
if cur > prev: # valuable interaction if distance is greater than previous comment
valuable.append(i)
var_val.append(True)
else:
var_val.append(False)
df["Valuable"] = var_val
return(df)
def val_index(df):
valuable = []
length = len(df.message_interactivity_value)
for i in range(length):
if df.Valuable[i] == True:
valuable.append(i)
else:
pass
return(valuable)
#################################################################
#################################################################
# CALCULATING
#################################################################
#################################################################
# scoring:
df = MIC(df)
df = TIS(df)
df = valuable(df)
# getting relevant variables out of df for easy printing:
message_score = df.message_interactivity_value
valuable = val_index(df)
echo_dyna = df.echo
oppo_dyna = df.opposite
dynamic_score = df.dynamic_score
#################################################################
#################################################################
# OUTPUT
#################################################################
#################################################################
df.to_excel("output.xlsx")
print("This thread contains", arg_num, "messages, with the parent message stating the argument of", l3[0], "which belongs to the", l1[0], "camp.", "\n")
print("The thread contains", l1_equal.count(True), l1[0], "messages and", l1_equal.count(False), "comments of the opposition camp.")
print('*'*100)
print("echo score =", echo_dyna[arg_num-1])
print("opposite score =", oppo_dyna[arg_num-1])
print("full thread interactivity score =",dynamic_score[arg_num-1], "\n")
if dynamic_score[arg_num-1] > 0.5:
print("This thread experiences a flood of opposition messaging drowning out the messages of standpoint",l1[0], ". \n")
else:
if dynamic_score[arg_num-1] < -0.5:
print("This thread is an echo chamber in terms of", l1[0], "messaging.", "\n")
else:
print("This thread is fairly balanced.")
print('*'*100)
print("The closer the individual score to 0, the smaller the interactive contribution that the message makes.", "\n")
print("individual interactivity contributions", "\n",message_score.to_string())
print('*'*100)
print("Messages receiving an individual score with a greater distance from 0 compared to the previous reply are deemed valuable interaction.", "\n")
print("The following replies are valuable (parent message has label 0):", valuable, "\n")
print("The corresponding arguments to these replies are:", list(l3[valuable]), "\n", "keeping in mind that the parent reply has the label", l3[0])
print('*'*100)
#################################################################
#################################################################
# PLOTS
#################################################################
#################################################################
#1
x = list(range(0, arg_num))
y = df.message_interactivity_value
f = plt.figure()
z = df.dynamic_score
plt.plot(x, z, '-ok', color='blue', label="TIS score")
plt.axhspan(-2, -0.5, facecolor='r', alpha=0.05)
plt.axhspan(0.5, 2, facecolor='r', alpha=0.05)
plt.axhspan(-0.5, 0.5, facecolor='g', alpha=0.05)
plt.ylim(-2, 2);
plt.xlabel("Message index (i)")
plt.ylabel("TIS score")
plt.legend();
f.savefig("plot-dTIS.png", bbox_inches='tight')
#2
k = plt.figure()
plt.plot(x,y, '-ok', color='black', label='MIC score')
plt.ylim(0, 1);
plt.xlabel("Message index (i)")
plt.ylabel("MIC score")
plt.legend();
k.savefig("plot-MIC.png", bbox_inches='tight')
#3
x = list(range(0, arg_num))
y = df.opposite
g = plt.figure()
plt.plot(x, y, '-ok', color='black', label="Opposite score at post X")
z = df.echo
plt.plot(x, z, '-ok', color='blue', label="Echo score at post X")
plt.ylim(-1, 2);
plt.legend();
g.savefig("plot-echo-oppo.png", bbox_inches='tight')