https://hal.archives-ouvertes.fr/hal-02963802
Tip revision: 6b9bf3964385d0c16d262796d9e4a3a30a52dafd authored by Software Heritage on 12 October 2020, 00:00:00 UTC
hal: Deposit 1045 in collection hal
hal: Deposit 1045 in collection hal
Tip revision: 6b9bf39
compareFile.py
# tools to grep for file 1 in file2 given coloumn numbers in wach file
import sys
import ipdb
if len(sys.argv) < 6:
print "Usage : " + sys.argv[0] + " <file1> <file2> <col in file 1> <col in file2> <hit or miss 1/0> <delimiter>"
print " Col index starts from 0"
exit()
file1_path = sys.argv[1]
file2_path = sys.argv[2]
col_file1 = int(sys.argv[3])
col_file2 = int(sys.argv[4])
hit_or_miss = int(sys.argv[5])
delimiter = ' '
if len(sys.argv) == 7:
delimiter = sys.argv[6]
file2_dict = {}
with open(file2_path) as fid:
for ele in fid:
file2_key = ele.strip().split(delimiter)[col_file2].decode('utf-8')
try:
file2_dict[file2_key].append(ele.strip())
except:
file2_dict[file2_key] = [ele.strip()]
with open(file1_path) as fid:
for ele in fid:
try:
value = file2_dict[ele.strip().split(delimiter)[col_file1].decode('utf-8')]
if hit_or_miss == 1:
for val in value:
print val
except:
if hit_or_miss == 0:
print ele.strip()