swh:1:snp:b958e3aa64f6b1663929789c8cf28d019f55f57d
Raw File
Tip revision: 6b9bf3964385d0c16d262796d9e4a3a30a52dafd authored by Software Heritage on 12 October 2020, 00:00:00 UTC
hal: Deposit 1045 in collection hal
Tip revision: 6b9bf39
compareFile.py
# tools to grep for file 1 in file2 given coloumn numbers in wach file

import sys
import ipdb


if len(sys.argv) < 6:
    print "Usage : " + sys.argv[0] + " <file1> <file2> <col in file 1> <col in file2> <hit or miss 1/0> <delimiter>"
    print " Col index starts from 0"
    exit()

file1_path = sys.argv[1]
file2_path = sys.argv[2]
col_file1 = int(sys.argv[3])
col_file2 = int(sys.argv[4])
hit_or_miss = int(sys.argv[5])
delimiter = ' '


if len(sys.argv) == 7:
    delimiter = sys.argv[6]


file2_dict = {}

with open(file2_path) as fid:
    for ele in fid:
        file2_key = ele.strip().split(delimiter)[col_file2].decode('utf-8')
        try:
            file2_dict[file2_key].append(ele.strip())
        except:
            file2_dict[file2_key] = [ele.strip()]

with open(file1_path) as fid:
    for ele in fid:
        try:
            value = file2_dict[ele.strip().split(delimiter)[col_file1].decode('utf-8')]
            if hit_or_miss == 1:
                for val in value:
                    print val
        except:
            if hit_or_miss == 0:
                print ele.strip()
back to top