import json import pymmh3 import binascii def question_hash(question): # MurmurHash3 128-bit hash hash_value = pymmh3.hash128(question, x64arch=True) # Split the 128-bit integer into two 64-bit integers (high and low) h1 = (hash_value >> 64) & 0xFFFFFFFFFFFFFFFF h2 = hash_value & 0xFFFFFFFFFFFFFFFF # Convert each part to a byte array in big endian order and concatenate hash_bytes = h2.to_bytes(8, byteorder='big') + h1.to_bytes(8, byteorder='big') # Convert the byte array to a hexadecimal string return binascii.hexlify(hash_bytes).decode('utf-8') # Read the testdata.json file with open('testdata.json.original', 'r') as f: data = json.load(f) # Create the new dictionaries questions = {} hashed_data = {} for question, answer in data.items(): hashed_question = question_hash(question) questions[hashed_question] = question hashed_data[hashed_question] = answer # Write the questions.json file with open('questions.json', 'w') as f: json.dump(questions, f, indent=4) # # Overwrite the testdata.json file with open('testdata.json', 'w') as f: json.dump(hashed_data, f, indent=4)