caddy-keydb-codebreaker/testdata.py

import json
import pymmh3
import binascii

def question_hash(question):
    # MurmurHash3 128-bit hash
    hash_value = pymmh3.hash128(question, x64arch=True)
    # Split the 128-bit integer into two 64-bit integers (high and low)
    h1 = (hash_value >> 64) & 0xFFFFFFFFFFFFFFFF
    h2 = hash_value & 0xFFFFFFFFFFFFFFFF
    # Convert each part to a byte array in big endian order and concatenate
    hash_bytes = h2.to_bytes(8, byteorder='big') + h1.to_bytes(8, byteorder='big')
    # Convert the byte array to a hexadecimal string
    return binascii.hexlify(hash_bytes).decode('utf-8')

# Read the testdata.json file
with open('testdata.json.original', 'r') as f:
    data = json.load(f)

# Create the new dictionaries
questions = {}
hashed_data = {}

for question, answer in data.items():
    hashed_question = question_hash(question)
    questions[hashed_question] = question
    hashed_data[hashed_question] = answer

# Write the questions.json file
with open('questions.json', 'w') as f:
    json.dump(questions, f, indent=4)

# # Overwrite the testdata.json file
with open('testdata.json', 'w') as f:
    json.dump(hashed_data, f, indent=4)