caddy-keydb-codebreaker/testdata.py
2024-08-14 13:11:08 -07:00

35 lines
1.1 KiB
Python

import json
import pymmh3
import binascii
def question_hash(question):
# MurmurHash3 128-bit hash
hash_value = pymmh3.hash128(question, x64arch=True)
# Split the 128-bit integer into two 64-bit integers (high and low)
h1 = (hash_value >> 64) & 0xFFFFFFFFFFFFFFFF
h2 = hash_value & 0xFFFFFFFFFFFFFFFF
# Convert each part to a byte array in big endian order and concatenate
hash_bytes = h2.to_bytes(8, byteorder='big') + h1.to_bytes(8, byteorder='big')
# Convert the byte array to a hexadecimal string
return binascii.hexlify(hash_bytes).decode('utf-8')
# Read the testdata.json file
with open('testdata.json.original', 'r') as f:
data = json.load(f)
# Create the new dictionaries
questions = {}
hashed_data = {}
for question, answer in data.items():
hashed_question = question_hash(question)
questions[hashed_question] = question
hashed_data[hashed_question] = answer
# Write the questions.json file
with open('questions.json', 'w') as f:
json.dump(questions, f, indent=4)
# # Overwrite the testdata.json file
with open('testdata.json', 'w') as f:
json.dump(hashed_data, f, indent=4)