You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
importpyarrowaspaimportmultiprocessing.shared_memoryasshmimportnumpyasnpimportmultiprocessingdefcreate_shared_arrow_array():
"""Creates a large Arrow array and stores it in shared memory."""data=np.arange(1_000_000, dtype=np.int32) # Example large datasetarray=pa.array(data)
# Extract raw buffer and store in shared memorybuf=array.buffers()[1] # First buffer is null bitmap, second is datashared_mem=shm.SharedMemory(create=True, size=buf.size)
shared_mem.buf[:buf.size] =buf# Copy data to shared memoryreturnshared_mem.name, array.type, array.length# Return details for reconstructiondefworker(shared_mem_name, dtype, start, end):
"""Worker function to attach to shared memory and slice data."""existing_shm=shm.SharedMemory(name=shared_mem_name)
buf=pa.py_buffer(existing_shm.buf) # Wrap in Arrow buffer# Reconstruct Arrow array from shared bufferarray=pa.Array.from_buffers(dtype, end-start, [None, buf], offset=start)
result=array.to_numpy().sum() # Example computationexisting_shm.close()
returnresultdefmain():
shared_mem_name, dtype, length=create_shared_arrow_array()
num_workers=4chunk_size=length//num_workers# Define process poolwithmultiprocessing.Pool(num_workers) aspool:
results=pool.starmap(worker, [
(shared_mem_name, dtype, i*chunk_size, (i+1) *chunk_size)
foriinrange(num_workers)
])
print("Results:", results)
print("Final Sum:", sum(results))
# Cleanup shared memoryshm.SharedMemory(name=shared_mem_name).unlink()
if__name__=="__main__":
main()
The text was updated successfully, but these errors were encountered:
The text was updated successfully, but these errors were encountered: