Python 代码实现高性能异构分子动力学模拟系统

初始化模块:

负责读取输入数据(如分子初始位置、速度等),并将这些数据初始化到主机内存和设备内存。

#include <iostream>
#include <vector>
#include <cuda_runtime.h>

// 原子结构体定义
struct Atom {
    float x, y, z;  // 位置
    float vx, vy, vz;  // 速度
};

// 初始化原子数据
void initializeAtoms(std::vector<Atom> &atoms, int numAtoms) {
    for (int i = 0; i < numAtoms; ++i) {
        atoms[i].x = rand() / (float)RAND_MAX;
        atoms[i].y = rand() / (float)RAND_MAX;
        atoms[i].z = rand() / (float)RAND_MAX;
        atoms[i].vx = rand() / (float)RAND_MAX;
        atoms[i].vy = rand() / (float)RAND_MAX;
        atoms[i].vz = rand() / (float)RAND_MAX;
    }
}

int main() {
    int numAtoms = 1000;
    std::vector<Atom> atoms(numAtoms);

    // 初始化原子数据
    initializeAtoms(atoms, numAtoms);

    // 将数据传输到 GPU
    Atom *d_atoms;
    cudaMalloc(&d_atoms, numAtoms * sizeof(Atom));
    cudaMemcpy(d_atoms, atoms.data(), numAtoms * sizeof(Atom), cudaMemcpyHostToDevice);

    // 其他处理...
}

力计算模块:

在 GPU 上并行计算原子之间的相互作用力。

__global__ void computeForces(Atom *atoms, float *forces, int numAtoms) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx < numAtoms) {
        // Lennard-Jones 力计算
        float fx = 0.0f, fy = 0.0f, fz = 0.0f;
        for (int j = 0; j < numAtoms; ++j) {
            if (j != idx) {
                float dx = atoms[idx].x - atoms[j].x;
                float dy = atoms[idx].y - atoms[j].y;
                float dz = atoms[idx].z - atoms[j].z;
                float r2 = dx * dx + dy * dy + dz * dz;
                float invR2 = 1.0f / r2;
                float invR6 = invR2 * invR2 * invR2;
                float f = 24.0f * invR2 * invR6 * (2.0f * invR6 - 1.0f);
                fx += dx * f;
                fy += dy * f;
                fz += dz * f;
            }
        }
        forces[3 * idx] = fx;
        forces[3 * idx + 1] = fy;
        forces[3 * idx + 2] = fz;
    }
}

int main() {
    int numAtoms = 1000;
    std::vector<Atom> atoms(numAtoms);
    initializeAtoms(atoms, numAtoms);

    Atom *d_atoms;
    cudaMalloc(&d_atoms, numAtoms * sizeof(Atom));
    cudaMemcpy(d_atoms, atoms.data(), numAtoms * sizeof(Atom), cudaMemcpyHostToDevice);

    float *d_forces;
    cudaMalloc(&d_forces, 3 * numAtoms * sizeof(float));

    int blockSize = 256;
    int numBlocks = (numAtoms + blockSize - 1) / blockSize;

    // 计算力
    computeForces<<<numBlocks, blockSize>>>(d_atoms, d_forces, numAtoms);

    // 其他处理...
}

时间积分模块:

更新原子的位置和速度,使用如 Velocity-Verlet 算法,这部分计算可以在 GPU 上执行。

__global__ void integrate(Atom *atoms, float *forces, int numAtoms, float dt) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx < numAtoms) {
        // 更新位置
        atoms[idx].x += atoms[idx].vx * dt + 0.5f * forces[3 * idx] * dt * dt;
        atoms[idx].y += atoms[idx].vy * dt + 0.5f * forces[3 * idx + 1] * dt * dt;
        atoms[idx].z += atoms[idx].vz * dt + 0.5f * forces[3 * idx + 2] * dt * dt;

        // 更新速度
        atoms[idx].vx += 0.5f * forces[3 * idx] * dt;
        atoms[idx].vy += 0.5f * forces[3 * idx + 1] * dt;
        atoms[idx].vz += 0.5f * forces[3 * idx + 2] * dt;
    }
}

int main() {
    int numAtoms = 1000;
    std::vector<Atom> atoms(numAtoms);
    initializeAtoms(atoms, numAtoms);

    Atom *d_atoms;
    cudaMalloc(&d_atoms, numAtoms * sizeof(Atom));
    cudaMemcpy(d_atoms, atoms.data(), numAtoms * sizeof(Atom), cudaMemcpyHostToDevice);

    float *d_forces;
    cudaMalloc(&d_forces, 3 * numAtoms * sizeof(float));

    int blockSize = 256;
    int numBlocks = (numAtoms + blockSize - 1) / blockSize;
    float dt = 0.001f;

    for (int step = 0; step < 1000; ++step) {
        // 计算力
        computeForces<<<numBlocks, blockSize>>>(d_atoms, d_forces, numAtoms);

        // 时间积分
        integrate<<<numBlocks, blockSize>>>(d_atoms, d_forces, numAtoms, dt);
    }

    // 将结果从 GPU 传回 CPU
    cudaMemcpy(atoms.data(), d_atoms, numAtoms * sizeof(Atom), cudaMemcpyDeviceToHost);

    // 其他处理...
}

数据传输模块:

在主机和设备之间传输数据。

在主机和设备之间传输数据已经在上面的例子中演示过了,主要使用 cudaMemcpy 来实现。

输出模块:

将模拟结果写入文件。

#include <fstream>

void saveResults(const std::vector<Atom> &atoms, const std::string &filename) {
    std::ofstream file(filename);
    if (file.is_open()) {
        for (const auto &atom : atoms) {
            file << atom.x << " " << atom.y << " " << atom.z << "\n";
        }
        file.close();
    } else {
        std::cerr << "无法打开文件:" << filename << std::endl;
    }
}

int main() {
    int numAtoms = 1000;
    std::vector<Atom> atoms(numAtoms);
    initializeAtoms(atoms, numAtoms);

    Atom *d_atoms;
    cudaMalloc(&d_atoms, numAtoms * sizeof(Atom));
    cudaMemcpy(d_atoms, atoms.data(), numAtoms * sizeof(Atom), cudaMemcpyHostToDevice);

    float *d_forces;
    cudaMalloc(&d_forces, 3 * numAtoms * sizeof(float));

    int blockSize = 256;
    int numBlocks = (numAtoms + blockSize - 1) / blockSize;
    float dt = 0.001f;

    for (int step = 0; step < 1000; ++step) {
        computeForces<<<numBlocks, blockSize>>>(d_atoms, d_forces, numAtoms);
        integrate<<<numBlocks, blockSize>>>(d_atoms, d_forces, numAtoms, dt);
    }

    cudaMemcpy(atoms.data(), d_atoms, numAtoms * sizeof(Atom), cudaMemcpyDeviceToHost);

    // 保存

C++ 代码实现高性能异构分子动力学模拟系统

初始化模块:

负责读取输入数据(如分子初始位置、速度等),并将这些数据初始化到主机内存和设备内存。

#include <iostream>
#include <vector>
#include <cuda_runtime.h>

// 原子结构体定义
struct Atom {
    float x, y, z;  // 位置
    float vx, vy, vz;  // 速度
};

// 初始化原子数据
void initializeAtoms(std::vector<Atom> &atoms, int numAtoms) {
    for (int i = 0; i < numAtoms; ++i) {
        atoms[i].x = rand() / (float)RAND_MAX;
        atoms[i].y = rand() / (float)RAND_MAX;
        atoms[i].z = rand() / (float)RAND_MAX;
        atoms[i].vx = rand() / (float)RAND_MAX;
        atoms[i].vy = rand() / (float)RAND_MAX;
        atoms[i].vz = rand() / (float)RAND_MAX;
    }
}

int main() {
    int numAtoms = 1000;
    std::vector<Atom> atoms(numAtoms);

    // 初始化原子数据
    initializeAtoms(atoms, numAtoms);

    // 将数据传输到 GPU
    Atom *d_atoms;
    cudaMalloc(&d_atoms, numAtoms * sizeof(Atom));
    cudaMemcpy(d_atoms, atoms.data(), numAtoms * sizeof(Atom), cudaMemcpyHostToDevice);

    // 其他处理...
}

力计算模块:

在 GPU 上并行计算原子之间的相互作用力。

__global__ void computeForces(Atom *atoms, float *forces, int numAtoms) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx < numAtoms) {
        // Lennard-Jones 力计算
        float fx = 0.0f, fy = 0.0f, fz = 0.0f;
        for (int j = 0; j < numAtoms; ++j) {
            if (j != idx) {
                float dx = atoms[idx].x - atoms[j].x;
                float dy = atoms[idx].y - atoms[j].y;
                float dz = atoms[idx].z - atoms[j].z;
                float r2 = dx * dx + dy * dy + dz * dz;
                float invR2 = 1.0f / r2;
                float invR6 = invR2 * invR2 * invR2;
                float f = 24.0f * invR2 * invR6 * (2.0f * invR6 - 1.0f);
                fx += dx * f;
                fy += dy * f;
                fz += dz * f;
            }
        }
        forces[3 * idx] = fx;
        forces[3 * idx + 1] = fy;
        forces[3 * idx + 2] = fz;
    }
}

int main() {
    int numAtoms = 1000;
    std::vector<Atom> atoms(numAtoms);
    initializeAtoms(atoms, numAtoms);

    Atom *d_atoms;
    cudaMalloc(&d_atoms, numAtoms * sizeof(Atom));
    cudaMemcpy(d_atoms, atoms.data(), numAtoms * sizeof(Atom), cudaMemcpyHostToDevice);

    float *d_forces;
    cudaMalloc(&d_forces, 3 * numAtoms * sizeof(float));

    int blockSize = 256;
    int numBlocks = (numAtoms + blockSize - 1) / blockSize;

    // 计算力
    computeForces<<<numBlocks, blockSize>>>(d_atoms, d_forces, numAtoms);

    // 其他处理...
}

时间积分模块:

更新原子的位置和速度,使用如 Velocity-Verlet 算法,这部分计算可以在 GPU 上执行。

__global__ void integrate(Atom *atoms, float *forces, int numAtoms, float dt) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx < numAtoms) {
        // 更新位置
        atoms[idx].x += atoms[idx].vx * dt + 0.5f * forces[3 * idx] * dt * dt;
        atoms[idx].y += atoms[idx].vy * dt + 0.5f * forces[3 * idx + 1] * dt * dt;
        atoms[idx].z += atoms[idx].vz * dt + 0.5f * forces[3 * idx + 2] * dt * dt;

        // 更新速度
        atoms[idx].vx += 0.5f * forces[3 * idx] * dt;
        atoms[idx].vy += 0.5f * forces[3 * idx + 1] * dt;
        atoms[idx].vz += 0.5f * forces[3 * idx + 2] * dt;
    }
}

int main() {
    int numAtoms = 1000;
    std::vector<Atom> atoms(numAtoms);
    initializeAtoms(atoms, numAtoms);

    Atom *d_atoms;
    cudaMalloc(&d_atoms, numAtoms * sizeof(Atom));
    cudaMemcpy(d_atoms, atoms.data(), numAtoms * sizeof(Atom), cudaMemcpyHostToDevice);

    float *d_forces;
    cudaMalloc(&d_forces, 3 * numAtoms * sizeof(float));

    int blockSize = 256;
    int numBlocks = (numAtoms + blockSize - 1) / blockSize;
    float dt = 0.001f;

    for (int step = 0; step < 1000; ++step) {
        // 计算力
        computeForces<<<numBlocks, blockSize>>>(d_atoms, d_forces, numAtoms);

        // 时间积分
        integrate<<<numBlocks, blockSize>>>(d_atoms, d_forces, numAtoms, dt);
    }

    // 将结果从 GPU 传回 CPU
    cudaMemcpy(atoms.data(), d_atoms, numAtoms * sizeof(Atom), cudaMemcpyDeviceToHost);

    // 其他处理...
}

数据传输模块:

在主机和设备之间传输数据已经在上面的例子中演示过了,主要使用 cudaMemcpy 来实现。

输出模块:

将模拟结果写入文件。

#include <fstream>

void saveResults(const std::vector<Atom> &atoms, const std::string &filename) {
    std::ofstream file(filename);
    if (file.is_open()) {
        for (const auto &atom : atoms) {
            file << atom.x << " " << atom.y << " " << atom.z << "\n";
        }
        file.close();
    } else {
        std::cerr << "无法打开文件:" << filename << std::endl;
    }
}

int main() {
    int numAtoms = 1000;
    std::vector<Atom> atoms(numAtoms);
    initializeAtoms(atoms, numAtoms);

    Atom *d_atoms;
    cudaMalloc(&d_atoms, numAtoms * sizeof(Atom));
    cudaMemcpy(d_atoms, atoms.data(), numAtoms * sizeof(Atom), cudaMemcpyHostToDevice);

    float *d_forces;
    cudaMalloc(&d_forces, 3 * numAtoms * sizeof(float));

    int blockSize = 256;
    int numBlocks = (numAtoms + blockSize - 1) / blockSize;
    float dt = 0.001f;

    for (int step = 0; step < 1000; ++step) {
        computeForces<<<numBlocks, blockSize>>>(d_atoms, d_forces, numAtoms);
        integrate<<<numBlocks, blockSize>>>(d_atoms, d_forces, numAtoms, dt);
    }

    cudaMemcpy(atoms.data(), d_atoms, numAtoms * sizeof(Atom), cudaMemcpyDeviceToHost);

    // 保存结果
    saveResults(atoms, "results.txt");

    // 清理资源
    cudaFree(d_atoms);
    cudaFree(d_forces);
}