在VPS上安装tensorflow开发环境

2024年12月25日作者 unix2go

圣诞节活动，我买了一台新的VPS，配置还算可以，有4g内存，80g磁盘。当然没有GPU，只有CPU，所以只安装CPU版本的tensorflow。

VPS安装了ubuntu系统，按如下操作，就可以装好tensorflow开发环境。

# 更新系统
sudo apt update && sudo apt upgrade -y

# 安装基础开发工具
sudo apt install build-essential wget curl git software-properties-common -y

# 安装Python3和pip
sudo apt install python3 python3-pip python3-dev python3-venv -y

# 验证Python版本
python3 --version

# 创建项目目录
mkdir ~/tensorflow_project
cd ~/tensorflow_project

# 创建虚拟环境
python3 -m venv tf_env

# 激活虚拟环境
source tf_env/bin/activate

# 升级pip
pip install --upgrade pip

# 安装基础科学计算包
pip install numpy scipy pandas matplotlib pydot

# 安装tensorflow-cpu（推荐用CPU版本，因为您的VPS没有GPU）
pip install tensorflow-cpu

# Jupyter Notebook（如果需要）
pip install jupyter

# 其他常用数据科学工具
pip install scikit-learn

# 创建测试脚本 test_tf.py
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("TensorFlow is built with CUDA:", tf.test.is_built_with_cuda())
print("Available devices:", tf.config.list_physical_devices())

# 注册环境变量，设置日志级别
echo 'export TF_CPP_MIN_LOG_LEVEL=3' >> ~/.bashrc
echo 'export CUDA_VISIBLE_DEVICES=-1' >> ~/.bashrc
source ~/.bashrc

# 安装graphviz
sudo apt install graphviz

# 安装内存监控工具
sudo apt install htop -y

# 安装压缩工具
sudo apt install zip unzip -y

# 安装系统监控工具
sudo apt install sysstat -y

因为安装了tensorflow虚拟环境，每次使用前，都需要激活虚拟环境。运行如下命令。

source ~/tensorflow_project/tf_env/bin/activate

再运行如下命令，退出虚拟环境。

deactivate

如果要打开jupyter notebook，那么如下运行。

jupyter notebook --ip=0.0.0.0 --port=8888 --no-browser

最后，可以使用如下脚本来测试tensorflow对cpu的处理能力（浮点计算能力）。

import tensorflow as tf
import time
import numpy as np

def cpu_benchmark(matrix_size=1000, iterations=10):
    # 禁用GPU，强制使用CPU
    tf.config.set_visible_devices([], 'GPU')
    
    # 创建两个随机矩阵
    matrix_a = tf.random.normal([matrix_size, matrix_size])
    matrix_b = tf.random.normal([matrix_size, matrix_size])
    
    # 预热
    _ = tf.matmul(matrix_a, matrix_b)
    
    # 开始计时
    start_time = time.time()
    
    # 进行多次矩阵乘法运算
    for _ in range(iterations):
        _ = tf.matmul(matrix_a, matrix_b)
    
    # 计算总时间
    total_time = time.time() - start_time
    
    # 计算每次运算的平均时间
    avg_time = total_time / iterations
    
    # 计算每秒浮点运算次数 (FLOPS)
    # 矩阵乘法的浮点运算次数约为 2 * N^3
    flops = 2 * (matrix_size ** 3)
    flops_per_sec = flops / avg_time
    
    return {
        'matrix_size': matrix_size,
        'iterations': iterations,
        'total_time': total_time,
        'avg_time_per_iteration': avg_time,
        'gflops': flops_per_sec / 1e9  # 转换为GFLOPS
    }

# 运行基准测试
sizes = [500, 1000, 2000]
for size in sizes:
    print(f"\n测试矩阵大小: {size}x{size}")
    results = cpu_benchmark(matrix_size=size, iterations=5)
    print(f"总耗时: {results['total_time']:.2f} 秒")
    print(f"每次迭代平均时间: {results['avg_time_per_iteration']:.2f} 秒")
    print(f"性能: {results['gflops']:.2f} GFLOPS")