目录
Linux
- Folder memory usage:
du -sh
, du -sh -- *
- File system status:
df -h
- Change permissions:
sudo chmod a+rws ./squall.py
- Set variable:
export HF_DATASETS_CACHE="./transformers_cache"
eval `ssh-agent`
- Check if port is in use:
lsof -i -P -n | grep LISTEN
netstat -tulpn | grep LISTEN
Host nyu
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
HostName greene.hpc.nyu.edu
User
LogLevel QUIET
Git
git submodule update --init --recursive
git fetch origin
git reset --hard origin/master
SLURM
squeue -u sz4651
srun -t10:00:00 --cpus-per-task=4 --mem=40000 --gres=gpu:4 --pty /bin/bash
srun -t10:00:00 --cpus-per-task=1 --mem=30000 --gres=gpu:2 --pty /bin/bash
cd /scratch/sz4651/
conda activate /scratch/sz4651/torch
#!/bin/bash
#SBATCH --job-name=tableqa
#SBATCH --open-mode=append
#SBATCH --output=./omnitab_squall.out
#SBATCH --error=./omnitab_squall.err
#SBATCH --export=ALL
#SBATCH --time=10:00:00
#SBATCH --gres=gpu:4
#SBATCH --mem=64G
#SBATCH -c 4
singularity exec --nv --overlay $SCRATCH/overlay-50G-10M.ext3:rw /scratch/work/public/singularity/cuda12.1.1-cudnn8.9.0-devel-ubuntu22.04.2.sif /bin/bash -c "
source /ext3/env.sh
conda activate /scratch/sz4651/torch
cd /scratch/sz4651/tableQA_text_to_SQL/models/RTR
source ./omnitab_train_.sh
"
Wandb
export WANDB_API_KEY=
export WANDB_PROJECT=
export WANDB_ENTITY=
- 3b48e9a5063a5c5906e7bde4fef1cac8aeeb8aad
- GPUs:
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
Json
data = json.load(open(path='./a.json'))
with open(output_prediction_file, "w") as json_file:
json.dump(outputs, json_file, indent=4)
Docker
- To use GPUs in docker, must install NVIDIA Container Toolkit.
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list \
&& \
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
- Build docker image:
docker build --build-arg BASE_IMAGE=pytorch/pytorch:1.9.0-cuda11.1-cudnn8-devel --tag local_picard .
- Run docker container:
docker run --gpus 2 -p 6667:6667 -v $(pwd):/app/my_picard --name my_picard_dev -it 4b699d75f63a
-e NVIDIA_DRIVER_CAPABILITIES=compute,utility -e NVIDIA_VISIBLE_DEVICES=all
RUN rm /etc/apt/sources.list.d/cuda.list
RUN rm /etc/apt/sources.list.d/nvidia-ml.list
Devcontainer
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile
{
"name": "Existing Dockerfile",
"build": {
// Sets the run context to one level up instead of the .devcontainer folder.
"context": "..",
// Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
"dockerfile": "../Dockerfile"
},
"containerEnv": {
"NVIDIA_VISIBLE_DEVICES": "2,3"
},
"runArgs": [
"--runtime=nvidia"
],
// "runArgs": [
// "--gpus",
// "2"
// ],
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-azuretools.vscode-docker"]
}
},
"mounts": [
"source=/home/siyue/Projects/table_qa_analysis/data,target=/workspaces/rat-sql/data,type=bind,consistency=cached"
]
}