Commit bfa1d479 authored by Patrick Michalsky's avatar Patrick Michalsky
Browse files

added GPU solution

parent 7fc4178d
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="3">
<item index="0" class="java.lang.String" itemvalue="quickshift" />
<item index="1" class="java.lang.String" itemvalue="tslearn" />
<item index="2" class="java.lang.String" itemvalue="tensorflow-gpu" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="E712" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/justus2-demo.iml" filepath="$PROJECT_DIR$/.idea/justus2-demo.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
FROM tensorflow/tensorflow:latest-gpu
ARG DEBIAN_FRONTEND=noninteractive
ARG VERSION=455.32.00
ARG LINK=https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64
RUN add-apt-repository universe && \
apt-get update -y && \
apt-get install -y \
linux-generic \
python3-tk \
python3-h5py \
llvm-8 \
alien \
dpkg \
kmod \
wget \
git
RUN wget ${LINK}/nvidia-driver-latest-cuda-libs-${VERSION}-1.el7.x86_64.rpm --quiet && \
alien --scripts nvidia-driver-latest-cuda-libs-${VERSION}-1.el7.x86_64.rpm && \
dpkg --install nvidia-driver-latest-cuda-libs_${VERSION}-2_amd64.deb
ENV LD_LIBRARY_PATH=/usr/lib64/:$LD_LIBRARY_PATH
RUN pip3 install --upgrade pip && \
pip3 --no-cache-dir --use-feature=2020-resolver install \
tables \
matplotlib \
google-cloud-storage \
Cython
RUN mkdir -p /opt/program/out
COPY ./code/. /opt/program/
RUN ln -s /usr/bin/python3 /usr/bin/python
WORKDIR /opt/program/
ENTRYPOINT ["/bin/bash", "./run.sh"]
from __future__ import print_function
import tensorflow as tf
import os
# Enable eager execution.
# Note that eager execution is enabled by default in TensorFlow 2.0.
# ----------------------------------------------------------------------------------------------------------------------
tf.compat.v1.enable_eager_execution()
# Load both trainings and test data sets.
# ----------------------------------------------------------------------------------------------------------------------
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
# Define neural network structure.
# ----------------------------------------------------------------------------------------------------------------------
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10)
])
predictions = model(x_train[:1]).numpy()
tf.nn.softmax(predictions).numpy()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_fn(y_train[:1], predictions).numpy()
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
# Train neural network.
# ----------------------------------------------------------------------------------------------------------------------
training_history = model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test, verbose=2)
# Save neural network and latest network accuracy to file.
# ----------------------------------------------------------------------------------------------------------------------
os.mkdir('./out/results')
model.save('./out/results/model.h5')
with open('./out/results/accuracy.txt', 'a+') as f:
f.write(str(training_history.history['accuracy'][-1]) + '\n')
#!/usr/bin/env sh
python ./main.py
chown -R "$(stat -c "%u:%g" /opt/program/out)" /opt/program/out
\ No newline at end of file
#!/usr/bin/env sh
# VARIABLES--------------------------------------------------------------------------------------
FOLDER_NAME=Image
IMAGE_NAME=docker_image
# CHANGE FOLDER----------------------------------------------------------------------------------
cd ./${FOLDER_NAME} || exit
# CHECK IF SCRIPT IS RUN AS ROOT-----------------------------------------------------------------
if [ "$(/usr/bin/id -u)" -ne 0 ]; then
echo "Not running as root. Please run this script as root: 'sudo bash create_image.sh'"
exit
fi
# DELETE IMAGES----------------------------------------------------------------------------------
if [ "$(docker images -q "${IMAGE_NAME}")" != "" ]; then
echo Remove old image:
docker image rm "${IMAGE_NAME}":latest
fi
# BUILD NEW IMAGE--------------------------------------------------------------------------------
docker build -t "${IMAGE_NAME}" -f Dockerfile .
# SAVE IMAGE TO TAR BALL-------------------------------------------------------------------------
cd ..
docker save "${IMAGE_NAME}" | gzip > "${IMAGE_NAME}".tar.gz
#!/usr/bin/env sh
VERSION=1.1.4
LINK=https://raw.githubusercontent.com/jorge-lip/udocker-builds/master/tarballs
# PULL UDOCKER INSTALLATION----------------------------------------------------------------------
curl ${LINK}/udocker-${VERSION}.tar.gz > udocker-${VERSION}.tar.gz
# EXPORT ENVIRONMENT VARIABLE--------------------------------------------------------------------
UDOCKER_TARBALL=$(pwd)/udocker-1.1.4.tar.gz
export UDOCKER_TARBALL
# UNZIP TARBALL----------------------------------------------------------------------------------
tar xzvf "$UDOCKER_TARBALL" udocker
# PREPARE AND EXECUTE INSTALLATION---------------------------------------------------------------
chmod u+rx udocker
./udocker install
# CLEAN UP---------------------------------------------------------------------------------------
rm udocker-1.1.4.tar.gz
\ No newline at end of file
#!/usr/bin/env sh
IMAGE_NAME=docker_image
CONTAINER_NAME=udocker_instance
# REMOVE PRIOR RESULTS---------------------------------------------------------------------------
RESULTS=./results
if [ -d ${RESULTS} ]; then
echo Delete prior results:
rm -r ${RESULTS}
fi
# REMOVE BROKEN CONTAINERS-----------------------------------------------------------------------
FILE=containerIDs.txt
if [ -f "$FILE" ]; then
echo Delete prior container:
while read -r CONTAINER_ID; do
./udocker rm "$CONTAINER_ID"
done < ${FILE}
rm ${FILE}
fi
# DELETE FORMER IMAGE----------------------------------------------------------------------------
if [ "$(./udocker inspect ${IMAGE_NAME})" != "" ]; then
echo Delete former image:
./udocker rmi ${IMAGE_NAME}:latest
fi
# LOAD NEW IMAGE---------------------------------------------------------------------------------
echo Load Image:
./udocker load -i ${IMAGE_NAME}.tar.gz
# CREATE CONTAINER-------------------------------------------------------------------------------
echo Create Container:
id=$(./udocker create --name=${CONTAINER_NAME} ${IMAGE_NAME}:latest)
# SAVE CONTAINER ID------------------------------------------------------------------------------
echo "${id}" >> container.txt
# ADD GPU CONFIGURATION--------------------------------------------------------------------------
# (did not work locally)
# ./udocker setup --nvidia "${id}"
# RUN CONTAINER----------------------------------------------------------------------------------
echo Run Container:
./udocker run --rm \
--user="$USER" \
--volume "$(pwd)":/opt/program/out \
${CONTAINER_NAME}
#!/usr/bin/env sh
SCRIPT=jobscript.sh
NUM_GPUS=1
sbatch -p gpu --gres=gpu:${NUM_GPUS} ${SCRIPT}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment