Commit e448e048 authored by ana's avatar ana

adding shtools for charrnn

parent 7797d84e
#! /bin/bash
scp path/to.txt algolit@UVW:char-rnn/rawdata
#! /bin/bash
# Script to concatenate all the raw data in the rawdata folder
# and train the network
cd /home/algolit/torch-rnn/;
if [ ! -d ./raw-data ]; then
mkdir ./raw-data
fi
name="$(date +%s)";
txt="./raw-data/${1}.txt"
h5="./data/${1}-${name}.h5"
json="./data/${1}-${name}.json"
cv="cvs/${1}/${name}"
mkdir -p "${cv}";
# Combine raw texts and merge into one file
#cat ./texts/*.txt > $txt;
# Make a copy of the data-set for processing outside
#cp -f $txt ./raw-data/latest.txt;
# Activate the venv and preprocess the data
source ~/venvs/torch-rnn/bin/activate;
python scripts/preprocess.py \
--input_txt $txt \
--output_h5 $h5 \
--output_json $json
# Start the training
th train.lua -input_h5 $h5 -input_json $json -num_layers 3 -rnn_size 256 -seq_length 75 -max_epochs 150 -checkpoint_name "${cv}/checkpoint";
# Echo after the training
echo "Training done. Use the following command to sample from the latest checkpoint:"
files=$(ls -t "${cv}/checkpoint*.t7");
set -- $files;
echo "th sample.lua -checkpoint ${1}";
#! /bin/bash
# Script to unzip all books and put them in a folder 'merged'
#cd /home/algolit/gutenberg;
if [ ! -d ./merged ]; then
mkdir ./merged
fi
for path in /home/algolit/gutenberg/aleph.gutenberg.org/*/*/*/*/*/*.zip; do
unzip $path -d ./merged;
done;
# Remove books with -8 name, unsure why these copies are there anyhow; codec?
for file in /home/algolit/gutenberg/merged/*-8.txt; do
name=$(basename $file);
num="${name%-8.txt}.txt";
rm ./merged/$num;
rm ./merged/$name;
done;
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment