#!/bin/bash # Copyright (c) 2010, Elmar Pruesse # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the nor the # names of its contributors may be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. set -e if [ -z "$1" ]; then echo "${0##*/} [size] [max]" echo " Splits multi-fasta file into chunks of [size] sequences." echo " If size is not specified, a default of 1000 is used." exit 1; fi FILENAME="$1" if [ ! -r "$FILENAME" ]; then echo "Cannot read from file '$FILENAME'!" exit 2; fi FILEBASE=`basename $FILENAME` PREFIX=${FILEBASE%.*} SUFFIX=${FILEBASE#$PREFIX} CHUNK="$2" if [ -z "$CHUNK" ]; then CHUNK=1000 fi if [ "$CHUNK" -lt 1 ]; then echo "Chunk size must be greater than one!" exit 3 fi MAX="$3" if [ -z "$MAX" ]; then MAX=99999999 fi awk ' BEGIN { N=0; ON=0; } /^>/ { if (N % '$CHUNK' == 0) ++ON ++N if (N > '$MAX'*'$CHUNK') exit 0 } { print > "'"$PREFIX"'." ON "'"$SUFFIX"'" } ' $FILENAME