Retrosheet Spider

This linux script pulls down all the play-by-play files that have been posted at Retrosheet.org and pushes them through Chadwick's cwbox application, thereby generating one SportsML file for every "Retrofile."

This script was donated by XML Team, to be used with no strings attached or obligations of any kind. No warranties, either! But it's simple enough that any linux rookie should be able to see what's going on.

Note that as of March 6, 2008, Retrosheet still has not posted game data for the 1999 season.

# retrosheet-spider.sh
#
# by Alan Karben, XML Team Solutions


# comment out the below if you do not want to clear out your directory first
echo Removing Old Directories
rm -rf source
rm -rf sportsml

echo Started >> log.txt
date >> log.txt

echo Starting to pull RetroFiles
mkdir -p source/zips
mkdir -p sportsml

for YEAR in `seq 1957 1997`
do
   echo Processing Year ${YEAR}
   wget -q "http://www.retrosheet.org/${YEAR}/${YEAR}al.zip"
   wget -q "http://www.retrosheet.org/${YEAR}/${YEAR}nl.zip"
   cp ${YEAR}al.zip source/zips/
   cp ${YEAR}nl.zip source/zips/
   mkdir -p source/${YEAR}
   mkdir -p sportsml/${YEAR}
   mv ${YEAR}al.zip source/${YEAR}
   mv ${YEAR}nl.zip source/${YEAR}
   cd source/${YEAR}/
   unzip -q ${YEAR}al.zip
   unzip -qu ${YEAR}nl.zip
   rm *.zip
   
   for EVFILE in `ls *.EV*`
   do
      echo Processing Event File $EVFILE
      dos2unix $EVFILE
      for GAMEID in `grep '^id,' $EVFILE | cut -d , -f 2`
      do 
         echo "Processing Year: ${YEAR} | Event File: $EVFILE | Game: $GAMEID"
         cwbox -q -i $GAMEID -y ${YEAR} -S $EVFILE > ../../sportsml/${YEAR}/${GAMEID}.xml
      done
   done
   cd ../..
done

for YEAR in `seq 1998 2007`
do
   echo Processing Year ${YEAR}
   wget -q "http://www.retrosheet.org/${YEAR}/${YEAR}ml.zip"
   cp ${YEAR}ml.zip source/zips/
   mkdir -p source/${YEAR}
   mkdir -p sportsml/${YEAR}
   mv ${YEAR}ml.zip source/${YEAR}
   cd source/${YEAR}/
   unzip -q ${YEAR}ml.zip
   rm *.zip
   
   for EVFILE in `ls *.EV*`
   do
      echo Processing Event File $EVFILE
      dos2unix $EVFILE
      for GAMEID in `grep '^id,' $EVFILE | cut -d , -f 2`
      do 
         echo "Processing Year: ${YEAR} | Event File: $EVFILE | Game: $GAMEID"
         cwbox -q -i $GAMEID -y ${YEAR} -S $EVFILE > ../../sportsml/${YEAR}/${GAMEID}.xml
      done
   done
   cd ../..
done

echo Ended >> log.txt
date >> log.txt