Skip to content

Commit

Permalink
Merge pull request #147 from griidc/release/5.11.0
Browse files Browse the repository at this point in the history
Release/5.11.0
  • Loading branch information
mickel1138 authored Jan 29, 2019
2 parents 7173d46 + 2cf4676 commit e64650e
Show file tree
Hide file tree
Showing 25 changed files with 868 additions and 29 deletions.
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ before-install:
# Install composer dependencies,
# Create schema and fixtures
install:
- pear install pear/PHP_CodeSniffer-2.5.1
- export SYMFONY_ENV=drupal_dev
- composer install
- phpenv rehash
Expand All @@ -65,7 +64,9 @@ before-script:
# Run script
script:
- phpunit
- phpcs --standard=GRIIDC --colors --ignore=bak --extensions=php -n -s ./src
- git clone --depth=1 https://github.com/squizlabs/PHP_CodeSniffer.git
- ./PHP_CodeSniffer/bin/phpcs --version
- ./PHP_CodeSniffer/bin/phpcs --config-set installed_paths ./src/GRIIDC/ --colors --ignore=bak --extensions=php -i -n -s ./src/

notifications:
email: false
Expand Down
10 changes: 3 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,22 @@

Pelagos is a system for maintaining a repository of scientific research data.
Developed and maintained by the The Gulf of Mexico Research Initiative Information and Data Cooperative (GRIIDC).
[URL] (https://data.gulfresearchinitiative.org/)
URL: https://data.gulfresearchinitiative.org/
## Getting Started

These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system.

### Prerequisites

* [CENTOS 7](https://wiki.centos.org/)
* [PHP 5.6](http://php.net/docs.php )
* [CENTOS 6/7](https://wiki.centos.org/)
* [PHP 7.1](http://php.net/docs.php )
* [Symfony 3.4](https://symfony.com/doc/3.4/index.html) - PHP framework for web application
* [PostgreSQL 9.6](https://www.postgresql.org/docs/9.6/static/release-9-6.html) - ORDBMS
* [FOSElasticaBundle](https://github.com/FriendsOfSymfony/FOSElasticaBundle) - PHP integration for Symfony with ElasticSearch
* [RabbitMQ](https://www.rabbitmq.com/documentation.html) - Open source message broker software

### Installation

## Documentation

## Roadmap

## Contributors

* **Michael Van Den Eijnden** - (2012 - present) [Github](https://github.com/mickel1138)
Expand Down
38 changes: 38 additions & 0 deletions share/bash/create-manifest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash

#
# Creates zip file with manifest files.
#

if [ "$1" == "" ]
then
echo "No argument provided!"
fi

for arg in "$@"
do
if [ "$arg" == "--help" ] || [ "$arg" == "-h" ]
then
echo "Help argument detected."
fi
done
path=`pwd`
udi=`echo $1 | grep -oP "([A-Za-z0-9]{2}.x[0-9]{3}.[0-9]{3})[.:]([0-9]{4})"`

echo "Processing UDI: $udi"

readmefile="$udi-ReadMe.txt"
manifestfile="$udi-file-manifest.txt"
zipfile="$udi-manifest.zip"

echo "Generating file: $path/$udi-ReadMe.txt"
python share/python/create-tree.py -d $1 > $readmefile
unix2dos $readmefile

echo "Generating file: $path/$udi-file-manifest.txt"
python share/python/create-tree.py $1 > $manifestfile
unix2dos $manifestfile

zip $zipfile $1
zip $zipfile -m $manifestfile $readmefile
unzip -l $zipfile
174 changes: 174 additions & 0 deletions share/python/create-tree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import csv
import argparse
import operator
import os
import re
import sys
import textwrap
from directory_tree_node import DirectoryTreeNode
from collections import OrderedDict

def check_header(filename):
# This function checks for a valid hashdeep header
# in the passed file. If the header is valid, the
# invocation path is returned, otherwise false
# is returned in the event of an invalid header.

# example header:

# %%%% HASHDEEP-1.0
# %%%% size,md5,sha256,filename
# ## Invoked from: /mnt/LTFS/R1.x137.108.0001
# ## $ hashdeep -r .
# ##

with open(filename) as f:
first = f.readline().rstrip() == '%%%% HASHDEEP-1.0'
second = f.readline().rstrip() == '%%%% size,md5,sha256,filename'
third_line = f.readline().rstrip()
third = re.match('^## Invoked from: ', third_line) != None
fourth_line = f.readline().rstrip()
# This hardcoded offset is safe because of the previous re.match() check.
path = third_line[17:]
forth = re.match('^## \$ hashdeep -r ', fourth_line) != None
fifth = f.readline().rstrip() == '##'
if (first and second and third and forth and fifth):
return path
else:
return None

# https://www.oreilly.com/library/view/python-cookbook/0596001673/ch04s16.html
def splitall(path):
allparts = []
while 1:
parts = os.path.split(path)
if parts[0] == path: # sentinel for absolute paths
allparts.insert(0, parts[0])
break
elif parts[1] == path: # sentinel for relative paths
allparts.insert(0, parts[1])
break
else:
path = parts[0]
allparts.insert(0, parts[1])
return allparts


def generate_tree(filename, short):
path = check_header(filename)
filetypes = {}
# Extract the UDI from the passed starting path.
udi_pattern = re.compile('([A-Za-z0-9]{2}.x[0-9]{3}.[0-9]{3})[.:]([0-9]{4})')
udi_parts = udi_pattern.findall(path)
udi = udi_parts[0][0] + ':' + udi_parts[0][1]

if (path is not None):
sizes = OrderedDict()
with open(filename, 'rb') as f:
reader = csv.reader(f)
rownum = 1
last = None
for row in reader:
# skip header
if (rownum > 5):
object_filename = re.sub(path + '/', '', row[3])
object_size = row[0]

# Find file's filetype, add to count by filetype.
filetype = os.path.splitext(row[3])[1]
try:
filetypes[filetype] += 1
except KeyError:
filetypes[filetype] = 1

# Split out paths to keep track, by dir, of totals.
parts = splitall(object_filename)
for i in range (0, len(parts), 1):
if (i == 0):
my_str = parts[i]
elif (i < len(parts)-1):
my_str = my_str + '/' + parts[i]
else:
# Appending '|EOL:' to ends of non-file paths, so this indicates directories.
# elegance-- but works.
my_str = my_str + '/' + parts[i] + '|EOL:'
try:
sizes[my_str] += int(object_size)
except KeyError:
sizes[my_str] = int(object_size)
rownum += 1
# Output Section
if (short):
print "Dataset Directory Summary for " + udi
else:
print "Dataset File Manifest for " + udi
print textwrap.dedent("""\
This dataset is greater than 25 GB and therefore too large to be downloaded
through direct download. In order to obtain this dataset, please email
[email protected] to make arrangements. If you would like a subset of the
dataset files, please indicate which directories and/or files.
""")
# Display filetype summary in short mode.
if (short):
extensions = []
for file_type, type_count in filetypes.iteritems():
if (file_type == ''):
file_type = 'no extension'
extensions.append(file_type)
print("File Extensions:")
extensions.sort()
print(','.join(extensions))
print

# Sort by count in each type, descending.
for file_type, type_count in sorted(filetypes.iteritems(), reverse=True, key=lambda (k,v): (v,k)):
if(file_type == ''):
file_type = '<no extension>'
formatted_line = '%10s %15s' % (str(type_count), file_type)
print formatted_line
print
print("Total Files - " + str(rownum-5-1))
print
if (short):
print('Directories Structure:')
else:
print('File Listing:')
print

for path, size in sizes.iteritems():
if (short):
# Display directories only in short mode.
if(re.search("\|EOL:$", path)):
pass
else:
opPath = re.sub('\|EOL:', '', path)
DirectoryTreeNode.buildTree(directoryTreeNodeRoot, opPath, size)
else:
opPath = re.sub('\|EOL:', '', path)
DirectoryTreeNode.buildTree(directoryTreeNodeRoot, opPath, size)
# print the tree starting with the node(s) that
# are children of the root. The root does not contain data.
rootChildren = directoryTreeNodeRoot.getChildren()
for child in rootChildren:
child.printTree(0)
else:
print("Error in header. Stopping")


directoryTreeNodeRoot = DirectoryTreeNode('root',0)

def main(argv, script_name):
parser = argparse.ArgumentParser()
# Stores args.d boolean, true if -d is set, false otherwise.
parser.add_argument('-d', action='store_true', help='Print only directories.')
parser.add_argument('hashfile')
args = parser.parse_args()
generate_tree(args.hashfile, args.d)

if __name__ == "__main__":
main(sys.argv[1:], sys.argv[0])

Loading

0 comments on commit e64650e

Please sign in to comment.