Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature job execution detail page #62

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,34 @@ Visit:

```

## Feed params

You may specify the following `scrapy` feed params in `config.py`:
- `FEED_URI` - path that is used by scrapy to store feed.
All storages (s3, ftp, local filesystem) are supported.
- `FEED_FORMAT` - exported file format
- `EXPORT_URI` - path where feed can be retrieved from.

`FEED_URI` and `EXPORT_URI` can contain the following params:
- `%(name)s` - spider name
- `%(create_time)s` - time of job execution start
- `%(job_id)s` - job execution id
- any other params from `Args` set while adding jobs.

If `EXPORT_URI` is not defined, export uri is equal to `FEED_URI`.
If `FEED_URI` is also not defined, it is not passed to spider.
The same is for `FEED_FORMAT`.

Example:
```
FEED_FORMAT = 'csv'
FEED_URI = 's3://bucket/%(name)s/%(job_id)s_%(create_time)s.csv'
EXPORT_URI = 'https://s3.amazonaws.com/bucket/%(name)s/%(job_id)s_%(create_time)s.csv'
```
If job has export uri (i.e. `FEED_URI` is defined in config.py), `Export` button is displayed on job detail page.

Note: need to install `boto3` for uploading to `s3`.

## TODO
- [ ] Job dashboard support filter
- [x] User Authentication
Expand Down
58 changes: 55 additions & 3 deletions SpiderKeeper/app/proxy/spiderctrl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import random
from functools import reduce

import SpiderKeeper.config as config
from SpiderKeeper.app import db
from SpiderKeeper.app.spider.model import SpiderStatus, JobExecution, JobInstance, Project, JobPriority

Expand Down Expand Up @@ -147,16 +147,68 @@ def start_spider(self, job_instance):
for i in range(threshold):
leaders.append(random.choice(candidates))
for leader in leaders:
serviec_job_id = leader.start_spider(project.project_name, spider_name, arguments)
job_execution = JobExecution()
job_execution.project_id = job_instance.project_id
job_execution.service_job_execution_id = serviec_job_id
job_execution.job_instance_id = job_instance.id
job_execution.create_time = datetime.datetime.now()
job_execution.running_on = leader.server
db.session.add(job_execution)
db.session.commit()

feed_settings = self.get_feed_params(
job_execution,
spider_name,
arguments
)
if feed_settings:
arguments['setting'] = feed_settings

service_job_id = leader.start_spider(
project.project_name,
spider_name,
arguments
)

job_execution.service_job_execution_id = service_job_id
db.session.commit()

def get_feed_params(self, job_execution, spider_name, args):
"""Pass FEED_URI and FEED_FORMAT params to spider settings.

Save EXPORT_URI to db as well.

"""
custom_settings = []
feed_uri, export_uri = self.get_feed_uri(
job_execution,
spider_name,
args
)
if feed_uri:
job_execution.export_uri = export_uri
custom_settings.append(
'FEED_URI={}'.format(feed_uri))
if config.FEED_FORMAT:
custom_settings.append(
'FEED_FORMAT={}'.format(config.FEED_FORMAT)
)
return custom_settings

@staticmethod
def get_feed_uri(job_execution, spider_name, args):
"""Pass params to FEED_URI and EXPORT_URI and return the result."""
if not config.FEED_URI:
return None, None
params = {
'name': spider_name,
'job_id': job_execution.id,
'create_time':
job_execution.create_time.strftime('%Y-%m-%d_%H-%M-%S')
}
params.update({key: value[0] for key, value in args.items()})
export_uri = config.EXPORT_URI if config.EXPORT_URI else config.FEED_URI
return config.FEED_URI % params, export_uri % params

def cancel_spider(self, job_execution):
job_instance = JobInstance.find_job_instance_by_id(job_execution.job_instance_id)
project = Project.find_project_by_id(job_instance.project_id)
Expand Down
12 changes: 12 additions & 0 deletions SpiderKeeper/app/spider/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,18 @@ def job_dashboard(project_id):
return render_template("job_dashboard.html", job_status=JobExecution.list_jobs(project_id))


@app.route("/job/<job_execution_id>/detail")
def job_detail(job_execution_id):
job_execution = JobExecution.query.filter_by(id=job_execution_id).first()
job_instance = JobInstance.find_job_instance_by_id(job_execution.job_instance_id) \
if job_execution else None
return render_template(
'job_detail.html',
job=job_execution,
job_instance=job_instance,
)


@app.route("/project/<project_id>/job/periodic")
def job_periodic(project_id):
project = Project.find_project_by_id(project_id)
Expand Down
3 changes: 2 additions & 1 deletion SpiderKeeper/app/spider/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,14 @@ class JobExecution(Base):
__tablename__ = 'sk_job_execution'

project_id = db.Column(db.INTEGER, nullable=False, index=True)
service_job_execution_id = db.Column(db.String(50), nullable=False, index=True)
service_job_execution_id = db.Column(db.String(50), index=True)
job_instance_id = db.Column(db.INTEGER, nullable=False, index=True)
create_time = db.Column(db.DATETIME)
start_time = db.Column(db.DATETIME)
end_time = db.Column(db.DATETIME)
running_status = db.Column(db.INTEGER, default=SpiderStatus.PENDING)
running_on = db.Column(db.Text)
export_uri = db.Column(db.Text, nullable=True)

def to_dict(self):
job_instance = JobInstance.query.filter_by(id=self.job_instance_id).first()
Expand Down
24 changes: 18 additions & 6 deletions SpiderKeeper/app/templates/job_dashboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,12 @@ <h3 class="box-title">Next Jobs</h3>
{% for job in job_status.PENDING %}
{% if job.job_instance %}
<tr>
<td>{{ job.job_execution_id }}</td>
<td><a href="/project/1/job/periodic#{{ job.job_instance_id }}">{{ job.job_instance_id }}</a></td>
<td>
<a href="/job/{{ job.job_execution_id}}/detail">
{{ job.job_execution_id }}
</a>
</td>
<td><a href="/project/{{ project.id }}/job/periodic#{{ job.job_instance_id }}">{{ job.job_instance_id }}</a></td>
<td>{{ job.job_instance.spider_name }}</td>
<td class="txt-args" data-toggle="tooltip" data-placement="right"
title="{{ job.job_instance.spider_arguments }}">{{ job.job_instance.spider_arguments }}
Expand Down Expand Up @@ -94,8 +98,12 @@ <h3 class="box-title">Running Jobs</h3>
{% for job in job_status.RUNNING %}
{% if job.job_instance %}
<tr>
<td>{{ job.job_execution_id }}</td>
<td><a href="/project/1/job/periodic#{{ job.job_instance_id }}">{{ job.job_instance_id }}</a></td>
<td>
<a href="/job/{{ job.job_execution_id}}/detail">
{{ job.job_execution_id }}
</a>
</td>
<td><a href="/project/{{ project.id }}/job/periodic#{{ job.job_instance_id }}">{{ job.job_instance_id }}</a></td>
<td>{{ job.job_instance.spider_name }}</td>
<td class="txt-args" data-toggle="tooltip" data-placement="right"
title="{{ job.job_instance.spider_arguments }}">{{ job.job_instance.spider_arguments }}
Expand Down Expand Up @@ -159,8 +167,12 @@ <h3 class="box-title">Completed Jobs</h3>
{% for job in job_status.COMPLETED %}
{% if job.job_instance %}
<tr>
<td>{{ job.job_execution_id }}</td>
<td><a href="/project/1/job/periodic#{{ job.job_instance_id }}">{{ job.job_instance_id }}</a></td>
<td>
<a href="/job/{{ job.job_execution_id}}/detail">
{{ job.job_execution_id }}
</a>
</td>
<td><a href="/project/{{ project.id }}/job/periodic#{{ job.job_instance_id }}">{{ job.job_instance_id }}</a></td>
<td>{{ job.job_instance.spider_name }}</td>
<td class="txt-args" data-toggle="tooltip" data-placement="right"
title="{{ job.job_instance.spider_arguments }}">{{ job.job_instance.spider_arguments }}
Expand Down
97 changes: 97 additions & 0 deletions SpiderKeeper/app/templates/job_detail.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
{% extends "base.html" %}
{% block content_header %}
<h1>Job Dashboard</h1>
<ol style="float: right;
margin-top: 0;
margin-bottom: 0;
font-size: 12px;
padding: 7px 5px;
position: absolute;
top: 15px;
right: 10px;">


{% if job.running_status == 2 and job.export_uri %}
<a href="{{ job.export_uri }}" type="button" class="btn btn-success btn-flat"
style="margin-top: -10px;">
Export
</a>
{% endif %}

</ol>
{% endblock %}
{% block content_body %}

<!--======completed job======-->
<div class="box">
<div class="box-header">
<h3 class="box-title">Job</h3>
</div>
<div class="box-body table-responsive">
<table class="table table-striped">
<tr>
<th style="width: 10px">#</th>
<th style="width: 30px">Job</th>
<th style="width: 160px">Spider</th>
<th style="width: 100px">Args</th>
<th style="width: 20px">Priority</th>
<th style="width: 40px">Runtime</th>
<th style="width: 120px">Started</th>
<th style="width: 10px">Log</th>
<th style="width: 10px">Status</th>
</tr>
{% if job %}
<tr>
<td>{{ job.id }}</td>
<td><a href="/project/{{ project.id }}/job/periodic#{{ job.job_instance_id }}">{{ job.job_instance_id }}</a></td>
<td>{{ job_instance.spider_name }}</td>
<td class="txt-args" data-toggle="tooltip" data-placement="right"
title="{{ job_instance.spider_arguments }}">{{ job_instance.spider_arguments }}
</td>
{% if job_instance.priority == -1 %}
<td>
<span class="label label-default">LOW</span>
</td>
{% elif job_instance.priority == 0 %}
<td>
<span class="label label-info">NORMAL</span>
</td>
{% elif job_instance.priority == 1 %}
<td>
<span class="label label-warning">HIGH</span>
</td>
{% elif job_instance.priority == 2 %}
<td>
<span class="label label-danger">HIGHEST</span>
</td>
{% endif %}
<td>
{% if job.running_status in [0, 1] %}
{{ timedelta(now,job.start_time) }}
{% else %}
{{ timedelta(job.end_time,job.start_time) }}
{% endif %}
</td>
<td>{{ job.start_time }}</td>
<td>
<a href="/project/{{ project.id }}/jobexecs/{{ job.id }}/log" target="_blank"
data-toggle="tooltip" data-placement="top"
title="{{ job.service_job_execution_id }}">Log</a>
</td>
<td>
{% if job.running_status == 0 %}
<span class="label label-info">PENDING</span>
{% elif job.running_status == 1 %}
<span class="label label-info">RUNNING</span>
{% elif job.running_status == 2 %}
<span class="label label-success">FINISHED</span>
{% else %}
<span class="label label-danger">CANCELED</span>
{% endif %}
</td>
</tr>
{% endif %}
</table>
</div>
</div>
{% endblock %}
5 changes: 5 additions & 0 deletions SpiderKeeper/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,8 @@
BASIC_AUTH_USERNAME = 'admin'
BASIC_AUTH_PASSWORD = 'admin'
BASIC_AUTH_FORCE = True

# feed params
FEED_FORMAT = None
FEED_URI = None
EXPORT_URI = None