forked from serge-sans-paille/pythran-stories
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetting-the-best-of-every-world-cython-and-pythran-working-together.html
567 lines (473 loc) · 51.4 KB
/
getting-the-best-of-every-world-cython-and-pythran-working-together.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Getting the best of every world: Cython and Pythran working together</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="">
<meta name="author" content="serge-sans-paille and other pythraners">
<!-- Le styles -->
<link rel="stylesheet" href="./theme/css/bootstrap.min.css" type="text/css" />
<style type="text/css">
body {
padding-top: 60px;
padding-bottom: 40px;
}
.sidebar-nav {
padding: 9px 0;
}
.tag-1 {
font-size: 13pt;
}
.tag-2 {
font-size: 10pt;
}
.tag-2 {
font-size: 8pt;
}
.tag-4 {
font-size: 6pt;
}
</style>
<link href="./theme/css/bootstrap-responsive.min.css" rel="stylesheet">
<link href="./theme/css/font-awesome.css" rel="stylesheet">
<link href="./theme/css/pygments.css" rel="stylesheet">
<!-- Le HTML5 shim, for IE6-8 support of HTML5 elements -->
<!--[if lt IE 9]>
<script src="//html5shim.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->
<!-- Le fav and touch icons -->
<link rel="shortcut icon" href="./theme/images/favicon.ico">
<link rel="apple-touch-icon" href="./theme/images/apple-touch-icon.png">
<link rel="apple-touch-icon" sizes="72x72" href="./theme/images/apple-touch-icon-72x72.png">
<link rel="apple-touch-icon" sizes="114x114" href="./theme/images/apple-touch-icon-114x114.png">
<link href="./" type="application/atom+xml" rel="alternate" title="Pythran stories ATOM Feed" />
</head>
<body>
<div class="navbar navbar-fixed-top">
<div class="navbar-inner">
<div class="container-fluid">
<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</a>
<a class="brand" href="./index.html">Pythran stories </a>
<div class="nav-collapse">
<ul class="nav">
<li class="divider-vertical"></li>
<li >
<a href="./category/benchmark.html">
<i class="icon-folder-open icon-large"></i>benchmark
</a>
</li>
<li >
<a href="./category/compilation.html">
<i class="icon-folder-open icon-large"></i>compilation
</a>
</li>
<li class="active">
<a href="./category/cython.html">
<i class="icon-folder-open icon-large"></i>cython
</a>
</li>
<li >
<a href="./category/engineering.html">
<i class="icon-folder-open icon-large"></i>engineering
</a>
</li>
<li >
<a href="./category/examples.html">
<i class="icon-folder-open icon-large"></i>examples
</a>
</li>
<li >
<a href="./category/optimisation.html">
<i class="icon-folder-open icon-large"></i>optimisation
</a>
</li>
<li >
<a href="./category/release.html">
<i class="icon-folder-open icon-large"></i>release
</a>
</li>
<ul class="nav pull-right">
<li><a href="./archives.html"><i class="icon-th-list"></i>Archives</a></li>
</ul>
</ul>
<!--<p class="navbar-text pull-right">Logged in as <a href="#">username</a></p>-->
</div><!--/.nav-collapse -->
</div>
</div>
</div>
<div class="container-fluid">
<div class="row">
<div class="span9" id="content">
<section id="content">
<article>
<header>
<h1>
<a href=""
rel="bookmark"
title="Permalink to Getting the best of every world: Cython and Pythran working together">
Getting the best of every world: Cython and Pythran working together
</a>
</h1>
</header>
<div class="entry-content">
<div class="well">
<footer class="post-info">
<span class="label">Date</span>
<abbr class="published" title="2018-11-25T00:00:00+01:00">
<i class="icon-calendar"></i>Sun 25 November 2018
</abbr>
<span class="label">By</span>
<a href="./author/adrien-guinet.html"><i class="icon-user"></i>Adrien Guinet</a>
<span class="label">Category</span>
<a href="./category/cython.html"><i class="icon-folder-open"></i>cython</a>.
</footer><!-- /.post-info --> </div>
<p>Once upon a time, on IRC, Serge Guelton asked me whether I wanted to work on
having Cython using Pythran for Numpy-related computation. I wasn't really sure
what I was getting into, but I've always liked optimizing software, and that
sounded like an interesting challenge to understand both projects.
As an also important interesting note, this whole project has been financed by
the <a href="http://opendreamkit.org">OpenDreamKit</a> project!</p>
<p>That's the end of the small story, now let's get to the real stuff!</p>
<h2>Why mixing Cython and Pythran?</h2>
<p>On one side, when Cython code contains operations which are done on Numpy
arrays, Cython relies on the original Numpy package to compute them. This
involves a fall back to the Python interpreter. It thus misses several
optimization opportunities, especially with complex expressions: even if each Numpy call is decently optimized, their combination is not.</p>
<p>On the other side, Pythran has a full C++ implementation of a major set of
the Numpy API. Some of the advantage of this implementation is that it supports
expression templates and SIMD instructions. Expression templates allow to
<em>fuse</em> loops that can occurs when expressions with multiple operators are
computed. For instance, the expression <code>a + b * c</code> is transformed by
Cython in two call: one for the multiplication of <code>b</code> by <code>c</code>, and one for the
addition of the result of this multiplication and the addition by <code>a</code>. Each call
ends up in one loop, that reads memory, computes the operation and writes
back to (newly allocated) memory. The second loop has the same pattern. In nowadays
architecture, memory bandwidth is often the limiting factor in this kind of
operation. It is thus really interesting to merge these loops, and load/store
the memory only once.</p>
<p>Expression templating is a C++ technique that allows to
evaluate expressions only when they are stored to memory. Thus, in this case,
the two loops are automatically <em>fused</em> by the C++ compiler, and we get
an optimized version of this code. Note that this technique is used for
instance by the C++ wrapper of the GMP library. Using <a href="https://github.com/QuantStack/xsimd">xsimd</a>, it is even possible to automagically vectoriez these computations.</p>
<p>The project has been focused on using this Pythran backend for Numpy arrays in
Cython when possible. At the time of writing this integration, Pythran had a
few limitations regarding the Numpy arrays it can handle:</p>
<ul>
<li>array "views" are not supported. That means that arrays must be stored in
contiguous memory. Fortran and C-style format are supported.</li>
<li>the endianess of the integers must be the same that the one of the targeted
architecture (note that Cython has the same limitation, and that it is still
true today)</li>
</ul>
<p>However, we still achieve interesting speedup, without the need of manual loop.</p>
<h2>Implementation details within Cython</h2>
<p>The overall idea of the implementation of this feature is to generate code that
is using the pythonic backend instead of calls to the Numpy Python functions.
Moreover, as Pythran didn't support every Numpy array types, we need a
mechanism to switch back to the original implementation if necessary.</p>
<p>In order to explain this, let's take an example with this simple Cython function:</p>
<div class="highlight"><pre><span></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="n">cimport</span> <span class="n">numpy</span> <span class="k">as</span> <span class="n">np</span>
<span class="k">def</span> <span class="nf">add</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">(</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="n">a</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">(</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="n">b</span><span class="p">):</span>
<span class="k">return</span> <span class="n">a</span><span class="o">+</span><span class="n">b</span>
</pre></div>
<p>When we encounter such a definition, we want to generate various functions, depending on the shapes of <code>a</code> and <code>b</code> at runtime:</p>
<ul>
<li>the original Cython code, if <code>a</code> and <code>b</code> can't be handled by Pythran</li>
<li>the version when only <code>a</code> can be handled by Pythran</li>
<li>the version when only <code>b</code> can be handled by Pythran</li>
<li>the version when both <code>a</code> and <code>b</code> can be handled by Pythran</li>
</ul>
<p>Note that, in the case of the <code>add</code> function, only the first and last
versions are really of value. For now, we don't try to be smart about this and
generate all of these versions.</p>
<p>In order to do that, we rely on the type infrastructure that already exists in
Cython. For every argument that is a potentially Pythran-supported Numpy array,
we convert its type into a Cython <code>FusedType</code>. A <code>FusedType</code> allows to declare a
union of type. Multiple <code>FusedType</code> can be specified within a function. In
the case of our <code>add</code> function, this will generate the four aforementioned
versions, and the dispatching is done at runtime. What's nice is that we just
need to declare these <code>FusedType</code> types, and Cython already handled all this
dispatching and the generation of the various functions.</p>
<p>Once this is done, we use these rules to known when we can generate
pythonic-based code:</p>
<ul>
<li>unary and binary operators of two pythonic arrays is a supported operation</li>
<li>calling a function of a module implemented in Pythran is a supported operation</li>
</ul>
<p>If none of these rules work out, we fall back to Python objects and use the
original Cython implementation.</p>
<h2>How to use it</h2>
<p>The Pythran-Numpy backend isn't activated by default within Cython. There are
multiple ways to activate it:</p>
<ul>
<li>if you are using Cython directly from the command line, you can pass the
<code>--np-pythran</code> flag to the <code>cython</code> program</li>
<li>if you are using <code>distutils</code>, you can just add a comment with <code># cython:
np_pythran=True</code> at the top of the necessary Cython files</li>
</ul>
<p>More detailed information can be found within the <a href="https://cython.readthedocs.io/en/latest/src/userguide/numpy_pythran.html">Cython documentation</a>.</p>
<h2>Benchmarks and Examples</h2>
<h3><code>cos norm</code></h3>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="o">...</span>
<span class="o">>>></span> <span class="n">n</span> <span class="o">=</span> <span class="mi">10000</span>
<span class="o">>>></span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">random</span><span class="p">((</span><span class="mi">2</span><span class="p">,</span> <span class="n">n</span><span class="p">))</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="k">def</span> <span class="nf">np_cos_norm</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">):</span>
<span class="o">...</span> <span class="n">val</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="mf">1.</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">cos</span><span class="p">(</span><span class="n">a</span><span class="o">-</span><span class="n">b</span><span class="p">))</span>
<span class="o">...</span> <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">val</span> <span class="o">/</span> <span class="mf">2.</span> <span class="o">/</span> <span class="n">a</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">timeit</span> <span class="n">np_cos_norm</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span>138 µs ± 1.47 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
</pre></div>
<p>Ok, that's our baseline.</p>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">load_ext</span> <span class="n">Cython</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%%</span><span class="n">cython</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="o">>>></span> <span class="k">def</span> <span class="nf">cy_np_cos_norm</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">):</span>
<span class="o">...</span> <span class="n">val</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="mf">1.</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">cos</span><span class="p">(</span><span class="n">a</span><span class="o">-</span><span class="n">b</span><span class="p">))</span>
<span class="o">...</span> <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">val</span> <span class="o">/</span> <span class="mf">2.</span> <span class="o">/</span> <span class="n">a</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">timeit</span> <span class="n">cy_np_cos_norm</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span>137 µs ± 2.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
</pre></div>
<p>Nothing surprising: there's no type annotation and not that much of interpreation step anyway.</p>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%%</span><span class="n">cython</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="o">>>></span> <span class="n">cimport</span> <span class="n">numpy</span> <span class="k">as</span> <span class="n">np</span>
<span class="o">>>></span> <span class="k">def</span> <span class="nf">cy_np_typed_cos_norm</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">1</span><span class="p">]</span> <span class="n">a</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">1</span><span class="p">]</span> <span class="n">b</span><span class="p">):</span>
<span class="o">...</span> <span class="n">val</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="mf">1.</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">cos</span><span class="p">(</span><span class="n">a</span><span class="o">-</span><span class="n">b</span><span class="p">))</span>
<span class="o">...</span> <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">val</span> <span class="o">/</span> <span class="mf">2.</span> <span class="o">/</span> <span class="n">a</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">timeit</span> <span class="n">cy_np_typed_cos_norm</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span>140 µs ± 765 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
</pre></div>
<p>Same here: adding type is not enough, Cython still uses numpy's implementation for each individual operation.</p>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%%</span><span class="n">cython</span>
<span class="o">>>></span> <span class="c1">#cython: np_pythran=True</span>
<span class="o">>>></span> <span class="c1">#cython: cxx=True</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="o">>>></span> <span class="n">cimport</span> <span class="n">numpy</span> <span class="k">as</span> <span class="n">np</span>
<span class="o">...</span>
<span class="o">>>></span> <span class="k">def</span> <span class="nf">cy_np_typed_pythran_cos_norm</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">1</span><span class="p">]</span> <span class="n">a</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">1</span><span class="p">]</span> <span class="n">b</span><span class="p">):</span>
<span class="o">...</span> <span class="n">cdef</span> <span class="nb">int</span> <span class="n">n</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
<span class="o">...</span> <span class="n">val</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="mf">1.</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">cos</span><span class="p">(</span><span class="n">a</span><span class="o">-</span><span class="n">b</span><span class="p">))</span>
<span class="o">...</span> <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">val</span> <span class="o">/</span> <span class="mf">2.</span> <span class="o">/</span> <span class="n">n</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">timeit</span> <span class="n">cy_np_typed_pythran_cos_norm</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span>131 µs ± 2.92 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
</pre></div>
<p>That's slighlty better, but not really impressive. That's because the execution time of the kernel is dominated by the <code>cos</code> call, and Pythran cannot do much about it.</p>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%%</span><span class="n">cython</span>
<span class="o">...</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="o">>>></span> <span class="n">cimport</span> <span class="n">numpy</span> <span class="k">as</span> <span class="n">np</span>
<span class="o">>>></span> <span class="kn">from</span> <span class="nn">libc.math</span> <span class="nn">cimport</span> <span class="nn">cos</span><span class="p">,</span> <span class="n">sqrt</span>
<span class="o">...</span>
<span class="o">>>></span> <span class="n">cimport</span> <span class="n">cython</span>
<span class="o">>>></span> <span class="nd">@cython.boundscheck</span><span class="p">(</span><span class="bp">False</span><span class="p">)</span> <span class="c1"># turn off bounds-checking for entire function</span>
<span class="o">>>></span> <span class="nd">@cython.wraparound</span><span class="p">(</span><span class="bp">False</span><span class="p">)</span> <span class="c1"># turn off negative index wrapping for entire function</span>
<span class="o">>>></span> <span class="k">def</span> <span class="nf">cy_np_typed_python_cos_norm</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">1</span><span class="p">]</span> <span class="n">a</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">1</span><span class="p">]</span> <span class="n">b</span><span class="p">):</span>
<span class="o">...</span> <span class="n">cdef</span> <span class="nb">int</span> <span class="n">n</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">a</span><span class="p">),</span> <span class="n">i</span>
<span class="o">...</span> <span class="n">cdef</span> <span class="n">double</span> <span class="n">acc</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span><span class="n">res</span>
<span class="o">...</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<span class="o">...</span> <span class="n">acc</span> <span class="o">+=</span> <span class="mi">1</span> <span class="o">-</span> <span class="n">cos</span><span class="p">(</span><span class="n">a</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">-</span><span class="n">b</span><span class="p">[</span><span class="n">i</span><span class="p">])</span>
<span class="o">...</span> <span class="n">res</span> <span class="o">=</span> <span class="n">sqrt</span><span class="p">(</span><span class="n">acc</span> <span class="o">/</span> <span class="mf">2.</span> <span class="o">/</span> <span class="n">n</span><span class="p">)</span>
<span class="o">...</span> <span class="k">return</span> <span class="n">res</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">timeit</span> <span class="n">cy_np_typed_python_cos_norm</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span>130 µs ± 2.58 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
</pre></div>
<p>Indeed even the C loop does not give us a great speedup...</p>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%%</span><span class="n">cython</span> <span class="o">-</span><span class="n">c</span><span class="o">=-</span><span class="n">DUSE_XSIMD</span> <span class="o">-</span><span class="n">c</span><span class="o">=-</span><span class="n">march</span><span class="o">=</span><span class="n">native</span>
<span class="o">>>></span> <span class="c1">#cython: np_pythran=True</span>
<span class="o">>>></span> <span class="c1">#cython: cxx=True</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="o">>>></span> <span class="n">cimport</span> <span class="n">numpy</span> <span class="k">as</span> <span class="n">np</span>
<span class="o">...</span>
<span class="o">>>></span> <span class="k">def</span> <span class="nf">cy_np_typed_pythran_xsimd_cos_norm</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">1</span><span class="p">]</span> <span class="n">a</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">1</span><span class="p">]</span> <span class="n">b</span><span class="p">):</span>
<span class="o">...</span> <span class="n">cdef</span> <span class="nb">int</span> <span class="n">n</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
<span class="o">...</span> <span class="n">val</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="mf">1.</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">cos</span><span class="p">(</span><span class="n">a</span><span class="o">-</span><span class="n">b</span><span class="p">))</span>
<span class="o">...</span> <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">val</span> <span class="o">/</span> <span class="mf">2.</span> <span class="o">/</span> <span class="n">n</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">timeit</span> <span class="n">cy_np_typed_pythran_xsimd_cos_norm</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span>34.8 µs ± 1.99 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
</pre></div>
<p><strong>That</strong> is interesting. By using the <code>-DUSE_XSIMD</code> flag and allowing the use of machine-specific instruction set (in our case, AVX), we get a great <code>x4</code> speedup. And we still use the nice and high-level syntax of Numpy.</p>
<h3><code>laplacien</code></h3>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="k">def</span> <span class="nf">np_laplacien</span><span class="p">(</span><span class="n">image</span><span class="p">):</span>
<span class="o">...</span> <span class="n">out_image</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="mi">4</span><span class="o">*</span><span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span>
<span class="o">...</span> <span class="n">image</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span> <span class="n">image</span><span class="p">[</span><span class="mi">2</span><span class="p">:,</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span>
<span class="o">...</span> <span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">2</span><span class="p">]</span> <span class="o">-</span> <span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">:])</span>
<span class="o">...</span> <span class="n">valmax</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">out_image</span><span class="p">)</span>
<span class="o">...</span> <span class="n">valmax</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mf">1.</span><span class="p">,</span><span class="n">valmax</span><span class="p">)</span><span class="o">+</span><span class="mf">1.E-9</span>
<span class="o">...</span> <span class="n">out_image</span> <span class="o">/=</span> <span class="n">valmax</span>
<span class="o">...</span> <span class="k">return</span> <span class="n">out_image</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="n">N</span> <span class="o">=</span> <span class="mi">500</span> <span class="p">;</span> <span class="n">image</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">N</span><span class="p">,</span><span class="n">N</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">timeit</span> <span class="n">np_laplacien</span><span class="p">(</span><span class="n">image</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span>2.61 ms ± 167 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
</pre></div>
<p>Again, some high-level numpy baseline.</p>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%%</span><span class="n">cython</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="o">>>></span> <span class="k">def</span> <span class="nf">cy_np_laplacien</span><span class="p">(</span><span class="n">image</span><span class="p">):</span>
<span class="o">...</span> <span class="n">out_image</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="mi">4</span><span class="o">*</span><span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span>
<span class="o">...</span> <span class="n">image</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span> <span class="n">image</span><span class="p">[</span><span class="mi">2</span><span class="p">:,</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span>
<span class="o">...</span> <span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">2</span><span class="p">]</span> <span class="o">-</span> <span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">:])</span>
<span class="o">...</span> <span class="n">valmax</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">out_image</span><span class="p">)</span>
<span class="o">...</span> <span class="n">valmax</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mf">1.</span><span class="p">,</span><span class="n">valmax</span><span class="p">)</span><span class="o">+</span><span class="mf">1.E-9</span>
<span class="o">...</span> <span class="n">out_image</span> <span class="o">/=</span> <span class="n">valmax</span>
<span class="o">...</span> <span class="k">return</span> <span class="n">out_image</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">timeit</span> <span class="n">cy_np_laplacien</span><span class="p">(</span><span class="n">image</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span>2.83 ms ± 397 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
</pre></div>
<p>And it comes at no surprise that just cythonizing it does not help much.</p>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%%</span><span class="n">cython</span>
<span class="o">>>></span> <span class="c1">#cython: np_pythran=True</span>
<span class="o">>>></span> <span class="c1">#cython: cxx=True</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="o">>>></span> <span class="n">cimport</span> <span class="n">numpy</span> <span class="k">as</span> <span class="n">np</span>
<span class="o">...</span>
<span class="o">>>></span> <span class="k">def</span> <span class="nf">cy_np_pythran_laplacien</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">2</span><span class="p">]</span> <span class="n">image</span><span class="p">):</span>
<span class="o">...</span>
<span class="o">...</span> <span class="n">out_image</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="mi">4</span><span class="o">*</span><span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span>
<span class="o">...</span> <span class="n">image</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span> <span class="n">image</span><span class="p">[</span><span class="mi">2</span><span class="p">:,</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span>
<span class="o">...</span> <span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">2</span><span class="p">]</span> <span class="o">-</span> <span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">:])</span>
<span class="o">...</span> <span class="n">valmax</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">out_image</span><span class="p">)</span>
<span class="o">...</span> <span class="n">valmax</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mf">1.</span><span class="p">,</span><span class="n">valmax</span><span class="p">)</span><span class="o">+</span><span class="mf">1.E-9</span>
<span class="o">...</span> <span class="n">out_image</span> <span class="o">/=</span> <span class="n">valmax</span>
<span class="o">...</span> <span class="k">return</span> <span class="n">out_image</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">timeit</span> <span class="n">cy_np_pythran_laplacien</span><span class="p">(</span><span class="n">image</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span>640 µs ± 68.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
</pre></div>
<p>This time, plain Pythran without vectorization is already a nice catch, as there is no costly operation that hides other optimizations.</p>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%%</span><span class="n">cython</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
<span class="o">>>></span> <span class="n">cimport</span> <span class="n">numpy</span> <span class="k">as</span> <span class="n">np</span>
<span class="o">>>></span> <span class="kn">from</span> <span class="nn">libc.math</span> <span class="nn">cimport</span> <span class="nn">fabs</span>
<span class="o">...</span>
<span class="o">>>></span> <span class="n">cimport</span> <span class="n">cython</span>
<span class="o">>>></span> <span class="nd">@cython.boundscheck</span><span class="p">(</span><span class="bp">False</span><span class="p">)</span> <span class="c1"># turn off bounds-checking for entire function</span>
<span class="o">>>></span> <span class="nd">@cython.wraparound</span><span class="p">(</span><span class="bp">False</span><span class="p">)</span> <span class="c1"># turn off negative index wrapping for entire function</span>
<span class="o">>>></span> <span class="k">def</span> <span class="nf">cy_py_laplacien</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">2</span><span class="p">]</span> <span class="n">image</span><span class="p">):</span>
<span class="o">...</span> <span class="n">cdef</span> <span class="nb">int</span> <span class="n">i</span><span class="p">,</span> <span class="n">j</span>
<span class="o">...</span> <span class="n">cdef</span> <span class="nb">int</span> <span class="n">n</span> <span class="o">=</span> <span class="n">image</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">m</span> <span class="o">=</span> <span class="n">image</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="o">...</span> <span class="n">cdef</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">[</span><span class="n">double</span><span class="p">,</span> <span class="n">ndim</span><span class="o">=</span><span class="mi">2</span><span class="p">]</span> <span class="n">out_image</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">((</span><span class="n">n</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span><span class="n">m</span><span class="o">-</span><span class="mi">2</span><span class="p">))</span>
<span class="o">...</span> <span class="n">cdef</span> <span class="n">double</span> <span class="n">valmax</span>
<span class="o">...</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">2</span><span class="p">):</span>
<span class="o">...</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">m</span><span class="o">-</span><span class="mi">2</span><span class="p">):</span>
<span class="o">...</span> <span class="n">out_image</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">fabs</span><span class="p">(</span><span class="mi">4</span><span class="o">*</span><span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="o">+</span><span class="n">i</span><span class="p">,</span><span class="mi">1</span><span class="o">+</span><span class="n">j</span><span class="p">]</span> <span class="o">-</span>
<span class="o">...</span> <span class="n">image</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="mi">1</span><span class="o">+</span><span class="n">j</span><span class="p">]</span> <span class="o">-</span> <span class="n">image</span><span class="p">[</span><span class="mi">2</span><span class="o">+</span><span class="n">i</span><span class="p">,</span><span class="mi">1</span><span class="o">+</span><span class="n">j</span><span class="p">]</span> <span class="o">-</span>
<span class="o">...</span> <span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="o">+</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span> <span class="o">-</span> <span class="n">image</span><span class="p">[</span><span class="mi">1</span><span class="o">+</span><span class="n">i</span><span class="p">,</span><span class="mi">2</span><span class="o">+</span><span class="n">j</span><span class="p">])</span>
<span class="o">...</span> <span class="n">valmax</span> <span class="o">=</span> <span class="n">out_image</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]</span>
<span class="o">...</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">2</span><span class="p">):</span>
<span class="o">...</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">m</span><span class="o">-</span><span class="mi">2</span><span class="p">):</span>
<span class="o">...</span> <span class="k">if</span> <span class="n">out_image</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span> <span class="o">></span> <span class="n">valmax</span><span class="p">:</span>
<span class="o">...</span> <span class="n">valmax</span> <span class="o">=</span> <span class="n">out_image</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span>
<span class="o">...</span> <span class="n">valmax</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="mf">1.</span><span class="p">,</span><span class="n">valmax</span><span class="p">)</span><span class="o">+</span><span class="mf">1.E-9</span>
<span class="o">...</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">2</span><span class="p">):</span>
<span class="o">...</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">m</span><span class="o">-</span><span class="mi">2</span><span class="p">):</span>
<span class="o">...</span> <span class="n">out_image</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span> <span class="o">/=</span> <span class="n">valmax</span>
<span class="o">...</span> <span class="k">return</span> <span class="n">out_image</span>
</pre></div>
<div class="highlight"><pre><span></span><span class="o">>>></span> <span class="o">%</span><span class="n">timeit</span> <span class="n">cy_py_laplacien</span><span class="p">(</span><span class="n">image</span><span class="p">)</span>
</pre></div>
<div class="highlight"><pre><span></span>852 µs ± 47.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
</pre></div>
<p>And the <em>plain loop</em> cython version is even not as good as Pythran's one :-) It's probably due to <code>fabs</code> which implies a call to <code>libm</code> while pythran's <code>np.abs</code> does <em>not</em> have this overhead.</p>
<h2>Future work</h2>
<p>Pythran does not support yet memory views, while Cython has support for this.
Memory views are an official CPython API to transparently forward buffers back
and forth from native world to Python. As Numpy arrays, they support various
shapes and ordering. Numpy arrays can also be processed as memory views. The
next move for Pythran would be to support this, and for Cython to be able to
use the Pythran backend for memory views!</p>
<p>This blogpost originally was a Jupyter Notebook. You can <a href="notebooks/cython and pythran.ipynb">download it</a> if you want. The conversion was done using <code>nbconvert</code> and a <a href="notebooks/nbmarkdown.tpl">custom template</a> to match the style of the other part of the blog.</p>
</div><!-- /.entry-content -->
</article>
</section>
</div><!--/span-->
<div class="span3 well sidebar-nav" id="sidebar">
<ul class="nav nav-list">
<li class="nav-header"><h4><i class="icon-external-link"></i>blogroll</h4></li>
<li><a href="http://pythonhosted.org/pythran"><i class="icon-external-link"></i>Pythran Doc</a></li>
<li><a href="https://pypi.python.org/pypi/pythran"><i class="icon-external-link"></i>Pythran on PyPI</a></li>
<li class="nav-header"><h4><i class="icon-home icon-large"></i> social</h4></li>
<li><a href="./feeds/all.atom.xml" rel="alternate"><i class="icon-bookmark icon-large"></i>atom feed</a></li>
<li><a href="https://github.com/serge-sans-paille/pythran"><i class="icon-github-sign icon-large"></i>github</a></li>
<li class="nav-header"><h4><i class="icon-folder-close icon-large"></i>Categories</h4></li>
<li>
<a href="./category/benchmark.html">
<i class="icon-folder-open icon-large"></i>benchmark
</a>
</li>
<li>
<a href="./category/compilation.html">
<i class="icon-folder-open icon-large"></i>compilation
</a>
</li>
<li>
<a href="./category/cython.html">
<i class="icon-folder-open icon-large"></i>cython
</a>
</li>
<li>
<a href="./category/engineering.html">
<i class="icon-folder-open icon-large"></i>engineering
</a>
</li>
<li>
<a href="./category/examples.html">
<i class="icon-folder-open icon-large"></i>examples
</a>
</li>
<li>
<a href="./category/optimisation.html">
<i class="icon-folder-open icon-large"></i>optimisation
</a>
</li>
<li>
<a href="./category/release.html">
<i class="icon-folder-open icon-large"></i>release
</a>
</li>
<li class="nav-header"><h4><i class="icon-tags icon-large"></i>Tags</h4></li>
</ul> </div><!--/.well -->
</div><!--/row-->
<hr>
<footer>
<address id="about">
Proudly powered by <a href="http://pelican.notmyidea.org/">Pelican <i class="icon-external-link"></i></a>,
which takes great advantage of <a href="http://python.org">Python <i class="icon-external-link"></i></a>.
</address><!-- /#about -->
<p>The theme is from <a href="http://twitter.github.com/bootstrap/">Bootstrap from Twitter <i class="icon-external-link"></i></a>,
and <a href="http://fortawesome.github.com/Font-Awesome/">Font-Awesome <i class="icon-external-link"></i></a>, thanks!</p>
</footer>
</div><!--/.fluid-container-->
<!-- Le javascript -->
<!-- Placed at the end of the document so the pages load faster -->
<script src="./theme/js/jquery-1.7.2.min.js"></script>
<script src="./theme/js/bootstrap.min.js"></script>
</body>
</html>