docs/html/guide/topics/renderscript/compute.jd


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434

page.title=RenderScript
parent.title=Computation
parent.link=index.html

@jd:body

<div id="qv-wrapper">
  <div id="qv">
    <h2>In this document</h2>

    <ol>
      <li><a href="#writing-an-rs-kernel">Writing a RenderScript Kernel</a></li>
      <li><a href="#access-rs-apis">Accessing RenderScript APIs from Java</a>
        <ol>
          <li><a href="#ide-setup">Setting Up Your Development Environment</a></li>
        </ol>
      </li>
      <li><a href="#using-rs-from-java">Using RenderScript from Java Code</a></li>
      <li><a href="#single-source-rs">Single-Source RenderScript</a></li>
      <li><a href="#reduction-in-depth">Reduction Kernels in Depth</a>
        <ol>
          <li><a href="#writing-reduction-kernel">Writing a reduction kernel</a></li>
          <li><a href="#calling-reduction-kernel">Calling a reduction kernel from Java code</a></li>
          <li><a href="#more-example">More example reduction kernels</a></li>
        </ol>
      </li>
    </ol>

    <h2>Related Samples</h2>

    <ol>
      <li><a class="external-link"href="https://github.com/android/platform_development/tree/master/samples/RenderScript/HelloCompute">Hello
      Compute</a></li>
    </ol>
  </div>
</div>

<p>RenderScript is a framework for running computationally intensive tasks at high performance on
Android. RenderScript is primarily oriented for use with data-parallel computation, although serial
workloads can benefit as well. The RenderScript runtime parallelizes
work across processors available on a device, such as multi-core CPUs and GPUs. This allows
you to focus on expressing algorithms rather than scheduling work. RenderScript is
especially useful for applications performing image processing, computational photography, or
computer vision.</p>

<p>To begin with RenderScript, there are two main concepts you should understand:</p>
<ul>

<li>The <em>language</em> itself is a C99-derived language for writing high-performance compute
code. <a href="#writing-an-rs-kernel">Writing a RenderScript Kernel</a> describes
how to use it to write compute kernels.</li>

<li>The <em>control API</em> is used for managing the lifetime of RenderScript resources and
controlling kernel execution. It is available in three different languages: Java, C++ in Android
NDK, and the C99-derived kernel language itself.
<a href="#using-rs-from-java">Using RenderScript from Java Code</a> and
<a href=#single-source-rs>Single-Source RenderScript</a> describe the first and the third
options, respectively.</li>
</ul>

<h2 id="writing-an-rs-kernel">Writing a RenderScript Kernel</h2>

<p>A RenderScript kernel typically resides in a <code>.rs</code> file in the
<code>&lt;project_root&gt;/src/</code> directory; each <code>.rs</code> file is called a
<i>script</i>. Every script contains its own set of kernels, functions, and variables. A script can
contain:</p>

<ul>
<li>A pragma declaration (<code>#pragma version(1)</code>) that declares the version of the
RenderScript kernel language used in this script. Currently, 1 is the only valid value.</li>

<li>A pragma declaration (<code>#pragma rs java_package_name(com.example.app)</code>) that
declares the package name of the Java classes reflected from this script.
Note that your <code>.rs</code> file must be part of your application package, and not in a
library project.</li>

<li>Zero or more <strong><i>invokable functions</i></strong>. An invokable function is a single-threaded RenderScript
function that you can call from your Java code with arbitrary arguments. These are often useful for
initial setup or serial computations within a larger processing pipeline.</li>

<li><p>Zero or more <strong><i>script globals</i></strong>. A script global is equivalent to a global variable in C. You can
access script globals from Java code, and these are often used for parameter passing to RenderScript
kernels.</p></li>

<li><p>Zero or more <strong><i>compute kernels</i></strong>. A compute kernel is a function
or collection of functions that you can direct the RenderScript runtime to execute in parallel
across a collection of data. There are two kinds of compute
kernels: <i>mapping</i> kernels (also called <i>foreach</i> kernels)
and <i>reduction</i> kernels.</p>

<p>A <em>mapping kernel</em> is a parallel function that operates on a collection of {@link
  android.renderscript.Allocation Allocations} of the same dimensions. By default, it executes
  once for every coordinate in those dimensions. It is typically (but not exclusively) used to
  transform a collection of input {@link android.renderscript.Allocation Allocations} to an
  output {@link android.renderscript.Allocation} one {@link android.renderscript.Element} at a
  time.</p>

<ul>
<li><p>Here is an example of a simple <strong>mapping kernel</strong>:</p>

<pre>uchar4 RS_KERNEL invert(uchar4 in, uint32_t x, uint32_t y) {
  uchar4 out = in;
  out.r = 255 - in.r;
  out.g = 255 - in.g;
  out.b = 255 - in.b;
  return out;
}</pre>

<p>In most respects, this is identical to a standard C
  function. The <a href="#RS_KERNEL"><code>RS_KERNEL</code></a> property applied to the
  function prototype specifies that the function is a RenderScript mapping kernel instead of an
  invokable function. The <code>in</code> argument is automatically filled in based on the
  input {@link android.renderscript.Allocation} passed to the kernel launch. The
  arguments <code>x</code> and <code>y</code> are
  discussed <a href="#special-arguments">below</a>. The value returned from the kernel is
  automatically written to the appropriate location in the output {@link
  android.renderscript.Allocation}. By default, this kernel is run across its entire input
  {@link android.renderscript.Allocation}, with one execution of the kernel function per {@link
  android.renderscript.Element} in the {@link android.renderscript.Allocation}.</p>

<p>A mapping kernel may have one or more input {@link android.renderscript.Allocation
  Allocations}, a single output {@link android.renderscript.Allocation}, or both. The
  RenderScript runtime checks to ensure that all input and output Allocations have the same
  dimensions, and that the {@link android.renderscript.Element} types of the input and output
  Allocations match the kernel's prototype; if either of these checks fails, RenderScript
  throws an exception.</p>

<p class="note"><strong>NOTE:</strong> Before Android 6.0 (API level 23), a mapping kernel may
  not have more than one input {@link android.renderscript.Allocation}.</p>

<p>If you need more input or output {@link android.renderscript.Allocation Allocations} than
  the kernel has, those objects should be bound to <code>rs_allocation</code> script globals
  and accessed from a kernel or invokable function
  via <code>rsGetElementAt_<i>type</i>()</code> or <code>rsSetElementAt_<i>type</i>()</code>.</p>

<p><strong>NOTE:</strong> <a id="RS_KERNEL"><code>RS_KERNEL</code></a> is a macro
  defined automatically by RenderScript for your convenience:</p>
<pre>
#define RS_KERNEL __attribute__((kernel))
</pre>
</li>
</ul>

<p>A <em>reduction kernel</em> is a family of functions that operates on a collection of input
  {@link android.renderscript.Allocation Allocations} of the same dimensions. By default,
  its <a href="#accumulator-function">accumulator function</a> executes once for every
  coordinate in those dimensions.  It is typically (but not exclusively) used to "reduce" a
  collection of input {@link android.renderscript.Allocation Allocations} to a single
  value.</p>

<ul>
<li><p>Here is an <a id="example-addint">example</a> of a simple <strong>reduction
kernel</strong> that adds up the {@link android.renderscript.Element Elements} of its
input:</p>

<pre>#pragma rs reduce(addint) accumulator(addintAccum)

static void addintAccum(int *accum, int val) {
  *accum += val;
}</pre>

<p>A reduction kernel consists of one or more user-written functions.
<code>#pragma rs reduce</code> is used to define the kernel by specifying its name
(<code>addint</code>, in this example) and the names and roles of the functions that make
up the kernel (an <code>accumulator</code> function <code>addintAccum</code>, in this
example). All such functions must be <code>static</code>. A reduction kernel always
requires an <code>accumulator</code> function; it may also have other functions, depending
on what you want the kernel to do.</p>

<p>A reduction kernel accumulator function must return <code>void</code> and must have at least
two arguments. The first argument (<code>accum</code>, in this example) is a pointer to
an <i>accumulator data item</i> and the second (<code>val</code>, in this example) is
automatically filled in based on the input {@link android.renderscript.Allocation} passed to
the kernel launch. The accumulator data item is created by the RenderScript runtime; by
default, it is initialized to zero. By default, this kernel is run across its entire input
{@link android.renderscript.Allocation}, with one execution of the accumulator function per
{@link android.renderscript.Element} in the {@link android.renderscript.Allocation}. By
default, the final value of the accumulator data item is treated as the result of the
reduction, and is returned to Java.  The RenderScript runtime checks to ensure that the {@link
android.renderscript.Element} type of the input Allocation matches the accumulator function's
prototype; if it does not match, RenderScript throws an exception.</p>

<p>A reduction kernel has one or more input {@link android.renderscript.Allocation
Allocations} but no output {@link android.renderscript.Allocation Allocations}.</p></li>

<p>Reduction kernels are explained in more detail <a href="#reduction-in-depth">here</a>.</p>

<p>Reduction kernels are supported in Android 7.0 (API level 24) and later.</p>
</li>
</ul>

<p>A mapping kernel function or a reduction kernel accumulator function may access the coordinates
of the current execution using the <a id="special-arguments">special arguments</a> <code>x</code>,
<code>y</code>, and <code>z</code>, which must be of type <code>int</code> or <code>uint32_t</code>.
These arguments are optional.</p>

<p>A mapping kernel function or a reduction kernel accumulator
function may also take the optional special argument
<code>context</code> of type <a
href='reference/rs_for_each.html#android_rs:rs_kernel_context'>rs_kernel_context</a>.
It is needed by a family of runtime APIs that are used to query
certain properties of the current execution -- for example, <a
href='reference/rs_for_each.html#android_rs:rsGetDimX'>rsGetDimX</a>.
(The <code>context</code> argument is available in Android 6.0 (API level 23) and later.)</p>
</li>

<li>An optional <code>init()</code> function. An <code>init()</code> function is a special type of
invokable function that RenderScript runs when the script is first instantiated. This allows for some
computation to occur automatically at script creation.</li>

<li>Zero or more <strong><i>static script globals and functions</i></strong>. A static script global is equivalent to a
script global except that it cannot be accessed from Java code. A static function is a standard C
function that can be called from any kernel or invokable function in the script but is not exposed
to the Java API. If a script global or function does not need to be called from Java code, it is
highly recommended that it be declared <code>static</code>.</li> </ul>

<h4>Setting floating point precision</h4>

<p>You can control the required level of floating point precision in a script. This is useful if
full IEEE 754-2008 standard (used by default) is not required. The following pragmas can set a
different level of floating point precision:</p>

<ul>

<li><code>#pragma rs_fp_full</code> (default if nothing is specified): For apps that require
  floating point precision as outlined by the IEEE 754-2008 standard.

</li>

  <li><code>#pragma rs_fp_relaxed</code>: For apps that don’t require strict IEEE 754-2008
    compliance and can tolerate less precision. This mode enables flush-to-zero for denorms and
    round-towards-zero.

</li>

  <li><code>#pragma rs_fp_imprecise</code>: For apps that don’t have stringent precision
    requirements. This mode enables everything in <code>rs_fp_relaxed</code> along with the
    following:

<ul>

  <li>Operations resulting in -0.0 can return +0.0 instead.</li>
  <li>Operations on INF and NAN are undefined.</li>
</ul>
</li>
</ul>

<p>Most applications can use <code>rs_fp_relaxed</code> without any side effects. This may be very
beneficial on some architectures due to additional optimizations only available with relaxed
precision (such as SIMD CPU instructions).</p>


<h2 id="access-rs-apis">Accessing RenderScript APIs from Java</h2>

<p>When developing an Android application that uses RenderScript, you can access its API from Java in
  one of two ways:</p>

<ul>
  <li><strong>{@link android.renderscript}</strong> - The APIs in this class package are
    available on devices running Android 3.0 (API level 11) and higher. </li>
  <li><strong>{@link android.support.v8.renderscript}</strong> - The APIs in this package are
    available through a <a href="{@docRoot}tools/support-library/features.html#v8">Support
    Library</a>, which allows you to use them on devices running Android 2.3 (API level 9) and
    higher.</li>
</ul>

<p>Here are the tradeoffs:</p>

<ul>
<li>If you use the Support Library APIs, the RenderScript portion of your application will be
  compatible with devices running Android 2.3 (API level 9) and higher, regardless of which RenderScript
  features you use. This allows your application to work on more devices than if you use the
  native (<strong>{@link android.renderscript}</strong>) APIs.</li>
<li>Certain RenderScript features are not available through the Support Library APIs.</li>
<li>If you use the Support Library APIs, you will get (possibly significantly) larger APKs than
if you use the native (<strong>{@link android.renderscript}</strong>) APIs.</li>
</ul>

<h3 id="ide-setup">Using the RenderScript Support Library APIs</h3>

<p>In order to use the Support Library RenderScript APIs, you must configure your development
  environment to be able to access them. The following Android SDK tools are required for using
  these APIs:</p>

<ul>
  <li>Android SDK Tools revision 22.2 or higher</li>
  <li>Android SDK Build-tools revision 18.1.0 or higher</li>
</ul>

<p>You can check and update the installed version of these tools in the
  <a href="{@docRoot}tools/help/sdk-manager.html">Android SDK Manager</a>.</p>


<p>To use the Support Library RenderScript APIs:</p>

<ol>
  <li>Make sure you have the required Android SDK version and Build Tools version installed.</li>
  <li> Update the settings for the Android build process to include the RenderScript settings:

    <ul>
      <li>Open the {@code build.gradle} file in the app folder of your application module. </li>
      <li>Add the following RenderScript settings to the file:

<pre>
android {
    compileSdkVersion 23
    buildToolsVersion "23.0.3"

    defaultConfig {
        minSdkVersion 9
        targetSdkVersion 19
<strong>
        renderscriptTargetApi 18
        renderscriptSupportModeEnabled true
</strong>
    }
}
</pre>


    <p>The settings listed above control specific behavior in the Android build process:</p>

    <ul>
      <li>{@code renderscriptTargetApi} - Specifies the bytecode version to be generated. We
      recommend you set this value to the lowest API level able to provide all the functionality
      you are using and set {@code renderscriptSupportModeEnabled} to {@code true}.
      Valid values for this setting are any integer value
      from 11 to the most recently released API level. If your minimum SDK version specified in your
      application manifest is set to a different value, that value is ignored and the target value
      in the build file is used to set the minimum SDK version.</li>
      <li>{@code renderscriptSupportModeEnabled} - Specifies that the generated bytecode should fall
      back to a compatible version if the device it is running on does not support the target
      version.
      </li>
      <li>{@code buildToolsVersion} - The version of the Android SDK build tools to use. This value
      should be set to {@code 18.1.0} or higher. If this option is not specified, the highest
      installed build tools version is used. You should always set this value to ensure the
      consistency of builds across development machines with different configurations.</li>
    </ul>
    </li>
   </ul>

  <li>In your application classes that use RenderScript, add an import for the Support Library
    classes:

<pre>
import android.support.v8.renderscript.*;
</pre>

  </li>

</ol>

<h2 id="using-rs-from-java">Using RenderScript from Java Code</h2>

<p>Using RenderScript from Java code relies on the API classes located in the
{@link android.renderscript} or the {@link android.support.v8.renderscript} package. Most
applications follow the same basic usage pattern:</p>

<ol>

<li><strong>Initialize a RenderScript context.</strong> The {@link
android.renderscript.RenderScript} context, created with {@link
android.renderscript.RenderScript#create}, ensures that RenderScript can be used and provides an
object to control the lifetime of all subsequent RenderScript objects. You should consider context
creation to be a potentially long-running operation, since it may create resources on different
pieces of hardware; it should not be in an application's critical path if at all
possible. Typically, an application will have only a single RenderScript context at a time.</li>

<li><strong>Create at least one {@link android.renderscript.Allocation} to be passed to a
script.</strong> An {@link android.renderscript.Allocation} is a RenderScript object that provides
storage for a fixed amount of data. Kernels in scripts take {@link android.renderscript.Allocation}
objects as their input and output, and {@link android.renderscript.Allocation} objects can be
accessed in kernels using <code>rsGetElementAt_<i>type</i>()</code> and
<code>rsSetElementAt_<i>type</i>()</code> when bound as script globals. {@link
android.renderscript.Allocation} objects allow arrays to be passed from Java code to RenderScript
code and vice-versa. {@link android.renderscript.Allocation} objects are typically created using
{@link android.renderscript.Allocation#createTyped createTyped()} or {@link
android.renderscript.Allocation#createFromBitmap createFromBitmap()}.</li>

<li><strong>Create whatever scripts are necessary.</strong> There are two types of scripts available
to you when using RenderScript:

<ul>

<li><strong>ScriptC</strong>: These are the user-defined scripts as described in <a
href="#writing-an-rs-kernel"><i>Writing a RenderScript Kernel</i></a> above. Every script has a Java class
reflected by the RenderScript compiler in order to make it easy to access the script from Java code;
this class has the name <code>ScriptC_<i>filename</i></code>. For example, if the mapping kernel
above were located in <code>invert.rs</code> and a RenderScript context were already located in
<code>mRenderScript</code>, the Java code to instantiate the script would be:

<pre>ScriptC_invert invert = new ScriptC_invert(mRenderScript);</pre></li>

<li><strong>ScriptIntrinsic</strong>: These are built-in RenderScript kernels for common operations,
such as Gaussian blur, convolution, and image blending. For more information, see the subclasses of
{@link android.renderscript.ScriptIntrinsic}.</li>

</ul></li>

<li><strong>Populate Allocations with data.</strong> Except for Allocations created with {@link
android.renderscript.Allocation#createFromBitmap createFromBitmap()}, an Allocation is populated with empty data when it is
first created. To populate an Allocation, use one of the "copy" methods in {@link
android.renderscript.Allocation}. The "copy" methods are <a href="#asynchronous-model">synchronous</a>.</li>

<li><strong>Set any necessary script globals.</strong> You may set globals using methods in the
  same <code>ScriptC_<i>filename</i></code> class named <code>set_<i>globalname</i></code>. For
  example, in order to set an <code>int</code> variable named <code>threshold</code>, use the
  Java method <code>set_threshold(int)</code>; and in order to set
  an <code>rs_allocation</code> variable named <code>lookup</code>, use the Java
  method <code>set_lookup(Allocation)</code>. The <code>set</code> methods
  are <a href="#asynchronous-model">asynchronous</a>.</li>

<li><strong>Launch the appropriate kernels and invokable functions.</strong>
<p>Methods to launch a given kernel are
reflected in the same <code>ScriptC_<i>filename</i></code> class with methods named
<code>forEach_<i>mappingKernelName</i>()</code>
or <code>reduce_<i>reductionKernelName</i>()</code>.
These launches are <a href="#asynchronous-model">asynchronous</a>.
Depending on the arguments to the kernel, the
method takes one or more Allocations, all of which must have the same dimensions. By default, a
kernel executes over every coordinate in those dimensions; to execute a kernel over a subset of those coordinates,
pass an appropriate {@link
android.renderscript.Script.LaunchOptions} as the last argument to the <code>forEach</code> or <code>reduce</code> method.</p>

<p>Launch invokable functions using the <code>invoke_<i>functionName</i></code> methods
reflected in the same <code>ScriptC_<i>filename</i></code> class.
These launches are <a href="#asynchronous-model">asynchronous</a>.</p></li>

<li><strong>Retrieve data from {@link android.renderscript.Allocation} objects
and <i><a href="#javaFutureType">javaFutureType</a></i> objects.</strong>
In order to
access data from an {@link android.renderscript.Allocation} from Java code, you must copy that data
back to Java using one of the "copy" methods in {@link
android.renderscript.Allocation}.
In order to obtain the result of a reduction kernel, you must use the <code><i>javaFutureType</i>.get()</code> method.
The "copy" and <code>get()</code> methods are <a href="#asynchronous-model">synchronous</a>.</li>

<li><strong>Tear down the RenderScript context.</strong> You can destroy the RenderScript context
with {@link android.renderscript.RenderScript#destroy} or by allowing the RenderScript context
object to be garbage collected. This causes any further use of any object belonging to that
context to throw an exception.</li> </ol>

<h3 id="asynchronous-model">Asynchronous execution model</h3>

<p>The reflected <code>forEach</code>, <code>invoke</code>, <code>reduce</code>,
  and <code>set</code> methods are asynchronous -- each may return to Java before completing the
  requested action.  However, the individual actions are serialized in the order in which they are launched.</p>

<p>The {@link android.renderscript.Allocation} class provides "copy" methods to copy data to
  and from Allocations.  A "copy" method is synchronous, and is serialized with respect to any
  of the asynchronous actions above that touch the same Allocation.</p>

<p>The reflected <i><a href="#javaFutureType">javaFutureType</a></i> classes provide
  a <code>get()</code> method to obtain the result of a reduction. <code>get()</code> is
  synchronous, and is serialized with respect to the reduction (which is asynchronous).</p>

<h2 id="single-source-rs">Single-Source RenderScript</h2>

<p>Android 7.0 (API level 24) introduces a new programming feature called <em>Single-Source
RenderScript</em>, in which kernels are launched from the script where they are defined, rather than
from Java. This approach is currently limited to mapping kernels, which are simply referred to as "kernels"
in this section for conciseness. This new feature also supports creating allocations of type
<a href={@docRoot}guide/topics/renderscript/reference/rs_object_types.html#android_rs:rs_allocation>
<code>rs_allocation</code></a> from inside the script. It is now possible to
implement a whole algorithm solely within a script, even if multiple kernel launches are required.
The benefit is twofold: more readable code, because it keeps the implementation of an algorithm in
one language; and potentially faster code, because of fewer transitions between Java and
RenderScript across multiple kernel launches.</p>

<p>In Single-Source RenderScript, you write kernels as described in <a href="#writing-an-rs-kernel">
Writing a RenderScript Kernel</a>. You then write an invokable function that calls
<a href="{@docRoot}guide/topics/renderscript/reference/rs_for_each.html#android_rs:rsForEach">
<code>rsForEach()</code></a> to launch them. That API takes a kernel function as the first
parameter, followed by input and output allocations. A similar API
<a href="{@docRoot}guide/topics/renderscript/reference/rs_for_each.html#android_rs:rsForEachWithOptions">
<code>rsForEachWithOptions()</code></a> takes an extra argument of type
<a href="{@docRoot}guide/topics/renderscript/reference/rs_for_each.html#android_rs:rs_script_call_t">
<code>rs_script_call_t</code></a>, which specifies a subset of the elements from the input and
output allocations for the kernel function to process.</p>

<p>To start RenderScript computation, you call the invokable function from Java.
Follow the steps in <a href="#using-rs-from-java">Using RenderScript from Java Code</a>.
In the step <a href="#launching_kernels">launch the appropriate kernels</a>, call
the invokable function using <code>invoke_<i>function_name</i>()</code>, which will start the
whole computation, including launching kernels.</p>

<p>Allocations are often needed to save and pass
intermediate results from one kernel launch to another. You can create them using
<a href="{@docRoot}guide/topics/renderscript/reference/rs_allocation_create.html#android_rs:rsCreateAllocation">
rsCreateAllocation()</a>. One easy-to-use form of that API is <code>
rsCreateAllocation_&ltT&gt&ltW&gt(&hellip;)</code>, where <i>T</i> is the data type for an
element, and <i>W</i> is the vector width for the element. The API takes the sizes in
dimensions X, Y, and Z as arguments. For 1D or 2D allocations, the size for dimension Y or Z can
be omitted. For example, <code>rsCreateAllocation_uchar4(16384)</code> creates a 1D allocation of
16384 elements, each of which is of type <code>uchar4</code>.</p>

<p>Allocations are managed by the system automatically. You
do not have to explicitly release or free them. However, you can call
<a href="{@docRoot}guide/topics/renderscript/reference/rs_object_info.html#android_rs:rsClearObject">
<code>rsClearObject(rs_allocation* alloc)</code></a> to indicate you no longer need the handle
<code>alloc</code> to the underlying allocation,
so that the system can free up resources as early as possible.</p>

<p>The <a href="#writing-an-rs-kernel">Writing a RenderScript Kernel</a> section contains an example
kernel that inverts an image. The example below expands that to apply more than one effect to an image,
using Single-Source RenderScript. It includes another kernel, <code>greyscale</code>, which turns a
color image into black-and-white. An invokable function <code>process()</code> then applies those two kernels
consecutively to an input image, and produces an output image. Allocations for both the input and
the output are passed in as arguments of type
<a href={@docRoot}guide/topics/renderscript/reference/rs_object_types.html#android_rs:rs_allocation>
<code>rs_allocation</code></a>.</p>

<pre>
// File: singlesource.rs

#pragma version(1)
#pragma rs java_package_name(com.android.rssample)

static const float4 weight = {0.299f, 0.587f, 0.114f, 0.0f};

uchar4 RS_KERNEL invert(uchar4 in, uint32_t x, uint32_t y) {
  uchar4 out = in;
  out.r = 255 - in.r;
  out.g = 255 - in.g;
  out.b = 255 - in.b;
  return out;
}

uchar4 RS_KERNEL greyscale(uchar4 in) {
  const float4 inF = rsUnpackColor8888(in);
  const float4 outF = (float4){ dot(inF, weight) };
  return rsPackColorTo8888(outF);
}

void process(rs_allocation inputImage, rs_allocation outputImage) {
  const uint32_t imageWidth = rsAllocationGetDimX(inputImage);
  const uint32_t imageHeight = rsAllocationGetDimY(inputImage);
  rs_allocation tmp = rsCreateAllocation_uchar4(imageWidth, imageHeight);
  rsForEach(invert, inputImage, tmp);
  rsForEach(greyscale, tmp, outputImage);
}
</pre>

<p>You can call the <code>process()</code> function from Java as follows:</p>

<pre>
// File SingleSource.java

RenderScript RS = RenderScript.create(context);
ScriptC_singlesource script = new ScriptC_singlesource(RS);
Allocation inputAllocation = Allocation.createFromBitmapResource(
    RS, getResources(), R.drawable.image);
Allocation outputAllocation = Allocation.createTyped(
    RS, inputAllocation.getType(),
    Allocation.USAGE_SCRIPT | Allocation.USAGE_IO_OUTPUT);
script.invoke_process(inputAllocation, outputAllocation);
</pre>

<p>This example shows how an algorithm that involves two kernel launches can be implemented completely
in the RenderScript language itself. Without Single-Source
RenderScript, you would have to launch both kernels from the Java code, separating kernel launches
from kernel definitions and making it harder to understand the whole algorithm. Not only is the
Single-Source RenderScript code easier to read, it also eliminates the transitioning
between Java and the script across kernel launches. Some iterative algorithms may launch kernels
hundreds of times, making the overhead of such transitioning considerable.</p>

<h2 id="reduction-in-depth">Reduction Kernels in Depth</h2>

<p><i>Reduction</i> is the process of combining a collection of data into a single
value. This is a useful primitive in parallel programming, with applications such as the
following:</p>
<ul>
  <li>computing the sum or product over all the data</li>
  <li>computing logical operations (<code>and</code>, <code>or</code>, <code>xor</code>)
  over all the data</li>
  <li>finding the minimum or maximum value within the data</li>
  <li>searching for a specific value or for the coordinate of a specific value within the data</li>
</ul>

<p>In Android 7.0 (API level 24) and later, RenderScript supports <i>reduction kernels</i> to allow
efficient user-written reduction algorithms. You may launch reduction kernels on inputs with
1, 2, or 3 dimensions.<p>

<p>An example above shows a simple <a href="#example-addint">addint</a> reduction kernel.
Here is a more complicated <a id="example-findMinAndMax">findMinAndMax</a> reduction kernel
that finds the locations of the minimum and maximum <code>long</code> values in a
1-dimensional {@link android.renderscript.Allocation}:</p>

<pre>
#define LONG_MAX (long)((1UL << 63) - 1)
#define LONG_MIN (long)(1UL << 63)

#pragma rs reduce(findMinAndMax) \
  initializer(fMMInit) accumulator(fMMAccumulator) \
  combiner(fMMCombiner) outconverter(fMMOutConverter)

// Either a value and the location where it was found, or <a href="#INITVAL">INITVAL</a>.
typedef struct {
  long val;
  int idx;     // -1 indicates <a href="#INITVAL">INITVAL</a>
} IndexedVal;

typedef struct {
  IndexedVal min, max;
} MinAndMax;

// In discussion below, this initial value { { LONG_MAX, -1 }, { LONG_MIN, -1 } }
// is called <a id="INITVAL">INITVAL</a>.
static void fMMInit(MinAndMax *accum) {
  accum->min.val = LONG_MAX;
  accum->min.idx = -1;
  accum->max.val = LONG_MIN;
  accum->max.idx = -1;
}

//----------------------------------------------------------------------
// In describing the behavior of the accumulator and combiner functions,
// it is helpful to describe hypothetical functions
//   IndexedVal min(IndexedVal a, IndexedVal b)
//   IndexedVal max(IndexedVal a, IndexedVal b)
//   MinAndMax  minmax(MinAndMax a, MinAndMax b)
//   MinAndMax  minmax(MinAndMax accum, IndexedVal val)
//
// The effect of
//   IndexedVal min(IndexedVal a, IndexedVal b)
// is to return the IndexedVal from among the two arguments
// whose val is lesser, except that when an IndexedVal
// has a negative index, that IndexedVal is never less than
// any other IndexedVal; therefore, if exactly one of the
// two arguments has a negative index, the min is the other
// argument. Like ordinary arithmetic min and max, this function
// is commutative and associative; that is,
//
//   min(A, B) == min(B, A)               // commutative
//   min(A, min(B, C)) == min((A, B), C)  // associative
//
// The effect of
//   IndexedVal max(IndexedVal a, IndexedVal b)
// is analogous (greater . . . never greater than).
//
// Then there is
//
//   MinAndMax minmax(MinAndMax a, MinAndMax b) {
//     return MinAndMax(min(a.min, b.min), max(a.max, b.max));
//   }
//
// Like ordinary arithmetic min and max, the above function
// is commutative and associative; that is:
//
//   minmax(A, B) == minmax(B, A)                  // commutative
//   minmax(A, minmax(B, C)) == minmax((A, B), C)  // associative
//
// Finally define
//
//   MinAndMax minmax(MinAndMax accum, IndexedVal val) {
//     return minmax(accum, MinAndMax(val, val));
//   }
//----------------------------------------------------------------------

// This function can be explained as doing:
//   *accum = minmax(*accum, IndexedVal(in, x))
//
// This function simply computes minimum and maximum values as if
// INITVAL.min were greater than any other minimum value and
// INITVAL.max were less than any other maximum value.  Note that if
// *accum is INITVAL, then this function sets
//   *accum = IndexedVal(in, x)
//
// After this function is called, both accum->min.idx and accum->max.idx
// will have nonnegative values:
// - x is always nonnegative, so if this function ever sets one of the
//   idx fields, it will set it to a nonnegative value
// - if one of the idx fields is negative, then the corresponding
//   val field must be LONG_MAX or LONG_MIN, so the function will always
//   set both the val and idx fields
static void fMMAccumulator(MinAndMax *accum, long in, int x) {
  IndexedVal me;
  me.val = in;
  me.idx = x;

  if (me.val <= accum->min.val)
    accum->min = me;
  if (me.val >= accum->max.val)
    accum->max = me;
}

// This function can be explained as doing:
//   *accum = minmax(*accum, *val)
//
// This function simply computes minimum and maximum values as if
// INITVAL.min were greater than any other minimum value and
// INITVAL.max were less than any other maximum value.  Note that if
// one of the two accumulator data items is INITVAL, then this
// function sets *accum to the other one.
static void fMMCombiner(MinAndMax *accum,
                        const MinAndMax *val) {
  if ((accum->min.idx < 0) || (val->min.val < accum->min.val))
    accum->min = val->min;
  if ((accum->max.idx < 0) || (val->max.val > accum->max.val))
    accum->max = val->max;
}

static void fMMOutConverter(int2 *result,
                            const MinAndMax *val) {
  result->x = val->min.idx;
  result->y = val->max.idx;
}
</pre>

<p class="note"><strong>NOTE:</strong> There are more example reduction
  kernels <a href="#more-example">here</a>.</p>

<p>In order to run a reduction kernel, the RenderScript runtime creates <em>one or more</em>
variables called <a id="accumulator-data-items"><strong><i>accumulator data
items</i></strong></a> to hold the state of the reduction process. The RenderScript runtime
picks the number of accumulator data items in such a way as to maximize performance. The type
of the accumulator data items (<i>accumType</i>) is determined by the kernel's <i>accumulator
function</i> -- the first argument to that function is a pointer to an accumulator data
item. By default, every accumulator data item is initialized to zero (as if
by <code>memset</code>); however, you may write an <i>initializer function</i> to do something
different.</p>

<p class="note"><strong>Example:</strong> In the <a href="#example-addint">addint</a>
kernel, the accumulator data items (of type <code>int</code>) are used to add up input
values. There is no initializer function, so each accumulator data item is initialized to
zero.</p>

<p class="note"><strong>Example:</strong> In
the <a href="#example-findMinAndMax">findMinAndMax</a> kernel, the accumulator data items
(of type <code>MinAndMax</code>) are used to keep track of the minimum and maximum values
found so far. There is an initializer function to set these to <code>LONG_MAX</code> and
<code>LONG_MIN</code>, respectively; and to set the locations of these values to -1, indicating that
the values are not actually present in the (empty) portion of the input that has been
processed.</p>

<p>RenderScript calls your accumulator function once for every coordinate in the
input(s). Typically, your function should update the accumulator data item in some way
according to the input.</p>

<p class="note"><strong>Example:</strong> In the <a href="#example-addint">addint</a>
kernel, the accumulator function adds the value of an input Element to the accumulator
data item.</p>

<p class="note"><strong>Example:</strong> In
the <a href="#example-findMinAndMax">findMinAndMax</a> kernel, the accumulator function
checks to see whether the value of an input Element is less than or equal to the minimum
value recorded in the accumulator data item and/or greater than or equal to the maximum
value recorded in the accumulator data item, and updates the accumulator data item
accordingly.</p>

<p>After the accumulator function has been called once for every coordinate in the input(s),
RenderScript must <strong>combine</strong> the <a href="#accumulator-data-items">accumulator
data items</a> together into a single accumulator data item. You may write a <i>combiner
function</i> to do this. If the accumulator function has a single input and
no <a href="#special-arguments">special arguments</a>, then you do not need to write a combiner
function; RenderScript will use the accumulator function to combine the accumulator data
items. (You may still write a combiner function if this default behavior is not what you
want.)</p>

<p class="note"><strong>Example:</strong> In the <a href="#example-addint">addint</a>
kernel, there is no combiner function, so the accumulator function will be used. This is
the correct behavior, because if we split a collection of values into two pieces, and we
add up the values in those two pieces separately, adding up those two sums is the same as
adding up the entire collection.</p>

<p class="note"><strong>Example:</strong> In
the <a href="#example-findMinAndMax">findMinAndMax</a> kernel, the combiner function
checks to see whether the minimum value recorded in the "source" accumulator data
item <code>*val</code> is less then the minimum value recorded in the "destination"
accumulator data item <code>*accum</code>, and updates <code>*accum</code>
accordingly. It does similar work for the maximum value. This updates <code>*accum</code>
to the state it would have had if all of the input values had been accumulated into
<code>*accum</code> rather than some into <code>*accum</code> and some into
<code>*val</code>.</p>

<p>After all of the accumulator data items have been combined, RenderScript determines
the result of the reduction to return to Java. You may write an <i>outconverter
function</i> to do this. You do not need to write an outconverter function if you want
the final value of the combined accumulator data items to be the result of the reduction.</p>

<p class="note"><strong>Example:</strong> In the <a href="#example-addint">addint</a> kernel,
there is no outconverter function.  The final value of the combined data items is the sum of
all Elements of the input, which is the value we want to return.</p>

<p class="note"><strong>Example:</strong> In
the <a href="#example-findMinAndMax">findMinAndMax</a> kernel, the outconverter function
initializes an <code>int2</code> result value to hold the locations of the minimum and
maximum values resulting from the combination of all of the accumulator data items.</p>

<h3 id="writing-reduction-kernel">Writing a reduction kernel</h3>

<p><code>#pragma rs reduce</code> defines a reduction kernel by
specifying its name and the names and roles of the functions that make
up the kernel.  All such functions must be
<code>static</code>. A reduction kernel always requires an <code>accumulator</code>
function; you can omit some or all of the other functions, depending on what you want the
kernel to do.</p>

<pre>#pragma rs reduce(<i>kernelName</i>) \
  initializer(<i>initializerName</i>) \
  accumulator(<i>accumulatorName</i>) \
  combiner(<i>combinerName</i>) \
  outconverter(<i>outconverterName</i>)
</pre>

<p>The meaning of the items in the <code>#pragma</code> is as follows:</p>
<ul>

<li><code>reduce(<i>kernelName</i>)</code> (mandatory): Specifies that a reduction kernel is
being defined. A reflected Java method <code>reduce_<i>kernelName</i></code> will launch the
kernel.</li>

<li><p><code>initializer(<i>initializerName</i>)</code> (optional): Specifies the name of the
initializer function for this reduction kernel. When you launch the kernel, RenderScript calls
this function once for each <a href="#accumulator-data-items">accumulator data item</a>. The
function must be defined like this:</p>

<pre>static void <i>initializerName</i>(<i>accumType</i> *accum) { … }</pre>

<p><code>accum</code> is a pointer to an accumulator data item for this function to
initialize.</p>

<p>If you do not provide an initializer function, RenderScript initializes every accumulator
data item to zero (as if by <code>memset</code>), behaving as if there were an initializer
function that looks like this:</p>
<pre>static void <i>initializerName</i>(<i>accumType</i> *accum) {
  memset(accum, 0, sizeof(*accum));
}</pre>
</li>

<li><p><code><a id="accumulator-function">accumulator(<i>accumulatorName</i>)</a></code>
(mandatory): Specifies the name of the accumulator function for this
reduction kernel. When you launch the kernel, RenderScript calls
this function once for every coordinate in the input(s), to update an
accumulator data item in some way according to the input(s). The function
must be defined like this:</p>

<pre>
static void <i>accumulatorName</i>(<i>accumType</i> *accum,
                            <i>in1Type</i> in1, <i>&hellip;,</i> <i>inNType</i> in<i>N</i>
                            <i>[, specialArguments]</i>) { &hellip; }
</pre>

<p><code>accum</code> is a pointer to an accumulator data item for this function to
modify. <code>in1</code> through <code>in<i>N</i></code> are one <em>or more</em> arguments that
are automatically filled in based on the inputs passed to the kernel launch, one argument
per input. The accumulator function may optionally take any of the <a
href="#special-arguments">special arguments</a>.</p>

<p>An example kernel with multiple inputs is <a href="#dot-product"><code>dotProduct</code></a>.</p>
</li>

<li><code><a id="combiner-function">combiner(<i>combinerName</i>)</a></code>
(optional): Specifies the name of the combiner function for this
reduction kernel. After RenderScript calls the accumulator function
once for every coordinate in the input(s), it calls this function as many
times as necessary to combine all accumulator data items into a single
accumulator data item. The function must be defined like this:</p>

<pre>static void <i>combinerName</i>(<i>accumType</i> *accum, const <i>accumType</i> *other) { … }</pre>

<p><code>accum</code> is a pointer to a "destination" accumulator data item for this
function to modify. <code>other</code> is a pointer to a "source" accumulator data item
for this function to "combine" into <code>*accum</code>.</p>

<p class="note"><strong>NOTE:</strong> It is possible
  that <code>*accum</code>, <code>*other</code>, or both have been initialized but have never
  been passed to the accumulator function; that is, one or both have never been updated
  according to any input data. For example, in
  the <a href="#example-findMinAndMax">findMinAndMax</a> kernel, the combiner
  function <code>fMMCombiner</code> explicitly checks for <code>idx &lt; 0</code> because that
  indicates such an accumulator data item, whose value is <a href="#INITVAL">INITVAL</a>.</p>

<p>If you do not provide a combiner function, RenderScript uses the accumulator function in its
place, behaving as if there were a combiner function that looks like this:</p>

<pre>static void <i>combinerName</i>(<i>accumType</i> *accum, const <i>accumType</i> *other) {
  <i>accumulatorName</i>(accum, *other);
}</pre>

<p>A combiner function is mandatory if the kernel has more than one input, if the input data
  type is not the same as the accumulator data type, or if the accumulator function takes one
  or more <a href="#special-arguments">special arguments</a>.</p>
</li>

<li><p><code><a id="outconverter-function">outconverter(<i>outconverterName</i>)</a></code>
(optional): Specifies the name of the outconverter function for this
reduction kernel. After RenderScript combines all of the accumulator
data items, it calls this function to determine the result of the
reduction to return to Java. The function must be defined like
this:</p>

<pre>static void <i>outconverterName</i>(<i>resultType</i> *result, const <i>accumType</i> *accum) { … }</pre>

<p><code>result</code> is a pointer to a result data item (allocated but not initialized
by the RenderScript runtime) for this function to initialize with the result of the
reduction. <i>resultType</i> is the type of that data item, which need not be the same
as <i>accumType</i>. <code>accum</code> is a pointer to the final accumulator data item
computed by the <a href="#combiner-function">combiner function</a>.</p>

<p>If you do not provide an outconverter function, RenderScript copies the final accumulator
data item to the result data item, behaving as if there were an outconverter function that
looks like this:</p>

<pre>static void <i>outconverterName</i>(<i>accumType</i> *result, const <i>accumType</i> *accum) {
  *result = *accum;
}</pre>

<p>If you want a different result type than the accumulator data type, then the outconverter function is mandatory.</p>
</li>

</ul>

<p>Note that a kernel has input types, an accumulator data item type, and a result type,
  none of which need to be the same. For example, in
  the <a href="#example-findMinAndMax">findMinAndMax</a> kernel, the input
  type <code>long</code>, accumulator data item type <code>MinAndMax</code>, and result
  type <code>int2</code> are all different.</p>

<h4 id="assume">What can't you assume?</h4>

<p>You must not rely on the number of accumulator data items created by RenderScript for a
  given kernel launch.  There is no guarantee that two launches of the same kernel with the
  same input(s) will create the same number of accumulator data items.</p>

<p>You must not rely on the order in which RenderScript calls the initializer, accumulator, and
  combiner functions; it may even call some of them in parallel.  There is no guarantee that
  two launches of the same kernel with the same input will follow the same order.  The only
  guarantee is that only the initializer function will ever see an uninitialized accumulator
  data item. For example:</p>
<ul>
<li>There is no guarantee that all accumulator data items will be initialized before the
  accumulator function is called, although it will only be called on an initialized accumulator
  data item.</li>
<li>There is no guarantee on the order in which input Elements are passed to the accumulator
  function.</li>
<li>There is no guarantee that the accumulator function has been called for all input Elements
  before the combiner function is called.</li>
</ul>

<p>One consequence of this is that the <a href="#example-findMinAndMax">findMinAndMax</a>
  kernel is not deterministic: If the input contains more than one occurrence of the same
  minimum or maximum value, you have no way of knowing which occurrence the kernel will
  find.</p>

<h4 id="guarantee">What must you guarantee?</h4>

<p>Because the RenderScript system can choose to execute a kernel <a href="#assume">in many
    different ways</a>, you must follow certain rules to ensure that your kernel behaves the
    way you want. If you do not follow these rules, you may get incorrect results,
    nondeterministic behavior, or runtime errors.</p>

<p>The rules below often say that two accumulator data items must have "<a id="the-same">the
  same value"</a>.  What does this mean?  That depends on what you want the kernel to do.  For
  a mathematical reduction such as <a href="#example-addint">addint</a>, it usually makes sense
  for "the same" to mean mathematical equality.  For a "pick any" search such
  as <a href="#example-findMinAndMax">findMinAndMax</a> ("find the location of minimum and
  maximum input values") where there might be more than one occurrence of identical input
  values, all locations of a given input value must be considered "the same".  You could write
  a similar kernel to "find the location of <em>leftmost</em> minimum and maximum input values"
  where (say) a minimum value at location 100 is preferred over an identical minimum value at location
  200; for this kernel, "the same" would mean identical <em>location</em>, not merely
  identical <em>value</em>, and the accumulator and combiner functions would have to be
  different than those for <a href="#example-findMinAndMax">findMinAndMax</a>.</p>

<strong>The initializer function must create an <i>identity value</i>.</strong>  That is,
  if <code><i>I</i></code> and <code><i>A</i></code> are accumulator data items initialized
  by the initializer function, and <code><i>I</i></code> has never been passed to the
  accumulator function (but <code><i>A</i></code> may have been), then
<ul>
<li><code><i>combinerName</i>(&<i>A</i>, &<i>I</i>)</code> must
  leave <code><i>A</i></code> <a href="#the-same">the same</a></li>
<li><code><i>combinerName</i>(&<i>I</i>, &<i>A</i>)</code> must
  leave <code><i>I</i></code> <a href="#the-same">the same</a> as <code><i>A</i></code></li>
</ul>
<p class="note"><strong>Example:</strong> In the <a href="#example-addint">addint</a>
  kernel, an accumulator data item is initialized to zero. The combiner function for this
  kernel performs addition; zero is the identity value for addition.</p>
<div class="note">
<p><strong>Example:</strong> In the <a href="#example-findMinAndMax">findMinAndMax</a>
  kernel, an accumulator data item is initialized
  to <a href="#INITVAL"><code>INITVAL</code></a>.
<ul>
<li><code>fMMCombiner(&<i>A</i>, &<i>I</i>)</code> leaves <code><i>A</i></code> the same,
  because <code><i>I</i></code> is <code>INITVAL</code>.</li>
<li><code>fMMCombiner(&<i>I</i>, &<i>A</i>)</code> sets <code><i>I</i></code>
  to <code><i>A</i></code>, because <code><i>I</i></code> is <code>INITVAL</code>.</li>
</ul>
Therefore, <code>INITVAL</code> is indeed an identity value.
</p></div>

<p><strong>The combiner function must be <i>commutative</i>.</strong>  That is,
  if <code><i>A</i></code> and <code><i>B</i></code> are accumulator data items initialized
  by the initializer function, and that may have been passed to the accumulator function zero
  or more times, then <code><i>combinerName</i>(&<i>A</i>, &<i>B</i>)</code> must
  set <code><i>A</i></code> to <a href="#the-same">the same value</a>
  that <code><i>combinerName</i>(&<i>B</i>, &<i>A</i>)</code>
  sets <code><i>B</i></code>.</p>
<p class="note"><strong>Example:</strong> In the <a href="#example-addint">addint</a>
  kernel, the combiner function adds the two accumulator data item values; addition is
  commutative.</p>
<div class="note">
<p><strong>Example:</strong> In the <a href="#example-findMinAndMax">findMinAndMax</a> kernel,
<pre>
fMMCombiner(&<i>A</i>, &<i>B</i>)
</pre>
is the same as
<pre>
<i>A</i> = minmax(<i>A</i>, <i>B</i>)
</pre>
and <code>minmax</code> is commutative, so <code>fMMCombiner</code> is also.
</p>
</div>

<p><strong>The combiner function must be <i>associative</i>.</strong>  That is,
  if <code><i>A</i></code>, <code><i>B</i></code>, and <code><i>C</i></code> are
  accumulator data items initialized by the initializer function, and that may have been passed
  to the accumulator function zero or more times, then the following two code sequences must
  set <code><i>A</i></code> to <a href="#the-same">the same value</a>:</p>
<ul>
<li><pre>
<i>combinerName</i>(&<i>A</i>, &<i>B</i>);
<i>combinerName</i>(&<i>A</i>, &<i>C</i>);
</pre></li>
<li><pre>
<i>combinerName</i>(&<i>B</i>, &<i>C</i>);
<i>combinerName</i>(&<i>A</i>, &<i>B</i>);
</pre></li>
</ul>
<div class="note">
<p><strong>Example:</strong> In the <a href="#example-addint">addint</a> kernel, the
  combiner function adds the two accumulator data item values:
<ul>
<li><pre>
<i>A</i> = <i>A</i> + <i>B</i>
<i>A</i> = <i>A</i> + <i>C</i>
// Same as
//   <i>A</i> = (<i>A</i> + <i>B</i>) + <i>C</i>
</pre></li>
<li><pre>
<i>B</i> = <i>B</i> + <i>C</i>
<i>A</i> = <i>A</i> + <i>B</i>
// Same as
//   <i>A</i> = <i>A</i> + (<i>B</i> + <i>C</i>)
//   <i>B</i> = <i>B</i> + <i>C</i>
</li>
</ul>
Addition is associative, and so the combiner function is also.
</p>
</div>
<div class="note">
<p><strong>Example:</strong> In the <a href="#example-findMinAndMax">findMinAndMax</a> kernel,
<pre>
fMMCombiner(&<i>A</i>, &<i>B</i>)
</pre>
is the same as
<pre>
<i>A</i> = minmax(<i>A</i>, <i>B</i>)
</pre>
So the two sequences are
<ul>
<li><pre>
<i>A</i> = minmax(<i>A</i>, <i>B</i>)
<i>A</i> = minmax(<i>A</i>, <i>C</i>)
// Same as
//   <i>A</i> = minmax(minmax(<i>A</i>, <i>B</i>), <i>C</i>)
</pre></li>
<li><pre>
<i>B</i> = minmax(<i>B</i>, <i>C</i>)
<i>A</i> = minmax(<i>A</i>, <i>B</i>)
// Same as
//   <i>A</i> = minmax(<i>A</i>, minmax(<i>B</i>, <i>C</i>))
//   <i>B</i> = minmax(<i>B</i>, <i>C</i>)
</pre></li>
<code>minmax</code> is associative, and so <code>fMMCombiner</code> is also.
</p>
</div>

<p><strong>The accumulator function and combiner function together must obey the <i>basic
  folding rule</i>.</strong>  That is, if <code><i>A</i></code>
  and <code><i>B</i></code> are accumulator data items, <code><i>A</i></code> has been
  initialized by the initializer function and may have been passed to the accumulator function
  zero or more times, <code><i>B</i></code> has not been initialized, and <i>args</i> is
  the list of input arguments and special arguments for a particular call to the accumulator
  function, then the following two code sequences must set <code><i>A</i></code>
  to <a href="#the-same">the same value</a>:</p>
<ul>
<li><pre>
<i>accumulatorName</i>(&<i>A</i>, <i>args</i>);  // statement 1
</pre></li>
<li><pre>
<i>initializerName</i>(&<i>B</i>);        // statement 2
<i>accumulatorName</i>(&<i>B</i>, <i>args</i>);  // statement 3
<i>combinerName</i>(&<i>A</i>, &<i>B</i>);       // statement 4
</pre></li>
</ul>
<div class="note">
<p><strong>Example:</strong> In the <a href="#example-addint">addint</a> kernel, for an input value <i>V</i>:
<ul>
<li>Statement 1 is the same as <code>A += <i>V</i></code></li>
<li>Statement 2 is the same as <code>B = 0</code></li>
<li>Statement 3 is the same as <code>B += <i>V</i></code>, which is the same as <code>B = <i>V</i></code></li>
<li>Statement 4 is the same as <code>A += B</code>, which is the same as <code>A += <i>V</i></code></li>
</ul>
Statements 1 and 4 set <code><i>A</i></code> to the same value, and so this kernel obeys the
basic folding rule.
</p>
</div>
<div class="note">
<p><strong>Example:</strong> In the <a href="#example-findMinAndMax">findMinAndMax</a> kernel, for an input
  value <i>V</i> at coordinate <i>X</i>:
<ul>
<li>Statement 1 is the same as <code>A = minmax(A, IndexedVal(<i>V</i>, <i>X</i>))</code></li>
<li>Statement 2 is the same as <code>B = <a href="#INITVAL">INITVAL</a></code></li>
<li>Statement 3 is the same as
<pre>
B = minmax(B, IndexedVal(<i>V</i>, <i>X</i>))
</pre>
which, because <i>B</i> is the initial value, is the same as
<pre>
B = IndexedVal(<i>V</i>, <i>X</i>)
</pre>
</li>
<li>Statement 4 is the same as
<pre>
A = minmax(A, B)
</pre>
which is the same as
<pre>
A = minmax(A, IndexedVal(<i>V</i>, <i>X</i>))
</pre>
</ul>
Statements 1 and 4 set <code><i>A</i></code> to the same value, and so this kernel obeys the
basic folding rule.
</p>
</div>

<h3 id="calling-reduction-kernel">Calling a reduction kernel from Java code</h3>

<p>For a reduction kernel named <i>kernelName</i> defined in the
file <code><i>filename</i>.rs</code>, there are three methods reflected in the
class <code>ScriptC_<i>filename</i></code>:</p>

<pre>
// Method 1
public <i>javaFutureType</i> reduce_<i>kernelName</i>(Allocation ain1, <i>&hellip;,</i>
                                        Allocation ain<i>N</i>);

// Method 2
public <i>javaFutureType</i> reduce_<i>kernelName</i>(Allocation ain1, <i>&hellip;,</i>
                                        Allocation ain<i>N</i>,
                                        Script.LaunchOptions sc);

// Method 3
public <i>javaFutureType</i> reduce_<i>kernelName</i>(<i><a href="#devec">devecSiIn1Type</a></i>[] in1, &hellip;,
                                        <i><a href="#devec">devecSiInNType</a></i>[] in<i>N</i>);
</pre>

<p>Here are some examples of calling the <a href="#example-addint">addint</a> kernel:</p>
<pre>
ScriptC_example script = new ScriptC_example(mRenderScript);

// 1D array
//   and obtain answer immediately
int input1[] = <i>&hellip;</i>;
int sum1 = script.reduce_addint(input1).get();  // Method 3

// 2D allocation
//   and do some additional work before obtaining answer
Type.Builder typeBuilder =
  new Type.Builder(RS, Element.I32(RS));
typeBuilder.setX(<i>&hellip;</i>);
typeBuilder.setY(<i>&hellip;</i>);
Allocation input2 = createTyped(RS, typeBuilder.create());
<i>populateSomehow</i>(input2);  // fill in input Allocation with data
script.result_int result2 = script.reduce_addint(input2);  // Method 1
<i>doSomeAdditionalWork</i>(); // might run at same time as reduction
int sum2 = result2.get();
</pre>

<p><strong>Method 1</strong> has one input {@link android.renderscript.Allocation} argument for
  every input argument in the kernel's <a href="#accumulator-function">accumulator
    function</a>. The RenderScript runtime checks to ensure that all of the input Allocations
  have the same dimensions and that the {@link android.renderscript.Element} type of each of
  the input Allocations matches that of the corresponding input argument of the accumulator
  function's prototype. If any of these checks fail, RenderScript throws an exception. The
  kernel executes over every coordinate in those dimensions.</p>

<p><strong>Method 2</strong> is the same as Method 1 except that Method 2 takes an additional
  argument <code>sc</code> that can be used to limit the kernel execution to a subset of the
  coordinates.</p>

<p><strong><a id="reduce-method-3">Method 3</a></strong> is the same as Method 1 except that
  instead of taking Allocation inputs it takes Java array inputs. This is a convenience that
  saves you from having to write code to explicitly create an Allocation and copy data to it
  from a Java array. <em>However, using Method 3 instead of Method 1 does not increase the
  performance of the code</em>. For each input array, Method 3 creates a temporary
  1-dimensional Allocation with the appropriate {@link android.renderscript.Element} type and
  {@link android.renderscript.Allocation#setAutoPadding} enabled, and copies the array to the
  Allocation as if by the appropriate <code>copyFrom()</code> method of {@link
  android.renderscript.Allocation}. It then calls Method 1, passing those temporary
  Allocations.</p>
<p class="note"><strong>NOTE:</strong> If your application will make multiple kernel calls with
  the same array, or with different arrays of the same dimensions and Element type, you may improve
  performance by explicitly creating, populating, and reusing Allocations yourself, instead of
  by using Method 3.</p>
<p><strong><i><a id="javaFutureType">javaFutureType</a></i></strong>,
  the return type of the reflected reduction methods, is a reflected
  static nested class within the <code>ScriptC_<i>filename</i></code>
  class. It represents the future result of a reduction
  kernel run. To obtain the actual result of the run, call
  the <code>get()</code> method of that class, which returns a value
  of type <i>javaResultType</i>. <code>get()</code> is <a href="#asynchronous-model">synchronous</a>.</p>

<pre>
public class ScriptC_<i>filename</i> extends ScriptC {
  public static class <i>javaFutureType</i> {
    public <i>javaResultType</i> get() { &hellip; }
  }
}
</pre>

<p><strong><i>javaResultType</i></strong> is determined from the <i>resultType</i> of the
  <a href="#outconverter-function">outconverter function</a>. Unless <i>resultType</i> is an
  unsigned type (scalar, vector, or array), <i>javaResultType</i> is the directly corresponding
  Java type. If <i>resultType</i> is an unsigned type and there is a larger Java signed type,
  then <i>javaResultType</i> is that larger Java signed type; otherwise, it is the directly
  corresponding Java type. For example:</p>
<ul>
<li>If <i>resultType</i> is <code>int</code>, <code>int2</code>, or <code>int[15]</code>,
  then <i>javaResultType</i> is <code>int</code>, <code>Int2</code>,
  or <code>int[]</code>. All values of <i>resultType</i> can be represented
  by <i>javaResultType</i>.</li>
<li>If <i>resultType</i> is <code>uint</code>, <code>uint2</code>, or <code>uint[15]</code>,
  then <i>javaResultType</i> is <code>long</code>, <code>Long2</code>,
  or <code>long[]</code>.  All values of <i>resultType</i> can be represented
  by <i>javaResultType</i>.</li>
<li>If <i>resultType</i> is <code>ulong</code>, <code>ulong2</code>,
  or <code>ulong[15]</code>, then <i>javaResultType</i>
  is <code>long</code>, <code>Long2</code>, or <code>long[]</code>. There are certain values
  of <i>resultType</i> that cannot be represented by <i>javaResultType</i>.</li>
</ul>

<p><strong><i>javaFutureType</i></strong> is the future result type corresponding
  to the <i>resultType</i> of the <a href="#outconverter-function">outconverter
  function</a>.</p>
<ul>
<li>If <i>resultType</i> is not an array type, then <i>javaFutureType</i>
  is <code>result_<i>resultType</i></code>.</li>
<li>If <i>resultType</i> is an array of length <i>Count</i> with members of type <i>memberType</i>,
  then <i>javaFutureType</i> is <code>resultArray<i>Count</i>_<i>memberType</i></code>.</li>
</ul>

<p>For example:</p>

<pre>
public class ScriptC_<i>filename</i> extends ScriptC {
  // for kernels with int result
  public static class result_int {
    public int get() { &hellip; }
  }

  // for kernels with int[10] result
  public static class resultArray10_int {
    public int[] get() { &hellip; }
  }

  // for kernels with int2 result
  //   note that the Java type name "Int2" is not the same as the script type name "int2"
  public static class result_int2 {
    public Int2 get() { &hellip; }
  }

  // for kernels with int2[10] result
  //   note that the Java type name "Int2" is not the same as the script type name "int2"
  public static class resultArray10_int2 {
    public Int2[] get() { &hellip; }
  }

  // for kernels with uint result
  //   note that the Java type "long" is a wider signed type than the unsigned script type "uint"
  public static class result_uint {
    public long get() { &hellip; }
  }

  // for kernels with uint[10] result
  //   note that the Java type "long" is a wider signed type than the unsigned script type "uint"
  public static class resultArray10_uint {
    public long[] get() { &hellip; }
  }

  // for kernels with uint2 result
  //   note that the Java type "Long2" is a wider signed type than the unsigned script type "uint2"
  public static class result_uint2 {
    public Long2 get() { &hellip; }
  }

  // for kernels with uint2[10] result
  //   note that the Java type "Long2" is a wider signed type than the unsigned script type "uint2"
  public static class resultArray10_uint2 {
    public Long2[] get() { &hellip; }
  }
}
</pre>

<p>If <i>javaResultType</i> is an object type (including an array type), each call
  to <code><i>javaFutureType</i>.get()</code> on the same instance will return the same
  object.</p>

<p>If <i>javaResultType</i> cannot represent all values of type <i>resultType</i>, and a
  reduction kernel produces an unrepresentible value,
  then <code><i>javaFutureType</i>.get()</code> throws an exception.</p>

<h4 id="devec">Method 3 and <i>devecSiInXType</i></h4>

<p><strong><i>devecSiInXType</i></strong> is the Java type corresponding to
  the <i>inXType</i> of the corresponding argument of
  the <a href="#accumulator-function">accumulator function</a>. Unless <i>inXType</i> is an
  unsigned type or a vector type, <i>devecSiInXType</i> is the directly corresponding Java
  type. If <i>inXType</i> is an unsigned scalar type, then <i>devecSiInXType</i> is the
  Java type directly corresponding to the signed scalar type of the same
  size. If <i>inXType</i> is a signed vector type, then <i>devecSiInXType</i> is the Java
  type directly corresponding to the vector component type. If <i>inXType</i> is an unsigned
  vector type, then <i>devecSiInXType</i> is the Java type directly corresponding to the
  signed scalar type of the same size as the vector component type. For example:</p>
<ul>
<li>If <i>inXType</i> is <code>int</code>, then <i>devecSiInXType</i>
  is <code>int</code>.</li>
<li>If <i>inXType</i> is <code>int2</code>, then <i>devecSiInXType</i>
  is <code>int</code>. The array is a <em>flattened</em> representation: It has twice as
  many <em>scalar</em> Elements as the Allocation has 2-component <em>vector</em>
  Elements. This is the same way that the <code>copyFrom()</code> methods of {@link
  android.renderscript.Allocation} work.</li>
<li>If <i>inXType</i> is <code>uint</code>, then <i>deviceSiInXType</i>
  is <code>int</code>. A signed value in the Java array is interpreted as an unsigned value of
  the same bitpattern in the Allocation. This is the same way that the <code>copyFrom()</code>
  methods of {@link android.renderscript.Allocation} work.</li>
<li>If <i>inXType</i> is <code>uint2</code>, then <i>deviceSiInXType</i>
  is <code>int</code>. This is a combination of the way <code>int2</code> and <code>uint</code>
  are handled: The array is a flattened representation, and Java array signed values are
  interpreted as RenderScript unsigned Element values.</li>
</ul>

<p>Note that for <a href="#reduce-method-3">Method 3</a>, input types are handled differently
than result types:</p>

<ul>
<li>A script's vector input is flattened on the Java side, whereas a script's vector result is not.</li>
<li>A script's unsigned input is represented as a signed input of the same size on the Java
  side, whereas a script's unsigned result is represented as a widened signed type on the Java
  side (except in the case of <code>ulong</code>).</li>
</ul>

<h3 id="more-example">More example reduction kernels</h3>

<pre id="dot-product">
#pragma rs reduce(dotProduct) \
  accumulator(dotProductAccum) combiner(dotProductSum)

// Note: No initializer function -- therefore,
// each accumulator data item is implicitly initialized to 0.0f.

static void dotProductAccum(float *accum, float in1, float in2) {
  *accum += in1*in2;
}

// combiner function
static void dotProductSum(float *accum, const float *val) {
  *accum += *val;
}
</pre>

<pre>
// Find a zero Element in a 2D allocation; return (-1, -1) if none
#pragma rs reduce(fz2) \
  initializer(fz2Init) \
  accumulator(fz2Accum) combiner(fz2Combine)

static void fz2Init(int2 *accum) { accum->x = accum->y = -1; }

static void fz2Accum(int2 *accum,
                     int inVal,
                     int x /* special arg */,
                     int y /* special arg */) {
  if (inVal==0) {
    accum->x = x;
    accum->y = y;
  }
}

static void fz2Combine(int2 *accum, const int2 *accum2) {
  if (accum2->x >= 0) *accum = *accum2;
}
</pre>

<pre>
// Note that this kernel returns an array to Java
#pragma rs reduce(histogram) \
  accumulator(hsgAccum) combiner(hsgCombine)

#define BUCKETS 256
typedef uint32_t Histogram[BUCKETS];

// Note: No initializer function --
// therefore, each bucket is implicitly initialized to 0.

static void hsgAccum(Histogram *h, uchar in) { ++(*h)[in]; }

static void hsgCombine(Histogram *accum,
                       const Histogram *addend) {
  for (int i = 0; i < BUCKETS; ++i)
    (*accum)[i] += (*addend)[i];
}

// Determines the mode (most frequently occurring value), and returns
// the value and the frequency.
//
// If multiple values have the same highest frequency, returns the lowest
// of those values.
//
// Shares functions with the histogram reduction kernel.
#pragma rs reduce(mode) \
  accumulator(hsgAccum) combiner(hsgCombine) \
  outconverter(modeOutConvert)

static void modeOutConvert(int2 *result, const Histogram *h) {
  uint32_t mode = 0;
  for (int i = 1; i < BUCKETS; ++i)
    if ((*h)[i] > (*h)[mode]) mode = i;
  result->x = mode;
  result->y = (*h)[mode];
}
</pre>