-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathl3pdftools.dtx
482 lines (469 loc) · 16.9 KB
/
l3pdftools.dtx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
% \iffalse meta-comment
%
%% File: l3pdftools.dtx
%
% Copyright (C) 2018-2024 The LaTeX Project
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version. The latest version
% of this license is in the file
%
% http://www.latex-project.org/lppl.txt
%
% This file is part of the "LaTeX PDF management testphase bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
% https://github.com/latex3/pdfresources
%
% for those people who are interested.
%
%<*driver>
\DocumentMetadata{pdfstandard=A-2b}
\documentclass[full]{l3doc}
\usepackage{array,booktabs}
\hypersetup{pdfauthor=The LaTeX Project,pdftitle=l3pdftools (LaTeX PDF management testphase bundle)}
\providecommand\potentialclash{\noindent\llap{\dbend\ }}
\begin{document}
\DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
%
% \title{^^A
% The \pkg{l3pdftools} module\\ temporary collection of pdf related commands ^^A
% \\ LaTeX PDF management testphase bundle
% }
%
% \author{^^A
% The \LaTeX{} Project\thanks
% {^^A
% E-mail:
% \href{mailto:[email protected]}
% {[email protected]}^^A
% }^^A
% }
%
% \date{Version 0.96o, released 2024-12-20}
%
% \maketitle
% \begin{documentation}
%
% \section{\pkg{l3pdftools} documentation}
%
% This module collects a number of candidate commands for the l3pdf module
%
% \begin{function}[EXP,added=2021-02-14]
% {\pdf_name_from_unicode_e:n}
% \begin{syntax}
% \cs{pdf_name_from_unicode_e:n} \Arg{content}
% \end{syntax}
% This converts \meta{content} to a format suitable for a PDF Name.
% The output depends on the backend: For almost all backends
% it will first expand the content with \cs{text_expand:n} and
% then escape it in the way needed in a PDF Name with
% \cs{str_convert_pdfname:e}, and at last prepend a slash before.
% Typically such names use only ascii,
% but non-ascii is supported, but should be utf8 encoded. For example\\
% |\pdf_name_from_unicode_e:n {A~B\c_percent_str C\c_hash_str D€}}|\\
% will output |/A#20B#25C#23D#E2#82#AC|.
%
% With dvips it will expand the content with \cs{text_expand:n} and then wrap it
% in a |cvn| operation (\enquote{convert to name}).
% So the example above will output |(A B%C#D€) cvn| to
% the postscript. The content should not contain unbalanced parentheses with dvips.
%
% \end{function}
% \begin{function}[added=2020-07-04]
% {\pdf_string_from_unicode:nnN}
% \begin{syntax}
% \cs{pdf_string_from_unicode:nnN} \Arg{format} \Arg{content} \Arg{tlvar}
% \end{syntax}
% This converts \meta{content} following the rules defined by \meta{format} and stores
% the result in \meta{tlvar}. The assignment is done locally.
% Non-ascii input should be utf8 encoded.
% Currently the following formats exist:
% \begin{description}
% \item[utf8/string-raw]
% this converts with \cs{str_set_convert:Nnnn} into utf8/string.
% \item[utf8/string]
% this converts into utf8/string and adds parentheses around the result.%
% \item[utf8/URI-raw]
% this converts with \cs{str_set_convert:Nnnn} into utf8/url and
% then replaces reserved and digits back from the percent encoding. Parentheses
% are escaped.
% \item[utf8/URI]
% this converts into utf8/URI and adds parentheses around the result.%
% \item[utf16/string-raw]
% this converts with \cs{str_set_convert:Nnnn} into utf16/string.
% \item[utf16/string]
% this converts into utf16/string and adds parentheses around the result.
% \item[utf16/hex-raw]
% this converts into utf16/hex
% \item[utf16/hex]
% this converts into utf16/hex and adds bracket around the result.
% \end{description}
% \end{function}
%
% \subsection{BDC operator / Properties resource}
% \begin{NOTE}{UF}
% we need a switch for the case that the resource should be added to
% xform resource instead of a page resources, see pdfbase.sty
% - xdvipdfmx: looks fine, the resource is added to the xform resource automatically
% - pdftex should now work okay too
% \end{NOTE}
% Entries to the /Properties dictionary in the page resources can
% be added with dvips only through side-effects: if a BDC-mark is created
% dvips/ghostscript will automatically create the necessary objects and names.
% To get a sensible abstraction the code does for some of the following
% command the same for the other backends if the
% core management code has been activated. This means that the behaviour
% of the command is different then. The \cs{pdf_bdcobject:..} should only be used
% if the management is active.
%
%
% \begin{function}[updated = 2024-10-23]
% {
% \pdf_bdc:nn, \pdf_bdc:ee
% }
% \begin{syntax}
% \cs{pdf_bdc:nn} \Arg{tag} \Arg{dictionary content}
% \end{syntax}
% This command adds a BDC marked content operator to the current page stream.
% \meta{tag} is the tag of this operator (without the leading slash),
% \meta{dictionary content} is the content of the second argument.
% With the exception of the dvips backend,
% the dictionary content is added inline in the stream. Such an inline BDC
% is typically better for ActualText additions as some PDF reader ignore
% entries given in properties.
% \end{function}
%
% \begin{function}[added = 2023-08-18]
% {
% \pdf_bdc_shipout:ee
% }
% \begin{syntax}
% \cs{pdf_bdc_shipout:ee} \Arg{tag} \Arg{dictionary content}
% \end{syntax}
% This command adds a BDC marked content operator to the current page stream.
% \meta{tag} is the tag of this operator (without the leading slash),
% \meta{dictionary content} is the content of the second argument.
%
% Differently to \cs{pdf_bdc:ee} the arguments are not expanded when the
% command is \emph{used}, but only at \emph{shipout}.
% This requires new engines which
% allow to use the keyword \texttt{shipout} with the primitive
% \cs{special} and \cs{pdfliteral}. Also similar to \cs{pdf_bdc:ee}
% the content of \meta{dictionary content} is added inline in the stream
% with most engines (not on the dvips + ps2pdf route).
% This means that this command can also be used if such an inline dictionary is preferred.
%
% The command requires current engines and
% is not defined if an too old engine is detected!
%
% \end{function}
%
% \begin{function}[added = 2020-07-03]
% {
% \pdf_bdcobject:nn
% }
% \begin{syntax}
% \cs{pdf_bdcobject:nn} \Arg{tag} \Arg{object name}
% \end{syntax}
% This command adds a BDC marked content operator to the current page stream.
% \meta{tag} is the tag of this operator (without the leading slash),
% \meta{object name} is a the name of an dictionary object reserved with
% \cs{pdf_object_new:n} and filled with \cs{pdf_object_write:nnn} with
% the properties of the BDC. Reusing a predefined object can save space
% but the command works correctly
% only if the resources management has been activated and should be used only
% if this can be ensured.
% \end{function}
% \begin{function}[updated = 2020-07-03]
% {
% \pdf_bdcobject:n
% }
% \begin{syntax}
% \cs{pdf_bdcobject:n} \Arg{tag}
% \end{syntax}
% This command adds a BDC marked content operator to the current page stream.
% \meta{tag} is the tag of this operator (without the leading slash).
% As object this commands uses the last anonymous dictionary object created with
% \cs{pdf_object_unnamed_write:nn}. It lies in the responsibility of the user that the last
% object is the wanted one. Like with \cs{pdf_bdcobject:nn} the command works correctly
% only if the resources management has been activated and should be used only
% if this can be ensured.
% \end{function}
% \begin{function}[added = 2019-10-17]
% {
% \pdf_bmc:n
% }
% \begin{syntax}
% \cs{pdf_bmc:n} \Arg{tag}
% \end{syntax}
% This command created a BMC marked content operator. The argument is the
% tag without the leading slash. It can be e.g. used for simple artifact
% markers.
% \end{function}
% \begin{function}[added = 2019-06-30]
% {
% \pdf_emc:
% }
% \begin{syntax}
% \cs{pdf_emc:}
% \end{syntax}
% This command closes the BDC marked content operator opened with \cs{pdf_bdc:nn}.
% It should be on the same page as the bdc-command.
%
% \begin{verbatim}
% \pdf_object_new:n {module/objA}
% \pdf_object_write:nnn {module/objA}{dict}{/Type/Artifact}
% \pdf_bdc:nn {Span}{module/objA}
% text
% \pdf_emc:
% \end{verbatim}
% \end{function}
%
%
% \end{documentation}
%
% \begin{implementation}
%
% \section{\pkg{l3pdftools} implementation}
%
% \begin{macrocode}
%<*header>
\ProvidesExplPackage{l3pdftools}{2024-12-20}{0.96o}
{candidate commands for l3pdf---LaTeX PDF management testphase bundle}
%</header>
% \end{macrocode}
%
% \begin{macrocode}
%<@@=pdf>
%<*package>
% \end{macrocode}
% \subsection{Conversions and export functions}
% \begin{macro}{\pdf_name_from_unicode_e:n,\pdf_name_from_unicode_e:V}
% \begin{macrocode}
\cs_generate_variant:Nn \str_convert_pdfname:n { e }
\cs_new:Npn \pdf_name_from_unicode_e:n #1
{
\__kernel_pdf_name_from_unicode_e:n { #1 }
}
\cs_generate_variant:Nn \pdf_name_from_unicode_e:n {V}
% \end{macrocode}
% \end{macro}
%
% The convert command must use a different value the source encoding
% depending on the engines. Until the PR in str-convert is active we add the alias here
% too
% \begin{macrocode}
\bool_lazy_any:nTF
{
\sys_if_engine_luatex_p:
\sys_if_engine_xetex_p:
}
{
\prop_gput:Nnn \g__str_alias_prop { default } { }
}
{
\prop_gput:Nnn \g__str_alias_prop { default } { utf8 }
}
% \end{macrocode}
% \begin{macro}{\pdf_string_from_unicode:nnN}
% \begin{macrocode}
\msg_new:nnn {pdfmanagement}{ unknown-convert}
{
Unknown~string~conversion~method~'#1'!
}
\cs_new:Npn \pdf_string_from_unicode:nnN #1 #2 #3
{
\cs_if_exist_use:cF { @@_string_from_unicode_#1:nN }
{
\msg_error:nnn { pdf } { unknown-convert } {#1}
\use_none:nn
}
{ #2 } #3
}
\cs_generate_variant:Nn \pdf_string_from_unicode:nnN {nVN}
% \end{macrocode}
% \end{macro}
% Most converter are simply wrapper around the str-convert commands and so
% use the same names, with the addition raw if no delimiters are added.
% The exception is the one for url's: it reverts most of the percent encodings
% and escapes the parentheses.
% That's why its name is URI instead of url. The current code is probably quite
% slow and will need a replacement.
% \begin{macro}{ @@_string_from_unicode_utf8/string-raw:nN }
% \begin{macro}{ @@_string_from_unicode_utf8/string:nN }
% \begin{macro}{ @@_string_from_unicode_utf8/URI-raw:nN }
% \begin{macro}{ @@_string_from_unicode_utf8/URI:nN }
% \begin{macro}{ @@_string_from_unicode_utf16/string-raw:nN }
% \begin{macro}{ @@_string_from_unicode_utf16/string:nN }
% \begin{macro}{ @@_string_from_unicode_utf16/hex-raw:nN }
% \begin{macro}{ @@_string_from_unicode_utf16/hex:nN }
% \begin{macrocode}
%% TODO Names need a review when it is clear which converters
%% are actually needed
%% string conversions and printing
%% we assume here that the text purify step has been done. The input is
%% a list of (utf8) chars.
%% str convert, not expandable.
% filespec (attachment view) tests:
% utf8: gr\303\274\303\237e.txt
% %doesn't work, umlaut wrong,
% utf8 with BOM \357\273\277gr\303\274\303\237e.txt
% %doesn't work, umlaut wrong, bom visible
% utf16 with BE: (FEFF)
% \376\377\000g\000r\000\374\000\337\000e\000.\000t\000x\000t %works
% xetex converts to <feff0067007200fc00df0065002e007400780074>
% utf16 with BE / HEX: <FEFF0067007200FC00DF0065002E007400780074> works
% bookmarks: as pdfoutline uses () currently only utf16 with BE is usable.
% check if one can use HEX too when directly writing the object
% ==========
% uri: utf16BE/string seems not to work, hex neither
% utf8/string works but not on macos,
% so a specific utf8/url variant is needed
% ==========
% "input" is utf8 for pdftex, empty (native) for unicode engine
% commands to output literal strings (...)
\cs_new_protected:cpn { @@_string_from_unicode_utf8/string-raw:nN } #1 #2
{
\str_set_convert:Nnnn #2
{ #1 }
{ default }
{utf8/string}
}
\cs_new_protected:cpn { @@_string_from_unicode_utf8/string:nN } #1 #2
{
\use:c { @@_string_from_unicode_utf8/string-raw:nN } { #1 } #2
\str_put_left:Nn #2 {(}
\str_put_right:Nn #2 {)}
}
% special url command:
\cs_new_protected:cpx { @@_string_from_unicode_utf8/URI-raw:nN } #1 #2
{
\exp_not:N \str_set_convert:Nnnn #2
{ #1 }
{ default }
{utf8/url}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 3A} {:}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 2F} {/}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 23} {\c_hash_str}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 5B} {[}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 5D} {]}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 40} {\c_atsign_str}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 21} {!}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 24} {\c_dollar_str}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 26} {\c_ampersand_str}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 27} {'}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 2A} {*}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 2B} {+}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 2C} {,}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 3B} {;}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 3D} {=}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 3F} {?}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 30} {0}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 31} {1}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 32} {2}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 33} {3}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 34} {4}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 35} {5}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 36} {6}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 37} {7}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 38} {8}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 39} {9}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 28} {\c_backslash_str(}
\exp_not:N \str_replace_all:Nnn #2 {\c_percent_str 29} {\c_backslash_str)}
}
\cs_new_protected:cpn { @@_string_from_unicode_utf8/URI:nN } #1 #2
{
\use:c { @@_string_from_unicode_utf8/URI-raw:nN } {#1} #2
\str_put_left:Nn #2 {(}
\str_put_right:Nn #2 {)}
}
% with utf16 with BE marker
\cs_new_protected:cpn { @@_string_from_unicode_utf16/string-raw:nN } #1 #2
{
\str_set_convert:Nnnn #2
{ #1 }
{ default }
{utf16/string}
}
\cs_new_protected:cpn { @@_string_from_unicode_utf16/string:nN } #1 #2
{
\use:c { @@_string_from_unicode_utf16/string-raw:nN } {#1} #2
\str_put_left:Nn #2 {(}
\str_put_right:Nn #2 {)}
}
\cs_new_protected:cpn { @@_string_from_unicode_utf16/hex-raw:nN } #1 #2
{
\str_set_convert:Nnnn #2
{ #1 }
{ default }
{utf16/hex}
}
\cs_new_protected:cpn { @@_string_from_unicode_utf16/hex:nN } #1 #2
{
\use:c { @@_string_from_unicode_utf16/hex-raw:nN } {#1} #2
\str_put_left:Nn #2 {<}
\str_put_right:Nn #2 {>}
}
% \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsubsection{BDC operator commands}
% \begin{macro}{\pdf_bdc:nn,\pdf_bdc:ee}
% \begin{macro}{\pdf_bdc_property:nn}
% \begin{macro}{\pdf_bdc_shipout:ee}
% \begin{macro}{\pdf_bdcobject:nn}
% \begin{macro}{\pdf_bdcobject:n}
% \begin{macro}{\pdf_bmc:n}
% \begin{macro}{\pdf_emc:}
% \begin{macrocode}
\cs_new_protected:Npn \pdf_bdc:nn #1 #2 { \@@_backend_bdc:nn { #1 }{ #2 } }
\cs_generate_variant:Nn \pdf_bdc:nn {ee}
\cs_new_protected:Npn \pdf_bdc_property:nn #1 #2
{ \@@_backend_bdc_contobj:nn { #1 }{ #2 } }
\cs_new_protected:Npn \pdf_bdc_shipout:ee #1 #2
{
\bool_if:NTF\l__pdfmanagement_delayed_shipout_bool
{
\@@_backend_bdc_shipout:ee { #1 }{ #2 }
\cs_gset_eq:NN \pdf_bdc_shipout:ee \@@_backend_bdc_shipout:ee
}
{
\msg_error:nn {pdfmanagement}{delayed-shipout}
\cs_gset_eq:NN \pdf_bdc_shipout:ee \use_none:nn
}
}
\cs_new_protected:Npn \pdf_bdcobject:nn #1 #2 { \@@_backend_bdcobject:nn { #1 }{ #2 } }
\cs_new_protected:Npn \pdf_bdcobject:n #1 { \@@_backend_bdcobject:n { #1 } }
\cs_new_protected:Npn \pdf_bmc:n #1 { \@@_backend_bmc:n { #1 } }
\cs_new_protected:Npn \pdf_emc: { \@@_backend_emc: }
% \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \begin{macrocode}
%</package>
% \end{macrocode}
%
% \end{implementation}
%
% \PrintIndex