Imported Upstream version 1.1.1
This commit is contained in:
commit
dfb12f36e6
84 changed files with 15184 additions and 0 deletions
340
COPYING
Normal file
340
COPYING
Normal file
|
@ -0,0 +1,340 @@
|
||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
Version 2, June 1991
|
||||||
|
|
||||||
|
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
|
||||||
|
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
Preamble
|
||||||
|
|
||||||
|
The licenses for most software are designed to take away your
|
||||||
|
freedom to share and change it. By contrast, the GNU General Public
|
||||||
|
License is intended to guarantee your freedom to share and change free
|
||||||
|
software--to make sure the software is free for all its users. This
|
||||||
|
General Public License applies to most of the Free Software
|
||||||
|
Foundation's software and to any other program whose authors commit to
|
||||||
|
using it. (Some other Free Software Foundation software is covered by
|
||||||
|
the GNU Library General Public License instead.) You can apply it to
|
||||||
|
your programs, too.
|
||||||
|
|
||||||
|
When we speak of free software, we are referring to freedom, not
|
||||||
|
price. Our General Public Licenses are designed to make sure that you
|
||||||
|
have the freedom to distribute copies of free software (and charge for
|
||||||
|
this service if you wish), that you receive source code or can get it
|
||||||
|
if you want it, that you can change the software or use pieces of it
|
||||||
|
in new free programs; and that you know you can do these things.
|
||||||
|
|
||||||
|
To protect your rights, we need to make restrictions that forbid
|
||||||
|
anyone to deny you these rights or to ask you to surrender the rights.
|
||||||
|
These restrictions translate to certain responsibilities for you if you
|
||||||
|
distribute copies of the software, or if you modify it.
|
||||||
|
|
||||||
|
For example, if you distribute copies of such a program, whether
|
||||||
|
gratis or for a fee, you must give the recipients all the rights that
|
||||||
|
you have. You must make sure that they, too, receive or can get the
|
||||||
|
source code. And you must show them these terms so they know their
|
||||||
|
rights.
|
||||||
|
|
||||||
|
We protect your rights with two steps: (1) copyright the software, and
|
||||||
|
(2) offer you this license which gives you legal permission to copy,
|
||||||
|
distribute and/or modify the software.
|
||||||
|
|
||||||
|
Also, for each author's protection and ours, we want to make certain
|
||||||
|
that everyone understands that there is no warranty for this free
|
||||||
|
software. If the software is modified by someone else and passed on, we
|
||||||
|
want its recipients to know that what they have is not the original, so
|
||||||
|
that any problems introduced by others will not reflect on the original
|
||||||
|
authors' reputations.
|
||||||
|
|
||||||
|
Finally, any free program is threatened constantly by software
|
||||||
|
patents. We wish to avoid the danger that redistributors of a free
|
||||||
|
program will individually obtain patent licenses, in effect making the
|
||||||
|
program proprietary. To prevent this, we have made it clear that any
|
||||||
|
patent must be licensed for everyone's free use or not licensed at all.
|
||||||
|
|
||||||
|
The precise terms and conditions for copying, distribution and
|
||||||
|
modification follow.
|
||||||
|
|
||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
0. This License applies to any program or other work which contains
|
||||||
|
a notice placed by the copyright holder saying it may be distributed
|
||||||
|
under the terms of this General Public License. The "Program", below,
|
||||||
|
refers to any such program or work, and a "work based on the Program"
|
||||||
|
means either the Program or any derivative work under copyright law:
|
||||||
|
that is to say, a work containing the Program or a portion of it,
|
||||||
|
either verbatim or with modifications and/or translated into another
|
||||||
|
language. (Hereinafter, translation is included without limitation in
|
||||||
|
the term "modification".) Each licensee is addressed as "you".
|
||||||
|
|
||||||
|
Activities other than copying, distribution and modification are not
|
||||||
|
covered by this License; they are outside its scope. The act of
|
||||||
|
running the Program is not restricted, and the output from the Program
|
||||||
|
is covered only if its contents constitute a work based on the
|
||||||
|
Program (independent of having been made by running the Program).
|
||||||
|
Whether that is true depends on what the Program does.
|
||||||
|
|
||||||
|
1. You may copy and distribute verbatim copies of the Program's
|
||||||
|
source code as you receive it, in any medium, provided that you
|
||||||
|
conspicuously and appropriately publish on each copy an appropriate
|
||||||
|
copyright notice and disclaimer of warranty; keep intact all the
|
||||||
|
notices that refer to this License and to the absence of any warranty;
|
||||||
|
and give any other recipients of the Program a copy of this License
|
||||||
|
along with the Program.
|
||||||
|
|
||||||
|
You may charge a fee for the physical act of transferring a copy, and
|
||||||
|
you may at your option offer warranty protection in exchange for a fee.
|
||||||
|
|
||||||
|
2. You may modify your copy or copies of the Program or any portion
|
||||||
|
of it, thus forming a work based on the Program, and copy and
|
||||||
|
distribute such modifications or work under the terms of Section 1
|
||||||
|
above, provided that you also meet all of these conditions:
|
||||||
|
|
||||||
|
a) You must cause the modified files to carry prominent notices
|
||||||
|
stating that you changed the files and the date of any change.
|
||||||
|
|
||||||
|
b) You must cause any work that you distribute or publish, that in
|
||||||
|
whole or in part contains or is derived from the Program or any
|
||||||
|
part thereof, to be licensed as a whole at no charge to all third
|
||||||
|
parties under the terms of this License.
|
||||||
|
|
||||||
|
c) If the modified program normally reads commands interactively
|
||||||
|
when run, you must cause it, when started running for such
|
||||||
|
interactive use in the most ordinary way, to print or display an
|
||||||
|
announcement including an appropriate copyright notice and a
|
||||||
|
notice that there is no warranty (or else, saying that you provide
|
||||||
|
a warranty) and that users may redistribute the program under
|
||||||
|
these conditions, and telling the user how to view a copy of this
|
||||||
|
License. (Exception: if the Program itself is interactive but
|
||||||
|
does not normally print such an announcement, your work based on
|
||||||
|
the Program is not required to print an announcement.)
|
||||||
|
|
||||||
|
These requirements apply to the modified work as a whole. If
|
||||||
|
identifiable sections of that work are not derived from the Program,
|
||||||
|
and can be reasonably considered independent and separate works in
|
||||||
|
themselves, then this License, and its terms, do not apply to those
|
||||||
|
sections when you distribute them as separate works. But when you
|
||||||
|
distribute the same sections as part of a whole which is a work based
|
||||||
|
on the Program, the distribution of the whole must be on the terms of
|
||||||
|
this License, whose permissions for other licensees extend to the
|
||||||
|
entire whole, and thus to each and every part regardless of who wrote it.
|
||||||
|
|
||||||
|
Thus, it is not the intent of this section to claim rights or contest
|
||||||
|
your rights to work written entirely by you; rather, the intent is to
|
||||||
|
exercise the right to control the distribution of derivative or
|
||||||
|
collective works based on the Program.
|
||||||
|
|
||||||
|
In addition, mere aggregation of another work not based on the Program
|
||||||
|
with the Program (or with a work based on the Program) on a volume of
|
||||||
|
a storage or distribution medium does not bring the other work under
|
||||||
|
the scope of this License.
|
||||||
|
|
||||||
|
3. You may copy and distribute the Program (or a work based on it,
|
||||||
|
under Section 2) in object code or executable form under the terms of
|
||||||
|
Sections 1 and 2 above provided that you also do one of the following:
|
||||||
|
|
||||||
|
a) Accompany it with the complete corresponding machine-readable
|
||||||
|
source code, which must be distributed under the terms of Sections
|
||||||
|
1 and 2 above on a medium customarily used for software interchange; or,
|
||||||
|
|
||||||
|
b) Accompany it with a written offer, valid for at least three
|
||||||
|
years, to give any third party, for a charge no more than your
|
||||||
|
cost of physically performing source distribution, a complete
|
||||||
|
machine-readable copy of the corresponding source code, to be
|
||||||
|
distributed under the terms of Sections 1 and 2 above on a medium
|
||||||
|
customarily used for software interchange; or,
|
||||||
|
|
||||||
|
c) Accompany it with the information you received as to the offer
|
||||||
|
to distribute corresponding source code. (This alternative is
|
||||||
|
allowed only for noncommercial distribution and only if you
|
||||||
|
received the program in object code or executable form with such
|
||||||
|
an offer, in accord with Subsection b above.)
|
||||||
|
|
||||||
|
The source code for a work means the preferred form of the work for
|
||||||
|
making modifications to it. For an executable work, complete source
|
||||||
|
code means all the source code for all modules it contains, plus any
|
||||||
|
associated interface definition files, plus the scripts used to
|
||||||
|
control compilation and installation of the executable. However, as a
|
||||||
|
special exception, the source code distributed need not include
|
||||||
|
anything that is normally distributed (in either source or binary
|
||||||
|
form) with the major components (compiler, kernel, and so on) of the
|
||||||
|
operating system on which the executable runs, unless that component
|
||||||
|
itself accompanies the executable.
|
||||||
|
|
||||||
|
If distribution of executable or object code is made by offering
|
||||||
|
access to copy from a designated place, then offering equivalent
|
||||||
|
access to copy the source code from the same place counts as
|
||||||
|
distribution of the source code, even though third parties are not
|
||||||
|
compelled to copy the source along with the object code.
|
||||||
|
|
||||||
|
4. You may not copy, modify, sublicense, or distribute the Program
|
||||||
|
except as expressly provided under this License. Any attempt
|
||||||
|
otherwise to copy, modify, sublicense or distribute the Program is
|
||||||
|
void, and will automatically terminate your rights under this License.
|
||||||
|
However, parties who have received copies, or rights, from you under
|
||||||
|
this License will not have their licenses terminated so long as such
|
||||||
|
parties remain in full compliance.
|
||||||
|
|
||||||
|
5. You are not required to accept this License, since you have not
|
||||||
|
signed it. However, nothing else grants you permission to modify or
|
||||||
|
distribute the Program or its derivative works. These actions are
|
||||||
|
prohibited by law if you do not accept this License. Therefore, by
|
||||||
|
modifying or distributing the Program (or any work based on the
|
||||||
|
Program), you indicate your acceptance of this License to do so, and
|
||||||
|
all its terms and conditions for copying, distributing or modifying
|
||||||
|
the Program or works based on it.
|
||||||
|
|
||||||
|
6. Each time you redistribute the Program (or any work based on the
|
||||||
|
Program), the recipient automatically receives a license from the
|
||||||
|
original licensor to copy, distribute or modify the Program subject to
|
||||||
|
these terms and conditions. You may not impose any further
|
||||||
|
restrictions on the recipients' exercise of the rights granted herein.
|
||||||
|
You are not responsible for enforcing compliance by third parties to
|
||||||
|
this License.
|
||||||
|
|
||||||
|
7. If, as a consequence of a court judgment or allegation of patent
|
||||||
|
infringement or for any other reason (not limited to patent issues),
|
||||||
|
conditions are imposed on you (whether by court order, agreement or
|
||||||
|
otherwise) that contradict the conditions of this License, they do not
|
||||||
|
excuse you from the conditions of this License. If you cannot
|
||||||
|
distribute so as to satisfy simultaneously your obligations under this
|
||||||
|
License and any other pertinent obligations, then as a consequence you
|
||||||
|
may not distribute the Program at all. For example, if a patent
|
||||||
|
license would not permit royalty-free redistribution of the Program by
|
||||||
|
all those who receive copies directly or indirectly through you, then
|
||||||
|
the only way you could satisfy both it and this License would be to
|
||||||
|
refrain entirely from distribution of the Program.
|
||||||
|
|
||||||
|
If any portion of this section is held invalid or unenforceable under
|
||||||
|
any particular circumstance, the balance of the section is intended to
|
||||||
|
apply and the section as a whole is intended to apply in other
|
||||||
|
circumstances.
|
||||||
|
|
||||||
|
It is not the purpose of this section to induce you to infringe any
|
||||||
|
patents or other property right claims or to contest validity of any
|
||||||
|
such claims; this section has the sole purpose of protecting the
|
||||||
|
integrity of the free software distribution system, which is
|
||||||
|
implemented by public license practices. Many people have made
|
||||||
|
generous contributions to the wide range of software distributed
|
||||||
|
through that system in reliance on consistent application of that
|
||||||
|
system; it is up to the author/donor to decide if he or she is willing
|
||||||
|
to distribute software through any other system and a licensee cannot
|
||||||
|
impose that choice.
|
||||||
|
|
||||||
|
This section is intended to make thoroughly clear what is believed to
|
||||||
|
be a consequence of the rest of this License.
|
||||||
|
|
||||||
|
8. If the distribution and/or use of the Program is restricted in
|
||||||
|
certain countries either by patents or by copyrighted interfaces, the
|
||||||
|
original copyright holder who places the Program under this License
|
||||||
|
may add an explicit geographical distribution limitation excluding
|
||||||
|
those countries, so that distribution is permitted only in or among
|
||||||
|
countries not thus excluded. In such case, this License incorporates
|
||||||
|
the limitation as if written in the body of this License.
|
||||||
|
|
||||||
|
9. The Free Software Foundation may publish revised and/or new versions
|
||||||
|
of the General Public License from time to time. Such new versions will
|
||||||
|
be similar in spirit to the present version, but may differ in detail to
|
||||||
|
address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the Program
|
||||||
|
specifies a version number of this License which applies to it and "any
|
||||||
|
later version", you have the option of following the terms and conditions
|
||||||
|
either of that version or of any later version published by the Free
|
||||||
|
Software Foundation. If the Program does not specify a version number of
|
||||||
|
this License, you may choose any version ever published by the Free Software
|
||||||
|
Foundation.
|
||||||
|
|
||||||
|
10. If you wish to incorporate parts of the Program into other free
|
||||||
|
programs whose distribution conditions are different, write to the author
|
||||||
|
to ask for permission. For software which is copyrighted by the Free
|
||||||
|
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||||
|
make exceptions for this. Our decision will be guided by the two goals
|
||||||
|
of preserving the free status of all derivatives of our free software and
|
||||||
|
of promoting the sharing and reuse of software generally.
|
||||||
|
|
||||||
|
NO WARRANTY
|
||||||
|
|
||||||
|
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||||
|
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||||
|
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||||
|
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||||
|
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||||
|
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||||
|
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||||
|
REPAIR OR CORRECTION.
|
||||||
|
|
||||||
|
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||||
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||||
|
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||||
|
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||||
|
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||||
|
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||||
|
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||||
|
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGES.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
How to Apply These Terms to Your New Programs
|
||||||
|
|
||||||
|
If you develop a new program, and you want it to be of the greatest
|
||||||
|
possible use to the public, the best way to achieve this is to make it
|
||||||
|
free software which everyone can redistribute and change under these terms.
|
||||||
|
|
||||||
|
To do so, attach the following notices to the program. It is safest
|
||||||
|
to attach them to the start of each source file to most effectively
|
||||||
|
convey the exclusion of warranty; and each file should have at least
|
||||||
|
the "copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
|
<one line to give the program's name and a brief idea of what it does.>
|
||||||
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
|
||||||
|
|
||||||
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
If the program is interactive, make it output a short notice like this
|
||||||
|
when it starts in an interactive mode:
|
||||||
|
|
||||||
|
Gnomovision version 69, Copyright (C) year name of author
|
||||||
|
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||||
|
This is free software, and you are welcome to redistribute it
|
||||||
|
under certain conditions; type `show c' for details.
|
||||||
|
|
||||||
|
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||||
|
parts of the General Public License. Of course, the commands you use may
|
||||||
|
be called something other than `show w' and `show c'; they could even be
|
||||||
|
mouse-clicks or menu items--whatever suits your program.
|
||||||
|
|
||||||
|
You should also get your employer (if you work as a programmer) or your
|
||||||
|
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||||
|
necessary. Here is a sample; alter the names:
|
||||||
|
|
||||||
|
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||||
|
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||||
|
|
||||||
|
<signature of Ty Coon>, 1 April 1989
|
||||||
|
Ty Coon, President of Vice
|
||||||
|
|
||||||
|
This General Public License does not permit incorporating your program into
|
||||||
|
proprietary programs. If your program is a subroutine library, you may
|
||||||
|
consider it more useful to permit linking proprietary applications with the
|
||||||
|
library. If this is what you want to do, use the GNU Library General
|
||||||
|
Public License instead of this License.
|
504
COPYING.LESSER
Normal file
504
COPYING.LESSER
Normal file
|
@ -0,0 +1,504 @@
|
||||||
|
GNU LESSER GENERAL PUBLIC LICENSE
|
||||||
|
Version 2.1, February 1999
|
||||||
|
|
||||||
|
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
[This is the first released version of the Lesser GPL. It also counts
|
||||||
|
as the successor of the GNU Library Public License, version 2, hence
|
||||||
|
the version number 2.1.]
|
||||||
|
|
||||||
|
Preamble
|
||||||
|
|
||||||
|
The licenses for most software are designed to take away your
|
||||||
|
freedom to share and change it. By contrast, the GNU General Public
|
||||||
|
Licenses are intended to guarantee your freedom to share and change
|
||||||
|
free software--to make sure the software is free for all its users.
|
||||||
|
|
||||||
|
This license, the Lesser General Public License, applies to some
|
||||||
|
specially designated software packages--typically libraries--of the
|
||||||
|
Free Software Foundation and other authors who decide to use it. You
|
||||||
|
can use it too, but we suggest you first think carefully about whether
|
||||||
|
this license or the ordinary General Public License is the better
|
||||||
|
strategy to use in any particular case, based on the explanations below.
|
||||||
|
|
||||||
|
When we speak of free software, we are referring to freedom of use,
|
||||||
|
not price. Our General Public Licenses are designed to make sure that
|
||||||
|
you have the freedom to distribute copies of free software (and charge
|
||||||
|
for this service if you wish); that you receive source code or can get
|
||||||
|
it if you want it; that you can change the software and use pieces of
|
||||||
|
it in new free programs; and that you are informed that you can do
|
||||||
|
these things.
|
||||||
|
|
||||||
|
To protect your rights, we need to make restrictions that forbid
|
||||||
|
distributors to deny you these rights or to ask you to surrender these
|
||||||
|
rights. These restrictions translate to certain responsibilities for
|
||||||
|
you if you distribute copies of the library or if you modify it.
|
||||||
|
|
||||||
|
For example, if you distribute copies of the library, whether gratis
|
||||||
|
or for a fee, you must give the recipients all the rights that we gave
|
||||||
|
you. You must make sure that they, too, receive or can get the source
|
||||||
|
code. If you link other code with the library, you must provide
|
||||||
|
complete object files to the recipients, so that they can relink them
|
||||||
|
with the library after making changes to the library and recompiling
|
||||||
|
it. And you must show them these terms so they know their rights.
|
||||||
|
|
||||||
|
We protect your rights with a two-step method: (1) we copyright the
|
||||||
|
library, and (2) we offer you this license, which gives you legal
|
||||||
|
permission to copy, distribute and/or modify the library.
|
||||||
|
|
||||||
|
To protect each distributor, we want to make it very clear that
|
||||||
|
there is no warranty for the free library. Also, if the library is
|
||||||
|
modified by someone else and passed on, the recipients should know
|
||||||
|
that what they have is not the original version, so that the original
|
||||||
|
author's reputation will not be affected by problems that might be
|
||||||
|
introduced by others.
|
||||||
|
|
||||||
|
Finally, software patents pose a constant threat to the existence of
|
||||||
|
any free program. We wish to make sure that a company cannot
|
||||||
|
effectively restrict the users of a free program by obtaining a
|
||||||
|
restrictive license from a patent holder. Therefore, we insist that
|
||||||
|
any patent license obtained for a version of the library must be
|
||||||
|
consistent with the full freedom of use specified in this license.
|
||||||
|
|
||||||
|
Most GNU software, including some libraries, is covered by the
|
||||||
|
ordinary GNU General Public License. This license, the GNU Lesser
|
||||||
|
General Public License, applies to certain designated libraries, and
|
||||||
|
is quite different from the ordinary General Public License. We use
|
||||||
|
this license for certain libraries in order to permit linking those
|
||||||
|
libraries into non-free programs.
|
||||||
|
|
||||||
|
When a program is linked with a library, whether statically or using
|
||||||
|
a shared library, the combination of the two is legally speaking a
|
||||||
|
combined work, a derivative of the original library. The ordinary
|
||||||
|
General Public License therefore permits such linking only if the
|
||||||
|
entire combination fits its criteria of freedom. The Lesser General
|
||||||
|
Public License permits more lax criteria for linking other code with
|
||||||
|
the library.
|
||||||
|
|
||||||
|
We call this license the "Lesser" General Public License because it
|
||||||
|
does Less to protect the user's freedom than the ordinary General
|
||||||
|
Public License. It also provides other free software developers Less
|
||||||
|
of an advantage over competing non-free programs. These disadvantages
|
||||||
|
are the reason we use the ordinary General Public License for many
|
||||||
|
libraries. However, the Lesser license provides advantages in certain
|
||||||
|
special circumstances.
|
||||||
|
|
||||||
|
For example, on rare occasions, there may be a special need to
|
||||||
|
encourage the widest possible use of a certain library, so that it becomes
|
||||||
|
a de-facto standard. To achieve this, non-free programs must be
|
||||||
|
allowed to use the library. A more frequent case is that a free
|
||||||
|
library does the same job as widely used non-free libraries. In this
|
||||||
|
case, there is little to gain by limiting the free library to free
|
||||||
|
software only, so we use the Lesser General Public License.
|
||||||
|
|
||||||
|
In other cases, permission to use a particular library in non-free
|
||||||
|
programs enables a greater number of people to use a large body of
|
||||||
|
free software. For example, permission to use the GNU C Library in
|
||||||
|
non-free programs enables many more people to use the whole GNU
|
||||||
|
operating system, as well as its variant, the GNU/Linux operating
|
||||||
|
system.
|
||||||
|
|
||||||
|
Although the Lesser General Public License is Less protective of the
|
||||||
|
users' freedom, it does ensure that the user of a program that is
|
||||||
|
linked with the Library has the freedom and the wherewithal to run
|
||||||
|
that program using a modified version of the Library.
|
||||||
|
|
||||||
|
The precise terms and conditions for copying, distribution and
|
||||||
|
modification follow. Pay close attention to the difference between a
|
||||||
|
"work based on the library" and a "work that uses the library". The
|
||||||
|
former contains code derived from the library, whereas the latter must
|
||||||
|
be combined with the library in order to run.
|
||||||
|
|
||||||
|
GNU LESSER GENERAL PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
0. This License Agreement applies to any software library or other
|
||||||
|
program which contains a notice placed by the copyright holder or
|
||||||
|
other authorized party saying it may be distributed under the terms of
|
||||||
|
this Lesser General Public License (also called "this License").
|
||||||
|
Each licensee is addressed as "you".
|
||||||
|
|
||||||
|
A "library" means a collection of software functions and/or data
|
||||||
|
prepared so as to be conveniently linked with application programs
|
||||||
|
(which use some of those functions and data) to form executables.
|
||||||
|
|
||||||
|
The "Library", below, refers to any such software library or work
|
||||||
|
which has been distributed under these terms. A "work based on the
|
||||||
|
Library" means either the Library or any derivative work under
|
||||||
|
copyright law: that is to say, a work containing the Library or a
|
||||||
|
portion of it, either verbatim or with modifications and/or translated
|
||||||
|
straightforwardly into another language. (Hereinafter, translation is
|
||||||
|
included without limitation in the term "modification".)
|
||||||
|
|
||||||
|
"Source code" for a work means the preferred form of the work for
|
||||||
|
making modifications to it. For a library, complete source code means
|
||||||
|
all the source code for all modules it contains, plus any associated
|
||||||
|
interface definition files, plus the scripts used to control compilation
|
||||||
|
and installation of the library.
|
||||||
|
|
||||||
|
Activities other than copying, distribution and modification are not
|
||||||
|
covered by this License; they are outside its scope. The act of
|
||||||
|
running a program using the Library is not restricted, and output from
|
||||||
|
such a program is covered only if its contents constitute a work based
|
||||||
|
on the Library (independent of the use of the Library in a tool for
|
||||||
|
writing it). Whether that is true depends on what the Library does
|
||||||
|
and what the program that uses the Library does.
|
||||||
|
|
||||||
|
1. You may copy and distribute verbatim copies of the Library's
|
||||||
|
complete source code as you receive it, in any medium, provided that
|
||||||
|
you conspicuously and appropriately publish on each copy an
|
||||||
|
appropriate copyright notice and disclaimer of warranty; keep intact
|
||||||
|
all the notices that refer to this License and to the absence of any
|
||||||
|
warranty; and distribute a copy of this License along with the
|
||||||
|
Library.
|
||||||
|
|
||||||
|
You may charge a fee for the physical act of transferring a copy,
|
||||||
|
and you may at your option offer warranty protection in exchange for a
|
||||||
|
fee.
|
||||||
|
|
||||||
|
2. You may modify your copy or copies of the Library or any portion
|
||||||
|
of it, thus forming a work based on the Library, and copy and
|
||||||
|
distribute such modifications or work under the terms of Section 1
|
||||||
|
above, provided that you also meet all of these conditions:
|
||||||
|
|
||||||
|
a) The modified work must itself be a software library.
|
||||||
|
|
||||||
|
b) You must cause the files modified to carry prominent notices
|
||||||
|
stating that you changed the files and the date of any change.
|
||||||
|
|
||||||
|
c) You must cause the whole of the work to be licensed at no
|
||||||
|
charge to all third parties under the terms of this License.
|
||||||
|
|
||||||
|
d) If a facility in the modified Library refers to a function or a
|
||||||
|
table of data to be supplied by an application program that uses
|
||||||
|
the facility, other than as an argument passed when the facility
|
||||||
|
is invoked, then you must make a good faith effort to ensure that,
|
||||||
|
in the event an application does not supply such function or
|
||||||
|
table, the facility still operates, and performs whatever part of
|
||||||
|
its purpose remains meaningful.
|
||||||
|
|
||||||
|
(For example, a function in a library to compute square roots has
|
||||||
|
a purpose that is entirely well-defined independent of the
|
||||||
|
application. Therefore, Subsection 2d requires that any
|
||||||
|
application-supplied function or table used by this function must
|
||||||
|
be optional: if the application does not supply it, the square
|
||||||
|
root function must still compute square roots.)
|
||||||
|
|
||||||
|
These requirements apply to the modified work as a whole. If
|
||||||
|
identifiable sections of that work are not derived from the Library,
|
||||||
|
and can be reasonably considered independent and separate works in
|
||||||
|
themselves, then this License, and its terms, do not apply to those
|
||||||
|
sections when you distribute them as separate works. But when you
|
||||||
|
distribute the same sections as part of a whole which is a work based
|
||||||
|
on the Library, the distribution of the whole must be on the terms of
|
||||||
|
this License, whose permissions for other licensees extend to the
|
||||||
|
entire whole, and thus to each and every part regardless of who wrote
|
||||||
|
it.
|
||||||
|
|
||||||
|
Thus, it is not the intent of this section to claim rights or contest
|
||||||
|
your rights to work written entirely by you; rather, the intent is to
|
||||||
|
exercise the right to control the distribution of derivative or
|
||||||
|
collective works based on the Library.
|
||||||
|
|
||||||
|
In addition, mere aggregation of another work not based on the Library
|
||||||
|
with the Library (or with a work based on the Library) on a volume of
|
||||||
|
a storage or distribution medium does not bring the other work under
|
||||||
|
the scope of this License.
|
||||||
|
|
||||||
|
3. You may opt to apply the terms of the ordinary GNU General Public
|
||||||
|
License instead of this License to a given copy of the Library. To do
|
||||||
|
this, you must alter all the notices that refer to this License, so
|
||||||
|
that they refer to the ordinary GNU General Public License, version 2,
|
||||||
|
instead of to this License. (If a newer version than version 2 of the
|
||||||
|
ordinary GNU General Public License has appeared, then you can specify
|
||||||
|
that version instead if you wish.) Do not make any other change in
|
||||||
|
these notices.
|
||||||
|
|
||||||
|
Once this change is made in a given copy, it is irreversible for
|
||||||
|
that copy, so the ordinary GNU General Public License applies to all
|
||||||
|
subsequent copies and derivative works made from that copy.
|
||||||
|
|
||||||
|
This option is useful when you wish to copy part of the code of
|
||||||
|
the Library into a program that is not a library.
|
||||||
|
|
||||||
|
4. You may copy and distribute the Library (or a portion or
|
||||||
|
derivative of it, under Section 2) in object code or executable form
|
||||||
|
under the terms of Sections 1 and 2 above provided that you accompany
|
||||||
|
it with the complete corresponding machine-readable source code, which
|
||||||
|
must be distributed under the terms of Sections 1 and 2 above on a
|
||||||
|
medium customarily used for software interchange.
|
||||||
|
|
||||||
|
If distribution of object code is made by offering access to copy
|
||||||
|
from a designated place, then offering equivalent access to copy the
|
||||||
|
source code from the same place satisfies the requirement to
|
||||||
|
distribute the source code, even though third parties are not
|
||||||
|
compelled to copy the source along with the object code.
|
||||||
|
|
||||||
|
5. A program that contains no derivative of any portion of the
|
||||||
|
Library, but is designed to work with the Library by being compiled or
|
||||||
|
linked with it, is called a "work that uses the Library". Such a
|
||||||
|
work, in isolation, is not a derivative work of the Library, and
|
||||||
|
therefore falls outside the scope of this License.
|
||||||
|
|
||||||
|
However, linking a "work that uses the Library" with the Library
|
||||||
|
creates an executable that is a derivative of the Library (because it
|
||||||
|
contains portions of the Library), rather than a "work that uses the
|
||||||
|
library". The executable is therefore covered by this License.
|
||||||
|
Section 6 states terms for distribution of such executables.
|
||||||
|
|
||||||
|
When a "work that uses the Library" uses material from a header file
|
||||||
|
that is part of the Library, the object code for the work may be a
|
||||||
|
derivative work of the Library even though the source code is not.
|
||||||
|
Whether this is true is especially significant if the work can be
|
||||||
|
linked without the Library, or if the work is itself a library. The
|
||||||
|
threshold for this to be true is not precisely defined by law.
|
||||||
|
|
||||||
|
If such an object file uses only numerical parameters, data
|
||||||
|
structure layouts and accessors, and small macros and small inline
|
||||||
|
functions (ten lines or less in length), then the use of the object
|
||||||
|
file is unrestricted, regardless of whether it is legally a derivative
|
||||||
|
work. (Executables containing this object code plus portions of the
|
||||||
|
Library will still fall under Section 6.)
|
||||||
|
|
||||||
|
Otherwise, if the work is a derivative of the Library, you may
|
||||||
|
distribute the object code for the work under the terms of Section 6.
|
||||||
|
Any executables containing that work also fall under Section 6,
|
||||||
|
whether or not they are linked directly with the Library itself.
|
||||||
|
|
||||||
|
6. As an exception to the Sections above, you may also combine or
|
||||||
|
link a "work that uses the Library" with the Library to produce a
|
||||||
|
work containing portions of the Library, and distribute that work
|
||||||
|
under terms of your choice, provided that the terms permit
|
||||||
|
modification of the work for the customer's own use and reverse
|
||||||
|
engineering for debugging such modifications.
|
||||||
|
|
||||||
|
You must give prominent notice with each copy of the work that the
|
||||||
|
Library is used in it and that the Library and its use are covered by
|
||||||
|
this License. You must supply a copy of this License. If the work
|
||||||
|
during execution displays copyright notices, you must include the
|
||||||
|
copyright notice for the Library among them, as well as a reference
|
||||||
|
directing the user to the copy of this License. Also, you must do one
|
||||||
|
of these things:
|
||||||
|
|
||||||
|
a) Accompany the work with the complete corresponding
|
||||||
|
machine-readable source code for the Library including whatever
|
||||||
|
changes were used in the work (which must be distributed under
|
||||||
|
Sections 1 and 2 above); and, if the work is an executable linked
|
||||||
|
with the Library, with the complete machine-readable "work that
|
||||||
|
uses the Library", as object code and/or source code, so that the
|
||||||
|
user can modify the Library and then relink to produce a modified
|
||||||
|
executable containing the modified Library. (It is understood
|
||||||
|
that the user who changes the contents of definitions files in the
|
||||||
|
Library will not necessarily be able to recompile the application
|
||||||
|
to use the modified definitions.)
|
||||||
|
|
||||||
|
b) Use a suitable shared library mechanism for linking with the
|
||||||
|
Library. A suitable mechanism is one that (1) uses at run time a
|
||||||
|
copy of the library already present on the user's computer system,
|
||||||
|
rather than copying library functions into the executable, and (2)
|
||||||
|
will operate properly with a modified version of the library, if
|
||||||
|
the user installs one, as long as the modified version is
|
||||||
|
interface-compatible with the version that the work was made with.
|
||||||
|
|
||||||
|
c) Accompany the work with a written offer, valid for at
|
||||||
|
least three years, to give the same user the materials
|
||||||
|
specified in Subsection 6a, above, for a charge no more
|
||||||
|
than the cost of performing this distribution.
|
||||||
|
|
||||||
|
d) If distribution of the work is made by offering access to copy
|
||||||
|
from a designated place, offer equivalent access to copy the above
|
||||||
|
specified materials from the same place.
|
||||||
|
|
||||||
|
e) Verify that the user has already received a copy of these
|
||||||
|
materials or that you have already sent this user a copy.
|
||||||
|
|
||||||
|
For an executable, the required form of the "work that uses the
|
||||||
|
Library" must include any data and utility programs needed for
|
||||||
|
reproducing the executable from it. However, as a special exception,
|
||||||
|
the materials to be distributed need not include anything that is
|
||||||
|
normally distributed (in either source or binary form) with the major
|
||||||
|
components (compiler, kernel, and so on) of the operating system on
|
||||||
|
which the executable runs, unless that component itself accompanies
|
||||||
|
the executable.
|
||||||
|
|
||||||
|
It may happen that this requirement contradicts the license
|
||||||
|
restrictions of other proprietary libraries that do not normally
|
||||||
|
accompany the operating system. Such a contradiction means you cannot
|
||||||
|
use both them and the Library together in an executable that you
|
||||||
|
distribute.
|
||||||
|
|
||||||
|
7. You may place library facilities that are a work based on the
|
||||||
|
Library side-by-side in a single library together with other library
|
||||||
|
facilities not covered by this License, and distribute such a combined
|
||||||
|
library, provided that the separate distribution of the work based on
|
||||||
|
the Library and of the other library facilities is otherwise
|
||||||
|
permitted, and provided that you do these two things:
|
||||||
|
|
||||||
|
a) Accompany the combined library with a copy of the same work
|
||||||
|
based on the Library, uncombined with any other library
|
||||||
|
facilities. This must be distributed under the terms of the
|
||||||
|
Sections above.
|
||||||
|
|
||||||
|
b) Give prominent notice with the combined library of the fact
|
||||||
|
that part of it is a work based on the Library, and explaining
|
||||||
|
where to find the accompanying uncombined form of the same work.
|
||||||
|
|
||||||
|
8. You may not copy, modify, sublicense, link with, or distribute
|
||||||
|
the Library except as expressly provided under this License. Any
|
||||||
|
attempt otherwise to copy, modify, sublicense, link with, or
|
||||||
|
distribute the Library is void, and will automatically terminate your
|
||||||
|
rights under this License. However, parties who have received copies,
|
||||||
|
or rights, from you under this License will not have their licenses
|
||||||
|
terminated so long as such parties remain in full compliance.
|
||||||
|
|
||||||
|
9. You are not required to accept this License, since you have not
|
||||||
|
signed it. However, nothing else grants you permission to modify or
|
||||||
|
distribute the Library or its derivative works. These actions are
|
||||||
|
prohibited by law if you do not accept this License. Therefore, by
|
||||||
|
modifying or distributing the Library (or any work based on the
|
||||||
|
Library), you indicate your acceptance of this License to do so, and
|
||||||
|
all its terms and conditions for copying, distributing or modifying
|
||||||
|
the Library or works based on it.
|
||||||
|
|
||||||
|
10. Each time you redistribute the Library (or any work based on the
|
||||||
|
Library), the recipient automatically receives a license from the
|
||||||
|
original licensor to copy, distribute, link with or modify the Library
|
||||||
|
subject to these terms and conditions. You may not impose any further
|
||||||
|
restrictions on the recipients' exercise of the rights granted herein.
|
||||||
|
You are not responsible for enforcing compliance by third parties with
|
||||||
|
this License.
|
||||||
|
|
||||||
|
11. If, as a consequence of a court judgment or allegation of patent
|
||||||
|
infringement or for any other reason (not limited to patent issues),
|
||||||
|
conditions are imposed on you (whether by court order, agreement or
|
||||||
|
otherwise) that contradict the conditions of this License, they do not
|
||||||
|
excuse you from the conditions of this License. If you cannot
|
||||||
|
distribute so as to satisfy simultaneously your obligations under this
|
||||||
|
License and any other pertinent obligations, then as a consequence you
|
||||||
|
may not distribute the Library at all. For example, if a patent
|
||||||
|
license would not permit royalty-free redistribution of the Library by
|
||||||
|
all those who receive copies directly or indirectly through you, then
|
||||||
|
the only way you could satisfy both it and this License would be to
|
||||||
|
refrain entirely from distribution of the Library.
|
||||||
|
|
||||||
|
If any portion of this section is held invalid or unenforceable under any
|
||||||
|
particular circumstance, the balance of the section is intended to apply,
|
||||||
|
and the section as a whole is intended to apply in other circumstances.
|
||||||
|
|
||||||
|
It is not the purpose of this section to induce you to infringe any
|
||||||
|
patents or other property right claims or to contest validity of any
|
||||||
|
such claims; this section has the sole purpose of protecting the
|
||||||
|
integrity of the free software distribution system which is
|
||||||
|
implemented by public license practices. Many people have made
|
||||||
|
generous contributions to the wide range of software distributed
|
||||||
|
through that system in reliance on consistent application of that
|
||||||
|
system; it is up to the author/donor to decide if he or she is willing
|
||||||
|
to distribute software through any other system and a licensee cannot
|
||||||
|
impose that choice.
|
||||||
|
|
||||||
|
This section is intended to make thoroughly clear what is believed to
|
||||||
|
be a consequence of the rest of this License.
|
||||||
|
|
||||||
|
12. If the distribution and/or use of the Library is restricted in
|
||||||
|
certain countries either by patents or by copyrighted interfaces, the
|
||||||
|
original copyright holder who places the Library under this License may add
|
||||||
|
an explicit geographical distribution limitation excluding those countries,
|
||||||
|
so that distribution is permitted only in or among countries not thus
|
||||||
|
excluded. In such case, this License incorporates the limitation as if
|
||||||
|
written in the body of this License.
|
||||||
|
|
||||||
|
13. The Free Software Foundation may publish revised and/or new
|
||||||
|
versions of the Lesser General Public License from time to time.
|
||||||
|
Such new versions will be similar in spirit to the present version,
|
||||||
|
but may differ in detail to address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the Library
|
||||||
|
specifies a version number of this License which applies to it and
|
||||||
|
"any later version", you have the option of following the terms and
|
||||||
|
conditions either of that version or of any later version published by
|
||||||
|
the Free Software Foundation. If the Library does not specify a
|
||||||
|
license version number, you may choose any version ever published by
|
||||||
|
the Free Software Foundation.
|
||||||
|
|
||||||
|
14. If you wish to incorporate parts of the Library into other free
|
||||||
|
programs whose distribution conditions are incompatible with these,
|
||||||
|
write to the author to ask for permission. For software which is
|
||||||
|
copyrighted by the Free Software Foundation, write to the Free
|
||||||
|
Software Foundation; we sometimes make exceptions for this. Our
|
||||||
|
decision will be guided by the two goals of preserving the free status
|
||||||
|
of all derivatives of our free software and of promoting the sharing
|
||||||
|
and reuse of software generally.
|
||||||
|
|
||||||
|
NO WARRANTY
|
||||||
|
|
||||||
|
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
|
||||||
|
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
|
||||||
|
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
|
||||||
|
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
|
||||||
|
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
|
||||||
|
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
|
||||||
|
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||||
|
|
||||||
|
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
|
||||||
|
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
|
||||||
|
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
|
||||||
|
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
|
||||||
|
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
|
||||||
|
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
|
||||||
|
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
|
||||||
|
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
|
||||||
|
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
DAMAGES.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
How to Apply These Terms to Your New Libraries
|
||||||
|
|
||||||
|
If you develop a new library, and you want it to be of the greatest
|
||||||
|
possible use to the public, we recommend making it free software that
|
||||||
|
everyone can redistribute and change. You can do so by permitting
|
||||||
|
redistribution under these terms (or, alternatively, under the terms of the
|
||||||
|
ordinary General Public License).
|
||||||
|
|
||||||
|
To apply these terms, attach the following notices to the library. It is
|
||||||
|
safest to attach them to the start of each source file to most effectively
|
||||||
|
convey the exclusion of warranty; and each file should have at least the
|
||||||
|
"copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
|
<one line to give the library's name and a brief idea of what it does.>
|
||||||
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
|
This library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with this library; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
|
||||||
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
You should also get your employer (if you work as a programmer) or your
|
||||||
|
school, if any, to sign a "copyright disclaimer" for the library, if
|
||||||
|
necessary. Here is a sample; alter the names:
|
||||||
|
|
||||||
|
Yoyodyne, Inc., hereby disclaims all copyright interest in the
|
||||||
|
library `Frob' (a library for tweaking knobs) written by James Random Hacker.
|
||||||
|
|
||||||
|
<signature of Ty Coon>, 1 April 1990
|
||||||
|
Ty Coon, President of Vice
|
||||||
|
|
||||||
|
That's all there is to it!
|
||||||
|
|
||||||
|
|
191
HACKING
Normal file
191
HACKING
Normal file
|
@ -0,0 +1,191 @@
|
||||||
|
================================
|
||||||
|
Some notes on hacking on kitchen
|
||||||
|
================================
|
||||||
|
|
||||||
|
:Author: Toshio Kuratomi
|
||||||
|
:Date: 2 Jan 2012
|
||||||
|
:Version: 1.1.x
|
||||||
|
|
||||||
|
For coding and kitchen, see the style guide in the documentation.
|
||||||
|
|
||||||
|
This file documents meta-information about kitchen such as where to get the
|
||||||
|
code and how to make a release.
|
||||||
|
|
||||||
|
.. contents::
|
||||||
|
|
||||||
|
-----------------------------------------
|
||||||
|
Extra software needed for making releases
|
||||||
|
-----------------------------------------
|
||||||
|
Although kitchen has very few requirements for running, there are a few more
|
||||||
|
that are required for making a release:
|
||||||
|
|
||||||
|
* python-2.4+ (tested on python-2.7)
|
||||||
|
* transifex-client (/usr/bin/tx)
|
||||||
|
* gettext (/usr/bin/msgfmt)
|
||||||
|
* python-babel (/usr/bin/pybabel)
|
||||||
|
* python-sphinx (/usr/bin/sphinx-build)
|
||||||
|
* python-nose (/usr/bin/nosetests)
|
||||||
|
* python-coverage (/usr/bin/coverage)
|
||||||
|
|
||||||
|
--------------
|
||||||
|
Get translated
|
||||||
|
--------------
|
||||||
|
|
||||||
|
We use the translation services at transifex.net to manage po files, coordinate
|
||||||
|
people translating strings, and merge new strings to the files. The following
|
||||||
|
instructions briefly tell how to use transifex to update the source languages'
|
||||||
|
files and pull new translations for release. Actually doing translations can
|
||||||
|
be found in the `transifex user's guide`_.
|
||||||
|
|
||||||
|
.. `transifex user's guide`:: http://help.transifex.net/user-guide/translating.html
|
||||||
|
|
||||||
|
To generate the POT file (located in the po/ subdirectory), use pybabel to
|
||||||
|
extract the messages. Tun the following from the top level directory::
|
||||||
|
|
||||||
|
pybabel extract -o po/kitchen.pot kitchen -kb_ -kbN_
|
||||||
|
|
||||||
|
Then commit this pot file and upload to transifex::
|
||||||
|
|
||||||
|
tx push -s
|
||||||
|
bzr commit -m 'Extract new strings from the source files' po/kitchen.pot
|
||||||
|
bzr push
|
||||||
|
|
||||||
|
To pull messages from transifex prior to making a release, do::
|
||||||
|
|
||||||
|
tx pull -a
|
||||||
|
bzr commit -m 'Merge new translations from transifex' po/*.po
|
||||||
|
|
||||||
|
If you see a status message from transifex like this::
|
||||||
|
Pulling new translations for resource kitchen.kitchenpot (source: po/kitchen.pot)
|
||||||
|
-> fr: po/fr.po
|
||||||
|
|
||||||
|
it means that transifex has created a brand new po file for you. You need to
|
||||||
|
add the new file to source control and commit it like this::
|
||||||
|
|
||||||
|
bzr add po/fr.po
|
||||||
|
bzr commit -m 'New French translation' po/fr.po
|
||||||
|
|
||||||
|
|
||||||
|
TODO: Add information about announcing string freeze. Using transifex's add
|
||||||
|
release function to coordinate with translators. Mailing a translators list,
|
||||||
|
etc.
|
||||||
|
|
||||||
|
--------
|
||||||
|
Releases
|
||||||
|
--------
|
||||||
|
|
||||||
|
.. note:: If a release is not time critical, make an effort to get the
|
||||||
|
software translated first. See :id:`Get translated` for details.
|
||||||
|
|
||||||
|
Testing
|
||||||
|
=======
|
||||||
|
|
||||||
|
Even though python is a compiled language, there's several ways to test that
|
||||||
|
the software is correct.
|
||||||
|
|
||||||
|
Test that docs build
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Documentation is written in ReStructuredText format and built via the
|
||||||
|
:mod:`sphinx` documentation system for python. There is a variety of
|
||||||
|
hand-written and formatted documentation in the :file:`docs` directory. Those
|
||||||
|
documents also pull some documentation out of the docstrings in the code.
|
||||||
|
|
||||||
|
Any of those places may have formatting that is not valid in the sphinx
|
||||||
|
system. Building the documentation into html will see if there's any spots
|
||||||
|
that need to be fixed::
|
||||||
|
|
||||||
|
python setup.py build_sphinx --fresh-env
|
||||||
|
|
||||||
|
The command will attempt to turn the documentation into html. Any errors or
|
||||||
|
warnings in the output mean that there's some piece of documentation that
|
||||||
|
sphinx doesn't know how to deal with. That should be fixed before publishing
|
||||||
|
the release.
|
||||||
|
|
||||||
|
|
||||||
|
Test that message catalogs compile
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
One of the pieces of creating a new release is downloading new message
|
||||||
|
catalogs from transifex. Once in a great while, a translator will upload a
|
||||||
|
translation there that causes problems (for instance, adding or omitting
|
||||||
|
format strings from a translated string.) Luckily the commands to create the
|
||||||
|
message catalogs will detect things like this so just compiling the catalogs
|
||||||
|
will determine if any translations need to be adjusted::
|
||||||
|
|
||||||
|
./releaseutils.py
|
||||||
|
|
||||||
|
This will iterate through all the message catalogs that transifex downloaded
|
||||||
|
to the :file:`po` directory and compile them into the :file:`locale`
|
||||||
|
directory.
|
||||||
|
|
||||||
|
.. warning:: If :file:/usr/bin/msgfmt is not installed, this command will still
|
||||||
|
compile the message catalogs but it will use babel. Babel, unfortunately,
|
||||||
|
doesn't check for all the errors in message catalogs that msgfmt does so
|
||||||
|
it may say that the messages are fine when they really aren't. Make sure
|
||||||
|
you have msgfmt available by installing gettext.
|
||||||
|
|
||||||
|
Unittest
|
||||||
|
--------
|
||||||
|
|
||||||
|
Kitchen has a large set of unittests. All of them should pass before release.
|
||||||
|
You can run the unittests with the following command::
|
||||||
|
nosetests --with-coverage --cover-package kitchen
|
||||||
|
|
||||||
|
This will run all the unittests under the tests directory and also generate
|
||||||
|
some statistics about which lines of code were not accessed when kitchen ran.
|
||||||
|
|
||||||
|
.. warning:: Although 100% test coverage is a worthy goal, it doesn't mean
|
||||||
|
that the code is bug free. This is especially true of code, like
|
||||||
|
kitchen's, that deals with encoding issues. The same piece of code in
|
||||||
|
kitchen will do different things depending on whether unicode or byte str
|
||||||
|
(and the characters that are in the byte str) is passed as a parameter and
|
||||||
|
what encoding is specified in certain environment variables. You can take
|
||||||
|
a look at :file:`test_i18n.py` and :file:`test_converters.py` to see tests
|
||||||
|
that attempt to cover enough input values to detect problems.
|
||||||
|
|
||||||
|
Since kitchen is currently supported on python-2.3.1+, it is desirable to test
|
||||||
|
kitchen on at least one python major version from python-2.3 through
|
||||||
|
python-2.7. We currently have access to a buildbot that has access to
|
||||||
|
python-2.4, python-2.6, and python-2.7. You can view it at
|
||||||
|
http://ci.csh.rit.edu:8080/view/Kitchen/ . The buildbot checks the devel
|
||||||
|
repository hourly and if new checkins have occurred, it attempts to rebuild.
|
||||||
|
If you need access to invoke builds on the buildbot more regularly than that,
|
||||||
|
contact Toshio to get access.
|
||||||
|
|
||||||
|
We were unable to get python-2.3 working in the buildbot so I manually run the
|
||||||
|
unittests on a CentOS-4 virtual machine (with python-2.3). I currently don't
|
||||||
|
test on python-2.5 but I'd be happy to take bug reports or get a new committer
|
||||||
|
that was interested in that platform.
|
||||||
|
|
||||||
|
Creating the release
|
||||||
|
====================
|
||||||
|
|
||||||
|
1. Make sure that any feature branches you want have been merged.
|
||||||
|
2. Pull in new translations and verify they are valid::
|
||||||
|
tx pull -a
|
||||||
|
# If msgfmt is installed, this will check that the catalogs are valid
|
||||||
|
./releaseutils.py
|
||||||
|
bzr commit -m 'Merge new translations from transifex.net'
|
||||||
|
3. Update the version in kitchen/__init__.py and NEWS.
|
||||||
|
4. Make a fresh clone of the repository::
|
||||||
|
cd $PATH_TO_MY_SHARED_REPO
|
||||||
|
bzr branch bzr://bzr.fedorahosted.org/bzr/kitchen/devel release
|
||||||
|
5. Make the source tarball in that directory::
|
||||||
|
cd release
|
||||||
|
python setup.py sdist
|
||||||
|
6. Make sure that the source tarball contains all of the files we want in the release::
|
||||||
|
cd ..
|
||||||
|
tar -xzvf release/dist/kitchen*tar.gz
|
||||||
|
diff -uNr devel kitchen-$RELEASE_VERSION
|
||||||
|
7. Upload the docs to pypi::
|
||||||
|
cd release
|
||||||
|
python setup.py upload_docs
|
||||||
|
8. Upload the tarball to pypi::
|
||||||
|
python setup.py sdist upload --sign
|
||||||
|
9. Upload the tarball to fedorahosted::
|
||||||
|
scp dist/kitchen*tar.gz fedorahosted.org:/srv/web/releases/k/i/kitchen/
|
||||||
|
10. Tag the release::
|
||||||
|
cd ../devel
|
||||||
|
bzr tag $RELEASE_VERSION
|
||||||
|
bzr push
|
170
NEWS
Normal file
170
NEWS
Normal file
|
@ -0,0 +1,170 @@
|
||||||
|
====
|
||||||
|
NEWS
|
||||||
|
====
|
||||||
|
|
||||||
|
:Authors: Toshio Kuratomi
|
||||||
|
:Date: 14 Feb 2012
|
||||||
|
:Version: 1.1.1
|
||||||
|
|
||||||
|
-----
|
||||||
|
1.1.1
|
||||||
|
-----
|
||||||
|
|
||||||
|
* Fix a bug with easy_gettext_setup() and get_translation_object() when using
|
||||||
|
the default value of localedirs.
|
||||||
|
|
||||||
|
-----
|
||||||
|
1.1.0
|
||||||
|
-----
|
||||||
|
|
||||||
|
* Add yum.i18n.exception2msg section to the porting docs
|
||||||
|
* Deprecate BYTE_EXCEPTION_CONVERTERS as simplification of code lets
|
||||||
|
us use EXCEPTION_CONVERTERS for both exception_to_unicode and
|
||||||
|
exception_to_bytes.
|
||||||
|
* kitchen.i18n.get_translation_object
|
||||||
|
- Add more parameters to :func:`~kitchen.i18n.get_translation_object` so it
|
||||||
|
can more easily be used as a replacement for :func:`gettext.translation`.
|
||||||
|
- Change the way we use localedirs. We cycle through them until we find a
|
||||||
|
suitable locale file rather than simply cycling through until we find a
|
||||||
|
directory that exists.
|
||||||
|
- When multiple message catalogs are found in localedirs (and via environment
|
||||||
|
variables), set up the extra ones as fallbacks if the message isn't found
|
||||||
|
in the first catalog.
|
||||||
|
* Change the return values from gettext and lgettext family of functions.
|
||||||
|
Instead of simply guaranteeing a byte str will be returned we now guarantee
|
||||||
|
the byte str will be valid in a certain encoding (the str may still be
|
||||||
|
mangled but it will be valid).
|
||||||
|
* Updated subprocess and base64 modules from latest python-2.7 branch.
|
||||||
|
* Fix i18n Translation objects to set input_charset and output_charset on any
|
||||||
|
fallback objects.
|
||||||
|
* Fix kitchen.i18n Translation objects' output_encoding() method on python-2.3.
|
||||||
|
It was accessing a different self object than we wanted it to. Defining it
|
||||||
|
in a different way makes it work on python-2.3.
|
||||||
|
|
||||||
|
-----
|
||||||
|
1.0.0
|
||||||
|
-----
|
||||||
|
|
||||||
|
* Add a pointer to ordereddict and iterutils in the docs
|
||||||
|
* Change a few pieces of code to not internally mix bytes and unicode
|
||||||
|
|
||||||
|
-----
|
||||||
|
0.2.4
|
||||||
|
-----
|
||||||
|
|
||||||
|
* Have easy_gettext_setup return lgettext functions instead of gettext
|
||||||
|
functions when use_unicode=False
|
||||||
|
* Correct docstring for kitchen.text.converters.exception_to_bytes() -- we're
|
||||||
|
transforming into a byte str, not into unicode.
|
||||||
|
* Correct some examples in the unicode frustrations documentation
|
||||||
|
* Correct some cross-references in the documentation
|
||||||
|
|
||||||
|
-----
|
||||||
|
0.2.3
|
||||||
|
-----
|
||||||
|
|
||||||
|
* Expose MAXFD, list2cmdline(), and mswindows in kitchen.pycompat27.subprocess.
|
||||||
|
These are undocumented, and not in upstream's __all__ but google (and bug
|
||||||
|
reports against kitchen) show that some people are using them. Note that
|
||||||
|
upstream is leaning towards these being private so they may be deprecated in
|
||||||
|
the python3 subprocess.
|
||||||
|
|
||||||
|
-----
|
||||||
|
0.2.2
|
||||||
|
-----
|
||||||
|
|
||||||
|
* Add kitchen.text.converters.exception_to_bytes() and
|
||||||
|
kitchen.text.converters.exception_to_unicode() that take an exception object
|
||||||
|
and convert it into a text representation.
|
||||||
|
* Add a documentation section on how API can be simplified if you can limit your encodings
|
||||||
|
|
||||||
|
If all goes well, we'll be making a 1.0 release shortly which is basically this release.
|
||||||
|
|
||||||
|
-------
|
||||||
|
0.2.2a1
|
||||||
|
-------
|
||||||
|
|
||||||
|
* Fix exception messages that contain unicode characters
|
||||||
|
* Speed up to_unicode for the common cases of utf-8 and latin-1.
|
||||||
|
* kitchen.i18n.NewGNUTranslations object that always returns unicode for
|
||||||
|
ugettext and ungettext, always returns str for the other gettext functions,
|
||||||
|
and doesn't throw UnicodeError.
|
||||||
|
* Change i18n functions to return either DummyTranslations or
|
||||||
|
NewGNUTranslations so all strings returned are known to be unicode or str.
|
||||||
|
* kitchen.pycompat24.base64 now synced from upstream python so it implements
|
||||||
|
all of the python-2.4 API
|
||||||
|
* unittest NewGNUTranslations
|
||||||
|
* unittest that easy_gettext_setup returns the correct objects
|
||||||
|
* Document kitchen.text.display
|
||||||
|
* Proofread all of the documentation. Cross reference to the stdlib.
|
||||||
|
* Write a porting guide for people porting from python-fedora and yum APIs.
|
||||||
|
|
||||||
|
-------
|
||||||
|
0.2.1a1
|
||||||
|
-------
|
||||||
|
|
||||||
|
* Fix failing unittest on python-2.7
|
||||||
|
* Add iterutils module
|
||||||
|
* Update table of combining utf8 characters from python-2.7
|
||||||
|
* Speed up kitchen.text.misc.str_eq().
|
||||||
|
* docs:
|
||||||
|
- api-i18n
|
||||||
|
- api-exceptions
|
||||||
|
- api-collections
|
||||||
|
- api-iterutils
|
||||||
|
- Add two tutorial sections for unicode
|
||||||
|
* unittests
|
||||||
|
- kitchen.text.converters.getwriter()
|
||||||
|
- kitchen.iterutils
|
||||||
|
- tests for more input variations to str_eq
|
||||||
|
|
||||||
|
-----
|
||||||
|
0.2a2
|
||||||
|
-----
|
||||||
|
* Add unittests for kitchen.text.display, update kitchen.text.utf8 and
|
||||||
|
kitchen.text.misc test coverage
|
||||||
|
* Bug fixes for python-2.3
|
||||||
|
* Some doc updates. More to come.
|
||||||
|
* New function kitchen.text.converters.getwriter()
|
||||||
|
|
||||||
|
-----
|
||||||
|
0.2a1
|
||||||
|
-----
|
||||||
|
* Relicense to LGPLv2+
|
||||||
|
* All API versions for subpackages moved to 1.0 to comply with new guidelines
|
||||||
|
on hacking subpackages.
|
||||||
|
* Documentation on hacking kitchen and addons
|
||||||
|
* Kitchen.text API changed (new API version 1.0)
|
||||||
|
* Move utils.* to misc.*
|
||||||
|
* Deprecate kitchen.text.utf8.utf8_valid in favor of
|
||||||
|
kitchen.text.misc.byte_string_valid_encoding
|
||||||
|
- byte_string_valid_encoding is significantly faster and a bit more generic
|
||||||
|
* Port utf8 functions to use unicode
|
||||||
|
* Put the unicode versions of the utf8 functions into kitchen.text.display
|
||||||
|
|
||||||
|
-----
|
||||||
|
0.1a3
|
||||||
|
-----
|
||||||
|
* Add a defaultdict implementation for pycompat25
|
||||||
|
* Add documentation
|
||||||
|
* Add a StrictDict class that never has str and unicode keys collide.
|
||||||
|
|
||||||
|
-----
|
||||||
|
0.1a2
|
||||||
|
-----
|
||||||
|
* Fixes for python-2.3
|
||||||
|
* versioning subpackage with version_tuple_to_string() function that creates
|
||||||
|
PEP-386 compatible version strings.
|
||||||
|
* Changed pycompat24.builtinset -- now you need to call the add_builtin_set()
|
||||||
|
function to add set and frozenset to the __builtin__ namespace.
|
||||||
|
* pycompat24.base64modern module that implements the modern interface to
|
||||||
|
encode and decode base64. Note that it does't implement b32 or b16 at the
|
||||||
|
moment.
|
||||||
|
* pycompat27 with the 2.7 version of subprocess.
|
||||||
|
* The 2.7 version of subprocess is also available at
|
||||||
|
kitchen.pycompat24.subprocess since subprocess first appeared in python2.4
|
||||||
|
|
||||||
|
-----
|
||||||
|
0.1a1
|
||||||
|
-----
|
||||||
|
* Initial releae of kitchen.core
|
39
PKG-INFO
Normal file
39
PKG-INFO
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
Metadata-Version: 1.0
|
||||||
|
Name: kitchen
|
||||||
|
Version: 1.1.1
|
||||||
|
Summary: Kitchen contains a cornucopia of useful code
|
||||||
|
Home-page: https://fedorahosted.org/kitchen
|
||||||
|
Author: Toshio Kuratomi
|
||||||
|
Author-email: toshio@fedoraproject.org
|
||||||
|
License: LGPLv2+
|
||||||
|
Download-URL: https://fedorahosted.org/releases/k/i/kitchen
|
||||||
|
Description:
|
||||||
|
We've all done it. In the process of writing a brand new application we've
|
||||||
|
discovered that we need a little bit of code that we've invented before.
|
||||||
|
Perhaps it's something to handle unicode text. Perhaps it's something to make
|
||||||
|
a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being
|
||||||
|
a tiny bit of code that seems too small to worry about pushing into its own
|
||||||
|
module so it sits there, a part of your current project, waiting to be cut and
|
||||||
|
pasted into your next project. And the next. And the next. And since that
|
||||||
|
little bittybit of code proved so useful to you, it's highly likely that it
|
||||||
|
proved useful to someone else as well. Useful enough that they've written it
|
||||||
|
and copy and pasted it over and over into each of their new projects.
|
||||||
|
|
||||||
|
Well, no longer! Kitchen aims to pull these small snippets of code into a few
|
||||||
|
python modules which you can import and use within your project. No more copy
|
||||||
|
and paste! Now you can let someone else maintain and release these small
|
||||||
|
snippets so that you can get on with your life.
|
||||||
|
|
||||||
|
Keywords: Useful Small Code Snippets
|
||||||
|
Platform: UNKNOWN
|
||||||
|
Classifier: Development Status :: 4 - Beta
|
||||||
|
Classifier: License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Classifier: Programming Language :: Python :: 2.3
|
||||||
|
Classifier: Programming Language :: Python :: 2.4
|
||||||
|
Classifier: Programming Language :: Python :: 2.5
|
||||||
|
Classifier: Programming Language :: Python :: 2.6
|
||||||
|
Classifier: Programming Language :: Python :: 2.7
|
||||||
|
Classifier: Topic :: Software Development :: Internationalization
|
||||||
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||||
|
Classifier: Topic :: Text Processing :: General
|
81
README
Normal file
81
README
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
===================
|
||||||
|
Kitchen.core Module
|
||||||
|
===================
|
||||||
|
|
||||||
|
:Author: Toshio Kuratomi
|
||||||
|
:Date: 2 Jan 2012
|
||||||
|
:Version: 1.1.x
|
||||||
|
|
||||||
|
The Kitchen module provides a python API for all sorts of little useful
|
||||||
|
snippets of code that everybody ends up writing for their projects but never
|
||||||
|
seem big enough to build an independent release. Use kitchen and stop cutting
|
||||||
|
and pasting that code over and over.
|
||||||
|
|
||||||
|
.. contents::
|
||||||
|
|
||||||
|
-------
|
||||||
|
License
|
||||||
|
-------
|
||||||
|
|
||||||
|
Since version 0.2a1, this python module has been distributed under the terms of
|
||||||
|
the GNU Lesser General Public License Version 2 or later.
|
||||||
|
|
||||||
|
.. note:: Some parts of this module are licensed under terms less restrictive
|
||||||
|
than the LGPL. If you separate these files from the work as a whole you
|
||||||
|
are allowed to use them under the less restrictive licenses. The following
|
||||||
|
is a list of the files that are known:
|
||||||
|
|
||||||
|
:subprocess.py: licensed under the Python 2 license by the PSF
|
||||||
|
http://www.python.org/download/releases/2.4/license/
|
||||||
|
:test_subprocess.py: Python Software Foundation License Version 2
|
||||||
|
http://www.python.org/download/releases/2.7/license/
|
||||||
|
:kitchen/pycompat25/defaultdict.py: Python Software Foundation License Version 2
|
||||||
|
http://www.python.org/download/releases/2.6.2/license
|
||||||
|
|
||||||
|
------------
|
||||||
|
Requirements
|
||||||
|
------------
|
||||||
|
|
||||||
|
kitchen.core requires
|
||||||
|
|
||||||
|
:python: 2.3.1 or later
|
||||||
|
|
||||||
|
Soft Requirements
|
||||||
|
=================
|
||||||
|
|
||||||
|
If found, these libraries will be used to make the implementation of soemthing
|
||||||
|
better in some way. If they are not present, the API that they enable will
|
||||||
|
still exist but may function in a different manner.
|
||||||
|
|
||||||
|
:chardet_: Used in kitchen.text.xml.guess_encoding__to_xml() to help guess encoding of
|
||||||
|
byte strings being converted. If not present, unknown encodings will be
|
||||||
|
converted as if they were latin1.
|
||||||
|
|
||||||
|
.. _chardet:: http://chardet.feedparser.org/
|
||||||
|
|
||||||
|
---------------------------
|
||||||
|
Other Recommended Libraries
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
These libraries implement commonly used functionality that everyone seems to
|
||||||
|
invent. Rather than reinvent their wheel, I simply list the things that they
|
||||||
|
do well for now. Perhaps if people can't find them normally, I'll add them as
|
||||||
|
requirements in setup.py or link them into kitchen's namespace. For now, I
|
||||||
|
just mention them here:
|
||||||
|
|
||||||
|
:bunch_: Bunch is a dictionary that you can use attribute lookup as well as
|
||||||
|
bracket notation to access. Setting it apart from most homebrewed
|
||||||
|
implementations is the bunchify() function which will descend nested
|
||||||
|
structures of lists nad dicts, transforming the dicts to Bunch's.
|
||||||
|
|
||||||
|
.. _bunch:: http://pypi.python.org/pypi/bunch/
|
||||||
|
|
||||||
|
---------------------
|
||||||
|
Building, and testing
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Testing
|
||||||
|
=======
|
||||||
|
|
||||||
|
You can run the unittests with this command::
|
||||||
|
nosetests --with-coverage --cover-package kitchen
|
6
docs/api-collections.rst
Normal file
6
docs/api-collections.rst
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
===================
|
||||||
|
Kitchen.collections
|
||||||
|
===================
|
||||||
|
|
||||||
|
.. automodule:: kitchen.collections.strictdict
|
||||||
|
:members:
|
12
docs/api-exceptions.rst
Normal file
12
docs/api-exceptions.rst
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
==========
|
||||||
|
Exceptions
|
||||||
|
==========
|
||||||
|
|
||||||
|
Kitchen has a hierarchy of exceptions that should make it easy to catch many
|
||||||
|
errors emitted by kitchen itself.
|
||||||
|
|
||||||
|
.. automodule:: kitchen.exceptions
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. automodule:: kitchen.text.exceptions
|
||||||
|
:members:
|
38
docs/api-i18n.rst
Normal file
38
docs/api-i18n.rst
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
===================
|
||||||
|
Kitchen.i18n Module
|
||||||
|
===================
|
||||||
|
|
||||||
|
.. automodule:: kitchen.i18n
|
||||||
|
|
||||||
|
Functions
|
||||||
|
=========
|
||||||
|
|
||||||
|
:func:`easy_gettext_setup` should satisfy the needs of most users.
|
||||||
|
:func:`get_translation_object` is designed to ease the way for anyone that
|
||||||
|
needs more control.
|
||||||
|
|
||||||
|
.. autofunction:: easy_gettext_setup
|
||||||
|
|
||||||
|
.. autofunction:: get_translation_object
|
||||||
|
|
||||||
|
Translation Objects
|
||||||
|
===================
|
||||||
|
|
||||||
|
The standard translation objects from the :mod:`gettext` module suffer from
|
||||||
|
several problems:
|
||||||
|
|
||||||
|
* They can throw :exc:`UnicodeError`
|
||||||
|
* They can't find translations for non-:term:`ASCII` byte :class:`str`
|
||||||
|
messages
|
||||||
|
* They may return either :class:`unicode` string or byte :class:`str` from the
|
||||||
|
same function even though the functions say they will only return
|
||||||
|
:class:`unicode` or only return byte :class:`str`.
|
||||||
|
|
||||||
|
:class:`DummyTranslations` and :class:`NewGNUTranslations` were written to fix
|
||||||
|
these issues.
|
||||||
|
|
||||||
|
.. autoclass:: kitchen.i18n.DummyTranslations
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. autoclass:: kitchen.i18n.NewGNUTranslations
|
||||||
|
:members:
|
9
docs/api-iterutils.rst
Normal file
9
docs/api-iterutils.rst
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
|
||||||
|
========================
|
||||||
|
Kitchen.iterutils Module
|
||||||
|
========================
|
||||||
|
|
||||||
|
.. automodule:: kitchen.iterutils
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.iterutils.isiterable
|
||||||
|
.. autofunction:: kitchen.iterutils.iterate
|
24
docs/api-overview.rst
Normal file
24
docs/api-overview.rst
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
.. _KitchenAPI:
|
||||||
|
|
||||||
|
===========
|
||||||
|
Kitchen API
|
||||||
|
===========
|
||||||
|
|
||||||
|
Kitchen is structured as a collection of modules. In its current
|
||||||
|
configuration, Kitchen ships with the following modules. Other addon modules
|
||||||
|
that may drag in more dependencies can be found on the `project webpage`_
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
api-i18n
|
||||||
|
api-text
|
||||||
|
api-collections
|
||||||
|
api-iterutils
|
||||||
|
api-versioning
|
||||||
|
api-pycompat24
|
||||||
|
api-pycompat25
|
||||||
|
api-pycompat27
|
||||||
|
api-exceptions
|
||||||
|
|
||||||
|
.. _`project webpage`: https://fedorahosted.org/kitchen
|
34
docs/api-pycompat24.rst
Normal file
34
docs/api-pycompat24.rst
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
=======================
|
||||||
|
Python 2.4 Compatibiity
|
||||||
|
=======================
|
||||||
|
|
||||||
|
|
||||||
|
-------------------
|
||||||
|
Sets for python-2.3
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
.. automodule:: kitchen.pycompat24.sets
|
||||||
|
.. autofunction:: kitchen.pycompat24.sets.add_builtin_set
|
||||||
|
|
||||||
|
----------------------------------
|
||||||
|
Partial new style base64 interface
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
.. automodule:: kitchen.pycompat24.base64
|
||||||
|
:members:
|
||||||
|
|
||||||
|
----------
|
||||||
|
Subprocess
|
||||||
|
----------
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:mod:`kitchen.pycompat27.subprocess`
|
||||||
|
Kitchen includes the python-2.7 version of subprocess which has a new
|
||||||
|
function, :func:`~kitchen.pycompat27.subprocess.check_output`. When
|
||||||
|
you import :mod:`pycompat24.subprocess` you will be getting the
|
||||||
|
python-2.7 version of subprocess rather than the 2.4 version (where
|
||||||
|
subprocess first appeared). This choice was made so that we can
|
||||||
|
concentrate our efforts on keeping the single version of subprocess up
|
||||||
|
to date rather than working on a 2.4 version that very few people
|
||||||
|
would need specifically.
|
8
docs/api-pycompat25.rst
Normal file
8
docs/api-pycompat25.rst
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
========================
|
||||||
|
Python 2.5 Compatibility
|
||||||
|
========================
|
||||||
|
|
||||||
|
.. automodule:: kitchen.pycompat25
|
||||||
|
|
||||||
|
.. automodule:: kitchen.pycompat25.collections._defaultdict
|
||||||
|
|
35
docs/api-pycompat27.rst
Normal file
35
docs/api-pycompat27.rst
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
========================
|
||||||
|
Python 2.7 Compatibility
|
||||||
|
========================
|
||||||
|
|
||||||
|
.. module:: kitchen.pycompat27.subprocess
|
||||||
|
|
||||||
|
--------------------------
|
||||||
|
Subprocess from Python 2.7
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
The :mod:`subprocess` module included here is a direct import from
|
||||||
|
python-2.7's |stdlib|_. You can access it via::
|
||||||
|
|
||||||
|
>>> from kitchen.pycompat27 import subprocess
|
||||||
|
|
||||||
|
The motivation for including this module is that various API changing
|
||||||
|
improvements have been made to subprocess over time. The following is a list
|
||||||
|
of the known changes to :mod:`subprocess` with the python version they were
|
||||||
|
introduced in:
|
||||||
|
|
||||||
|
==================================== ===
|
||||||
|
New API Feature Ver
|
||||||
|
==================================== ===
|
||||||
|
:exc:`subprocess.CalledProcessError` 2.5
|
||||||
|
:func:`subprocess.check_call` 2.5
|
||||||
|
:func:`subprocess.check_output` 2.7
|
||||||
|
:meth:`subprocess.Popen.send_signal` 2.6
|
||||||
|
:meth:`subprocess.Popen.terminate` 2.6
|
||||||
|
:meth:`subprocess.Popen.kill` 2.6
|
||||||
|
==================================== ===
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
The stdlib :mod:`subprocess` documenation
|
||||||
|
For complete documentation on how to use subprocess
|
405
docs/api-text-converters.rst
Normal file
405
docs/api-text-converters.rst
Normal file
|
@ -0,0 +1,405 @@
|
||||||
|
-----------------------
|
||||||
|
Kitchen.text.converters
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
.. automodule:: kitchen.text.converters
|
||||||
|
|
||||||
|
Byte Strings and Unicode in Python2
|
||||||
|
===================================
|
||||||
|
|
||||||
|
Python2 has two string types, :class:`str` and :class:`unicode`.
|
||||||
|
:class:`unicode` represents an abstract sequence of text characters. It can
|
||||||
|
hold any character that is present in the unicode standard. :class:`str` can
|
||||||
|
hold any byte of data. The operating system and python work together to
|
||||||
|
display these bytes as characters in many cases but you should always keep in
|
||||||
|
mind that the information is really a sequence of bytes, not a sequence of
|
||||||
|
characters. In python2 these types are interchangeable a large amount of the
|
||||||
|
time. They are one of the few pairs of types that automatically convert when
|
||||||
|
used in equality::
|
||||||
|
|
||||||
|
>>> # string is converted to unicode and then compared
|
||||||
|
>>> "I am a string" == u"I am a string"
|
||||||
|
True
|
||||||
|
>>> # Other types, like int, don't have this special treatment
|
||||||
|
>>> 5 == "5"
|
||||||
|
False
|
||||||
|
|
||||||
|
However, this automatic conversion tends to lull people into a false sense of
|
||||||
|
security. As long as you're dealing with :term:`ASCII` characters the
|
||||||
|
automatic conversion will save you from seeing any differences. Once you
|
||||||
|
start using characters that are not in :term:`ASCII`, you will start getting
|
||||||
|
:exc:`UnicodeError` and :exc:`UnicodeWarning` as the automatic conversions
|
||||||
|
between the types fail::
|
||||||
|
|
||||||
|
>>> "I am an ñ" == u"I am an ñ"
|
||||||
|
__main__:1: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
|
||||||
|
False
|
||||||
|
|
||||||
|
Why do these conversions fail? The reason is that the python2
|
||||||
|
:class:`unicode` type represents an abstract sequence of unicode text known as
|
||||||
|
:term:`code points`. :class:`str`, on the other hand, really represents
|
||||||
|
a sequence of bytes. Those bytes are converted by your operating system to
|
||||||
|
appear as characters on your screen using a particular encoding (usually
|
||||||
|
with a default defined by the operating system and customizable by the
|
||||||
|
individual user.) Although :term:`ASCII` characters are fairly standard in
|
||||||
|
what bytes represent each character, the bytes outside of the :term:`ASCII`
|
||||||
|
range are not. In general, each encoding will map a different character to
|
||||||
|
a particular byte. Newer encodings map individual characters to multiple
|
||||||
|
bytes (which the older encodings will instead treat as multiple characters).
|
||||||
|
In the face of these differences, python refuses to guess at an encoding and
|
||||||
|
instead issues a warning or exception and refuses to convert.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
:ref:`overcoming-frustration`
|
||||||
|
For a longer introduction on this subject.
|
||||||
|
|
||||||
|
Strategy for Explicit Conversion
|
||||||
|
================================
|
||||||
|
|
||||||
|
So what is the best method of dealing with this weltering babble of incoherent
|
||||||
|
encodings? The basic strategy is to explicitly turn everything into
|
||||||
|
:class:`unicode` when it first enters your program. Then, when you send it to
|
||||||
|
output, you can transform the unicode back into bytes. Doing this allows you
|
||||||
|
to control the encodings that are used and avoid getting tracebacks due to
|
||||||
|
:exc:`UnicodeError`. Using the functions defined in this module, that looks
|
||||||
|
something like this:
|
||||||
|
|
||||||
|
.. code-block:: pycon
|
||||||
|
:linenos:
|
||||||
|
|
||||||
|
>>> from kitchen.text.converters import to_unicode, to_bytes
|
||||||
|
>>> name = raw_input('Enter your name: ')
|
||||||
|
Enter your name: Toshio くらとみ
|
||||||
|
>>> name
|
||||||
|
'Toshio \xe3\x81\x8f\xe3\x82\x89\xe3\x81\xa8\xe3\x81\xbf'
|
||||||
|
>>> type(name)
|
||||||
|
<type 'str'>
|
||||||
|
>>> unicode_name = to_unicode(name)
|
||||||
|
>>> type(unicode_name)
|
||||||
|
<type 'unicode'>
|
||||||
|
>>> unicode_name
|
||||||
|
u'Toshio \u304f\u3089\u3068\u307f'
|
||||||
|
>>> # Do a lot of other things before needing to save/output again:
|
||||||
|
>>> output = open('datafile', 'w')
|
||||||
|
>>> output.write(to_bytes(u'Name: %s\\n' % unicode_name))
|
||||||
|
|
||||||
|
A few notes:
|
||||||
|
|
||||||
|
Looking at line 6, you'll notice that the input we took from the user was
|
||||||
|
a byte :class:`str`. In general, anytime we're getting a value from outside
|
||||||
|
of python (The filesystem, reading data from the network, interacting with an
|
||||||
|
external command, reading values from the environment) we are interacting with
|
||||||
|
something that will want to give us a byte :class:`str`. Some |stdlib|_
|
||||||
|
modules and third party libraries will automatically attempt to convert a byte
|
||||||
|
:class:`str` to :class:`unicode` strings for you. This is both a boon and
|
||||||
|
a curse. If the library can guess correctly about the encoding that the data
|
||||||
|
is in, it will return :class:`unicode` objects to you without you having to
|
||||||
|
convert. However, if it can't guess correctly, you may end up with one of
|
||||||
|
several problems:
|
||||||
|
|
||||||
|
:exc:`UnicodeError`
|
||||||
|
The library attempted to decode a byte :class:`str` into
|
||||||
|
a :class:`unicode`, string failed, and raises an exception.
|
||||||
|
Garbled data
|
||||||
|
If the library returns the data after decoding it with the wrong encoding,
|
||||||
|
the characters you see in the :exc:`unicode` string won't be the ones that
|
||||||
|
you expect.
|
||||||
|
A byte :class:`str` instead of :class:`unicode` string
|
||||||
|
Some libraries will return a :class:`unicode` string when they're able to
|
||||||
|
decode the data and a byte :class:`str` when they can't. This is
|
||||||
|
generally the hardest problem to debug when it occurs. Avoid it in your
|
||||||
|
own code and try to avoid or open bugs against upstreams that do this. See
|
||||||
|
:ref:`DesigningUnicodeAwareAPIs` for strategies to do this properly.
|
||||||
|
|
||||||
|
On line 8, we convert from a byte :class:`str` to a :class:`unicode` string.
|
||||||
|
:func:`~kitchen.text.converters.to_unicode` does this for us. It has some
|
||||||
|
error handling and sane defaults that make this a nicer function to use than
|
||||||
|
calling :meth:`str.decode` directly:
|
||||||
|
|
||||||
|
* Instead of defaulting to the :term:`ASCII` encoding which fails with all
|
||||||
|
but the simple American English characters, it defaults to :term:`UTF-8`.
|
||||||
|
* Instead of raising an error if it cannot decode a value, it will replace
|
||||||
|
the value with the unicode "Replacement character" symbol (``<EFBFBD>``).
|
||||||
|
* If you happen to call this method with something that is not a :class:`str`
|
||||||
|
or :class:`unicode`, it will return an empty :class:`unicode` string.
|
||||||
|
|
||||||
|
All three of these can be overridden using different keyword arguments to the
|
||||||
|
function. See the :func:`to_unicode` documentation for more information.
|
||||||
|
|
||||||
|
On line 15 we push the data back out to a file. Two things you should note here:
|
||||||
|
|
||||||
|
1. We deal with the strings as :class:`unicode` until the last instant. The
|
||||||
|
string format that we're using is :class:`unicode` and the variable also
|
||||||
|
holds :class:`unicode`. People sometimes get into trouble when they mix
|
||||||
|
a byte :class:`str` format with a variable that holds a :class:`unicode`
|
||||||
|
string (or vice versa) at this stage.
|
||||||
|
2. :func:`~kitchen.text.converters.to_bytes`, does the reverse of
|
||||||
|
:func:`to_unicode`. In this case, we're using the default values which
|
||||||
|
turn :class:`unicode` into a byte :class:`str` using :term:`UTF-8`. Any
|
||||||
|
errors are replaced with a ``<EFBFBD>`` and sending nonstring objects yield empty
|
||||||
|
:class:`unicode` strings. Just like :func:`to_unicode`, you can look at
|
||||||
|
the documentation for :func:`to_bytes` to find out how to override any of
|
||||||
|
these defaults.
|
||||||
|
|
||||||
|
When to use an alternate strategy
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
The default strategy of decoding to :class:`unicode` strings when you take
|
||||||
|
data in and encoding to a byte :class:`str` when you send the data back out
|
||||||
|
works great for most problems but there are a few times when you shouldn't:
|
||||||
|
|
||||||
|
* The values aren't meant to be read as text
|
||||||
|
* The values need to be byte-for-byte when you send them back out -- for
|
||||||
|
instance if they are database keys or filenames.
|
||||||
|
* You are transferring the data between several libraries that all expect
|
||||||
|
byte :class:`str`.
|
||||||
|
|
||||||
|
In each of these instances, there is a reason to keep around the byte
|
||||||
|
:class:`str` version of a value. Here's a few hints to keep your sanity in
|
||||||
|
these situations:
|
||||||
|
|
||||||
|
1. Keep your :class:`unicode` and :class:`str` values separate. Just like the
|
||||||
|
pain caused when you have to use someone else's library that returns both
|
||||||
|
:class:`unicode` and :class:`str` you can cause yourself pain if you have
|
||||||
|
functions that can return both types or variables that could hold either
|
||||||
|
type of value.
|
||||||
|
2. Name your variables so that you can tell whether you're storing byte
|
||||||
|
:class:`str` or :class:`unicode` string. One of the first things you end
|
||||||
|
up having to do when debugging is determine what type of string you have in
|
||||||
|
a variable and what type of string you are expecting. Naming your
|
||||||
|
variables consistently so that you can tell which type they are supposed to
|
||||||
|
hold will save you from at least one of those steps.
|
||||||
|
3. When you get values initially, make sure that you're dealing with the type
|
||||||
|
of value that you expect as you save it. You can use :func:`isinstance`
|
||||||
|
or :func:`to_bytes` since :func:`to_bytes` doesn't do any modifications of
|
||||||
|
the string if it's already a :class:`str`. When using :func:`to_bytes`
|
||||||
|
for this purpose you might want to use::
|
||||||
|
|
||||||
|
try:
|
||||||
|
b_input = to_bytes(input_should_be_bytes_already, errors='strict', nonstring='strict')
|
||||||
|
except:
|
||||||
|
handle_errors_somehow()
|
||||||
|
|
||||||
|
The reason is that the default of :func:`to_bytes` will take characters
|
||||||
|
that are illegal in the chosen encoding and transform them to replacement
|
||||||
|
characters. Since the point of keeping this data as a byte :class:`str` is
|
||||||
|
to keep the exact same bytes when you send it outside of your code,
|
||||||
|
changing things to replacement characters should be rasing red flags that
|
||||||
|
something is wrong. Setting :attr:`errors` to ``strict`` will raise an
|
||||||
|
exception which gives you an opportunity to fail gracefully.
|
||||||
|
4. Sometimes you will want to print out the values that you have in your byte
|
||||||
|
:class:`str`. When you do this you will need to make sure that you
|
||||||
|
transform :class:`unicode` to :class:`str` before combining them. Also be
|
||||||
|
sure that any other function calls (including :mod:`gettext`) are going to
|
||||||
|
give you strings that are the same type. For instance::
|
||||||
|
|
||||||
|
print to_bytes(_('Username: %(user)s'), 'utf-8') % {'user': b_username}
|
||||||
|
|
||||||
|
Gotchas and how to avoid them
|
||||||
|
=============================
|
||||||
|
|
||||||
|
Even when you have a good conceptual understanding of how python2 treats
|
||||||
|
:class:`unicode` and :class:`str` there are still some things that can
|
||||||
|
surprise you. In most cases this is because, as noted earlier, python or one
|
||||||
|
of the python libraries you depend on is trying to convert a value
|
||||||
|
automatically and failing. Explicit conversion at the appropriate place
|
||||||
|
usually solves that.
|
||||||
|
|
||||||
|
str(obj)
|
||||||
|
--------
|
||||||
|
|
||||||
|
One common idiom for getting a simple, string representation of an object is to use::
|
||||||
|
|
||||||
|
str(obj)
|
||||||
|
|
||||||
|
Unfortunately, this is not safe. Sometimes str(obj) will return
|
||||||
|
:class:`unicode`. Sometimes it will return a byte :class:`str`. Sometimes,
|
||||||
|
it will attempt to convert from a :class:`unicode` string to a byte
|
||||||
|
:class:`str`, fail, and throw a :exc:`UnicodeError`. To be safe from all of
|
||||||
|
these, first decide whether you need :class:`unicode` or :class:`str` to be
|
||||||
|
returned. Then use :func:`to_unicode` or :func:`to_bytes` to get the simple
|
||||||
|
representation like this::
|
||||||
|
|
||||||
|
u_representation = to_unicode(obj, nonstring='simplerepr')
|
||||||
|
b_representation = to_bytes(obj, nonstring='simplerepr')
|
||||||
|
|
||||||
|
print
|
||||||
|
-----
|
||||||
|
|
||||||
|
python has a builtin :func:`print` statement that outputs strings to the
|
||||||
|
terminal. This originated in a time when python only dealt with byte
|
||||||
|
:class:`str`. When :class:`unicode` strings came about, some enhancements
|
||||||
|
were made to the :func:`print` statement so that it could print those as well.
|
||||||
|
The enhancements make :func:`print` work most of the time. However, the times
|
||||||
|
when it doesn't work tend to make for cryptic debugging.
|
||||||
|
|
||||||
|
The basic issue is that :func:`print` has to figure out what encoding to use
|
||||||
|
when it prints a :class:`unicode` string to the terminal. When python is
|
||||||
|
attached to your terminal (ie, you're running the interpreter or running
|
||||||
|
a script that prints to the screen) python is able to take the encoding value
|
||||||
|
from your locale settings :envvar:`LC_ALL` or :envvar:`LC_CTYPE` and print the
|
||||||
|
characters allowed by that encoding. On most modern Unix systems, the
|
||||||
|
encoding is :term:`utf-8` which means that you can print any :class:`unicode`
|
||||||
|
character without problem.
|
||||||
|
|
||||||
|
There are two common cases of things going wrong:
|
||||||
|
|
||||||
|
1. Someone has a locale set that does not accept all valid unicode characters.
|
||||||
|
For instance::
|
||||||
|
|
||||||
|
$ LC_ALL=C python
|
||||||
|
>>> print u'\ufffd'
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "<stdin>", line 1, in <module>
|
||||||
|
UnicodeEncodeError: 'ascii' codec can't encode character u'\ufffd' in position 0: ordinal not in range(128)
|
||||||
|
|
||||||
|
This often happens when a script that you've written and debugged from the
|
||||||
|
terminal is run from an automated environment like :program:`cron`. It
|
||||||
|
also occurs when you have written a script using a :term:`utf-8` aware
|
||||||
|
locale and released it for consumption by people all over the internet.
|
||||||
|
Inevitably, someone is running with a locale that can't handle all unicode
|
||||||
|
characters and you get a traceback reported.
|
||||||
|
2. You redirect output to a file. Python isn't using the values in
|
||||||
|
:envvar:`LC_ALL` unconditionally to decide what encoding to use. Instead
|
||||||
|
it is using the encoding set for the terminal you are printing to which is
|
||||||
|
set to accept different encodings by :envvar:`LC_ALL`. If you redirect
|
||||||
|
to a file, you are no longer printing to the terminal so :envvar:`LC_ALL`
|
||||||
|
won't have any effect. At this point, python will decide it can't find an
|
||||||
|
encoding and fallback to :term:`ASCII` which will likely lead to
|
||||||
|
:exc:`UnicodeError` being raised. You can see this in a short script::
|
||||||
|
|
||||||
|
#! /usr/bin/python -tt
|
||||||
|
print u'\ufffd'
|
||||||
|
|
||||||
|
And then look at the difference between running it normally and redirecting to a file:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
$ ./test.py
|
||||||
|
<20>
|
||||||
|
$ ./test.py > t
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "test.py", line 3, in <module>
|
||||||
|
print u'\ufffd'
|
||||||
|
UnicodeEncodeError: 'ascii' codec can't encode character u'\ufffd' in position 0: ordinal not in range(128)
|
||||||
|
|
||||||
|
The short answer to dealing with this is to always use bytes when writing
|
||||||
|
output. You can do this by explicitly converting to bytes like this::
|
||||||
|
|
||||||
|
from kitchen.text.converters import to_bytes
|
||||||
|
u_string = u'\ufffd'
|
||||||
|
print to_bytes(u_string)
|
||||||
|
|
||||||
|
or you can wrap stdout and stderr with a :class:`~codecs.StreamWriter`.
|
||||||
|
A :class:`~codecs.StreamWriter` is convenient in that you can assign it to
|
||||||
|
encode for :data:`sys.stdout` or :data:`sys.stderr` and then have output
|
||||||
|
automatically converted but it has the drawback of still being able to throw
|
||||||
|
:exc:`UnicodeError` if the writer can't encode all possible unicode
|
||||||
|
codepoints. Kitchen provides an alternate version which can be retrieved with
|
||||||
|
:func:`kitchen.text.converters.getwriter` which will not traceback in its
|
||||||
|
standard configuration.
|
||||||
|
|
||||||
|
.. _unicode-and-dict-keys:
|
||||||
|
|
||||||
|
Unicode, str, and dict keys
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
The :func:`hash` of the :term:`ASCII` characters is the same for
|
||||||
|
:class:`unicode` and byte :class:`str`. When you use them in :class:`dict`
|
||||||
|
keys, they evaluate to the same dictionary slot::
|
||||||
|
|
||||||
|
>>> u_string = u'a'
|
||||||
|
>>> b_string = 'a'
|
||||||
|
>>> hash(u_string), hash(b_string)
|
||||||
|
(12416037344, 12416037344)
|
||||||
|
>>> d = {}
|
||||||
|
>>> d[u_string] = 'unicode'
|
||||||
|
>>> d[b_string] = 'bytes'
|
||||||
|
>>> d
|
||||||
|
{u'a': 'bytes'}
|
||||||
|
|
||||||
|
When you deal with key values outside of :term:`ASCII`, :class:`unicode` and
|
||||||
|
byte :class:`str` evaluate unequally no matter what their character content or
|
||||||
|
hash value::
|
||||||
|
|
||||||
|
>>> u_string = u'ñ'
|
||||||
|
>>> b_string = u_string.encode('utf-8')
|
||||||
|
>>> print u_string
|
||||||
|
ñ
|
||||||
|
>>> print b_string
|
||||||
|
ñ
|
||||||
|
>>> d = {}
|
||||||
|
>>> d[u_string] = 'unicode'
|
||||||
|
>>> d[b_string] = 'bytes'
|
||||||
|
>>> d
|
||||||
|
{u'\\xf1': 'unicode', '\\xc3\\xb1': 'bytes'}
|
||||||
|
>>> b_string2 = '\\xf1'
|
||||||
|
>>> hash(u_string), hash(b_string2)
|
||||||
|
(30848092528, 30848092528)
|
||||||
|
>>> d = {}
|
||||||
|
>>> d[u_string] = 'unicode'
|
||||||
|
>>> d[b_string2] = 'bytes'
|
||||||
|
{u'\\xf1': 'unicode', '\\xf1': 'bytes'}
|
||||||
|
|
||||||
|
How do you work with this one? Remember rule #1: Keep your :class:`unicode`
|
||||||
|
and byte :class:`str` values separate. That goes for keys in a dictionary
|
||||||
|
just like anything else.
|
||||||
|
|
||||||
|
* For any given dictionary, make sure that all your keys are either
|
||||||
|
:class:`unicode` or :class:`str`. **Do not mix the two.** If you're being
|
||||||
|
given both :class:`unicode` and :class:`str` but you don't need to preserve
|
||||||
|
separate keys for each, I recommend using :func:`to_unicode` or
|
||||||
|
:func:`to_bytes` to convert all keys to one type or the other like this::
|
||||||
|
|
||||||
|
>>> from kitchen.text.converters import to_unicode
|
||||||
|
>>> u_string = u'one'
|
||||||
|
>>> b_string = 'two'
|
||||||
|
>>> d = {}
|
||||||
|
>>> d[to_unicode(u_string)] = 1
|
||||||
|
>>> d[to_unicode(b_string)] = 2
|
||||||
|
>>> d
|
||||||
|
{u'two': 2, u'one': 1}
|
||||||
|
|
||||||
|
* These issues also apply to using dicts with tuple keys that contain
|
||||||
|
a mixture of :class:`unicode` and :class:`str`. Once again the best fix
|
||||||
|
is to standardise on either :class:`str` or :class:`unicode`.
|
||||||
|
|
||||||
|
* If you absolutely need to store values in a dictionary where the keys could
|
||||||
|
be either :class:`unicode` or :class:`str` you can use
|
||||||
|
:class:`~kitchen.collections.strictdict.StrictDict` which has separate
|
||||||
|
entries for all :class:`unicode` and byte :class:`str` and deals correctly
|
||||||
|
with any :class:`tuple` containing mixed :class:`unicode` and byte
|
||||||
|
:class:`str`.
|
||||||
|
|
||||||
|
---------
|
||||||
|
Functions
|
||||||
|
---------
|
||||||
|
|
||||||
|
Unicode and byte str conversion
|
||||||
|
===============================
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.converters.to_unicode
|
||||||
|
.. autofunction:: kitchen.text.converters.to_bytes
|
||||||
|
.. autofunction:: kitchen.text.converters.getwriter
|
||||||
|
.. autofunction:: kitchen.text.converters.to_str
|
||||||
|
.. autofunction:: kitchen.text.converters.to_utf8
|
||||||
|
|
||||||
|
Transformation to XML
|
||||||
|
=====================
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.converters.unicode_to_xml
|
||||||
|
.. autofunction:: kitchen.text.converters.xml_to_unicode
|
||||||
|
.. autofunction:: kitchen.text.converters.byte_string_to_xml
|
||||||
|
.. autofunction:: kitchen.text.converters.xml_to_byte_string
|
||||||
|
.. autofunction:: kitchen.text.converters.bytes_to_xml
|
||||||
|
.. autofunction:: kitchen.text.converters.xml_to_bytes
|
||||||
|
.. autofunction:: kitchen.text.converters.guess_encoding_to_xml
|
||||||
|
.. autofunction:: kitchen.text.converters.to_xml
|
||||||
|
|
||||||
|
Working with exception messages
|
||||||
|
===============================
|
||||||
|
|
||||||
|
.. autodata:: kitchen.text.converters.EXCEPTION_CONVERTERS
|
||||||
|
.. autodata:: kitchen.text.converters.BYTE_EXCEPTION_CONVERTERS
|
||||||
|
.. autofunction:: kitchen.text.converters.exception_to_unicode
|
||||||
|
.. autofunction:: kitchen.text.converters.exception_to_bytes
|
33
docs/api-text-display.rst
Normal file
33
docs/api-text-display.rst
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
.. automodule:: kitchen.text.display
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display.textual_width
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display.textual_width_chop
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display.textual_width_fill
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display.wrap
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display.fill
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display.byte_string_textual_width_fill
|
||||||
|
|
||||||
|
Internal Data
|
||||||
|
=============
|
||||||
|
|
||||||
|
There are a few internal functions and variables in this module. Code outside
|
||||||
|
of kitchen shouldn't use them but people coding on kitchen itself may find
|
||||||
|
them useful.
|
||||||
|
|
||||||
|
.. autodata:: kitchen.text.display._COMBINING
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display._generate_combining_table
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display._print_combining_table
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display._interval_bisearch
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display._ucp_width
|
||||||
|
|
||||||
|
.. autofunction:: kitchen.text.display._textual_width_le
|
||||||
|
|
2
docs/api-text-misc.rst
Normal file
2
docs/api-text-misc.rst
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
.. automodule:: kitchen.text.misc
|
||||||
|
:members:
|
3
docs/api-text-utf8.rst
Normal file
3
docs/api-text-utf8.rst
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
.. automodule:: kitchen.text.utf8
|
||||||
|
:members:
|
||||||
|
:deprecated:
|
22
docs/api-text.rst
Normal file
22
docs/api-text.rst
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
=============================================
|
||||||
|
Kitchen.text: unicode and utf8 and xml oh my!
|
||||||
|
=============================================
|
||||||
|
|
||||||
|
The kitchen.text module contains functions that deal with text manipulation.
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
|
||||||
|
api-text-converters
|
||||||
|
api-text-display
|
||||||
|
api-text-misc
|
||||||
|
api-text-utf8
|
||||||
|
|
||||||
|
:mod:`~kitchen.text.converters`
|
||||||
|
deals with converting text for different encodings and to and from XML
|
||||||
|
:mod:`~kitchen.text.display`
|
||||||
|
deals with issues with printing text to a screen
|
||||||
|
:mod:`~kitchen.text.misc`
|
||||||
|
is a catchall for text manipulation functions that don't seem to fit
|
||||||
|
elsewhere
|
||||||
|
:mod:`~kitchen.text.utf8`
|
||||||
|
contains deprecated functions to manipulate utf8 byte strings
|
6
docs/api-versioning.rst
Normal file
6
docs/api-versioning.rst
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
===============================
|
||||||
|
Helpers for versioning software
|
||||||
|
===============================
|
||||||
|
|
||||||
|
.. automodule:: kitchen.versioning
|
||||||
|
:members:
|
220
docs/conf.py
Normal file
220
docs/conf.py
Normal file
|
@ -0,0 +1,220 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Kitchen documentation build configuration file, created by
|
||||||
|
# sphinx-quickstart on Sat May 22 00:51:26 2010.
|
||||||
|
#
|
||||||
|
# This file is execfile()d with the current directory set to its containing dir.
|
||||||
|
#
|
||||||
|
# Note that not all possible configuration values are present in this
|
||||||
|
# autogenerated file.
|
||||||
|
#
|
||||||
|
# All configuration values have a default; values that are commented out
|
||||||
|
# serve to show the default.
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||||
|
import kitchen.release
|
||||||
|
|
||||||
|
# If extensions (or modules to document with autodoc) are in another directory,
|
||||||
|
# add these directories to sys.path here. If the directory is relative to the
|
||||||
|
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||||
|
#sys.path.append(os.path.abspath('.'))
|
||||||
|
|
||||||
|
# -- General configuration -----------------------------------------------------
|
||||||
|
|
||||||
|
# Add any Sphinx extension module names here, as strings. They can be extensions
|
||||||
|
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
||||||
|
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.pngmath', 'sphinx.ext.ifconfig']
|
||||||
|
|
||||||
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
|
templates_path = ['_templates']
|
||||||
|
|
||||||
|
# The suffix of source filenames.
|
||||||
|
source_suffix = '.rst'
|
||||||
|
|
||||||
|
# The encoding of source files.
|
||||||
|
#source_encoding = 'utf-8'
|
||||||
|
|
||||||
|
# The master toctree document.
|
||||||
|
master_doc = 'index'
|
||||||
|
|
||||||
|
# General information about the project.
|
||||||
|
project = kitchen.release.NAME
|
||||||
|
copyright = kitchen.release.COPYRIGHT
|
||||||
|
|
||||||
|
# The version info for the project you're documenting, acts as replacement for
|
||||||
|
# |version| and |release|, also used in various other places throughout the
|
||||||
|
# built documents.
|
||||||
|
#
|
||||||
|
# The short X.Y version.
|
||||||
|
version = '0.2'
|
||||||
|
# The full version, including alpha/beta/rc tags.
|
||||||
|
release = kitchen.__version__
|
||||||
|
|
||||||
|
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||||
|
# for a list of supported languages.
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
# There are two options for replacing |today|: either, you set today to some
|
||||||
|
# non-false value, then it is used:
|
||||||
|
#today = ''
|
||||||
|
# Else, today_fmt is used as the format for a strftime call.
|
||||||
|
#today_fmt = '%B %d, %Y'
|
||||||
|
|
||||||
|
# List of documents that shouldn't be included in the build.
|
||||||
|
#unused_docs = []
|
||||||
|
|
||||||
|
# List of directories, relative to source directory, that shouldn't be searched
|
||||||
|
# for source files.
|
||||||
|
exclude_trees = []
|
||||||
|
|
||||||
|
# The reST default role (used for this markup: `text`) to use for all documents.
|
||||||
|
#default_role = None
|
||||||
|
|
||||||
|
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||||
|
add_function_parentheses = True
|
||||||
|
|
||||||
|
# If true, the current module name will be prepended to all description
|
||||||
|
# unit titles (such as .. function::).
|
||||||
|
#add_module_names = True
|
||||||
|
|
||||||
|
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||||
|
# output. They are ignored by default.
|
||||||
|
show_authors = True
|
||||||
|
|
||||||
|
# The name of the Pygments (syntax highlighting) style to use.
|
||||||
|
pygments_style = 'sphinx'
|
||||||
|
|
||||||
|
# A list of ignored prefixes for module index sorting.
|
||||||
|
#modindex_common_prefix = []
|
||||||
|
|
||||||
|
highlight_language = 'python'
|
||||||
|
|
||||||
|
# -- Options for HTML output ---------------------------------------------------
|
||||||
|
|
||||||
|
# The theme to use for HTML and HTML Help pages. Major themes that come with
|
||||||
|
# Sphinx are currently 'default' and 'sphinxdoc'.
|
||||||
|
html_theme = 'default'
|
||||||
|
|
||||||
|
# Theme options are theme-specific and customize the look and feel of a theme
|
||||||
|
# further. For a list of options available for each theme, see the
|
||||||
|
# documentation.
|
||||||
|
#html_theme_options = {}
|
||||||
|
|
||||||
|
# Add any paths that contain custom themes here, relative to this directory.
|
||||||
|
#html_theme_path = []
|
||||||
|
|
||||||
|
# The name for this set of Sphinx documents. If None, it defaults to
|
||||||
|
# "<project> v<release> documentation".
|
||||||
|
#html_title = None
|
||||||
|
|
||||||
|
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||||
|
#html_short_title = None
|
||||||
|
|
||||||
|
# The name of an image file (relative to this directory) to place at the top
|
||||||
|
# of the sidebar.
|
||||||
|
#html_logo = None
|
||||||
|
|
||||||
|
# The name of an image file (within the static path) to use as favicon of the
|
||||||
|
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||||
|
# pixels large.
|
||||||
|
#html_favicon = None
|
||||||
|
|
||||||
|
# Add any paths that contain custom static files (such as style sheets) here,
|
||||||
|
# relative to this directory. They are copied after the builtin static files,
|
||||||
|
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||||
|
html_static_path = ['_static']
|
||||||
|
|
||||||
|
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||||
|
# using the given strftime format.
|
||||||
|
#html_last_updated_fmt = '%b %d, %Y'
|
||||||
|
|
||||||
|
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||||
|
# typographically correct entities.
|
||||||
|
#html_use_smartypants = True
|
||||||
|
|
||||||
|
# Content template for the index page.
|
||||||
|
html_index = 'index.html'
|
||||||
|
|
||||||
|
# Custom sidebar templates, maps document names to template names.
|
||||||
|
#html_sidebars = {}
|
||||||
|
|
||||||
|
# Additional templates that should be rendered to pages, maps page names to
|
||||||
|
# template names.
|
||||||
|
#html_additional_pages = {}
|
||||||
|
|
||||||
|
# If false, no module index is generated.
|
||||||
|
#html_use_modindex = True
|
||||||
|
|
||||||
|
# If false, no index is generated.
|
||||||
|
#html_use_index = True
|
||||||
|
|
||||||
|
# If true, the index is split into individual pages for each letter.
|
||||||
|
#html_split_index = False
|
||||||
|
|
||||||
|
# If true, links to the reST sources are added to the pages.
|
||||||
|
#html_show_sourcelink = True
|
||||||
|
|
||||||
|
# If true, an OpenSearch description file will be output, and all pages will
|
||||||
|
# contain a <link> tag referring to it. The value of this option must be the
|
||||||
|
# base URL from which the finished HTML is served.
|
||||||
|
html_use_opensearch = kitchen.release.DOWNLOAD_URL + 'docs/'
|
||||||
|
|
||||||
|
# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
|
||||||
|
#html_file_suffix = ''
|
||||||
|
|
||||||
|
# Output file base name for HTML help builder.
|
||||||
|
htmlhelp_basename = 'kitchendoc'
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for LaTeX output --------------------------------------------------
|
||||||
|
|
||||||
|
# The paper size ('letter' or 'a4').
|
||||||
|
#latex_paper_size = 'letter'
|
||||||
|
|
||||||
|
# The font size ('10pt', '11pt' or '12pt').
|
||||||
|
#latex_font_size = '10pt'
|
||||||
|
|
||||||
|
# Grouping the document tree into LaTeX files. List of tuples
|
||||||
|
# (source start file, target name, title, author, documentclass [howto/manual]).
|
||||||
|
latex_documents = [
|
||||||
|
('index', 'kitchen.tex', u'kitchen Documentation',
|
||||||
|
u'Toshio Kuratomi', 'manual'),
|
||||||
|
]
|
||||||
|
|
||||||
|
# The name of an image file (relative to this directory) to place at the top of
|
||||||
|
# the title page.
|
||||||
|
#latex_logo = None
|
||||||
|
|
||||||
|
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||||
|
# not chapters.
|
||||||
|
#latex_use_parts = False
|
||||||
|
|
||||||
|
# Additional stuff for the LaTeX preamble.
|
||||||
|
#latex_preamble = ''
|
||||||
|
|
||||||
|
# Documents to append as an appendix to all manuals.
|
||||||
|
#latex_appendices = []
|
||||||
|
|
||||||
|
# If false, no module index is generated.
|
||||||
|
#latex_use_modindex = True
|
||||||
|
|
||||||
|
automodule_skip_lines = 4
|
||||||
|
autoclass_content = "class"
|
||||||
|
|
||||||
|
# Example configuration for intersphinx: refer to the Python standard library.
|
||||||
|
intersphinx_mapping = {'http://docs.python.org/': None,
|
||||||
|
'https://fedorahosted.org/releases/p/y/python-fedora/doc/': None,
|
||||||
|
'https://fedorahosted.org/releases/p/a/packagedb/doc/': None}
|
||||||
|
|
||||||
|
rst_epilog = '''
|
||||||
|
.. |projpage| replace:: project webpage
|
||||||
|
.. _projpage: %(url)s
|
||||||
|
.. |docpage| replace:: documentation page
|
||||||
|
.. _docpage: %(download)s/docs
|
||||||
|
.. |downldpage| replace:: download page
|
||||||
|
.. _downldpage: %(download)s
|
||||||
|
.. |stdlib| replace:: python standard library
|
||||||
|
.. _stdlib: http://docs.python.org/library
|
||||||
|
''' % {'url': kitchen.release.URL, 'download': kitchen.release.DOWNLOAD_URL}
|
690
docs/designing-unicode-apis.rst
Normal file
690
docs/designing-unicode-apis.rst
Normal file
|
@ -0,0 +1,690 @@
|
||||||
|
.. _DesigningUnicodeAwareAPIs:
|
||||||
|
|
||||||
|
============================
|
||||||
|
Designing Unicode Aware APIs
|
||||||
|
============================
|
||||||
|
|
||||||
|
APIs that deal with byte :class:`str` and :class:`unicode` strings are
|
||||||
|
difficult to get right. Here are a few strategies with pros and cons of each.
|
||||||
|
|
||||||
|
.. contents::
|
||||||
|
|
||||||
|
-------------------------------------------------
|
||||||
|
Take either bytes or unicode, output only unicode
|
||||||
|
-------------------------------------------------
|
||||||
|
|
||||||
|
In this strategy, you allow the user to enter either :class:`unicode` strings
|
||||||
|
or byte :class:`str` but what you give back is always :class:`unicode`. This
|
||||||
|
strategy is easy for novice endusers to start using immediately as they will
|
||||||
|
be able to feed either type of string into the function and get back a string
|
||||||
|
that they can use in other places.
|
||||||
|
|
||||||
|
However, it does lead to the novice writing code that functions correctly when
|
||||||
|
testing it with :term:`ASCII`-only data but fails when given data that contains
|
||||||
|
non-:term:`ASCII` characters. Worse, if your API is not designed to be
|
||||||
|
flexible, the consumer of your code won't be able to easily correct those
|
||||||
|
problems once they find them.
|
||||||
|
|
||||||
|
Here's a good API that uses this strategy::
|
||||||
|
|
||||||
|
from kitchen.text.converters import to_unicode
|
||||||
|
|
||||||
|
def truncate(msg, max_length, encoding='utf8', errors='replace'):
|
||||||
|
msg = to_unicode(msg, encoding, errors)
|
||||||
|
return msg[:max_length]
|
||||||
|
|
||||||
|
The call to :func:`truncate` starts with the essential parameters for
|
||||||
|
performing the task. It ends with two optional keyword arguments that define
|
||||||
|
the encoding to use to transform from a byte :class:`str` to :class:`unicode`
|
||||||
|
and the strategy to use if undecodable bytes are encountered. The defaults
|
||||||
|
may vary depending on the use cases you have in mind. When the output is
|
||||||
|
generally going to be printed for the user to see, ``errors='replace'`` is
|
||||||
|
a good default. If you are constructing keys to a database, raisng an
|
||||||
|
exception (with ``errors='strict'``) may be a better default. In either case,
|
||||||
|
having both parameters allows the person using your API to choose how they
|
||||||
|
want to handle any problems. Having the values is also a clue to them that
|
||||||
|
a conversion from byte :class:`str` to :class:`unicode` string is going to
|
||||||
|
occur.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
If you're targeting python-3.1 and above, ``errors='surrogateescape'`` may
|
||||||
|
be a better default than ``errors='strict'``. You need to be mindful of
|
||||||
|
a few things when using ``surrogateescape`` though:
|
||||||
|
|
||||||
|
* ``surrogateescape`` will cause issues if a non-:term:`ASCII` compatible
|
||||||
|
encoding is used (for instance, UTF-16 and UTF-32.) That makes it
|
||||||
|
unhelpful in situations where a true general purpose method of encoding
|
||||||
|
must be found. :pep:`383` mentions that ``surrogateescape`` was
|
||||||
|
specifically designed with the limitations of translating using system
|
||||||
|
locales (where :term:`ASCII` compatibility is generally seen as
|
||||||
|
inescapable) so you should keep that in mind.
|
||||||
|
* If you use ``surrogateescape`` to decode from :class:`bytes`
|
||||||
|
to :class:`unicode` you will need to use an error handler other than
|
||||||
|
``strict`` to encode as the lone surrogate that this error handler
|
||||||
|
creates makes for invalid unicode that must be handled when encoding.
|
||||||
|
In Python-3.1.2 or less, a bug in the encoder error handlers mean that
|
||||||
|
you can only use ``surrogateescape`` to encode; anything else will throw
|
||||||
|
an error.
|
||||||
|
|
||||||
|
Evaluate your usages of the variables in question to see what makes sense.
|
||||||
|
|
||||||
|
Here's a bad example of using this strategy::
|
||||||
|
|
||||||
|
from kitchen.text.converters import to_unicode
|
||||||
|
|
||||||
|
def truncate(msg, max_length):
|
||||||
|
msg = to_unicode(msg)
|
||||||
|
return msg[:max_length]
|
||||||
|
|
||||||
|
In this example, we don't have the optional keyword arguments for
|
||||||
|
:attr:`encoding` and :attr:`errors`. A user who uses this function is more
|
||||||
|
likely to miss the fact that a conversion from byte :class:`str` to
|
||||||
|
:class:`unicode` is going to occur. And once an error is reported, they will
|
||||||
|
have to look through their backtrace and think harder about where they want to
|
||||||
|
transform their data into :class:`unicode` strings instead of having the
|
||||||
|
opportunity to control how the conversion takes place in the function itself.
|
||||||
|
Note that the user does have the ability to make this work by making the
|
||||||
|
transformation to unicode themselves::
|
||||||
|
|
||||||
|
from kitchen.text.converters import to_unicode
|
||||||
|
|
||||||
|
msg = to_unicode(msg, encoding='euc_jp', errors='ignore')
|
||||||
|
new_msg = truncate(msg, 5)
|
||||||
|
|
||||||
|
--------------------------------------------------
|
||||||
|
Take either bytes or unicode, output the same type
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
This strategy is sometimes called polymorphic because the type of data that is
|
||||||
|
returned is dependent on the type of data that is received. The concept is
|
||||||
|
that when you are given a byte :class:`str` to process, you return a byte
|
||||||
|
:class:`str` in your output. When you are given :class:`unicode` strings to
|
||||||
|
process, you return :class:`unicode` strings in your output.
|
||||||
|
|
||||||
|
This can work well for end users as the ones that know about the difference
|
||||||
|
between the two string types will already have transformed the strings to
|
||||||
|
their desired type before giving it to this function. The ones that don't can
|
||||||
|
remain blissfully ignorant (at least, as far as your function is concerned) as
|
||||||
|
the function does not change the type.
|
||||||
|
|
||||||
|
In cases where the encoding of the byte :class:`str` is known or can be
|
||||||
|
discovered based on the input data this works well. If you can't figure out
|
||||||
|
the input encoding, however, this strategy can fail in any of the following
|
||||||
|
cases:
|
||||||
|
|
||||||
|
1. It needs to do an internal conversion between byte :class:`str` and
|
||||||
|
:class:`unicode` string.
|
||||||
|
2. It cannot return the same data as either a :class:`unicode` string or byte
|
||||||
|
:class:`str`.
|
||||||
|
3. You may need to deal with byte strings that are not byte-compatible with
|
||||||
|
:term:`ASCII`
|
||||||
|
|
||||||
|
First, a couple examples of using this strategy in a good way::
|
||||||
|
|
||||||
|
def translate(msg, table):
|
||||||
|
replacements = table.keys()
|
||||||
|
new_msg = []
|
||||||
|
for index, char in enumerate(msg):
|
||||||
|
if char in replacements:
|
||||||
|
new_msg.append(table[char])
|
||||||
|
else:
|
||||||
|
new_msg.append(char)
|
||||||
|
|
||||||
|
return ''.join(new_msg)
|
||||||
|
|
||||||
|
In this example, all of the strings that we use (except the empty string which
|
||||||
|
is okay because it doesn't have any characters to encode) come from outside of
|
||||||
|
the function. Due to that, the user is responsible for making sure that the
|
||||||
|
:attr:`msg`, and the keys and values in :attr:`table` all match in terms of
|
||||||
|
type (:class:`unicode` vs :class:`str`) and encoding (You can do some error
|
||||||
|
checking to make sure the user gave all the same type but you can't do the
|
||||||
|
same for the user giving different encodings). You do not need to make
|
||||||
|
changes to the string that require you to know the encoding or type of the
|
||||||
|
string; everything is a simple replacement of one element in the array of
|
||||||
|
characters in message with the character in table.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
import json
|
||||||
|
from kitchen.text.converters import to_unicode, to_bytes
|
||||||
|
|
||||||
|
def first_field_from_json_data(json_string):
|
||||||
|
'''Return the first field in a json data structure.
|
||||||
|
|
||||||
|
The format of the json data is a simple list of strings.
|
||||||
|
'["one", "two", "three"]'
|
||||||
|
'''
|
||||||
|
if isinstance(json_string, unicode):
|
||||||
|
# On all python versions, json.loads() returns unicode if given
|
||||||
|
# a unicode string
|
||||||
|
return json.loads(json_string)[0]
|
||||||
|
|
||||||
|
# Byte str: figure out which encoding we're dealing with
|
||||||
|
if '\x00' not in json_data[:2]
|
||||||
|
encoding = 'utf8'
|
||||||
|
elif '\x00\x00\x00' == json_data[:3]:
|
||||||
|
encoding = 'utf-32-be'
|
||||||
|
elif '\x00\x00\x00' == json_data[1:4]:
|
||||||
|
encoding = 'utf-32-le'
|
||||||
|
elif '\x00' == json_data[0] and '\x00' == json_data[2]:
|
||||||
|
encoding = 'utf-16-be'
|
||||||
|
else:
|
||||||
|
encoding = 'utf-16-le'
|
||||||
|
|
||||||
|
data = json.loads(unicode(json_string, encoding))
|
||||||
|
return data[0].encode(encoding)
|
||||||
|
|
||||||
|
In this example the function takes either a byte :class:`str` type or
|
||||||
|
a :class:`unicode` string that has a list in json format and returns the first
|
||||||
|
field from it as the type of the input string. The first section of code is
|
||||||
|
very straightforward; we receive a :class:`unicode` string, parse it with
|
||||||
|
a function, and then return the first field from our parsed data (which our
|
||||||
|
function returned to us as json data).
|
||||||
|
|
||||||
|
The second portion that deals with byte :class:`str` is not so
|
||||||
|
straightforward. Before we can parse the string we have to determine what
|
||||||
|
characters the bytes in the string map to. If we didn't do that, we wouldn't
|
||||||
|
be able to properly find which characters are present in the string. In order
|
||||||
|
to do that we have to figure out the encoding of the byte :class:`str`.
|
||||||
|
Luckily, the json specification states that all strings are unicode and
|
||||||
|
encoded with one of UTF32be, UTF32le, UTF16be, UTF16le, or :term:`UTF-8`. It further
|
||||||
|
defines the format such that the first two characters are always
|
||||||
|
:term:`ASCII`. Each of these has a different sequence of NULLs when they
|
||||||
|
encode an :term:`ASCII` character. We can use that to detect which encoding
|
||||||
|
was used to create the byte :class:`str`.
|
||||||
|
|
||||||
|
Finally, we return the byte :class:`str` by encoding the :class:`unicode` back
|
||||||
|
to a byte :class:`str`.
|
||||||
|
|
||||||
|
As you can see, in this example we have to convert from byte :class:`str` to
|
||||||
|
:class:`unicode` and back. But we know from the json specification that byte
|
||||||
|
:class:`str` has to be one of a limited number of encodings that we are able
|
||||||
|
to detect. That ability makes this strategy work.
|
||||||
|
|
||||||
|
Now for some examples of using this strategy in ways that fail::
|
||||||
|
|
||||||
|
import unicodedata
|
||||||
|
def first_char(msg):
|
||||||
|
'''Return the first character in a string'''
|
||||||
|
if not isinstance(msg, unicode):
|
||||||
|
try:
|
||||||
|
msg = unicode(msg, 'utf8')
|
||||||
|
except UnicodeError:
|
||||||
|
msg = unicode(msg, 'latin1')
|
||||||
|
msg = unicodedata.normalize('NFC', msg)
|
||||||
|
return msg[0]
|
||||||
|
|
||||||
|
If you look at that code and think that there's something fragile and prone to
|
||||||
|
breaking in the ``try: except:`` block you are correct in being suspicious.
|
||||||
|
This code will fail on multi-byte character sets that aren't :term:`UTF-8`. It
|
||||||
|
can also fail on data where the sequence of bytes is valid :term:`UTF-8` but
|
||||||
|
the bytes are actually of a different encoding. The reasons this code fails
|
||||||
|
is that we don't know what encoding the bytes are in and the code must convert
|
||||||
|
from a byte :class:`str` to a :class:`unicode` string in order to function.
|
||||||
|
|
||||||
|
In order to make this code robust we must know the encoding of :attr:`msg`.
|
||||||
|
The only way to know that is to ask the user so the API must do that::
|
||||||
|
|
||||||
|
import unicodedata
|
||||||
|
def number_of_chars(msg, encoding='utf8', errors='strict'):
|
||||||
|
if not isinstance(msg, unicode):
|
||||||
|
msg = unicode(msg, encoding, errors)
|
||||||
|
msg = unicodedata.normalize('NFC', msg)
|
||||||
|
return len(msg)
|
||||||
|
|
||||||
|
Another example of failure::
|
||||||
|
|
||||||
|
import os
|
||||||
|
def listdir(directory):
|
||||||
|
files = os.listdir(directory)
|
||||||
|
if isinstance(directory, str):
|
||||||
|
return files
|
||||||
|
# files could contain both bytes and unicode
|
||||||
|
new_files = []
|
||||||
|
for filename in files:
|
||||||
|
if not isinstance(filename, unicode):
|
||||||
|
# What to do here?
|
||||||
|
continue
|
||||||
|
new_files.appen(filename)
|
||||||
|
return new_files
|
||||||
|
|
||||||
|
This function illustrates the second failure mode. Here, not all of the
|
||||||
|
possible values can be represented as :class:`unicode` without knowing more
|
||||||
|
about the encoding of each of the filenames involved. Since each filename
|
||||||
|
could have a different encoding there's a few different options to pursue. We
|
||||||
|
could make this function always return byte :class:`str` since that can
|
||||||
|
accurately represent anything that could be returned. If we want to return
|
||||||
|
:class:`unicode` we need to at least allow the user to specify what to do in
|
||||||
|
case of an error decoding the bytes to :class:`unicode`. We can also let the
|
||||||
|
user specify the encoding to use for doing the decoding but that won't help in
|
||||||
|
all cases since not all files will be in the same encoding (or even
|
||||||
|
necessarily in any encoding)::
|
||||||
|
|
||||||
|
import locale
|
||||||
|
import os
|
||||||
|
def listdir(directory, encoding=locale.getpreferredencoding(), errors='strict'):
|
||||||
|
# Note: In python-3.1+, surrogateescape may be a better default
|
||||||
|
files = os.listdir(directory)
|
||||||
|
if isinstance(directory, str):
|
||||||
|
return files
|
||||||
|
new_files = []
|
||||||
|
for filename in files:
|
||||||
|
if not isinstance(filename, unicode):
|
||||||
|
filename = unicode(filename, encoding=encoding, errors=errors)
|
||||||
|
new_files.append(filename)
|
||||||
|
return new_files
|
||||||
|
|
||||||
|
Note that although we use :attr:`errors` in this example as what to pass to
|
||||||
|
the codec that decodes to :class:`unicode` we could also have an
|
||||||
|
:attr:`errors` argument that decides other things to do like skip a filename
|
||||||
|
entirely, return a placeholder (``Nondisplayable filename``), or raise an
|
||||||
|
exception.
|
||||||
|
|
||||||
|
This leaves us with one last failure to describe::
|
||||||
|
|
||||||
|
def first_field(csv_string):
|
||||||
|
'''Return the first field in a comma separated values string.'''
|
||||||
|
try:
|
||||||
|
return csv_string[:csv_string.index(',')]
|
||||||
|
except ValueError:
|
||||||
|
return csv_string
|
||||||
|
|
||||||
|
This code looks simple enough. The hidden error here is that we are searching
|
||||||
|
for a comma character in a byte :class:`str` but not all encodings will use
|
||||||
|
the same sequence of bytes to represent the comma. If you use an encoding
|
||||||
|
that's not :term:`ASCII` compatible on the byte level, then the literal comma
|
||||||
|
``','`` in the above code will match inappropriate bytes. Some examples of
|
||||||
|
how it can fail:
|
||||||
|
|
||||||
|
* Will find the byte representing an :term:`ASCII` comma in another character
|
||||||
|
* Will find the comma but leave trailing garbage bytes on the end of the
|
||||||
|
string
|
||||||
|
* Will not match the character that represents the comma in this encoding
|
||||||
|
|
||||||
|
There are two ways to solve this. You can either take the encoding value from
|
||||||
|
the user or you can take the separator value from the user. Of the two,
|
||||||
|
taking the encoding is the better option for two reasons:
|
||||||
|
|
||||||
|
1. Taking a separator argument doesn't clearly document for the API user that
|
||||||
|
the reason they must give it is to properly match the encoding of the
|
||||||
|
:attr:`csv_string`. They're just as likely to think that it's simply a way
|
||||||
|
to specify an alternate character (like ":" or "|") for the separator.
|
||||||
|
2. It's possible for a variable width encoding to reuse the same byte sequence
|
||||||
|
for different characters in multiple sequences.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
:term:`UTF-8` is resistant to this as any character's sequence of
|
||||||
|
bytes will never be a subset of another character's sequence of bytes.
|
||||||
|
|
||||||
|
With that in mind, here's how to improve the API::
|
||||||
|
|
||||||
|
def first_field(csv_string, encoding='utf-8', errors='replace'):
|
||||||
|
if not isinstance(csv_string, unicode):
|
||||||
|
u_string = unicode(csv_string, encoding, errors)
|
||||||
|
is_unicode = False
|
||||||
|
else:
|
||||||
|
u_string = csv_string
|
||||||
|
|
||||||
|
try:
|
||||||
|
field = u_string[:U_string.index(u',')]
|
||||||
|
except ValueError:
|
||||||
|
return csv_string
|
||||||
|
|
||||||
|
if not is_unicode:
|
||||||
|
field = field.encode(encoding, errors)
|
||||||
|
return field
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
If you decide you'll never encounter a variable width encoding that reuses
|
||||||
|
byte sequences you can use this code instead::
|
||||||
|
|
||||||
|
def first_field(csv_string, encoding='utf-8'):
|
||||||
|
try:
|
||||||
|
return csv_string[:csv_string.index(','.encode(encoding))]
|
||||||
|
except ValueError:
|
||||||
|
return csv_string
|
||||||
|
|
||||||
|
------------------
|
||||||
|
Separate functions
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Sometimes you want to be able to take either byte :class:`str` or
|
||||||
|
:class:`unicode` strings, perform similar operations on either one and then
|
||||||
|
return data in the same format as was given. Probably the easiest way to do
|
||||||
|
that is to have separate functions for each and adopt a naming convention to
|
||||||
|
show that one is for working with byte :class:`str` and the other is for
|
||||||
|
working with :class:`unicode` strings::
|
||||||
|
|
||||||
|
def translate_b(msg, table):
|
||||||
|
'''Replace values in str with other byte values like unicode.translate'''
|
||||||
|
if not isinstance(msg, str):
|
||||||
|
raise TypeError('msg must be of type str')
|
||||||
|
str_table = [chr(s) for s in xrange(0,256)]
|
||||||
|
delete_chars = []
|
||||||
|
for chr_val in (k for k in table.keys() if isinstance(k, int)):
|
||||||
|
if chr_val > 255:
|
||||||
|
raise ValueError('Keys in table must not exceed 255)')
|
||||||
|
if table[chr_val] == None:
|
||||||
|
delete_chars.append(chr(chr_val))
|
||||||
|
elif isinstance(table[chr_val], int):
|
||||||
|
if table[chr_val] > 255:
|
||||||
|
raise TypeError('table values cannot be more than 255 or less than 0')
|
||||||
|
str_table[chr_val] = chr(table[chr_val])
|
||||||
|
else:
|
||||||
|
if not isinstance(table[chr_val], str):
|
||||||
|
raise TypeError('character mapping must return integer, None or str')
|
||||||
|
str_table[chr_val] = table[chr_val]
|
||||||
|
str_table = ''.join(str_table)
|
||||||
|
delete_chars = ''.join(delete_chars)
|
||||||
|
return msg.translate(str_table, delete_chars)
|
||||||
|
|
||||||
|
def translate(msg, table):
|
||||||
|
'''Replace values in a unicode string with other values'''
|
||||||
|
if not isinstance(msg, unicode):
|
||||||
|
raise TypeError('msg must be of type unicode')
|
||||||
|
return msg.translate(table)
|
||||||
|
|
||||||
|
There's several things that we have to do in this API:
|
||||||
|
|
||||||
|
* Because the function names might not be enough of a clue to the user of the
|
||||||
|
functions of the value types that are expected, we have to check that the
|
||||||
|
types are correct.
|
||||||
|
|
||||||
|
* We keep the behaviour of the two functions as close to the same as possible,
|
||||||
|
just with byte :class:`str` and :class:`unicode` strings substituted for
|
||||||
|
each other.
|
||||||
|
|
||||||
|
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
Deciding whether to take str or unicode when no value is returned
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
|
||||||
|
Not all functions have a return value. Sometimes a function is there to
|
||||||
|
interact with something external to python, for instance, writing a file out
|
||||||
|
to disk or a method exists to update the internal state of a data structure.
|
||||||
|
One of the main questions with these APIs is whether to take byte
|
||||||
|
:class:`str`, :class:`unicode` string, or both. The answer depends on your
|
||||||
|
use case but I'll give some examples here.
|
||||||
|
|
||||||
|
Writing to external data
|
||||||
|
========================
|
||||||
|
|
||||||
|
When your information is going to an external data source like writing to
|
||||||
|
a file you need to decide whether to take in :class:`unicode` strings or byte
|
||||||
|
:class:`str`. Remember that most external data sources are not going to be
|
||||||
|
dealing with unicode directly. Instead, they're going to be dealing with
|
||||||
|
a sequence of bytes that may be interpreted as unicode. With that in mind,
|
||||||
|
you either need to have the user give you a byte :class:`str` or convert to
|
||||||
|
a byte :class:`str` inside the function.
|
||||||
|
|
||||||
|
Next you need to think about the type of data that you're receiving. If it's
|
||||||
|
textual data, (for instance, this is a chat client and the user is typing
|
||||||
|
messages that they expect to be read by another person) it probably makes sense to
|
||||||
|
take in :class:`unicode` strings and do the conversion inside your function.
|
||||||
|
On the other hand, if this is a lower level function that's passing data into
|
||||||
|
a network socket, it probably should be taking byte :class:`str` instead.
|
||||||
|
|
||||||
|
Just as noted in the API notes above, you should specify an :attr:`encoding`
|
||||||
|
and :attr:`errors` argument if you need to transform from :class:`unicode`
|
||||||
|
string to byte :class:`str` and you are unable to guess the encoding from the
|
||||||
|
data itself.
|
||||||
|
|
||||||
|
Updating data structures
|
||||||
|
========================
|
||||||
|
|
||||||
|
Sometimes your API is just going to update a data structure and not
|
||||||
|
immediately output that data anywhere. Just as when writing external data,
|
||||||
|
you should think about both what your function is going to do with the data
|
||||||
|
eventually and what the caller of your function is thinking that they're
|
||||||
|
giving you. Most of the time, you'll want to take :class:`unicode` strings
|
||||||
|
and enter them into the data structure as :class:`unicode` when the data is
|
||||||
|
textual in nature. You'll want to take byte :class:`str` and enter them into
|
||||||
|
the data structure as byte :class:`str` when the data is not text. Use
|
||||||
|
a naming convention so the user knows what's expected.
|
||||||
|
|
||||||
|
-------------
|
||||||
|
APIs to Avoid
|
||||||
|
-------------
|
||||||
|
|
||||||
|
There are a few APIs that are just wrong. If you catch yourself making an API
|
||||||
|
that does one of these things, change it before anyone sees your code.
|
||||||
|
|
||||||
|
Returning unicode unless a conversion fails
|
||||||
|
===========================================
|
||||||
|
|
||||||
|
This type of API usually deals with byte :class:`str` at some point and
|
||||||
|
converts it to :class:`unicode` because it's usually thought to be text.
|
||||||
|
However, there are times when the bytes fail to convert to a :class:`unicode`
|
||||||
|
string. When that happens, this API returns the raw byte :class:`str` instead
|
||||||
|
of a :class:`unicode` string. One example of this is present in the |stdlib|_:
|
||||||
|
python2's :func:`os.listdir`::
|
||||||
|
|
||||||
|
>>> import os
|
||||||
|
>>> import locale
|
||||||
|
>>> locale.getpreferredencoding()
|
||||||
|
'UTF-8'
|
||||||
|
>>> os.mkdir('/tmp/mine')
|
||||||
|
>>> os.chdir('/tmp/mine')
|
||||||
|
>>> open('nonsense_char_\xff', 'w').close()
|
||||||
|
>>> open('all_ascii', 'w').close()
|
||||||
|
>>> os.listdir(u'.')
|
||||||
|
[u'all_ascii', 'nonsense_char_\xff']
|
||||||
|
|
||||||
|
The problem with APIs like this is that they cause failures that are hard to
|
||||||
|
debug because they don't happen where the variables are set. For instance,
|
||||||
|
let's say you take the filenames from :func:`os.listdir` and give it to this
|
||||||
|
function::
|
||||||
|
|
||||||
|
def normalize_filename(filename):
|
||||||
|
'''Change spaces and dashes into underscores'''
|
||||||
|
return filename.translate({ord(u' '):u'_', ord(u' '):u'_'})
|
||||||
|
|
||||||
|
When you test this, you use filenames that all are decodable in your preferred
|
||||||
|
encoding and everything seems to work. But when this code is run on a machine
|
||||||
|
that has filenames in multiple encodings the filenames returned by
|
||||||
|
:func:`os.listdir` suddenly include byte :class:`str`. And byte :class:`str`
|
||||||
|
has a different :func:`string.translate` function that takes different values.
|
||||||
|
So the code raises an exception where it's not immediately obvious that
|
||||||
|
:func:`os.listdir` is at fault.
|
||||||
|
|
||||||
|
Ignoring values with no chance of recovery
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
An early version of python3 attempted to fix the :func:`os.listdir` problem
|
||||||
|
pointed out in the last section by returning all values that were decodable to
|
||||||
|
:class:`unicode` and omitting the filenames that were not. This lead to the
|
||||||
|
following output::
|
||||||
|
|
||||||
|
>>> import os
|
||||||
|
>>> import locale
|
||||||
|
>>> locale.getpreferredencoding()
|
||||||
|
'UTF-8'
|
||||||
|
>>> os.mkdir('/tmp/mine')
|
||||||
|
>>> os.chdir('/tmp/mine')
|
||||||
|
>>> open(b'nonsense_char_\xff', 'w').close()
|
||||||
|
>>> open('all_ascii', 'w').close()
|
||||||
|
>>> os.listdir('.')
|
||||||
|
['all_ascii']
|
||||||
|
|
||||||
|
The issue with this type of code is that it is silently doing something
|
||||||
|
surprising. The caller expects to get a full list of files back from
|
||||||
|
:func:`os.listdir`. Instead, it silently ignores some of the files, returning
|
||||||
|
only a subset. This leads to code that doesn't do what is expected that may
|
||||||
|
go unnoticed until the code is in production and someone notices that
|
||||||
|
something important is being missed.
|
||||||
|
|
||||||
|
Raising a UnicodeException with no chance of recovery
|
||||||
|
=====================================================
|
||||||
|
|
||||||
|
Believe it or not, a few libraries exist that make it impossible to deal
|
||||||
|
with unicode text without raising a :exc:`UnicodeError`. What seems to occur
|
||||||
|
in these libraries is that the library has functions that expect to receive
|
||||||
|
a :class:`unicode` string. However, internally, those functions call other
|
||||||
|
functions that expect to receive a byte :class:`str`. The programmer of the
|
||||||
|
API was smart enough to convert from a :class:`unicode` string to a byte
|
||||||
|
:class:`str` but they did not give the user the chance to specify the
|
||||||
|
encodings to use or how to deal with errors. This results in exceptions when
|
||||||
|
the user passes in a byte :class:`str` because the initial function wants
|
||||||
|
a :class:`unicode` string and exceptions when the user passes in
|
||||||
|
a :class:`unicode` string because the function can't convert the string to
|
||||||
|
bytes in the encoding that it's selected.
|
||||||
|
|
||||||
|
Do not put the user in the position of not being able to use your API without
|
||||||
|
raising a :exc:`UnicodeError` with certain values. If you can only safely
|
||||||
|
take :class:`unicode` strings, document that byte :class:`str` is not allowed
|
||||||
|
and vice versa. If you have to convert internally, make sure to give the
|
||||||
|
caller of your function parameters to control the encoding and how to treat
|
||||||
|
errors that may occur during the encoding/decoding process. If your code will
|
||||||
|
raise a :exc:`UnicodeError` with non-:term:`ASCII` values no matter what, you
|
||||||
|
should probably rethink your API.
|
||||||
|
|
||||||
|
-----------------
|
||||||
|
Knowing your data
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
If you've read all the way down to this section without skipping you've seen
|
||||||
|
several admonitions about the type of data you are processing affecting the
|
||||||
|
viability of the various API choices.
|
||||||
|
|
||||||
|
Here's a few things to consider in your data:
|
||||||
|
|
||||||
|
Do you need to operate on both bytes and unicode?
|
||||||
|
=================================================
|
||||||
|
|
||||||
|
Much of the data in libraries, programs, and the general environment outside
|
||||||
|
of python is written where strings are sequences of bytes. So when we
|
||||||
|
interact with data that comes from outside of python or data that is about to
|
||||||
|
leave python it may make sense to only operate on the data as a byte
|
||||||
|
:class:`str`. There's two times when this may make sense:
|
||||||
|
|
||||||
|
1. The user is intended to hand the data to the function and then the function
|
||||||
|
takes care of sending the data outside of python (to the filesystem, over
|
||||||
|
the network, etc).
|
||||||
|
2. The data is not representable as text. For instance, writing a binary
|
||||||
|
file format.
|
||||||
|
|
||||||
|
Even when your code is operating in this area you still need to think a little
|
||||||
|
more about your data. For instance, it might make sense for the person using
|
||||||
|
your API to pass in :class:`unicode` strings and let the function convert that
|
||||||
|
into the byte :class:`str` that it then sends over the wire.
|
||||||
|
|
||||||
|
There are also times when it might make sense to operate only on
|
||||||
|
:class:`unicode` strings. :class:`unicode` represents text so anytime that
|
||||||
|
you are working on textual data that isn't going to leave python it has the
|
||||||
|
potential to be a :class:`unicode`-only API. However, there's two things that
|
||||||
|
you should consider when designing a :class:`unicode`-only API:
|
||||||
|
|
||||||
|
1. As your API gains popularity, people are going to use your API in places
|
||||||
|
that you may not have thought of. Corner cases in these other places may
|
||||||
|
mean that processing bytes is desirable.
|
||||||
|
2. In python2, byte :class:`str` and :class:`unicode` are often used
|
||||||
|
interchangably with each other. That means that people programming against
|
||||||
|
your API may have received :class:`str` from some other API and it would be
|
||||||
|
most convenient for their code if your API accepted it.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
In python3, the separation between the text type and the byte type
|
||||||
|
are more clear. So in python3, there's less need to have all APIs take
|
||||||
|
both unicode and bytes.
|
||||||
|
|
||||||
|
Can you restrict the encodings?
|
||||||
|
===============================
|
||||||
|
If you determine that you have to deal with byte :class:`str` you should
|
||||||
|
realize that not all encodings are created equal. Each has different
|
||||||
|
properties that may make it possible to provide a simpler API provided that
|
||||||
|
you can reasonably tell the users of your API that they cannot use certain
|
||||||
|
classes of encodings.
|
||||||
|
|
||||||
|
As one example, if you are required to find a comma (``,``) in a byte
|
||||||
|
:class:`str` you have different choices based on what encodings are allowed.
|
||||||
|
If you can reasonably restrict your API users to only giving :term:`ASCII
|
||||||
|
compatible` encodings you can do this simply by searching for the literal
|
||||||
|
comma character because that character will be represented by the same byte
|
||||||
|
sequence in all :term:`ASCII compatible` encodings.
|
||||||
|
|
||||||
|
The following are some classes of encodings to be aware of as you decide how
|
||||||
|
generic your code needs to be.
|
||||||
|
|
||||||
|
Single byte encodings
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Single byte encodings can only represent 256 total characters. They encode
|
||||||
|
the :term:`code points` for a character to the equivalent number in a single
|
||||||
|
byte.
|
||||||
|
|
||||||
|
Most single byte encodings are :term:`ASCII compatible`. :term:`ASCII
|
||||||
|
compatible` encodings are the most likely to be usable without changes to code
|
||||||
|
so this is good news. A notable exception to this is the `EBDIC
|
||||||
|
<http://en.wikipedia.org/wiki/Extended_Binary_Coded_Decimal_Interchange_Code>`_
|
||||||
|
family of encodings.
|
||||||
|
|
||||||
|
Multibyte encodings
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
Multibyte encodings use more than one byte to encode some characters.
|
||||||
|
|
||||||
|
Fixed width
|
||||||
|
~~~~~~~~~~~
|
||||||
|
|
||||||
|
Fixed width encodings have a set number of bytes to represent all of the
|
||||||
|
characters in the character set. ``UTF-32`` is an example of a fixed width
|
||||||
|
encoding that uses four bytes per character and can express every unicode
|
||||||
|
characters. There are a number of problems with writing APIs that need to
|
||||||
|
operate on fixed width, multibyte characters. To go back to our earlier
|
||||||
|
example of finding a comma in a string, we have to realize that even in
|
||||||
|
``UTF-32`` where the :term:`code point` for :term:`ASCII` characters is the
|
||||||
|
same as in :term:`ASCII`, the byte sequence for them is different. So you
|
||||||
|
cannot search for the literal byte character as it may pick up false
|
||||||
|
positives and may break a byte sequence in an odd place.
|
||||||
|
|
||||||
|
Variable Width
|
||||||
|
~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
ASCII compatible
|
||||||
|
""""""""""""""""
|
||||||
|
|
||||||
|
:term:`UTF-8` and the `EUC <http://en.wikipedia.org/wiki/Extended_Unix_Code>`_
|
||||||
|
family of encodings are examples of :term:`ASCII compatible` multi-byte
|
||||||
|
encodings. They achieve this by adhering to two principles:
|
||||||
|
|
||||||
|
* All of the :term:`ASCII` characters are represented by the byte that they
|
||||||
|
are in the :term:`ASCII` encoding.
|
||||||
|
* None of the :term:`ASCII` byte sequences are reused in any other byte
|
||||||
|
sequence for a different character.
|
||||||
|
|
||||||
|
Escaped
|
||||||
|
"""""""
|
||||||
|
|
||||||
|
Some multibyte encodings work by using only bytes from the :term:`ASCII`
|
||||||
|
encoding but when a particular sequence of those byes is found, they are
|
||||||
|
interpreted as meaning something other than their :term:`ASCII` values.
|
||||||
|
``UTF-7`` is one such encoding that can encode all of the unicode
|
||||||
|
:term:`code points`. For instance, here's a some Japanese characters encoded as
|
||||||
|
``UTF-7``::
|
||||||
|
|
||||||
|
>>> a = u'\u304f\u3089\u3068\u307f'
|
||||||
|
>>> print a
|
||||||
|
くらとみ
|
||||||
|
>>> print a.encode('utf-7')
|
||||||
|
+ME8wiTBoMH8-
|
||||||
|
|
||||||
|
These encodings can be used when you need to encode unicode data that may
|
||||||
|
contain non-:term:`ASCII` characters for inclusion in an :term:`ASCII` only
|
||||||
|
transport medium or file.
|
||||||
|
|
||||||
|
However, they are not :term:`ASCII compatible` in the sense that we used
|
||||||
|
earlier as the bytes that represent a :term:`ASCII` character are being reused
|
||||||
|
as part of other characters. If you were to search for a literal plus sign in
|
||||||
|
this encoded string, you would run across many false positives, for instance.
|
||||||
|
|
||||||
|
Other
|
||||||
|
"""""
|
||||||
|
|
||||||
|
There are many other popular variable width encodings, for instance ``UTF-16``
|
||||||
|
and ``shift-JIS``. Many of these are not :term:`ASCII compatible` so you
|
||||||
|
cannot search for a literal :term:`ASCII` character without danger of false
|
||||||
|
positives or false negatives.
|
107
docs/glossary.rst
Normal file
107
docs/glossary.rst
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
========
|
||||||
|
Glossary
|
||||||
|
========
|
||||||
|
|
||||||
|
.. glossary::
|
||||||
|
|
||||||
|
"Everything but the kitchen sink"
|
||||||
|
An English idiom meaning to include nearly everything that you can
|
||||||
|
think of.
|
||||||
|
|
||||||
|
API version
|
||||||
|
Version that is meant for computer consumption. This version is
|
||||||
|
parsable and comparable by computers. It contains information about
|
||||||
|
a library's API so that computer software can decide whether it works
|
||||||
|
with the software.
|
||||||
|
|
||||||
|
ASCII
|
||||||
|
A character encoding that maps numbers to characters essential to
|
||||||
|
American English. It maps 128 characters using 7bits.
|
||||||
|
|
||||||
|
.. seealso:: http://en.wikipedia.org/wiki/ASCII
|
||||||
|
|
||||||
|
ASCII compatible
|
||||||
|
An encoding in which the particular byte that maps to a character in
|
||||||
|
the :term:`ASCII` character set is only used to map to that character.
|
||||||
|
This excludes EBDIC based encodings and many multi-byte fixed and
|
||||||
|
variable width encodings since they reuse the bytes that make up the
|
||||||
|
:term:`ASCII` encoding for other purposes. :term:`UTF-8` is notable
|
||||||
|
as a variable width encoding that is :term:`ASCII` compatible.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
http://en.wikipedia.org/wiki/Variable-width_encoding
|
||||||
|
For another explanation of various ways bytes are mapped to
|
||||||
|
characters in a possibly incompatible manner.
|
||||||
|
|
||||||
|
code points
|
||||||
|
:term:`code point`
|
||||||
|
|
||||||
|
code point
|
||||||
|
A number that maps to a particular abstract character. Code points
|
||||||
|
make it so that we have a number pointing to a character without
|
||||||
|
worrying about implementation details of how those numbers are stored
|
||||||
|
for the computer to read. Encodings define how the code points map to
|
||||||
|
particular sequences of bytes on disk and in memory.
|
||||||
|
|
||||||
|
control characters
|
||||||
|
:term:`control character`
|
||||||
|
|
||||||
|
control character
|
||||||
|
The set of characters in unicode that are used, not to display glyphs
|
||||||
|
on the screen, but to tell the display in program to do something.
|
||||||
|
|
||||||
|
.. seealso:: http://en.wikipedia.org/wiki/Control_character
|
||||||
|
|
||||||
|
grapheme
|
||||||
|
characters or pieces of characters that you might write on a page to
|
||||||
|
make words, sentences, or other pieces of text.
|
||||||
|
|
||||||
|
.. seealso:: http://en.wikipedia.org/wiki/Grapheme
|
||||||
|
|
||||||
|
I18N
|
||||||
|
I18N is an abbreviation for internationalization. It's often used to
|
||||||
|
signify the need to translate words, number and date formats, and
|
||||||
|
other pieces of data in a computer program so that it will work well
|
||||||
|
for people who speak another language than yourself.
|
||||||
|
|
||||||
|
message catalogs
|
||||||
|
:term:`message catalog`
|
||||||
|
|
||||||
|
message catalog
|
||||||
|
Message catalogs contain translations for user-visible strings that
|
||||||
|
are present in your code. Normally, you need to mark the strings to
|
||||||
|
be translated by wrapping them in one of several :mod:`gettext`
|
||||||
|
functions. The function serves two purposes:
|
||||||
|
|
||||||
|
1. It allows automated tools to find which strings are supposed to be
|
||||||
|
extracted for translation.
|
||||||
|
2. The functions perform the translation when the program is running.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
`babel's documentation
|
||||||
|
<http://babel.edgewall.org/wiki/Documentation/messages.html>`_
|
||||||
|
for one method of extracting message catalogs from source
|
||||||
|
code.
|
||||||
|
|
||||||
|
Murphy's Law
|
||||||
|
"Anything that can go wrong, will go wrong."
|
||||||
|
|
||||||
|
.. seealso:: http://en.wikipedia.org/wiki/Murphy%27s_Law
|
||||||
|
|
||||||
|
release version
|
||||||
|
Version that is meant for human consumption. This version is easy for
|
||||||
|
a human to look at to decide how a particular version relates to other
|
||||||
|
versions of the software.
|
||||||
|
|
||||||
|
textual width
|
||||||
|
The amount of horizontal space a character takes up on a monospaced
|
||||||
|
screen. The units are number of character cells or columns that it
|
||||||
|
takes the place of.
|
||||||
|
|
||||||
|
UTF-8
|
||||||
|
A character encoding that maps all unicode :term:`code points` to a sequence
|
||||||
|
of bytes. It is compatible with :term:`ASCII`. It uses a variable
|
||||||
|
number of bytes to encode all of unicode. ASCII characters take one
|
||||||
|
byte. Characters from other parts of unicode take two to four bytes.
|
||||||
|
It is widespread as an encoding on the internet and in Linux.
|
359
docs/hacking.rst
Normal file
359
docs/hacking.rst
Normal file
|
@ -0,0 +1,359 @@
|
||||||
|
=======================================
|
||||||
|
Conventions for contributing to kitchen
|
||||||
|
=======================================
|
||||||
|
|
||||||
|
-----
|
||||||
|
Style
|
||||||
|
-----
|
||||||
|
|
||||||
|
* Strive to be :pep:`8` compliant
|
||||||
|
* Run `:command:`pylint` ` over the code and try to resolve most of its nitpicking
|
||||||
|
|
||||||
|
------------------------
|
||||||
|
Python 2.3 compatibility
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
At the moment, we're supporting python-2.3 and above. Understand that there's
|
||||||
|
a lot of python features that we cannot use because of this.
|
||||||
|
|
||||||
|
Sometimes modules in the |stdlib|_ can be added to kitchen so that they're
|
||||||
|
available. When we do that we need to be careful of several things:
|
||||||
|
|
||||||
|
1. Keep the module in sync with the version in the python-2.x trunk. Use
|
||||||
|
:file:`maintainers/sync-copied-files.py` for this.
|
||||||
|
2. Sync the unittests as well as the module.
|
||||||
|
3. Be aware that not all modules are written to remain compatible with
|
||||||
|
Python-2.3 and might use python language features that were not present
|
||||||
|
then (generator expressions, relative imports, decorators, with, try: with
|
||||||
|
both except: and finally:, etc) These are not good candidates for
|
||||||
|
importing into kitchen as they require more work to keep synced.
|
||||||
|
|
||||||
|
---------
|
||||||
|
Unittests
|
||||||
|
---------
|
||||||
|
|
||||||
|
* At least smoketest your code (make sure a function will return expected
|
||||||
|
values for one set of inputs).
|
||||||
|
* Note that even 100% coverage is not a guarantee of working code! Good tests
|
||||||
|
will realize that you need to also give multiple inputs that test the code
|
||||||
|
paths of called functions that are outside of your code. Example::
|
||||||
|
|
||||||
|
def to_unicode(msg, encoding='utf8', errors='replace'):
|
||||||
|
return unicode(msg, encoding, errors)
|
||||||
|
|
||||||
|
# Smoketest only. This will give 100% coverage for your code (it
|
||||||
|
# tests all of the code inside of to_unicode) but it leaves a lot of
|
||||||
|
# room for errors as it doesn't test all combinations of arguments
|
||||||
|
# that are then passed to the unicode() function.
|
||||||
|
|
||||||
|
tools.ok_(to_unicode('abc') == u'abc')
|
||||||
|
|
||||||
|
# Better -- tests now cover non-ascii characters and that error conditions
|
||||||
|
# occur properly. There's a lot of other permutations that can be
|
||||||
|
# added along these same lines.
|
||||||
|
tools.ok_(to_unicode(u'café', 'utf8', 'replace'))
|
||||||
|
tools.assert_raises(UnicodeError, to_unicode, [u'cafè ñunru'.encode('latin1')])
|
||||||
|
|
||||||
|
* We're using nose for unittesting. Rather than depend on unittest2
|
||||||
|
functionality, use the functions that nose provides.
|
||||||
|
* Remember to maintain python-2.3 compatibility even in unittests.
|
||||||
|
|
||||||
|
----------------------------
|
||||||
|
Docstrings and documentation
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
We use sphinx to build our documentation. We use the sphinx autodoc extension
|
||||||
|
to pull docstrings out of the modules for API documentation. This means that
|
||||||
|
docstrings for subpackages and modules should follow a certain pattern. The
|
||||||
|
general structure is:
|
||||||
|
|
||||||
|
* Introductory material about a module in the module's top level docstring.
|
||||||
|
|
||||||
|
* Introductory material should begin with a level two title: an overbar and
|
||||||
|
underbar of '-'.
|
||||||
|
|
||||||
|
* docstrings for every function.
|
||||||
|
|
||||||
|
* The first line is a short summary of what the function does
|
||||||
|
* This is followed by a blank line
|
||||||
|
* The next lines are a `field list
|
||||||
|
<http://sphinx.pocoo.org/markup/desc.html#info-field-lists>_` giving
|
||||||
|
information about the function's signature. We use the keywords:
|
||||||
|
``arg``, ``kwarg``, ``raises``, ``returns``, and sometimes ``rtype``. Use
|
||||||
|
these to describe all arguments, key word arguments, exceptions raised,
|
||||||
|
and return values using these.
|
||||||
|
|
||||||
|
* Parameters that are ``kwarg`` should specify what their default
|
||||||
|
behaviour is.
|
||||||
|
|
||||||
|
.. _kitchen-versioning:
|
||||||
|
|
||||||
|
------------------
|
||||||
|
Kitchen versioning
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Currently the kitchen library is in early stages of development. While we're
|
||||||
|
in this state, the main kitchen library uses the following pattern for version
|
||||||
|
information:
|
||||||
|
|
||||||
|
* Versions look like this::
|
||||||
|
__version_info__ = ((0, 1, 2),)
|
||||||
|
__version__ = '0.1.2'
|
||||||
|
|
||||||
|
* The Major version number remains at 0 until we decide to make the first 1.0
|
||||||
|
release of kitchen. At that point, we're declaring that we have some
|
||||||
|
confidence that we won't need to break backwards compatibility for a while.
|
||||||
|
* The Minor version increments for any backwards incompatible API changes.
|
||||||
|
When this is updated, we reset micro to zero.
|
||||||
|
* The Micro version increments for any other changes (backwards compatible API
|
||||||
|
changes, pure bugfixes, etc).
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Versioning is only updated for releases that generate sdists and new
|
||||||
|
uploads to the download directory. Usually we update the version
|
||||||
|
information for the library just before release. By contrast, we update
|
||||||
|
kitchen :ref:`subpackage-versioning` when an API change is made. When in
|
||||||
|
doubt, look at the version information in the last release.
|
||||||
|
|
||||||
|
----
|
||||||
|
I18N
|
||||||
|
----
|
||||||
|
|
||||||
|
All strings that are used as feedback for users need to be translated.
|
||||||
|
:mod:`kitchen` sets up several functions for this. :func:`_` is used for
|
||||||
|
marking things that are shown to users via print, GUIs, or other "standard"
|
||||||
|
methods. Strings for exceptions are marked with :func:`b_`. This function
|
||||||
|
returns a byte :class:`str` which is needed for use with exceptions::
|
||||||
|
|
||||||
|
from kitchen import _, b_
|
||||||
|
|
||||||
|
def print_message(msg, username):
|
||||||
|
print _('%(user)s, your message of the day is: %(message)s') % {
|
||||||
|
'message': msg, 'user': username}
|
||||||
|
|
||||||
|
raise Exception b_('Test message')
|
||||||
|
|
||||||
|
This serves several purposes:
|
||||||
|
|
||||||
|
* It marks the strings to be extracted by an xgettext-like program.
|
||||||
|
* :func:`_` is a function that will substitute available translations at
|
||||||
|
runtime.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
By using the ``%()s with dict`` style of string formatting, we make this
|
||||||
|
string friendly to translators that may need to reorder the variables when
|
||||||
|
they're translating the string.
|
||||||
|
|
||||||
|
`paver <http://www.blueskyonmars.com/projects/paver/>_` and `babel
|
||||||
|
<http://babel.edgewall.org/>_` are used to extract the strings.
|
||||||
|
|
||||||
|
-----------
|
||||||
|
API updates
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Kitchen strives to have a long deprecation cycle so that people have time to
|
||||||
|
switch away from any APIs that we decide to discard. Discarded APIs should
|
||||||
|
raise a :exc:`DeprecationWarning` and clearly state in the warning message and
|
||||||
|
the docstring how to convert old code to use the new interface. An example of
|
||||||
|
deprecating a function::
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from kitchen import _
|
||||||
|
from kitchen.text.converters import to_bytes, to_unicode
|
||||||
|
from kitchen.text.new_module import new_function
|
||||||
|
|
||||||
|
def old_function(param):
|
||||||
|
'''**Deprecated**
|
||||||
|
|
||||||
|
This function is deprecated. Use
|
||||||
|
:func:`kitchen.text.new_module.new_function` instead. If you want
|
||||||
|
unicode strngs as output, switch to::
|
||||||
|
|
||||||
|
>>> from kitchen.text.new_module import new_function
|
||||||
|
>>> output = new_function(param)
|
||||||
|
|
||||||
|
If you want byte strings, use::
|
||||||
|
|
||||||
|
>>> from kitchen.text.new_module import new_function
|
||||||
|
>>> from kitchen.text.converters import to_bytes
|
||||||
|
>>> output = to_bytes(new_function(param))
|
||||||
|
'''
|
||||||
|
warnings.warn(_('kitchen.text.old_function is deprecated. Use'
|
||||||
|
' kitchen.text.new_module.new_function instead'),
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
|
||||||
|
as_unicode = isinstance(param, unicode)
|
||||||
|
message = new_function(to_unicode(param))
|
||||||
|
if not as_unicode:
|
||||||
|
message = to_bytes(message)
|
||||||
|
return message
|
||||||
|
|
||||||
|
If a particular API change is very intrusive, it may be better to create a new
|
||||||
|
version of the subpackage and ship both the old version and the new version.
|
||||||
|
|
||||||
|
---------
|
||||||
|
NEWS file
|
||||||
|
---------
|
||||||
|
|
||||||
|
Update the :file:`NEWS` file when you make a change that will be visible to
|
||||||
|
the users. This is not a ChangeLog file so we don't need to list absolutely
|
||||||
|
everything but it should give the user an idea of how this version differs
|
||||||
|
from prior versions. API changes should be listed here explicitly. bugfixes
|
||||||
|
can be more general::
|
||||||
|
|
||||||
|
-----
|
||||||
|
0.2.0
|
||||||
|
-----
|
||||||
|
* Relicense to LGPLv2+
|
||||||
|
* Add kitchen.text.format module with the following functions:
|
||||||
|
textual_width, textual_width_chop.
|
||||||
|
* Rename the kitchen.text.utils module to kitchen.text.misc. use of the
|
||||||
|
old names is deprecated but still available.
|
||||||
|
* bugfixes applied to kitchen.pycompat24.defaultdict that fixes some
|
||||||
|
tracebacks
|
||||||
|
|
||||||
|
-------------------
|
||||||
|
Kitchen subpackages
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
Kitchen itself is a namespace. The kitchen sdist (tarball) provides certain
|
||||||
|
useful subpackages.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
`Kitchen addon packages`_
|
||||||
|
For information about subpackages not distributed in the kitchen sdist
|
||||||
|
that install into the kitchen namespace.
|
||||||
|
|
||||||
|
.. _subpackage-versioning:
|
||||||
|
|
||||||
|
Versioning
|
||||||
|
==========
|
||||||
|
|
||||||
|
Each subpackage should have its own version information which is independent
|
||||||
|
of the other kitchen subpackages and the main kitchen library version. This is
|
||||||
|
used so that code that depends on kitchen APIs can check the version
|
||||||
|
information. The standard way to do this is to put something like this in the
|
||||||
|
subpackage's :file:`__init__.py`::
|
||||||
|
|
||||||
|
from kitchen.versioning import version_tuple_to_string
|
||||||
|
|
||||||
|
__version_info__ = ((1, 0, 0),)
|
||||||
|
__version__ = version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
:attr:`__version_info__` is documented in :mod:`kitchen.versioning`. The
|
||||||
|
values of the first tuple should describe API changes to the module. There
|
||||||
|
are at least three numbers present in the tuple: (Major, minor, micro). The
|
||||||
|
major version number is for backwards incompatible changes (For
|
||||||
|
instance, removing a function, or adding a new mandatory argument to
|
||||||
|
a function). Whenever one of these occurs, you should increment the major
|
||||||
|
number and reset minor and micro to zero. The second number is the minor
|
||||||
|
version. Anytime new but backwards compatible changes are introduced this
|
||||||
|
number should be incremented and the micro version number reset to zero. The
|
||||||
|
micro version should be incremented when a change is made that does not change
|
||||||
|
the API at all. This is a common case for bugfixes, for instance.
|
||||||
|
|
||||||
|
Version information beyond the first three parts of the first tuple may be
|
||||||
|
useful for versioning but semantically have similar meaning to the micro
|
||||||
|
version.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We update the :attr:`__version_info__` tuple when the API is updated.
|
||||||
|
This way there's less chance of forgetting to update the API version when
|
||||||
|
a new release is made. However, we try to only increment the version
|
||||||
|
numbers a single step for any release. So if kitchen-0.1.0 has
|
||||||
|
kitchen.text.__version__ == '1.0.1', kitchen-0.1.1 should have
|
||||||
|
kitchen.text.__version__ == '1.0.2' or '1.1.0' or '2.0.0'.
|
||||||
|
|
||||||
|
Criteria for subpackages in kitchen
|
||||||
|
===================================
|
||||||
|
|
||||||
|
Supackages within kitchen should meet these criteria:
|
||||||
|
|
||||||
|
* Generally useful or needed for other pieces of kitchen.
|
||||||
|
|
||||||
|
* No mandatory requirements outside of the |stdlib|_.
|
||||||
|
|
||||||
|
* Optional requirements from outside the |stdlib|_ are allowed. Things with
|
||||||
|
mandatory requirements are better placed in `kitchen addon packages`_
|
||||||
|
|
||||||
|
* Somewhat API stable -- this is not a hard requirement. We can change the
|
||||||
|
kitchen api. However, it is better not to as people may come to depend on
|
||||||
|
it.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
`API Updates`_
|
||||||
|
|
||||||
|
----------------------
|
||||||
|
Kitchen addon packages
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
Addon packages are very similar to subpackages integrated into the kitchen
|
||||||
|
sdist. This section just lists some of the differences to watch out for.
|
||||||
|
|
||||||
|
setup.py
|
||||||
|
========
|
||||||
|
|
||||||
|
Your :file:`setup.py` should contain entries like this::
|
||||||
|
|
||||||
|
# It's suggested to use a dotted name like this so the package is easily
|
||||||
|
# findable on pypi:
|
||||||
|
setup(name='kitchen.config',
|
||||||
|
# Include kitchen in the keywords, again, for searching on pypi
|
||||||
|
keywords=['kitchen', 'configuration'],
|
||||||
|
# This package lives in the directory kitchen/config
|
||||||
|
packages=['kitchen.config'],
|
||||||
|
# [...]
|
||||||
|
)
|
||||||
|
|
||||||
|
Package directory layout
|
||||||
|
========================
|
||||||
|
|
||||||
|
Create a :file:`kitchen` directory in the toplevel. Place the addon
|
||||||
|
subpackage in there. For example::
|
||||||
|
|
||||||
|
./ <== toplevel with README, setup.py, NEWS, etc
|
||||||
|
kitchen/
|
||||||
|
kitchen/__init__.py
|
||||||
|
kitchen/config/ <== subpackage directory
|
||||||
|
kitchen/config/__init__.py
|
||||||
|
|
||||||
|
Fake kitchen module
|
||||||
|
===================
|
||||||
|
|
||||||
|
The :file::`__init__.py` in the :file:`kitchen` directory is special. It
|
||||||
|
won't be installed. It just needs to pull in the kitchen from the system so
|
||||||
|
that you are able to test your module. You should be able to use this
|
||||||
|
boilerplate::
|
||||||
|
|
||||||
|
# Fake module. This is not installed, It's just made to import the real
|
||||||
|
# kitchen modules for testing this module
|
||||||
|
import pkgutil
|
||||||
|
|
||||||
|
# Extend the __path__ with everything in the real kitchen module
|
||||||
|
__path__ = pkgutil.extend_path(__path__, __name__)
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
:mod:`kitchen` needs to be findable by python for this to work. Installed
|
||||||
|
in the :file:`site-packages` directory or adding it to the
|
||||||
|
:envvar:`PYTHONPATH` will work.
|
||||||
|
|
||||||
|
Your unittests should now be able to find both your submodule and the main
|
||||||
|
kitchen module.
|
||||||
|
|
||||||
|
Versioning
|
||||||
|
==========
|
||||||
|
|
||||||
|
It is recommended that addon packages version similarly to
|
||||||
|
:ref:`subpackage-versioning`. The :data:`__version_info__` and
|
||||||
|
:data:`__version__` strings can be changed independently of the version
|
||||||
|
exposed by setup.py so that you have both an API version
|
||||||
|
(:data:`__version_info__`) and release version that's easier for people to
|
||||||
|
parse. However, you aren't required to do this and you could follow
|
||||||
|
a different methodology if you want (for instance, :ref:`kitchen-versioning`)
|
142
docs/index.rst
Normal file
142
docs/index.rst
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
================================
|
||||||
|
Kitchen, everything but the sink
|
||||||
|
================================
|
||||||
|
|
||||||
|
:Author: Toshio Kuratomi
|
||||||
|
:Date: 19 March 2011
|
||||||
|
:Version: 1.0.x
|
||||||
|
|
||||||
|
We've all done it. In the process of writing a brand new application we've
|
||||||
|
discovered that we need a little bit of code that we've invented before.
|
||||||
|
Perhaps it's something to handle unicode text. Perhaps it's something to make
|
||||||
|
a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being
|
||||||
|
a tiny bit of code that seems too small to worry about pushing into its own
|
||||||
|
module so it sits there, a part of your current project, waiting to be cut and
|
||||||
|
pasted into your next project. And the next. And the next. And since that
|
||||||
|
little bittybit of code proved so useful to you, it's highly likely that it
|
||||||
|
proved useful to someone else as well. Useful enough that they've written it
|
||||||
|
and copy and pasted it over and over into each of their new projects.
|
||||||
|
|
||||||
|
Well, no longer! Kitchen aims to pull these small snippets of code into a few
|
||||||
|
python modules which you can import and use within your project. No more copy
|
||||||
|
and paste! Now you can let someone else maintain and release these small
|
||||||
|
snippets so that you can get on with your life.
|
||||||
|
|
||||||
|
This package forms the core of Kitchen. It contains some useful modules for
|
||||||
|
using newer |stdlib|_ modules on older python versions, text manipulation,
|
||||||
|
:pep:`386` versioning, and initializing :mod:`gettext`. With this package we're
|
||||||
|
trying to provide a few useful features that don't have too many dependencies
|
||||||
|
outside of the |stdlib|_. We'll be releasing other modules that drop into the
|
||||||
|
kitchen namespace to add other features (possibly with larger deps) as time
|
||||||
|
goes on.
|
||||||
|
|
||||||
|
------------
|
||||||
|
Requirements
|
||||||
|
------------
|
||||||
|
|
||||||
|
We've tried to keep the core kitchen module's requirements lightweight. At the
|
||||||
|
moment kitchen only requires
|
||||||
|
|
||||||
|
:python: 2.3.1 or later
|
||||||
|
|
||||||
|
.. warning:: Kitchen-1.1.0 is likely to be the last release that supports
|
||||||
|
python-2.3.x. Future releases will target python-2.4 as the minimum
|
||||||
|
required version.
|
||||||
|
|
||||||
|
Soft Requirements
|
||||||
|
=================
|
||||||
|
|
||||||
|
If found, these libraries will be used to make the implementation of some part
|
||||||
|
of kitchen better in some way. If they are not present, the API that they
|
||||||
|
enable will still exist but may function in a different manner.
|
||||||
|
|
||||||
|
`chardet <http://pypi.python.org/pypi/chardet>`_
|
||||||
|
Used in :func:`~kitchen.text.misc.guess_encoding` and
|
||||||
|
:func:`~kitchen.text.converters.guess_encoding_to_xml` to help guess
|
||||||
|
encoding of byte strings being converted. If not present, unknown
|
||||||
|
encodings will be converted as if they were ``latin1``
|
||||||
|
|
||||||
|
---------------------------
|
||||||
|
Other Recommended Libraries
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
These libraries implement commonly used functionality that everyone seems to
|
||||||
|
invent. Rather than reinvent their wheel, I simply list the things that they
|
||||||
|
do well for now. Perhaps if people can't find them normally, I'll add them as
|
||||||
|
requirements in :file:`setup.py` or link them into kitchen's namespace. For
|
||||||
|
now, I just mention them here:
|
||||||
|
|
||||||
|
`bunch <http://pypi.python.org/pypi/bunch/>`_
|
||||||
|
Bunch is a dictionary that you can use attribute lookup as well as bracket
|
||||||
|
notation to access. Setting it apart from most homebrewed implementations
|
||||||
|
is the :func:`bunchify` function which will descend nested structures of
|
||||||
|
lists and dicts, transforming the dicts to Bunch's.
|
||||||
|
`hashlib <http://code.krypto.org/python/hashlib/>`_
|
||||||
|
Python 2.5 and forward have a :mod:`hashlib` library that provides secure
|
||||||
|
hash functions to python. If you're developing for python2.3 or
|
||||||
|
python2.4, though, you can install the standalone hashlib library and have
|
||||||
|
access to the same functions.
|
||||||
|
`iterutils <http://pypi.python.org/pypi/iterutils/>`_
|
||||||
|
The python documentation for :mod:`itertools` has some examples
|
||||||
|
of other nice iterable functions that can be built from the
|
||||||
|
:mod:`itertools` functions. This third-party module creates those recipes
|
||||||
|
as a module.
|
||||||
|
`ordereddict <http://pypi.python.org/pypi/ordereddict/>`_
|
||||||
|
Python 2.7 and forward have a :mod:`~collections.OrderedDict` that
|
||||||
|
provides a :class:`dict` whose items are ordered (and indexable) as well
|
||||||
|
as named.
|
||||||
|
`unittest2 <http://pypi.python.org/pypi/unittest2>`_
|
||||||
|
Python 2.7 has an updated :mod:`unittest` library with new functions not
|
||||||
|
present in the |stdlib|_ for Python 2.6 or less. If you want to use those
|
||||||
|
new functions but need your testing framework to be compatible with older
|
||||||
|
Python the unittest2 library provides the update as an external module.
|
||||||
|
`nose <http://somethingaboutorange.com/mrl/projects/nose/>`_
|
||||||
|
If you want to use a test discovery tool instead of the unittest
|
||||||
|
framework, nosetests provides a simple to use way to do that.
|
||||||
|
|
||||||
|
-------
|
||||||
|
License
|
||||||
|
-------
|
||||||
|
|
||||||
|
This python module is distributed under the terms of the
|
||||||
|
`GNU Lesser General Public License Version 2 or later
|
||||||
|
<http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html>`_.
|
||||||
|
|
||||||
|
.. note:: Some parts of this module are licensed under terms less restrictive
|
||||||
|
than the LGPLv2+. If you separate these files from the work as a whole
|
||||||
|
you are allowed to use them under the less restrictive licenses. The
|
||||||
|
following is a list of the files that are known:
|
||||||
|
|
||||||
|
`Python 2 license <http://www.python.org/download/releases/2.4/license/>`_
|
||||||
|
:file:`_subprocess.py`, :file:`test_subprocess.py`,
|
||||||
|
:file:`defaultdict.py`, :file:`test_defaultdict.py`,
|
||||||
|
:file:`_base64.py`, and :file:`test_base64.py`
|
||||||
|
|
||||||
|
--------
|
||||||
|
Contents
|
||||||
|
--------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
tutorial
|
||||||
|
api-overview
|
||||||
|
porting-guide-0.3
|
||||||
|
hacking
|
||||||
|
glossary
|
||||||
|
|
||||||
|
------------------
|
||||||
|
Indices and tables
|
||||||
|
------------------
|
||||||
|
|
||||||
|
* :ref:`genindex`
|
||||||
|
* :ref:`modindex`
|
||||||
|
* :ref:`search`
|
||||||
|
|
||||||
|
-------------
|
||||||
|
Project Pages
|
||||||
|
-------------
|
||||||
|
|
||||||
|
More information about the project can be found on the |projpage|_
|
||||||
|
|
||||||
|
The latest published version of this documentation can be found on the |docpage|_
|
209
docs/porting-guide-0.3.rst
Normal file
209
docs/porting-guide-0.3.rst
Normal file
|
@ -0,0 +1,209 @@
|
||||||
|
===================
|
||||||
|
1.0.0 Porting Guide
|
||||||
|
===================
|
||||||
|
|
||||||
|
The 0.1 through 1.0.0 releases focused on bringing in functions from yum and
|
||||||
|
python-fedora. This porting guide tells how to port from those APIs to their
|
||||||
|
kitchen replacements.
|
||||||
|
|
||||||
|
-------------
|
||||||
|
python-fedora
|
||||||
|
-------------
|
||||||
|
|
||||||
|
=================================== ===================
|
||||||
|
python-fedora kitchen replacement
|
||||||
|
----------------------------------- -------------------
|
||||||
|
:func:`fedora.iterutils.isiterable` :func:`kitchen.iterutils.isiterable` [#f1]_
|
||||||
|
:func:`fedora.textutils.to_unicode` :func:`kitchen.text.converters.to_unicode`
|
||||||
|
:func:`fedora.textutils.to_bytes` :func:`kitchen.text.converters.to_bytes`
|
||||||
|
=================================== ===================
|
||||||
|
|
||||||
|
.. [#f1] :func:`~kitchen.iterutils.isiterable` has changed slightly in
|
||||||
|
kitchen. The :attr:`include_string` attribute has switched its default value
|
||||||
|
from :data:`True` to :data:`False`. So you need to change code like::
|
||||||
|
|
||||||
|
>>> # Old code
|
||||||
|
>>> isiterable('abcdef')
|
||||||
|
True
|
||||||
|
>>> # New code
|
||||||
|
>>> isiterable('abcdef', include_string=True)
|
||||||
|
True
|
||||||
|
|
||||||
|
---
|
||||||
|
yum
|
||||||
|
---
|
||||||
|
|
||||||
|
================================= ===================
|
||||||
|
yum kitchen replacement
|
||||||
|
--------------------------------- -------------------
|
||||||
|
:func:`yum.i18n.dummy_wrapper` :meth:`kitchen.i18n.DummyTranslations.ugettext` [#y1]_
|
||||||
|
:func:`yum.i18n.dummyP_wrapper` :meth:`kitchen.i18n.DummyTanslations.ungettext` [#y1]_
|
||||||
|
:func:`yum.i18n.utf8_width` :func:`kitchen.text.display.textual_width`
|
||||||
|
:func:`yum.i18n.utf8_width_chop` :func:`kitchen.text.display.textual_width_chop`
|
||||||
|
and :func:`kitchen.text.display.textual_width` [#y2]_ [#y4]_
|
||||||
|
:func:`yum.i18n.utf8_valid` :func:`kitchen.text.misc.byte_string_valid_encoding`
|
||||||
|
:func:`yum.i18n.utf8_text_wrap` :func:`kitchen.text.display.wrap` [#y3]_
|
||||||
|
:func:`yum.i18n.utf8_text_fill` :func:`kitchen.text.display.fill` [#y3]_
|
||||||
|
:func:`yum.i18n.to_unicode` :func:`kitchen.text.converters.to_unicode` [#y5]_
|
||||||
|
:func:`yum.i18n.to_unicode_maybe` :func:`kitchen.text.converters.to_unicode` [#y5]_
|
||||||
|
:func:`yum.i18n.to_utf8` :func:`kitchen.text.converters.to_bytes` [#y5]_
|
||||||
|
:func:`yum.i18n.to_str` :func:`kitchen.text.converters.to_unicode`
|
||||||
|
or :func:`kitchen.text.converters.to_bytes` [#y6]_
|
||||||
|
:func:`yum.i18n.str_eq` :func:`kitchen.text.misc.str_eq`
|
||||||
|
:func:`yum.misc.to_xml` :func:`kitchen.text.converters.unicode_to_xml`
|
||||||
|
or :func:`kitchen.text.converters.byte_string_to_xml` [#y7]_
|
||||||
|
:func:`yum.i18n._` See: :ref:`yum-i18n-init`
|
||||||
|
:func:`yum.i18n.P_` See: :ref:`yum-i18n-init`
|
||||||
|
:func:`yum.i18n.exception2msg` :func:`kitchen.text.converters.exception_to_unicode`
|
||||||
|
or :func:`kitchen.text.converter.exception_to_bytes` [#y8]_
|
||||||
|
================================= ===================
|
||||||
|
|
||||||
|
.. [#y1] These yum methods provided fallback support for :mod:`gettext`
|
||||||
|
functions in case either ``gaftonmode`` was set or :mod:`gettext` failed
|
||||||
|
to return an object. In kitchen, we can use the
|
||||||
|
:class:`kitchen.i18n.DummyTranslations` object to fulfill that role.
|
||||||
|
Please see :ref:`yum-i18n-init` for more suggestions on how to do this.
|
||||||
|
|
||||||
|
.. [#y2] The yum version of these functions returned a byte :class:`str`. The
|
||||||
|
kitchen version listed here returns a :class:`unicode` string. If you
|
||||||
|
need a byte :class:`str` simply call
|
||||||
|
:func:`kitchen.text.converters.to_bytes` on the result.
|
||||||
|
|
||||||
|
.. [#y3] The yum version of these functions would return either a byte
|
||||||
|
:class:`str` or a :class:`unicode` string depending on what the input
|
||||||
|
value was. The kitchen version always returns :class:`unicode` strings.
|
||||||
|
|
||||||
|
.. [#y4] :func:`yum.i18n.utf8_width_chop` performed two functions. It
|
||||||
|
returned the piece of the message that fit in a specified width and the
|
||||||
|
width of that message. In kitchen, you need to call two functions, one
|
||||||
|
for each action::
|
||||||
|
|
||||||
|
>>> # Old way
|
||||||
|
>>> utf8_width_chop(msg, 5)
|
||||||
|
(5, 'く ku')
|
||||||
|
>>> # New way
|
||||||
|
>>> from kitchen.text.display import textual_width, textual_width_chop
|
||||||
|
>>> (textual_width(msg), textual_width_chop(msg, 5))
|
||||||
|
(5, u'く ku')
|
||||||
|
|
||||||
|
.. [#y5] If the yum version of :func:`~yum.i18n.to_unicode` or
|
||||||
|
:func:`~yum.i18n.to_utf8` is given an object that is not a string, it
|
||||||
|
returns the object itself. :func:`kitchen.text.converters.to_unicode` and
|
||||||
|
:func:`kitchen.text.converters.to_bytes` default to returning the
|
||||||
|
``simplerepr`` of the object instead. If you want the yum behaviour, set
|
||||||
|
the :attr:`nonstring` parameter to ``passthru``::
|
||||||
|
|
||||||
|
>>> from kitchen.text.converters import to_unicode
|
||||||
|
>>> to_unicode(5)
|
||||||
|
u'5'
|
||||||
|
>>> to_unicode(5, nonstring='passthru')
|
||||||
|
5
|
||||||
|
|
||||||
|
.. [#y6] :func:`yum.i18n.to_str` could return either a byte :class:`str`. or
|
||||||
|
a :class:`unicode` string In kitchen you can get the same effect but you
|
||||||
|
get to choose whether you want a byte :class:`str` or a :class:`unicode`
|
||||||
|
string. Use :func:`~kitchen.text.converters.to_bytes` for :class:`str`
|
||||||
|
and :func:`~kitchen.text.converters.to_unicode` for :class:`unicode`.
|
||||||
|
|
||||||
|
.. [#y7] :func:`yum.misc.to_xml` was buggy as written. I think the intention
|
||||||
|
was for you to be able to pass a byte :class:`str` or :class:`unicode`
|
||||||
|
string in and get out a byte :class:`str` that was valid to use in an xml
|
||||||
|
file. The two kitchen functions
|
||||||
|
:func:`~kitchen.text.converters.byte_string_to_xml` and
|
||||||
|
:func:`~kitchen.text.converters.unicode_to_xml` do that for each string
|
||||||
|
type.
|
||||||
|
|
||||||
|
.. [#y8] When porting :func:`yum.i18n.exception2msg` to use kitchen, you
|
||||||
|
should setup two wrapper functions to aid in your port. They'll look like
|
||||||
|
this:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from kitchen.text.converters import EXCEPTION_CONVERTERS, \
|
||||||
|
BYTE_EXCEPTION_CONVERTERS, exception_to_unicode, \
|
||||||
|
exception_to_bytes
|
||||||
|
def exception2umsg(e):
|
||||||
|
'''Return a unicode representation of an exception'''
|
||||||
|
c = [lambda e: e.value]
|
||||||
|
c.extend(EXCEPTION_CONVERTERS)
|
||||||
|
return exception_to_unicode(e, converters=c)
|
||||||
|
def exception2bmsg(e):
|
||||||
|
'''Return a utf8 encoded str representation of an exception'''
|
||||||
|
c = [lambda e: e.value]
|
||||||
|
c.extend(BYTE_EXCEPTION_CONVERTERS)
|
||||||
|
return exception_to_bytes(e, converters=c)
|
||||||
|
|
||||||
|
The reason to define this wrapper is that many of the exceptions in yum
|
||||||
|
put the message in the :attr:`value` attribute of the :exc:`Exception`
|
||||||
|
instead of adding it to the :attr:`args` attribute. So the default
|
||||||
|
:data:`~kitchen.text.converters.EXCEPTION_CONVERTERS` don't know where to
|
||||||
|
find the message. The wrapper tells kitchen to check the :attr:`value`
|
||||||
|
attribute for the message. The reason to define two wrappers may be less
|
||||||
|
obvious. :func:`yum.i18n.exception2msg` can return a :class:`unicode`
|
||||||
|
string or a byte :class:`str` depending on a combination of what
|
||||||
|
attributes are present on the :exc:`Exception` and what locale the
|
||||||
|
function is being run in. By contrast,
|
||||||
|
:func:`kitchen.text.converters.exception_to_unicode` only returns
|
||||||
|
:class:`unicode` strings and
|
||||||
|
:func:`kitchen.text.converters.exception_to_bytes` only returns byte
|
||||||
|
:class:`str`. This is much safer as it keeps code that can only handle
|
||||||
|
:class:`unicode` or only handle byte :class:`str` correctly from getting
|
||||||
|
the wrong type when an input changes but it means you need to examine the
|
||||||
|
calling code when porting from :func:`yum.i18n.exception2msg` and use the
|
||||||
|
appropriate wrapper.
|
||||||
|
|
||||||
|
.. _yum-i18n-init:
|
||||||
|
|
||||||
|
Initializing Yum i18n
|
||||||
|
=====================
|
||||||
|
|
||||||
|
Previously, yum had several pieces of code to initialize i18n. From the
|
||||||
|
toplevel of :file:`yum/i18n.py`::
|
||||||
|
|
||||||
|
try:.
|
||||||
|
'''
|
||||||
|
Setup the yum translation domain and make _() and P_() translation wrappers
|
||||||
|
available.
|
||||||
|
using ugettext to make sure translated strings are in Unicode.
|
||||||
|
'''
|
||||||
|
import gettext
|
||||||
|
t = gettext.translation('yum', fallback=True)
|
||||||
|
_ = t.ugettext
|
||||||
|
P_ = t.ungettext
|
||||||
|
except:
|
||||||
|
'''
|
||||||
|
Something went wrong so we make a dummy _() wrapper there is just
|
||||||
|
returning the same text
|
||||||
|
'''
|
||||||
|
_ = dummy_wrapper
|
||||||
|
P_ = dummyP_wrapper
|
||||||
|
|
||||||
|
With kitchen, this can be changed to this::
|
||||||
|
|
||||||
|
from kitchen.i18n import easy_gettext_setup, DummyTranslations
|
||||||
|
try:
|
||||||
|
_, P_ = easy_gettext_setup('yum')
|
||||||
|
except:
|
||||||
|
translations = DummyTranslations()
|
||||||
|
_ = translations.ugettext
|
||||||
|
P_ = translations.ungettext
|
||||||
|
|
||||||
|
.. note:: In :ref:`overcoming-frustration`, it is mentioned that for some
|
||||||
|
things (like exception messages), using the byte :class:`str` oriented
|
||||||
|
functions is more appropriate. If this is desired, the setup portion is
|
||||||
|
only a second call to :func:`kitchen.i18n.easy_gettext_setup`::
|
||||||
|
|
||||||
|
b_, bP_ = easy_gettext_setup('yum', use_unicode=False)
|
||||||
|
|
||||||
|
The second place where i18n is setup is in :meth:`yum.YumBase._getConfig` in
|
||||||
|
:file:`yum/__init_.py` if ``gaftonmode`` is in effect::
|
||||||
|
|
||||||
|
if startupconf.gaftonmode:
|
||||||
|
global _
|
||||||
|
_ = yum.i18n.dummy_wrapper
|
||||||
|
|
||||||
|
This can be changed to::
|
||||||
|
|
||||||
|
if startupconf.gaftonmode:
|
||||||
|
global _
|
||||||
|
_ = DummyTranslations().ugettext()
|
19
docs/tutorial.rst
Normal file
19
docs/tutorial.rst
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
================================
|
||||||
|
Using kitchen to write good code
|
||||||
|
================================
|
||||||
|
|
||||||
|
Kitchen's functions won't automatically make you a better programmer. You
|
||||||
|
have to learn when and how to use them as well. This section of the
|
||||||
|
documentation is intended to show you some of the ways that you can apply
|
||||||
|
kitchen's functions to problems that may have arisen in your life. The goal
|
||||||
|
of this section is to give you enough information to understand what the
|
||||||
|
kitchen API can do for you and where in the :ref:`KitchenAPI` docs to look
|
||||||
|
for something that can help you with your next issue. Along the way,
|
||||||
|
you might pick up the knack for identifying issues with your code before you
|
||||||
|
publish it. And that *will* make you a better coder.
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
unicode-frustrations
|
||||||
|
designing-unicode-apis
|
571
docs/unicode-frustrations.rst
Normal file
571
docs/unicode-frustrations.rst
Normal file
|
@ -0,0 +1,571 @@
|
||||||
|
.. _overcoming-frustration:
|
||||||
|
|
||||||
|
==========================================================
|
||||||
|
Overcoming frustration: Correctly using unicode in python2
|
||||||
|
==========================================================
|
||||||
|
|
||||||
|
In python-2.x, there's two types that deal with text.
|
||||||
|
|
||||||
|
1. :class:`str` is for strings of bytes. These are very similar in nature to
|
||||||
|
how strings are handled in C.
|
||||||
|
2. :class:`unicode` is for strings of unicode :term:`code points`.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
**Just what the dickens is "Unicode"?**
|
||||||
|
|
||||||
|
One mistake that people encountering this issue for the first time make is
|
||||||
|
confusing the :class:`unicode` type and the encodings of unicode stored in
|
||||||
|
the :class:`str` type. In python, the :class:`unicode` type stores an
|
||||||
|
abstract sequence of :term:`code points`. Each :term:`code point`
|
||||||
|
represents a :term:`grapheme`. By contrast, byte :class:`str` stores
|
||||||
|
a sequence of bytes which can then be mapped to a sequence of :term:`code
|
||||||
|
points`. Each unicode encoding (:term:`UTF-8`, UTF-7, UTF-16, UTF-32,
|
||||||
|
etc) maps different sequences of bytes to the unicode :term:`code points`.
|
||||||
|
|
||||||
|
What does that mean to you as a programmer? When you're dealing with text
|
||||||
|
manipulations (finding the number of characters in a string or cutting
|
||||||
|
a string on word boundaries) you should be dealing with :class:`unicode`
|
||||||
|
strings as they abstract characters in a manner that's appropriate for
|
||||||
|
thinking of them as a sequence of letters that you will see on a page.
|
||||||
|
When dealing with I/O, reading to and from the disk, printing to
|
||||||
|
a terminal, sending something over a network link, etc, you should be dealing
|
||||||
|
with byte :class:`str` as those devices are going to need to deal with
|
||||||
|
concrete implementations of what bytes represent your abstract characters.
|
||||||
|
|
||||||
|
In the python2 world many APIs use these two classes interchangably but there
|
||||||
|
are several important APIs where only one or the other will do the right
|
||||||
|
thing. When you give the wrong type of string to an API that wants the other
|
||||||
|
type, you may end up with an exception being raised (:exc:`UnicodeDecodeError`
|
||||||
|
or :exc:`UnicodeEncodeError`). However, these exceptions aren't always raised
|
||||||
|
because python implicitly converts between types... *sometimes*.
|
||||||
|
|
||||||
|
-----------------------------------
|
||||||
|
Frustration #1: Inconsistent Errors
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
|
Although converting when possible seems like the right thing to do, it's
|
||||||
|
actually the first source of frustration. A programmer can test out their
|
||||||
|
program with a string like: ``The quick brown fox jumped over the lazy dog``
|
||||||
|
and not encounter any issues. But when they release their software into the
|
||||||
|
wild, someone enters the string: ``I sat down for coffee at the café`` and
|
||||||
|
suddenly an exception is thrown. The reason? The mechanism that converts
|
||||||
|
between the two types is only able to deal with :term:`ASCII` characters.
|
||||||
|
Once you throw non-:term:`ASCII` characters into your strings, you have to
|
||||||
|
start dealing with the conversion manually.
|
||||||
|
|
||||||
|
So, if I manually convert everything to either byte :class:`str` or
|
||||||
|
:class:`unicode` strings, will I be okay? The answer is.... *sometimes*.
|
||||||
|
|
||||||
|
---------------------------------
|
||||||
|
Frustration #2: Inconsistent APIs
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
The problem you run into when converting everything to byte :class:`str` or
|
||||||
|
:class:`unicode` strings is that you'll be using someone else's API quite
|
||||||
|
often (this includes the APIs in the |stdlib|_) and find that the API will only
|
||||||
|
accept byte :class:`str` or only accept :class:`unicode` strings. Or worse,
|
||||||
|
that the code will accept either when you're dealing with strings that consist
|
||||||
|
solely of :term:`ASCII` but throw an error when you give it a string that's
|
||||||
|
got non-:term:`ASCII` characters. When you encounter these APIs you first
|
||||||
|
need to identify which type will work better and then you have to convert your
|
||||||
|
values to the correct type for that code. Thus the programmer that wants to
|
||||||
|
proactively fix all unicode errors in their code needs to do two things:
|
||||||
|
|
||||||
|
1. You must keep track of what type your sequences of text are. Does
|
||||||
|
``my_sentence`` contain :class:`unicode` or :class:`str`? If you don't
|
||||||
|
know that then you're going to be in for a world of hurt.
|
||||||
|
2. Anytime you call a function you need to evaluate whether that function will
|
||||||
|
do the right thing with :class:`str` or :class:`unicode` values. Sending
|
||||||
|
the wrong value here will lead to a :exc:`UnicodeError` being thrown when
|
||||||
|
the string contains non-:term:`ASCII` characters.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
There is one mitigating factor here. The python community has been
|
||||||
|
standardizing on using :class:`unicode` in all its APIs. Although there
|
||||||
|
are some APIs that you need to send byte :class:`str` to in order to be
|
||||||
|
safe, (including things as ubiquitous as :func:`print` as we'll see in the
|
||||||
|
next section), it's getting easier and easier to use :class:`unicode`
|
||||||
|
strings with most APIs.
|
||||||
|
|
||||||
|
------------------------------------------------
|
||||||
|
Frustration #3: Inconsistent treatment of output
|
||||||
|
------------------------------------------------
|
||||||
|
|
||||||
|
Alright, since the python community is moving to using :class:`unicode`
|
||||||
|
strings everywhere, we might as well convert everything to :class:`unicode`
|
||||||
|
strings and use that by default, right? Sounds good most of the time but
|
||||||
|
there's at least one huge caveat to be aware of. Anytime you output text to
|
||||||
|
the terminal or to a file, the text has to be converted into a byte
|
||||||
|
:class:`str`. Python will try to implicitly convert from :class:`unicode` to
|
||||||
|
byte :class:`str`... but it will throw an exception if the bytes are
|
||||||
|
non-:term:`ASCII`::
|
||||||
|
|
||||||
|
>>> string = unicode(raw_input(), 'utf8')
|
||||||
|
café
|
||||||
|
>>> log = open('/var/tmp/debug.log', 'w')
|
||||||
|
>>> log.write(string)
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "<stdin>", line 1, in <module>
|
||||||
|
UnicodeEncodeError: 'ascii' codec can't encode character u'\xe9' in position 3: ordinal not in range(128)
|
||||||
|
|
||||||
|
Okay, this is simple enough to solve: Just convert to a byte :class:`str` and
|
||||||
|
we're all set::
|
||||||
|
|
||||||
|
>>> string = unicode(raw_input(), 'utf8')
|
||||||
|
café
|
||||||
|
>>> string_for_output = string.encode('utf8', 'replace')
|
||||||
|
>>> log = open('/var/tmp/debug.log', 'w')
|
||||||
|
>>> log.write(string_for_output)
|
||||||
|
>>>
|
||||||
|
|
||||||
|
So that was simple, right? Well... there's one gotcha that makes things a bit
|
||||||
|
harder to debug sometimes. When you attempt to write non-:term:`ASCII`
|
||||||
|
:class:`unicode` strings to a file-like object you get a traceback everytime.
|
||||||
|
But what happens when you use :func:`print`? The terminal is a file-like object
|
||||||
|
so it should raise an exception right? The answer to that is....
|
||||||
|
*sometimes*:
|
||||||
|
|
||||||
|
.. code-block:: pycon
|
||||||
|
|
||||||
|
$ python
|
||||||
|
>>> print u'café'
|
||||||
|
café
|
||||||
|
|
||||||
|
No exception. Okay, we're fine then?
|
||||||
|
|
||||||
|
We are until someone does one of the following:
|
||||||
|
|
||||||
|
* Runs the script in a different locale:
|
||||||
|
|
||||||
|
.. code-block:: pycon
|
||||||
|
|
||||||
|
$ LC_ALL=C python
|
||||||
|
>>> # Note: if you're using a good terminal program when running in the C locale
|
||||||
|
>>> # The terminal program will prevent you from entering non-ASCII characters
|
||||||
|
>>> # python will still recognize them if you use the codepoint instead:
|
||||||
|
>>> print u'caf\xe9'
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "<stdin>", line 1, in <module>
|
||||||
|
UnicodeEncodeError: 'ascii' codec can't encode character u'\xe9' in position 3: ordinal not in range(128)
|
||||||
|
|
||||||
|
* Redirects output to a file:
|
||||||
|
|
||||||
|
.. code-block:: pycon
|
||||||
|
|
||||||
|
$ cat test.py
|
||||||
|
#!/usr/bin/python -tt
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
print u'café'
|
||||||
|
$ ./test.py >t
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "./test.py", line 4, in <module>
|
||||||
|
print u'café'
|
||||||
|
UnicodeEncodeError: 'ascii' codec can't encode character u'\xe9' in position 3: ordinal not in range(128)
|
||||||
|
|
||||||
|
Okay, the locale thing is a pain but understandable: the C locale doesn't
|
||||||
|
understand any characters outside of :term:`ASCII` so naturally attempting to
|
||||||
|
display those won't work. Now why does redirecting to a file cause problems?
|
||||||
|
It's because :func:`print` in python2 is treated specially. Whereas the other
|
||||||
|
file-like objects in python always convert to :term:`ASCII` unless you set
|
||||||
|
them up differently, using :func:`print` to output to the terminal will use
|
||||||
|
the user's locale to convert before sending the output to the terminal. When
|
||||||
|
:func:`print` is not outputting to the terminal (being redirected to a file,
|
||||||
|
for instance), :func:`print` decides that it doesn't know what locale to use
|
||||||
|
for that file and so it tries to convert to :term:`ASCII` instead.
|
||||||
|
|
||||||
|
So what does this mean for you, as a programmer? Unless you have the luxury
|
||||||
|
of controlling how your users use your code, you should always, always, always
|
||||||
|
convert to a byte :class:`str` before outputting strings to the terminal or to
|
||||||
|
a file. Python even provides you with a facility to do just this. If you
|
||||||
|
know that every :class:`unicode` string you send to a particular file-like
|
||||||
|
object (for instance, :data:`~sys.stdout`) should be converted to a particular
|
||||||
|
encoding you can use a :class:`codecs.StreamWriter` object to convert from
|
||||||
|
a :class:`unicode` string into a byte :class:`str`. In particular,
|
||||||
|
:func:`codecs.getwriter` will return a :class:`~codecs.StreamWriter` class
|
||||||
|
that will help you to wrap a file-like object for output. Using our
|
||||||
|
:func:`print` example:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
$ cat test.py
|
||||||
|
#!/usr/bin/python -tt
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import codecs
|
||||||
|
import sys
|
||||||
|
|
||||||
|
UTF8Writer = codecs.getwriter('utf8')
|
||||||
|
sys.stdout = UTF8Writer(sys.stdout)
|
||||||
|
print u'café'
|
||||||
|
$ ./test.py >t
|
||||||
|
$ cat t
|
||||||
|
café
|
||||||
|
|
||||||
|
-----------------------------------------
|
||||||
|
Frustrations #4 and #5 -- The other shoes
|
||||||
|
-----------------------------------------
|
||||||
|
|
||||||
|
In English, there's a saying "waiting for the other shoe to drop". It means
|
||||||
|
that when one event (usually bad) happens, you come to expect another event
|
||||||
|
(usually worse) to come after. In this case we have two other shoes.
|
||||||
|
|
||||||
|
|
||||||
|
Frustration #4: Now it doesn't take byte strings?!
|
||||||
|
==================================================
|
||||||
|
|
||||||
|
If you wrap :data:`sys.stdout` using :func:`codecs.getwriter` and think you
|
||||||
|
are now safe to print any variable without checking its type I am afraid
|
||||||
|
I must inform you that you're not paying enough attention to :term:`Murphy's
|
||||||
|
Law`. The :class:`~codecs.StreamWriter` that :func:`codecs.getwriter`
|
||||||
|
provides will take :class:`unicode` strings and transform them into byte
|
||||||
|
:class:`str` before they get to :data:`sys.stdout`. The problem is if you
|
||||||
|
give it something that's already a byte :class:`str` it tries to transform
|
||||||
|
that as well. To do that it tries to turn the byte :class:`str` you give it
|
||||||
|
into :class:`unicode` and then transform that back into a byte :class:`str`...
|
||||||
|
and since it uses the :term:`ASCII` codec to perform those conversions,
|
||||||
|
chances are that it'll blow up when making them::
|
||||||
|
|
||||||
|
>>> import codecs
|
||||||
|
>>> import sys
|
||||||
|
>>> UTF8Writer = codecs.getwriter('utf8')
|
||||||
|
>>> sys.stdout = UTF8Writer(sys.stdout)
|
||||||
|
>>> print 'café'
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "<stdin>", line 1, in <module>
|
||||||
|
File "/usr/lib64/python2.6/codecs.py", line 351, in write
|
||||||
|
data, consumed = self.encode(object, self.errors)
|
||||||
|
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 3: ordinal not in range(128)
|
||||||
|
|
||||||
|
To work around this, kitchen provides an alternate version of
|
||||||
|
:func:`codecs.getwriter` that can deal with both byte :class:`str` and
|
||||||
|
:class:`unicode` strings. Use :func:`kitchen.text.converters.getwriter` in
|
||||||
|
place of the :mod:`codecs` version like this::
|
||||||
|
|
||||||
|
>>> import sys
|
||||||
|
>>> from kitchen.text.converters import getwriter
|
||||||
|
>>> UTF8Writer = getwriter('utf8')
|
||||||
|
>>> sys.stdout = UTF8Writer(sys.stdout)
|
||||||
|
>>> print u'café'
|
||||||
|
café
|
||||||
|
>>> print 'café'
|
||||||
|
café
|
||||||
|
|
||||||
|
Frustration #5: Exceptions
|
||||||
|
==========================
|
||||||
|
|
||||||
|
Okay, so we've gotten ourselves this far. We convert everything to
|
||||||
|
:class:`unicode` strings. We're aware that we need to convert back into byte
|
||||||
|
:class:`str` before we write to the terminal. We've worked around the
|
||||||
|
inability of the standard :func:`~codecs.getwriter` to deal with both byte
|
||||||
|
:class:`str` and :class:`unicode` strings. Are we all set? Well, there's at
|
||||||
|
least one more gotcha: raising exceptions with a :class:`unicode` message.
|
||||||
|
Take a look:
|
||||||
|
|
||||||
|
.. code-block:: pycon
|
||||||
|
|
||||||
|
>>> class MyException(Exception):
|
||||||
|
>>> pass
|
||||||
|
>>>
|
||||||
|
>>> raise MyException(u'Cannot do this')
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "<stdin>", line 1, in <module>
|
||||||
|
__main__.MyException: Cannot do this
|
||||||
|
>>> raise MyException(u'Cannot do this while at a café')
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "<stdin>", line 1, in <module>
|
||||||
|
__main__.MyException:
|
||||||
|
>>>
|
||||||
|
|
||||||
|
No, I didn't truncate that last line; raising exceptions really cannot handle
|
||||||
|
non-:term:`ASCII` characters in a :class:`unicode` string and will output an
|
||||||
|
exception without the message if the message contains them. What happens if
|
||||||
|
we try to use the handy dandy :func:`~kitchen.text.converters.getwriter` trick
|
||||||
|
to work around this?
|
||||||
|
|
||||||
|
.. code-block:: pycon
|
||||||
|
|
||||||
|
>>> import sys
|
||||||
|
>>> from kitchen.text.converters import getwriter
|
||||||
|
>>> sys.stderr = getwriter('utf8')(sys.stderr)
|
||||||
|
>>> raise MyException(u'Cannot do this')
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "<stdin>", line 1, in <module>
|
||||||
|
__main__.MyException: Cannot do this
|
||||||
|
>>> raise MyException(u'Cannot do this while at a café')
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "<stdin>", line 1, in <module>
|
||||||
|
__main__.MyException>>>
|
||||||
|
|
||||||
|
Not only did this also fail, it even swallowed the trailing newline that's
|
||||||
|
normally there.... So how to make this work? Transform from :class:`unicode`
|
||||||
|
strings to byte :class:`str` manually before outputting::
|
||||||
|
|
||||||
|
>>> from kitchen.text.converters import to_bytes
|
||||||
|
>>> raise MyException(to_bytes(u'Cannot do this while at a café'))
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "<stdin>", line 1, in <module>
|
||||||
|
__main__.MyException: Cannot do this while at a café
|
||||||
|
>>>
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
If you use :func:`codecs.getwriter` on :data:`sys.stderr`, you'll find
|
||||||
|
that raising an exception with a byte :class:`str` is broken by the
|
||||||
|
default :class:`~codecs.StreamWriter` as well. Don't do that or you'll
|
||||||
|
have no way to output non-:term:`ASCII` characters. If you want to use
|
||||||
|
a :class:`~codecs.StreamWriter` to encode other things on stderr while
|
||||||
|
still having working exceptions, use
|
||||||
|
:func:`kitchen.text.converters.getwriter`.
|
||||||
|
|
||||||
|
-------------------------------------------
|
||||||
|
Frustration #6: Inconsistent APIs Part deux
|
||||||
|
-------------------------------------------
|
||||||
|
Sometimes you do everything right in your code but other people's code fails
|
||||||
|
you. With unicode issues this happens more often than we want. A glaring
|
||||||
|
example of this is when you get values back from a function that aren't
|
||||||
|
consistently :class:`unicode` string or byte :class:`str`.
|
||||||
|
|
||||||
|
An example from the |stdlib|_ is :mod:`gettext`. The :mod:`gettext` functions
|
||||||
|
are used to help translate messages that you display to users in the users'
|
||||||
|
native languages. Since most languages contain letters outside of the
|
||||||
|
:term:`ASCII` range, the values that are returned contain unicode characters.
|
||||||
|
:mod:`gettext` provides you with :meth:`~gettext.GNUTranslations.ugettext` and
|
||||||
|
:meth:`~gettext.GNUTranslations.ungettext` to return these translations as
|
||||||
|
:class:`unicode` strings and :meth:`~gettext.GNUTranslations.gettext`,
|
||||||
|
:meth:`~gettext.GNUTranslations.ngettext`,
|
||||||
|
:meth:`~gettext.GNUTranslations.lgettext`, and
|
||||||
|
:meth:`~gettext.GNUTranslations.lngettext` to return them as encoded byte
|
||||||
|
:class:`str`. Unfortunately, even though they're documented to return only
|
||||||
|
one type of string or the other, the implementation has corner cases where the
|
||||||
|
wrong type can be returned.
|
||||||
|
|
||||||
|
This means that even if you separate your :class:`unicode` string and byte
|
||||||
|
:class:`str` correctly before you pass your strings to a :mod:`gettext`
|
||||||
|
function, afterwards, you might have to check that you have the right sort of
|
||||||
|
string type again.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
:mod:`kitchen.i18n` provides alternate gettext translation objects that
|
||||||
|
return only byte :class:`str` or only :class:`unicode` string.
|
||||||
|
|
||||||
|
---------------
|
||||||
|
A few solutions
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Now that we've identified the issues, can we define a comprehensive strategy
|
||||||
|
for dealing with them?
|
||||||
|
|
||||||
|
Convert text at the border
|
||||||
|
==========================
|
||||||
|
|
||||||
|
If you get some piece of text from a library, read from a file, etc, turn it
|
||||||
|
into a :class:`unicode` string immediately. Since python is moving in the
|
||||||
|
direction of :class:`unicode` strings everywhere it's going to be easier to
|
||||||
|
work with :class:`unicode` strings within your code.
|
||||||
|
|
||||||
|
If your code is heavily involved with using things that are bytes, you can do
|
||||||
|
the opposite and convert all text into byte :class:`str` at the border and
|
||||||
|
only convert to :class:`unicode` when you need it for passing to another
|
||||||
|
library or performing string operations on it.
|
||||||
|
|
||||||
|
In either case, the important thing is to pick a default type for strings and
|
||||||
|
stick with it throughout your code. When you mix the types it becomes much
|
||||||
|
easier to operate on a string with a function that can only use the other type
|
||||||
|
by mistake.
|
||||||
|
|
||||||
|
.. note:: In python3, the abstract unicode type becomes much more prominent.
|
||||||
|
The type named ``str`` is the equivalent of python2's :class:`unicode` and
|
||||||
|
python3's ``bytes`` type replaces python2's :class:`str`. Most APIs deal
|
||||||
|
in the unicode type of string with just some pieces that are low level
|
||||||
|
dealing with bytes. The implicit conversions between bytes and unicode
|
||||||
|
is removed and whenever you want to make the conversion you need to do so
|
||||||
|
explicitly.
|
||||||
|
|
||||||
|
When the data needs to be treated as bytes (or unicode) use a naming convention
|
||||||
|
===============================================================================
|
||||||
|
|
||||||
|
Sometimes you're converting nearly all of your data to :class:`unicode`
|
||||||
|
strings but you have one or two values where you have to keep byte
|
||||||
|
:class:`str` around. This is often the case when you need to use the value
|
||||||
|
verbatim with some external resource. For instance, filenames or key values
|
||||||
|
in a database. When you do this, use a naming convention for the data you're
|
||||||
|
working with so you (and others reading your code later) don't get confused
|
||||||
|
about what's being stored in the value.
|
||||||
|
|
||||||
|
If you need both a textual string to present to the user and a byte value for
|
||||||
|
an exact match, consider keeping both versions around. You can either use two
|
||||||
|
variables for this or a :class:`dict` whose key is the byte value.
|
||||||
|
|
||||||
|
.. note:: You can use the naming convention used in kitchen as a guide for
|
||||||
|
implementing your own naming convention. It prefixes byte :class:`str`
|
||||||
|
variables of unknown encoding with ``b_`` and byte :class:`str` of known
|
||||||
|
encoding with the encoding name like: ``utf8_``. If the default was to
|
||||||
|
handle :class:`str` and only keep a few :class:`unicode` values, those
|
||||||
|
variables would be prefixed with ``u_``.
|
||||||
|
|
||||||
|
When outputting data, convert back into bytes
|
||||||
|
=============================================
|
||||||
|
|
||||||
|
When you go to send your data back outside of your program (to the filesystem,
|
||||||
|
over the network, displaying to the user, etc) turn the data back into a byte
|
||||||
|
:class:`str`. How you do this will depend on the expected output format of
|
||||||
|
the data. For displaying to the user, you can use the user's default encoding
|
||||||
|
using :func:`locale.getpreferredencoding`. For entering into a file, you're best
|
||||||
|
bet is to pick a single encoding and stick with it.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
When using the encoding that the user has set (for instance, using
|
||||||
|
:func:`locale.getpreferredencoding`, remember that they may have their
|
||||||
|
encoding set to something that can't display every single unicode
|
||||||
|
character. That means when you convert from :class:`unicode` to a byte
|
||||||
|
:class:`str` you need to decide what should happen if the byte value is
|
||||||
|
not valid in the user's encoding. For purposes of displaying messages to
|
||||||
|
the user, it's usually okay to use the ``replace`` encoding error handler
|
||||||
|
to replace the invalid characters with a question mark or other symbol
|
||||||
|
meaning the character couldn't be displayed.
|
||||||
|
|
||||||
|
You can use :func:`kitchen.text.converters.getwriter` to do this automatically
|
||||||
|
for :data:`sys.stdout`. When creating exception messages be sure to convert
|
||||||
|
to bytes manually.
|
||||||
|
|
||||||
|
When writing unittests, include non-ASCII values and both unicode and str type
|
||||||
|
==============================================================================
|
||||||
|
|
||||||
|
Unless you know that a specific portion of your code will only deal with
|
||||||
|
:term:`ASCII`, be sure to include non-:term:`ASCII` values in your unittests.
|
||||||
|
Including a few characters from several different scripts is highly advised as
|
||||||
|
well because some code may have special cased accented roman characters but
|
||||||
|
not know how to handle characters used in Asian alphabets.
|
||||||
|
|
||||||
|
Similarly, unless you know that that portion of your code will only be given
|
||||||
|
:class:`unicode` strings or only byte :class:`str` be sure to try variables
|
||||||
|
of both types in your unittests. When doing this, make sure that the
|
||||||
|
variables are also non-:term:`ASCII` as python's implicit conversion will mask
|
||||||
|
problems with pure :term:`ASCII` data. In many cases, it makes sense to check
|
||||||
|
what happens if byte :class:`str` and :class:`unicode` strings that won't
|
||||||
|
decode in the present locale are given.
|
||||||
|
|
||||||
|
Be vigilant about spotting poor APIs
|
||||||
|
====================================
|
||||||
|
|
||||||
|
Make sure that the libraries you use return only :class:`unicode` strings or
|
||||||
|
byte :class:`str`. Unittests can help you spot issues here by running many
|
||||||
|
variations of data through your functions and checking that you're still
|
||||||
|
getting the types of string that you expect.
|
||||||
|
|
||||||
|
Example: Putting this all together with kitchen
|
||||||
|
===============================================
|
||||||
|
|
||||||
|
The kitchen library provides a wide array of functions to help you deal with
|
||||||
|
byte :class:`str` and :class:`unicode` strings in your program. Here's
|
||||||
|
a short example that uses many kitchen functions to do its work::
|
||||||
|
|
||||||
|
#!/usr/bin/python -tt
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import locale
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
|
from kitchen.text.converters import getwriter, to_bytes, to_unicode
|
||||||
|
from kitchen.i18n import get_translation_object
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Setup gettext driven translations but use the kitchen functions so
|
||||||
|
# we don't have the mismatched bytes-unicode issues.
|
||||||
|
translations = get_translation_object('example')
|
||||||
|
# We use _() for marking strings that we operate on as unicode
|
||||||
|
# This is pretty much everything
|
||||||
|
_ = translations.ugettext
|
||||||
|
# And b_() for marking strings that we operate on as bytes.
|
||||||
|
# This is limited to exceptions
|
||||||
|
b_ = translations.lgettext
|
||||||
|
|
||||||
|
# Setup stdout
|
||||||
|
encoding = locale.getpreferredencoding()
|
||||||
|
Writer = getwriter(encoding)
|
||||||
|
sys.stdout = Writer(sys.stdout)
|
||||||
|
|
||||||
|
# Load data. Format is filename\0description
|
||||||
|
# description should be utf-8 but filename can be any legal filename
|
||||||
|
# on the filesystem
|
||||||
|
# Sample datafile.txt:
|
||||||
|
# /etc/shells\x00Shells available on caf\xc3\xa9.lan
|
||||||
|
# /var/tmp/file\xff\x00File with non-utf8 data in the filename
|
||||||
|
#
|
||||||
|
# And to create /var/tmp/file\xff (under bash or zsh) do:
|
||||||
|
# echo 'Some data' > /var/tmp/file$'\377'
|
||||||
|
datafile = open('datafile.txt', 'r')
|
||||||
|
data = {}
|
||||||
|
for line in datafile:
|
||||||
|
# We're going to keep filename as bytes because we will need the
|
||||||
|
# exact bytes to access files on a POSIX operating system.
|
||||||
|
# description, we'll immediately transform into unicode type.
|
||||||
|
b_filename, description = line.split('\0', 1)
|
||||||
|
|
||||||
|
# to_unicode defaults to decoding output from utf-8 and replacing
|
||||||
|
# any problematic bytes with the unicode replacement character
|
||||||
|
# We accept mangling of the description here knowing that our file
|
||||||
|
# format is supposed to use utf-8 in that field and that the
|
||||||
|
# description will only be displayed to the user, not used as
|
||||||
|
# a key value.
|
||||||
|
description = to_unicode(description, 'utf-8').strip()
|
||||||
|
data[b_filename] = description
|
||||||
|
datafile.close()
|
||||||
|
|
||||||
|
# We're going to add a pair of extra fields onto our data to show the
|
||||||
|
# length of the description and the filesize. We put those between
|
||||||
|
# the filename and description because we haven't checked that the
|
||||||
|
# description is free of NULLs.
|
||||||
|
datafile = open('newdatafile.txt', 'w')
|
||||||
|
|
||||||
|
# Name filename with a b_ prefix to denote byte string of unknown encoding
|
||||||
|
for b_filename in data:
|
||||||
|
# Since we have the byte representation of filename, we can read any
|
||||||
|
# filename
|
||||||
|
if os.access(b_filename, os.F_OK):
|
||||||
|
size = os.path.getsize(b_filename)
|
||||||
|
else:
|
||||||
|
size = 0
|
||||||
|
# Because the description is unicode type, we know the number of
|
||||||
|
# characters corresponds to the length of the normalized unicode
|
||||||
|
# string.
|
||||||
|
length = len(unicodedata.normalize('NFC', description))
|
||||||
|
|
||||||
|
# Print a summary to the screen
|
||||||
|
# Note that we do not let implici type conversion from str to
|
||||||
|
# unicode transform b_filename into a unicode string. That might
|
||||||
|
# fail as python would use the ASCII filename. Instead we use
|
||||||
|
# to_unicode() to explictly transform in a way that we know will
|
||||||
|
# not traceback.
|
||||||
|
print _(u'filename: %s') % to_unicode(b_filename)
|
||||||
|
print _(u'file size: %s') % size
|
||||||
|
print _(u'desc length: %s') % length
|
||||||
|
print _(u'description: %s') % data[b_filename]
|
||||||
|
|
||||||
|
# First combine the unicode portion
|
||||||
|
line = u'%s\0%s\0%s' % (size, length, data[b_filename])
|
||||||
|
# Since the filenames are bytes, turn everything else to bytes before combining
|
||||||
|
# Turning into unicode first would be wrong as the bytes in b_filename
|
||||||
|
# might not convert
|
||||||
|
b_line = '%s\0%s\n' % (b_filename, to_bytes(line))
|
||||||
|
|
||||||
|
# Just to demonstrate that getwriter will pass bytes through fine
|
||||||
|
print b_('Wrote: %s') % b_line
|
||||||
|
datafile.write(b_line)
|
||||||
|
datafile.close()
|
||||||
|
|
||||||
|
# And just to show how to properly deal with an exception.
|
||||||
|
# Note two things about this:
|
||||||
|
# 1) We use the b_() function to translate the string. This returns a
|
||||||
|
# byte string instead of a unicode string
|
||||||
|
# 2) We're using the b_() function returned by kitchen. If we had
|
||||||
|
# used the one from gettext we would need to convert the message to
|
||||||
|
# a byte str first
|
||||||
|
message = u'Demonstrate the proper way to raise exceptions. Sincerely, \u3068\u3057\u304a'
|
||||||
|
raise Exception(b_(message))
|
||||||
|
|
||||||
|
.. seealso:: :mod:`kitchen.text.converters`
|
41
kitchen/__init__.py
Normal file
41
kitchen/__init__.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2011 Red Hat, Inc
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
#
|
||||||
|
'''
|
||||||
|
Kitchen
|
||||||
|
|
||||||
|
Aggregate of a bunch of unrelated but helpful python modules.
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Pylint disabled messages:
|
||||||
|
# :C0103: We need gettext aliases for both unicode strings and byte strings.
|
||||||
|
# The byte string one (b_) triggers this warning.
|
||||||
|
from kitchen import i18n
|
||||||
|
from kitchen import versioning
|
||||||
|
|
||||||
|
(_, N_) = i18n.easy_gettext_setup('kitchen.core')
|
||||||
|
#pylint: disable-msg=C0103
|
||||||
|
(b_, bN_) = i18n.easy_gettext_setup('kitchen.core', use_unicode=False)
|
||||||
|
#pylint: enable-msg=C0103
|
||||||
|
|
||||||
|
__version_info__ = ((1, 1, 1),)
|
||||||
|
__version__ = versioning.version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
__all__ = ('exceptions', 'release',)
|
9
kitchen/collections/__init__.py
Normal file
9
kitchen/collections/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
from kitchen.versioning import version_tuple_to_string
|
||||||
|
|
||||||
|
__version_info__ = ((1, 1, 0),)
|
||||||
|
__version__ = version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
from kitchen.collections import strictdict
|
||||||
|
from kitchen.collections.strictdict import StrictDict
|
||||||
|
|
||||||
|
__all__ = ('strictdict', 'StrictDict',)
|
87
kitchen/collections/strictdict.py
Normal file
87
kitchen/collections/strictdict.py
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2010 Red Hat, Inc
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
'''
|
||||||
|
----------
|
||||||
|
StrictDict
|
||||||
|
----------
|
||||||
|
|
||||||
|
:class:`kitchen.collections.StrictDict` provides a dictionary that treats
|
||||||
|
:class:`str` and :class:`unicode` as distinct key values.
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Pylint disabled messages:
|
||||||
|
# :C0111: We're implementing the dict interface so just reference the dict
|
||||||
|
# documentation rather than having our own docstrings
|
||||||
|
|
||||||
|
try:
|
||||||
|
# :E0611: Pylint false positive. We try to import from the stdlib but we
|
||||||
|
# have a fallback so this is okay.
|
||||||
|
#pylint:disable-msg=E0611
|
||||||
|
from collections import defaultdict
|
||||||
|
except ImportError:
|
||||||
|
from kitchen.pycompat25.collections import defaultdict
|
||||||
|
|
||||||
|
class StrictDict(defaultdict):
|
||||||
|
'''
|
||||||
|
Map class that considers :class:`unicode` and :class:`str` different keys
|
||||||
|
|
||||||
|
Ordinarily when you are dealing with a :class:`dict` keyed on strings you
|
||||||
|
want to have keys that have the same characters end up in the same bucket
|
||||||
|
even if one key is :class:`unicode` and the other is a byte :class:`str`.
|
||||||
|
The normal :class:`dict` type does this for :term:`ASCII` characters (but
|
||||||
|
not for anything outside of the :term:`ASCII` range.)
|
||||||
|
|
||||||
|
Sometimes, however, you want to keep the two string classes strictly
|
||||||
|
separate, for instance, if you're creating a single table that can map
|
||||||
|
from :class:`unicode` characters to :class:`str` characters and vice
|
||||||
|
versa. This class will help you do that by making all :class:`unicode`
|
||||||
|
keys evaluate to a different key than all :class:`str` keys.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
:class:`dict`
|
||||||
|
for documentation on this class's methods. This class implements
|
||||||
|
all the standard :class:`dict` methods. Its treatment of
|
||||||
|
:class:`unicode` and :class:`str` keys as separate is the only
|
||||||
|
difference.
|
||||||
|
|
||||||
|
'''
|
||||||
|
#pylint:disable-msg=C0111
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return defaultdict.__getitem__(self, (repr(key), key))
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
defaultdict.__setitem__(self, (repr(key), key), value)
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
defaultdict.__delitem__(self, (repr(key), key))
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for i in defaultdict.__iter__(self):
|
||||||
|
yield i[1]
|
||||||
|
|
||||||
|
iterkeys = __iter__
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
return list(self.__iter__())
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
return defaultdict.__contains__(self, (repr(key), key))
|
||||||
|
|
||||||
|
__all__ = ('StrictDict',)
|
35
kitchen/exceptions.py
Normal file
35
kitchen/exceptions.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2010 Red Hat, Inc
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||||
|
# terms of the GNU Lesser General Public License as published by the Free
|
||||||
|
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
# more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
#
|
||||||
|
'''
|
||||||
|
-----------------------
|
||||||
|
Base kitchen exceptions
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
Exception classes for kitchen and the root of the exception hierarchy for
|
||||||
|
all kitchen modules.
|
||||||
|
'''
|
||||||
|
|
||||||
|
class KitchenError(Exception):
|
||||||
|
'''Base exception class for any error thrown directly by kitchen.
|
||||||
|
'''
|
||||||
|
pass
|
||||||
|
|
||||||
|
__all__ = ('KitchenError',)
|
827
kitchen/i18n/__init__.py
Normal file
827
kitchen/i18n/__init__.py
Normal file
|
@ -0,0 +1,827 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2010-2011 Red Hat, Inc
|
||||||
|
# Copyright (c) 2009 Milos Komarcevic
|
||||||
|
# Copyright (c) 2008 Tim Lauridsen
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||||
|
# terms of the GNU Lesser General Public License as published by the Free
|
||||||
|
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
# more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors: James Antill
|
||||||
|
# Milos Komarcevic
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
# Tim Lauridsen
|
||||||
|
# Luke Macken <lmacken@redhat.com>
|
||||||
|
# Seth Vidal <skvidal@fedoraproject.org>
|
||||||
|
#
|
||||||
|
# Portions of code taken from yum/i18n.py
|
||||||
|
# Portions of code adapted from |stdlib|_ gettext.py
|
||||||
|
'''
|
||||||
|
:term:`I18N` is an important piece of any modern program. Unfortunately,
|
||||||
|
setting up :term:`i18n` in your program is often a confusing process. The
|
||||||
|
functions provided here aim to make the programming side of that a little
|
||||||
|
easier.
|
||||||
|
|
||||||
|
Most projects will be able to do something like this when they startup::
|
||||||
|
|
||||||
|
# myprogram/__init__.py:
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from kitchen.i18n import easy_gettext_setup
|
||||||
|
|
||||||
|
_, N_ = easy_gettext_setup('myprogram', localedirs=(
|
||||||
|
os.path.join(os.path.realpath(os.path.dirname(__file__)), 'locale'),
|
||||||
|
os.path.join(sys.prefix, 'lib', 'locale')
|
||||||
|
))
|
||||||
|
|
||||||
|
Then, in other files that have strings that need translating::
|
||||||
|
|
||||||
|
# myprogram/commands.py:
|
||||||
|
|
||||||
|
from myprogram import _, N_
|
||||||
|
|
||||||
|
def print_usage():
|
||||||
|
print _(u"""available commands are:
|
||||||
|
--help Display help
|
||||||
|
--version Display version of this program
|
||||||
|
--bake-me-a-cake as fast as you can
|
||||||
|
""")
|
||||||
|
|
||||||
|
def print_invitations(age):
|
||||||
|
print _('Please come to my party.')
|
||||||
|
print N_('I will be turning %(age)s year old',
|
||||||
|
'I will be turning %(age)s years old', age) % {'age': age}
|
||||||
|
|
||||||
|
See the documentation of :func:`easy_gettext_setup` and
|
||||||
|
:func:`get_translation_object` for more details.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:mod:`gettext`
|
||||||
|
for details of how the python gettext facilities work
|
||||||
|
`babel <http://babel.edgewall.org>`_
|
||||||
|
The babel module for in depth information on gettext, :term:`message
|
||||||
|
catalogs`, and translating your app. babel provides some nice
|
||||||
|
features for :term:`i18n` on top of :mod:`gettext`
|
||||||
|
'''
|
||||||
|
# Pylint disabled messages:
|
||||||
|
# :E1101: NewGNUTranslations is modeled as a replacement for GNUTranslations.
|
||||||
|
# That module invokes the _parse message to create some of its attributes.
|
||||||
|
# Pylint doesn't see those attributes being defined since it doesn't know
|
||||||
|
# when _parse() is called. We disable E1101 when accessing self._catalog
|
||||||
|
# and self.plural for this reason.
|
||||||
|
# :C0103: We're replicating the gettext API here so we need to use method and
|
||||||
|
# parameter names that mirror gettext.
|
||||||
|
# :C0111: We're replicating the gettext API here so for the gettext
|
||||||
|
# translation object methods we point people at the stdlib docs
|
||||||
|
|
||||||
|
from kitchen.versioning import version_tuple_to_string
|
||||||
|
|
||||||
|
__version_info__ = ((2, 1, 1),)
|
||||||
|
__version__ = version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
import copy
|
||||||
|
from errno import ENOENT
|
||||||
|
import gettext
|
||||||
|
import itertools
|
||||||
|
import locale
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# We use the _default_localedir definition in get_translation_object
|
||||||
|
try:
|
||||||
|
from gettext import _default_localedir as _DEFAULT_LOCALEDIR
|
||||||
|
except ImportError:
|
||||||
|
_DEFAULT_LOCALEDIR = os.path.join(sys.prefix, 'share', 'locale')
|
||||||
|
|
||||||
|
from kitchen.text.converters import to_bytes, to_unicode
|
||||||
|
from kitchen.text.misc import byte_string_valid_encoding
|
||||||
|
|
||||||
|
# We cache parts of the translation objects just like stdlib's gettext so that
|
||||||
|
# we don't reparse the message files and keep them in memory separately if the
|
||||||
|
# same catalog is opened twice.
|
||||||
|
_translations = {}
|
||||||
|
|
||||||
|
class DummyTranslations(object, gettext.NullTranslations):
|
||||||
|
'''Safer version of :class:`gettext.NullTranslations`
|
||||||
|
|
||||||
|
This Translations class doesn't translate the strings and is intended to
|
||||||
|
be used as a fallback when there were errors setting up a real
|
||||||
|
Translations object. It's safer than :class:`gettext.NullTranslations` in
|
||||||
|
its handling of byte :class:`str` vs :class:`unicode` strings.
|
||||||
|
|
||||||
|
Unlike :class:`~gettext.NullTranslations`, this Translation class will
|
||||||
|
never throw a :exc:`~exceptions.UnicodeError`. The code that you have
|
||||||
|
around a call to :class:`DummyTranslations` might throw
|
||||||
|
a :exc:`~exceptions.UnicodeError` but at least that will be in code you
|
||||||
|
control and can fix. Also, unlike :class:`~gettext.NullTranslations` all
|
||||||
|
of this Translation object's methods guarantee to return byte :class:`str`
|
||||||
|
except for :meth:`ugettext` and :meth:`ungettext` which guarantee to
|
||||||
|
return :class:`unicode` strings.
|
||||||
|
|
||||||
|
When byte :class:`str` are returned, the strings will be encoded according
|
||||||
|
to this algorithm:
|
||||||
|
|
||||||
|
1) If a fallback has been added, the fallback will be called first.
|
||||||
|
You'll need to consult the fallback to see whether it performs any
|
||||||
|
encoding changes.
|
||||||
|
2) If a byte :class:`str` was given, the same byte :class:`str` will
|
||||||
|
be returned.
|
||||||
|
3) If a :class:`unicode` string was given and :meth:`set_output_charset`
|
||||||
|
has been called then we encode the string using the
|
||||||
|
:attr:`output_charset`
|
||||||
|
4) If a :class:`unicode` string was given and this is :meth:`gettext` or
|
||||||
|
:meth:`ngettext` and :attr:`_charset` was set output in that charset.
|
||||||
|
5) If a :class:`unicode` string was given and this is :meth:`gettext`
|
||||||
|
or :meth:`ngettext` we encode it using 'utf-8'.
|
||||||
|
6) If a :class:`unicode` string was given and this is :meth:`lgettext`
|
||||||
|
or :meth:`lngettext` we encode using the value of
|
||||||
|
:func:`locale.getpreferredencoding`
|
||||||
|
|
||||||
|
For :meth:`ugettext` and :meth:`ungettext`, we go through the same set of
|
||||||
|
steps with the following differences:
|
||||||
|
|
||||||
|
* We transform byte :class:`str` into :class:`unicode` strings for
|
||||||
|
these methods.
|
||||||
|
* The encoding used to decode the byte :class:`str` is taken from
|
||||||
|
:attr:`input_charset` if it's set, otherwise we decode using
|
||||||
|
:term:`UTF-8`.
|
||||||
|
|
||||||
|
.. attribute:: input_charset
|
||||||
|
|
||||||
|
is an extension to the |stdlib|_ :mod:`gettext` that specifies what
|
||||||
|
charset a message is encoded in when decoding a message to
|
||||||
|
:class:`unicode`. This is used for two purposes:
|
||||||
|
|
||||||
|
1) If the message string is a byte :class:`str`, this is used to decode
|
||||||
|
the string to a :class:`unicode` string before looking it up in the
|
||||||
|
:term:`message catalog`.
|
||||||
|
2) In :meth:`~kitchen.i18n.DummyTranslations.ugettext` and
|
||||||
|
:meth:`~kitchen.i18n.DummyTranslations.ungettext` methods, if a byte
|
||||||
|
:class:`str` is given as the message and is untranslated this is used
|
||||||
|
as the encoding when decoding to :class:`unicode`. This is different
|
||||||
|
from :attr:`_charset` which may be set when a :term:`message catalog`
|
||||||
|
is loaded because :attr:`input_charset` is used to describe an encoding
|
||||||
|
used in a python source file while :attr:`_charset` describes the
|
||||||
|
encoding used in the :term:`message catalog` file.
|
||||||
|
|
||||||
|
Any characters that aren't able to be transformed from a byte :class:`str`
|
||||||
|
to :class:`unicode` string or vice versa will be replaced with
|
||||||
|
a replacement character (ie: ``u'<EFBFBD>'`` in unicode based encodings, ``'?'`` in other
|
||||||
|
:term:`ASCII` compatible encodings).
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:class:`gettext.NullTranslations`
|
||||||
|
For information about what methods are available and what they do.
|
||||||
|
|
||||||
|
.. versionchanged:: kitchen-1.1.0 ; API kitchen.i18n 2.1.0
|
||||||
|
* Although we had adapted :meth:`gettext`, :meth:`ngettext`,
|
||||||
|
:meth:`lgettext`, and :meth:`lngettext` to always return byte
|
||||||
|
:class:`str`, we hadn't forced those byte :class:`str` to always be
|
||||||
|
in a specified charset. We now make sure that :meth:`gettext` and
|
||||||
|
:meth:`ngettext` return byte :class:`str` encoded using
|
||||||
|
:attr:`output_charset` if set, otherwise :attr:`charset` and if
|
||||||
|
neither of those, :term:`UTF-8`. With :meth:`lgettext` and
|
||||||
|
:meth:`lngettext` :attr:`output_charset` if set, otherwise
|
||||||
|
:func:`locale.getpreferredencoding`.
|
||||||
|
* Make setting :attr:`input_charset` and :attr:`output_charset` also
|
||||||
|
set those attributes on any fallback translation objects.
|
||||||
|
'''
|
||||||
|
#pylint: disable-msg=C0103,C0111
|
||||||
|
def __init__(self, fp=None):
|
||||||
|
gettext.NullTranslations.__init__(self, fp)
|
||||||
|
|
||||||
|
# Python 2.3 compat
|
||||||
|
if not hasattr(self, '_output_charset'):
|
||||||
|
self._output_charset = None
|
||||||
|
|
||||||
|
# Extension for making ugettext and ungettext more sane
|
||||||
|
# 'utf-8' is only a default here. Users can override.
|
||||||
|
self._input_charset = 'utf-8'
|
||||||
|
|
||||||
|
def _set_input_charset(self, charset):
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
self._fallback.input_charset = charset
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
self._input_charset = charset
|
||||||
|
|
||||||
|
def _get_input_charset(self):
|
||||||
|
return self._input_charset
|
||||||
|
|
||||||
|
input_charset = property(_get_input_charset, _set_input_charset)
|
||||||
|
|
||||||
|
def set_output_charset(self, charset):
|
||||||
|
'''Set the output charset
|
||||||
|
|
||||||
|
This serves two purposes. The normal
|
||||||
|
:meth:`gettext.NullTranslations.set_output_charset` does not set the
|
||||||
|
output on fallback objects. On python-2.3,
|
||||||
|
:class:`gettext.NullTranslations` objects don't contain this method.
|
||||||
|
'''
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
self._fallback.set_output_charset(charset)
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
gettext.NullTranslations.set_output_charset(self, charset)
|
||||||
|
except AttributeError:
|
||||||
|
self._output_charset = charset
|
||||||
|
|
||||||
|
if not hasattr(gettext.NullTranslations, 'output_charset'):
|
||||||
|
def output_charset(self):
|
||||||
|
'''Compatibility for python2.3 which doesn't have output_charset'''
|
||||||
|
return self._output_charset
|
||||||
|
|
||||||
|
def _reencode_if_necessary(self, message, output_encoding):
|
||||||
|
'''Return a byte string that's valid in a specific charset.
|
||||||
|
|
||||||
|
.. warning:: This method may mangle the message if the inpput encoding
|
||||||
|
is not known or the message isn't represntable in the chosen
|
||||||
|
output encoding.
|
||||||
|
'''
|
||||||
|
valid = False
|
||||||
|
msg = None
|
||||||
|
try:
|
||||||
|
valid = byte_string_valid_encoding(message, output_encoding)
|
||||||
|
except TypeError:
|
||||||
|
# input was unicode, so it needs to be encoded
|
||||||
|
pass
|
||||||
|
|
||||||
|
if valid:
|
||||||
|
return message
|
||||||
|
try:
|
||||||
|
# Decode to unicode so we can re-encode to desired encoding
|
||||||
|
msg = to_unicode(message, encoding=self.input_charset,
|
||||||
|
nonstring='strict')
|
||||||
|
except TypeError:
|
||||||
|
# Not a string; return an empty byte string
|
||||||
|
return ''
|
||||||
|
|
||||||
|
# Make sure that we're returning a str of the desired encoding
|
||||||
|
return to_bytes(msg, encoding=output_encoding)
|
||||||
|
|
||||||
|
def gettext(self, message):
|
||||||
|
# First use any fallback gettext objects. Since DummyTranslations
|
||||||
|
# doesn't do any translation on its own, this is a good first step.
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
message = self._fallback.gettext(message)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Next decide what encoding to use for the strings we return
|
||||||
|
output_encoding = (self._output_charset or self._charset or
|
||||||
|
self.input_charset)
|
||||||
|
|
||||||
|
return self._reencode_if_necessary(message, output_encoding)
|
||||||
|
|
||||||
|
def ngettext(self, msgid1, msgid2, n):
|
||||||
|
# Default
|
||||||
|
if n == 1:
|
||||||
|
message = msgid1
|
||||||
|
else:
|
||||||
|
message = msgid2
|
||||||
|
|
||||||
|
# The fallback method might return something different
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
message = self._fallback.ngettext(msgid1, msgid2, n)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Next decide what encoding to use for the strings we return
|
||||||
|
output_encoding = (self._output_charset or self._charset or
|
||||||
|
self.input_charset)
|
||||||
|
|
||||||
|
return self._reencode_if_necessary(message, output_encoding)
|
||||||
|
|
||||||
|
def lgettext(self, message):
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
message = self._fallback.lgettext(message)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: we'll do our own encoding next
|
||||||
|
# AttributeErrors happen on py2.3 where lgettext is not
|
||||||
|
# implemented
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Next decide what encoding to use for the strings we return
|
||||||
|
output_encoding = (self._output_charset or
|
||||||
|
locale.getpreferredencoding())
|
||||||
|
|
||||||
|
return self._reencode_if_necessary(message, output_encoding)
|
||||||
|
|
||||||
|
def lngettext(self, msgid1, msgid2, n):
|
||||||
|
# Default
|
||||||
|
if n == 1:
|
||||||
|
message = msgid1
|
||||||
|
else:
|
||||||
|
message = msgid2
|
||||||
|
# Fallback method might have something different
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
message = self._fallback.lngettext(msgid1, msgid2, n)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: we'll do our own encoding next
|
||||||
|
# AttributeError happens on py2.3 where lngettext is not
|
||||||
|
# implemented
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Next decide what encoding to use for the strings we return
|
||||||
|
output_encoding = (self._output_charset or
|
||||||
|
locale.getpreferredencoding())
|
||||||
|
|
||||||
|
return self._reencode_if_necessary(message, output_encoding)
|
||||||
|
|
||||||
|
def ugettext(self, message):
|
||||||
|
if not isinstance(message, basestring):
|
||||||
|
return u''
|
||||||
|
if self._fallback:
|
||||||
|
msg = to_unicode(message, encoding=self.input_charset)
|
||||||
|
try:
|
||||||
|
message = self._fallback.ugettext(msg)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: We'll do our own decoding later
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Make sure we're returning unicode
|
||||||
|
return to_unicode(message, encoding=self.input_charset)
|
||||||
|
|
||||||
|
def ungettext(self, msgid1, msgid2, n):
|
||||||
|
# Default
|
||||||
|
if n == 1:
|
||||||
|
message = msgid1
|
||||||
|
else:
|
||||||
|
message = msgid2
|
||||||
|
# Fallback might override this
|
||||||
|
if self._fallback:
|
||||||
|
msgid1 = to_unicode(msgid1, encoding=self.input_charset)
|
||||||
|
msgid2 = to_unicode(msgid2, encoding=self.input_charset)
|
||||||
|
try:
|
||||||
|
message = self._fallback.ungettext(msgid1, msgid2, n)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: We'll do our own decoding later
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Make sure we're returning unicode
|
||||||
|
return to_unicode(message, encoding=self.input_charset,
|
||||||
|
nonstring='empty')
|
||||||
|
|
||||||
|
|
||||||
|
class NewGNUTranslations(DummyTranslations, gettext.GNUTranslations):
|
||||||
|
'''Safer version of :class:`gettext.GNUTranslations`
|
||||||
|
|
||||||
|
:class:`gettext.GNUTranslations` suffers from two problems that this
|
||||||
|
class fixes.
|
||||||
|
|
||||||
|
1) :class:`gettext.GNUTranslations` can throw a
|
||||||
|
:exc:`~exceptions.UnicodeError` in
|
||||||
|
:meth:`gettext.GNUTranslations.ugettext` if the message being
|
||||||
|
translated has non-:term:`ASCII` characters and there is no translation
|
||||||
|
for it.
|
||||||
|
2) :class:`gettext.GNUTranslations` can return byte :class:`str` from
|
||||||
|
:meth:`gettext.GNUTranslations.ugettext` and :class:`unicode`
|
||||||
|
strings from the other :meth:`~gettext.GNUTranslations.gettext`
|
||||||
|
methods if the message being translated is the wrong type
|
||||||
|
|
||||||
|
When byte :class:`str` are returned, the strings will be encoded
|
||||||
|
according to this algorithm:
|
||||||
|
|
||||||
|
1) If a fallback has been added, the fallback will be called first.
|
||||||
|
You'll need to consult the fallback to see whether it performs any
|
||||||
|
encoding changes.
|
||||||
|
2) If a byte :class:`str` was given, the same byte :class:`str` will
|
||||||
|
be returned.
|
||||||
|
3) If a :class:`unicode` string was given and
|
||||||
|
:meth:`set_output_charset` has been called then we encode the
|
||||||
|
string using the :attr:`output_charset`
|
||||||
|
4) If a :class:`unicode` string was given and this is :meth:`gettext`
|
||||||
|
or :meth:`ngettext` and a charset was detected when parsing the
|
||||||
|
:term:`message catalog`, output in that charset.
|
||||||
|
5) If a :class:`unicode` string was given and this is :meth:`gettext`
|
||||||
|
or :meth:`ngettext` we encode it using :term:`UTF-8`.
|
||||||
|
6) If a :class:`unicode` string was given and this is :meth:`lgettext`
|
||||||
|
or :meth:`lngettext` we encode using the value of
|
||||||
|
:func:`locale.getpreferredencoding`
|
||||||
|
|
||||||
|
For :meth:`ugettext` and :meth:`ungettext`, we go through the same set of
|
||||||
|
steps with the following differences:
|
||||||
|
|
||||||
|
* We transform byte :class:`str` into :class:`unicode` strings for these
|
||||||
|
methods.
|
||||||
|
* The encoding used to decode the byte :class:`str` is taken from
|
||||||
|
:attr:`input_charset` if it's set, otherwise we decode using
|
||||||
|
:term:`UTF-8`
|
||||||
|
|
||||||
|
.. attribute:: input_charset
|
||||||
|
|
||||||
|
an extension to the |stdlib|_ :mod:`gettext` that specifies what
|
||||||
|
charset a message is encoded in when decoding a message to
|
||||||
|
:class:`unicode`. This is used for two purposes:
|
||||||
|
|
||||||
|
1) If the message string is a byte :class:`str`, this is used to decode
|
||||||
|
the string to a :class:`unicode` string before looking it up in the
|
||||||
|
:term:`message catalog`.
|
||||||
|
2) In :meth:`~kitchen.i18n.DummyTranslations.ugettext` and
|
||||||
|
:meth:`~kitchen.i18n.DummyTranslations.ungettext` methods, if a byte
|
||||||
|
:class:`str` is given as the message and is untranslated his is used as
|
||||||
|
the encoding when decoding to :class:`unicode`. This is different from
|
||||||
|
the :attr:`_charset` parameter that may be set when a :term:`message
|
||||||
|
catalog` is loaded because :attr:`input_charset` is used to describe an
|
||||||
|
encoding used in a python source file while :attr:`_charset` describes
|
||||||
|
the encoding used in the :term:`message catalog` file.
|
||||||
|
|
||||||
|
Any characters that aren't able to be transformed from a byte
|
||||||
|
:class:`str` to :class:`unicode` string or vice versa will be replaced
|
||||||
|
with a replacement character (ie: ``u'<EFBFBD>'`` in unicode based encodings,
|
||||||
|
``'?'`` in other :term:`ASCII` compatible encodings).
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:class:`gettext.GNUTranslations.gettext`
|
||||||
|
For information about what methods this class has and what they do
|
||||||
|
|
||||||
|
.. versionchanged:: kitchen-1.1.0 ; API kitchen.i18n 2.1.0
|
||||||
|
Although we had adapted :meth:`gettext`, :meth:`ngettext`,
|
||||||
|
:meth:`lgettext`, and :meth:`lngettext` to always return
|
||||||
|
byte :class:`str`, we hadn't forced those byte :class:`str` to always
|
||||||
|
be in a specified charset. We now make sure that :meth:`gettext` and
|
||||||
|
:meth:`ngettext` return byte :class:`str` encoded using
|
||||||
|
:attr:`output_charset` if set, otherwise :attr:`charset` and if
|
||||||
|
neither of those, :term:`UTF-8`. With :meth:`lgettext` and
|
||||||
|
:meth:`lngettext` :attr:`output_charset` if set, otherwise
|
||||||
|
:func:`locale.getpreferredencoding`.
|
||||||
|
'''
|
||||||
|
#pylint: disable-msg=C0103,C0111
|
||||||
|
def _parse(self, fp):
|
||||||
|
gettext.GNUTranslations._parse(self, fp)
|
||||||
|
|
||||||
|
def gettext(self, message):
|
||||||
|
if not isinstance(message, basestring):
|
||||||
|
return ''
|
||||||
|
tmsg = message
|
||||||
|
u_message = to_unicode(message, encoding=self.input_charset)
|
||||||
|
try:
|
||||||
|
tmsg = self._catalog[u_message] #pylint:disable-msg=E1101
|
||||||
|
except KeyError:
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
tmsg = self._fallback.gettext(message)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Next decide what encoding to use for the strings we return
|
||||||
|
output_encoding = (self._output_charset or self._charset or
|
||||||
|
self.input_charset)
|
||||||
|
|
||||||
|
return self._reencode_if_necessary(tmsg, output_encoding)
|
||||||
|
|
||||||
|
def ngettext(self, msgid1, msgid2, n):
|
||||||
|
if n == 1:
|
||||||
|
tmsg = msgid1
|
||||||
|
else:
|
||||||
|
tmsg = msgid2
|
||||||
|
|
||||||
|
if not isinstance(msgid1, basestring):
|
||||||
|
return ''
|
||||||
|
u_msgid1 = to_unicode(msgid1, encoding=self.input_charset)
|
||||||
|
try:
|
||||||
|
#pylint:disable-msg=E1101
|
||||||
|
tmsg = self._catalog[(u_msgid1, self.plural(n))]
|
||||||
|
except KeyError:
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
tmsg = self._fallback.ngettext(msgid1, msgid2, n)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Next decide what encoding to use for the strings we return
|
||||||
|
output_encoding = (self._output_charset or self._charset or
|
||||||
|
self.input_charset)
|
||||||
|
|
||||||
|
return self._reencode_if_necessary(tmsg, output_encoding)
|
||||||
|
|
||||||
|
def lgettext(self, message):
|
||||||
|
if not isinstance(message, basestring):
|
||||||
|
return ''
|
||||||
|
tmsg = message
|
||||||
|
u_message = to_unicode(message, encoding=self.input_charset)
|
||||||
|
try:
|
||||||
|
tmsg = self._catalog[u_message] #pylint:disable-msg=E1101
|
||||||
|
except KeyError:
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
tmsg = self._fallback.lgettext(message)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Next decide what encoding to use for the strings we return
|
||||||
|
output_encoding = (self._output_charset or
|
||||||
|
locale.getpreferredencoding())
|
||||||
|
|
||||||
|
return self._reencode_if_necessary(tmsg, output_encoding)
|
||||||
|
|
||||||
|
def lngettext(self, msgid1, msgid2, n):
|
||||||
|
if n == 1:
|
||||||
|
tmsg = msgid1
|
||||||
|
else:
|
||||||
|
tmsg = msgid2
|
||||||
|
|
||||||
|
if not isinstance(msgid1, basestring):
|
||||||
|
return ''
|
||||||
|
u_msgid1 = to_unicode(msgid1, encoding=self.input_charset)
|
||||||
|
try:
|
||||||
|
#pylint:disable-msg=E1101
|
||||||
|
tmsg = self._catalog[(u_msgid1, self.plural(n))]
|
||||||
|
except KeyError:
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
tmsg = self._fallback.ngettext(msgid1, msgid2, n)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Next decide what encoding to use for the strings we return
|
||||||
|
output_encoding = (self._output_charset or
|
||||||
|
locale.getpreferredencoding())
|
||||||
|
|
||||||
|
return self._reencode_if_necessary(tmsg, output_encoding)
|
||||||
|
|
||||||
|
|
||||||
|
def ugettext(self, message):
|
||||||
|
if not isinstance(message, basestring):
|
||||||
|
return u''
|
||||||
|
message = to_unicode(message, encoding=self.input_charset)
|
||||||
|
try:
|
||||||
|
message = self._catalog[message] #pylint:disable-msg=E1101
|
||||||
|
except KeyError:
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
message = self._fallback.ugettext(message)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Make sure that we're returning unicode
|
||||||
|
return to_unicode(message, encoding=self.input_charset)
|
||||||
|
|
||||||
|
def ungettext(self, msgid1, msgid2, n):
|
||||||
|
if n == 1:
|
||||||
|
tmsg = msgid1
|
||||||
|
else:
|
||||||
|
tmsg = msgid2
|
||||||
|
|
||||||
|
if not isinstance(msgid1, basestring):
|
||||||
|
return u''
|
||||||
|
u_msgid1 = to_unicode(msgid1, encoding=self.input_charset)
|
||||||
|
try:
|
||||||
|
#pylint:disable-msg=E1101
|
||||||
|
tmsg = self._catalog[(u_msgid1, self.plural(n))]
|
||||||
|
except KeyError:
|
||||||
|
if self._fallback:
|
||||||
|
try:
|
||||||
|
tmsg = self._fallback.ungettext(msgid1, msgid2, n)
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Make sure that we're returning unicode
|
||||||
|
return to_unicode(tmsg, encoding=self.input_charset,
|
||||||
|
nonstring='empty')
|
||||||
|
|
||||||
|
|
||||||
|
def get_translation_object(domain, localedirs=tuple(), languages=None,
|
||||||
|
class_=None, fallback=True, codeset=None):
|
||||||
|
'''Get a translation object bound to the :term:`message catalogs`
|
||||||
|
|
||||||
|
:arg domain: Name of the message domain. This should be a unique name
|
||||||
|
that can be used to lookup the :term:`message catalog` for this app or
|
||||||
|
library.
|
||||||
|
:kwarg localedirs: Iterator of directories to look for
|
||||||
|
:term:`message catalogs` under. The directories are searched in order
|
||||||
|
for :term:`message catalogs`. For each of the directories searched,
|
||||||
|
we check for message catalogs in any language specified
|
||||||
|
in:attr:`languages`. The :term:`message catalogs` are used to create
|
||||||
|
the Translation object that we return. The Translation object will
|
||||||
|
attempt to lookup the msgid in the first catalog that we found. If
|
||||||
|
it's not in there, it will go through each subsequent catalog looking
|
||||||
|
for a match. For this reason, the order in which you specify the
|
||||||
|
:attr:`localedirs` may be important. If no :term:`message catalogs`
|
||||||
|
are found, either return a :class:`DummyTranslations` object or raise
|
||||||
|
an :exc:`IOError` depending on the value of :attr:`fallback`.
|
||||||
|
Rhe default localedir from :mod:`gettext` which is
|
||||||
|
:file:`os.path.join(sys.prefix, 'share', 'locale')` on Unix is
|
||||||
|
implicitly appended to the :attr:`localedirs`, making it the last
|
||||||
|
directory searched.
|
||||||
|
:kwarg languages: Iterator of language codes to check for
|
||||||
|
:term:`message catalogs`. If unspecified, the user's locale settings
|
||||||
|
will be used.
|
||||||
|
|
||||||
|
.. seealso:: :func:`gettext.find` for information on what environment
|
||||||
|
variables are used.
|
||||||
|
|
||||||
|
:kwarg class_: The class to use to extract translations from the
|
||||||
|
:term:`message catalogs`. Defaults to :class:`NewGNUTranslations`.
|
||||||
|
:kwarg fallback: If set to data:`False`, raise an :exc:`IOError` if no
|
||||||
|
:term:`message catalogs` are found. If :data:`True`, the default,
|
||||||
|
return a :class:`DummyTranslations` object.
|
||||||
|
:kwarg codeset: Set the character encoding to use when returning byte
|
||||||
|
:class:`str` objects. This is equivalent to calling
|
||||||
|
:meth:`~gettext.GNUTranslations.output_charset` on the Translations
|
||||||
|
object that is returned from this function.
|
||||||
|
:return: Translation object to get :mod:`gettext` methods from
|
||||||
|
|
||||||
|
If you need more flexibility than :func:`easy_gettext_setup`, use this
|
||||||
|
function. It sets up a :mod:`gettext` Translation object and returns it
|
||||||
|
to you. Then you can access any of the methods of the object that you
|
||||||
|
need directly. For instance, if you specifically need to access
|
||||||
|
:func:`~gettext.GNUTranslations.lgettext`::
|
||||||
|
|
||||||
|
translations = get_translation_object('foo')
|
||||||
|
translations.lgettext('My Message')
|
||||||
|
|
||||||
|
This function is similar to the |stdlib|_ :func:`gettext.translation` but
|
||||||
|
makes it better in two ways
|
||||||
|
|
||||||
|
1. It returns :class:`NewGNUTranslations` or :class:`DummyTranslations`
|
||||||
|
objects by default. These are superior to the
|
||||||
|
:class:`gettext.GNUTranslations` and :class:`gettext.NullTranslations`
|
||||||
|
objects because they are consistent in the string type they return and
|
||||||
|
they fix several issues that can causethe |stdlib|_ objects to throw
|
||||||
|
:exc:`UnicodeError`.
|
||||||
|
2. This function takes multiple directories to search for
|
||||||
|
:term:`message catalogs`.
|
||||||
|
|
||||||
|
The latter is important when setting up :mod:`gettext` in a portable
|
||||||
|
manner. There is not a common directory for translations across operating
|
||||||
|
systems so one needs to look in multiple directories for the translations.
|
||||||
|
:func:`get_translation_object` is able to handle that if you give it
|
||||||
|
a list of directories to search for catalogs::
|
||||||
|
|
||||||
|
translations = get_translation_object('foo', localedirs=(
|
||||||
|
os.path.join(os.path.realpath(os.path.dirname(__file__)), 'locale'),
|
||||||
|
os.path.join(sys.prefix, 'lib', 'locale')))
|
||||||
|
|
||||||
|
This will search for several different directories:
|
||||||
|
|
||||||
|
1. A directory named :file:`locale` in the same directory as the module
|
||||||
|
that called :func:`get_translation_object`,
|
||||||
|
2. In :file:`/usr/lib/locale`
|
||||||
|
3. In :file:`/usr/share/locale` (the fallback directory)
|
||||||
|
|
||||||
|
This allows :mod:`gettext` to work on Windows and in development (where the
|
||||||
|
:term:`message catalogs` are typically in the toplevel module directory)
|
||||||
|
and also when installed under Linux (where the :term:`message catalogs`
|
||||||
|
are installed in :file:`/usr/share/locale`). You (or the system packager)
|
||||||
|
just need to install the :term:`message catalogs` in
|
||||||
|
:file:`/usr/share/locale` and remove the :file:`locale` directory from the
|
||||||
|
module to make this work. ie::
|
||||||
|
|
||||||
|
In development:
|
||||||
|
~/foo # Toplevel module directory
|
||||||
|
~/foo/__init__.py
|
||||||
|
~/foo/locale # With message catalogs below here:
|
||||||
|
~/foo/locale/es/LC_MESSAGES/foo.mo
|
||||||
|
|
||||||
|
Installed on Linux:
|
||||||
|
/usr/lib/python2.7/site-packages/foo
|
||||||
|
/usr/lib/python2.7/site-packages/foo/__init__.py
|
||||||
|
/usr/share/locale/ # With message catalogs below here:
|
||||||
|
/usr/share/locale/es/LC_MESSAGES/foo.mo
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
This function will setup Translation objects that attempt to lookup
|
||||||
|
msgids in all of the found :term:`message catalogs`. This means if
|
||||||
|
you have several versions of the :term:`message catalogs` installed
|
||||||
|
in different directories that the function searches, you need to make
|
||||||
|
sure that :attr:`localedirs` specifies the directories so that newer
|
||||||
|
:term:`message catalogs` are searched first. It also means that if
|
||||||
|
a newer catalog does not contain a translation for a msgid but an
|
||||||
|
older one that's in :attr:`localedirs` does, the translation from that
|
||||||
|
older catalog will be returned.
|
||||||
|
|
||||||
|
.. versionchanged:: kitchen-1.1.0 ; API kitchen.i18n 2.1.0
|
||||||
|
Add more parameters to :func:`~kitchen.i18n.get_translation_object` so
|
||||||
|
it can more easily be used as a replacement for
|
||||||
|
:func:`gettext.translation`. Also change the way we use localedirs.
|
||||||
|
We cycle through them until we find a suitable locale file rather
|
||||||
|
than simply cycling through until we find a directory that exists.
|
||||||
|
The new code is based heavily on the |stdlib|_
|
||||||
|
:func:`gettext.translation` function.
|
||||||
|
'''
|
||||||
|
if not class_:
|
||||||
|
class_ = NewGNUTranslations
|
||||||
|
|
||||||
|
mofiles = []
|
||||||
|
for localedir in itertools.chain(localedirs, (_DEFAULT_LOCALEDIR,)):
|
||||||
|
mofiles.extend(gettext.find(domain, localedir, languages, all=1))
|
||||||
|
if not mofiles:
|
||||||
|
if fallback:
|
||||||
|
return DummyTranslations()
|
||||||
|
raise IOError(ENOENT, 'No translation file found for domain', domain)
|
||||||
|
|
||||||
|
# Accumulate a translation with fallbacks to all the other mofiles
|
||||||
|
stacked_translations = None
|
||||||
|
for mofile in mofiles:
|
||||||
|
full_path = os.path.abspath(mofile)
|
||||||
|
translation = _translations.get(full_path)
|
||||||
|
if not translation:
|
||||||
|
mofile_fh = open(full_path, 'rb')
|
||||||
|
try:
|
||||||
|
translation = _translations.setdefault(full_path,
|
||||||
|
class_(mofile_fh))
|
||||||
|
finally:
|
||||||
|
mofile_fh.close()
|
||||||
|
|
||||||
|
# Shallow copy the object so that the fallbacks and output charset can
|
||||||
|
# differ but the data we read from the mofile is shared.
|
||||||
|
translation = copy.copy(translation)
|
||||||
|
if codeset:
|
||||||
|
translation.set_output_charset(codeset)
|
||||||
|
if not stacked_translations:
|
||||||
|
stacked_translations = translation
|
||||||
|
else:
|
||||||
|
stacked_translations.add_fallback(translation)
|
||||||
|
|
||||||
|
return stacked_translations
|
||||||
|
|
||||||
|
def easy_gettext_setup(domain, localedirs=tuple(), use_unicode=True):
|
||||||
|
''' Setup translation functions for an application
|
||||||
|
|
||||||
|
:arg domain: Name of the message domain. This should be a unique name
|
||||||
|
that can be used to lookup the :term:`message catalog` for this app.
|
||||||
|
:kwarg localedirs: Iterator of directories to look for :term:`message
|
||||||
|
catalogs` under. The first directory to exist is used regardless of
|
||||||
|
whether messages for this domain are present. If none of the
|
||||||
|
directories exist, fallback on ``sys.prefix`` + :file:`/share/locale`
|
||||||
|
Default: No directories to search so we just use the fallback.
|
||||||
|
:kwarg use_unicode: If :data:`True` return the :mod:`gettext` functions
|
||||||
|
for :class:`unicode` strings else return the functions for byte
|
||||||
|
:class:`str` for the translations. Default is :data:`True`.
|
||||||
|
:return: tuple of the :mod:`gettext` function and :mod:`gettext` function
|
||||||
|
for plurals
|
||||||
|
|
||||||
|
Setting up :mod:`gettext` can be a little tricky because of lack of
|
||||||
|
documentation. This function will setup :mod:`gettext` using the
|
||||||
|
`Class-based API
|
||||||
|
<http://docs.python.org/library/gettext.html#class-based-api>`_ for you.
|
||||||
|
For the simple case, you can use the default arguments and call it like
|
||||||
|
this::
|
||||||
|
|
||||||
|
_, N_ = easy_gettext_setup()
|
||||||
|
|
||||||
|
This will get you two functions, :func:`_` and :func:`N_` that you can use
|
||||||
|
to mark strings in your code for translation. :func:`_` is used to mark
|
||||||
|
strings that don't need to worry about plural forms no matter what the
|
||||||
|
value of the variable is. :func:`N_` is used to mark strings that do need
|
||||||
|
to have a different form if a variable in the string is plural.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:doc:`api-i18n`
|
||||||
|
This module's documentation has examples of using :func:`_` and :func:`N_`
|
||||||
|
:func:`get_translation_object`
|
||||||
|
for information on how to use :attr:`localedirs` to get the
|
||||||
|
proper :term:`message catalogs` both when in development and when
|
||||||
|
installed to FHS compliant directories on Linux.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
The gettext functions returned from this function should be superior
|
||||||
|
to the ones returned from :mod:`gettext`. The traits that make them
|
||||||
|
better are described in the :class:`DummyTranslations` and
|
||||||
|
:class:`NewGNUTranslations` documentation.
|
||||||
|
|
||||||
|
.. versionchanged:: kitchen-0.2.4 ; API kitchen.i18n 2.0.0
|
||||||
|
Changed :func:`~kitchen.i18n.easy_gettext_setup` to return the lgettext
|
||||||
|
functions instead of gettext functions when use_unicode=False.
|
||||||
|
'''
|
||||||
|
translations = get_translation_object(domain, localedirs=localedirs)
|
||||||
|
if use_unicode:
|
||||||
|
return(translations.ugettext, translations.ungettext)
|
||||||
|
return(translations.lgettext, translations.lngettext)
|
||||||
|
|
||||||
|
__all__ = ('DummyTranslations', 'NewGNUTranslations', 'easy_gettext_setup',
|
||||||
|
'get_translation_object')
|
96
kitchen/iterutils/__init__.py
Normal file
96
kitchen/iterutils/__init__.py
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2010 Red Hat, Inc
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||||
|
# terms of the GNU Lesser General Public License as published by the Free
|
||||||
|
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
#.
|
||||||
|
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
# more details.
|
||||||
|
#.
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
# Luke Macken <lmacken@redhat.com>
|
||||||
|
#
|
||||||
|
# Portions of code taken from python-fedora fedora/iterutils.py
|
||||||
|
'''
|
||||||
|
Functions to manipulate iterables
|
||||||
|
|
||||||
|
.. versionadded:: Kitchen: 0.2.1a1
|
||||||
|
|
||||||
|
.. moduleauthor:: Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
.. moduleauthor:: Luke Macken <lmacken@redhat.com>
|
||||||
|
'''
|
||||||
|
|
||||||
|
from kitchen.versioning import version_tuple_to_string
|
||||||
|
|
||||||
|
__version_info__ = ((0, 0, 1),)
|
||||||
|
__version__ = version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
def isiterable(obj, include_string=False):
|
||||||
|
'''Check whether an object is an iterable
|
||||||
|
|
||||||
|
:arg obj: Object to test whether it is an iterable
|
||||||
|
:kwarg include_string: If :data:`True` and :attr:`obj` is a byte
|
||||||
|
:class:`str` or :class:`unicode` string this function will return
|
||||||
|
:data:`True`. If set to :data:`False`, byte :class:`str` and
|
||||||
|
:class:`unicode` strings will cause this function to return
|
||||||
|
:data:`False`. Default :data:`False`.
|
||||||
|
:returns: :data:`True` if :attr:`obj` is iterable, otherwise
|
||||||
|
:data:`False`.
|
||||||
|
'''
|
||||||
|
if include_string or not isinstance(obj, basestring):
|
||||||
|
try:
|
||||||
|
iter(obj)
|
||||||
|
except TypeError:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def iterate(obj, include_string=False):
|
||||||
|
'''Generator that can be used to iterate over anything
|
||||||
|
|
||||||
|
:arg obj: The object to iterate over
|
||||||
|
:kwarg include_string: if :data:`True`, treat strings as iterables.
|
||||||
|
Otherwise treat them as a single scalar value. Default :data:`False`
|
||||||
|
|
||||||
|
This function will create an iterator out of any scalar or iterable. It
|
||||||
|
is useful for making a value given to you an iterable before operating on it.
|
||||||
|
Iterables have their items returned. scalars are transformed into iterables.
|
||||||
|
A string is treated as a scalar value unless the :attr:`include_string`
|
||||||
|
parameter is set to :data:`True`. Example usage::
|
||||||
|
|
||||||
|
>>> list(iterate(None))
|
||||||
|
[None]
|
||||||
|
>>> list(iterate([None]))
|
||||||
|
[None]
|
||||||
|
>>> list(iterate([1, 2, 3]))
|
||||||
|
[1, 2, 3]
|
||||||
|
>>> list(iterate(set([1, 2, 3])))
|
||||||
|
[1, 2, 3]
|
||||||
|
>>> list(iterate(dict(a='1', b='2')))
|
||||||
|
['a', 'b']
|
||||||
|
>>> list(iterate(1))
|
||||||
|
[1]
|
||||||
|
>>> list(iterate(iter([1, 2, 3])))
|
||||||
|
[1, 2, 3]
|
||||||
|
>>> list(iterate('abc'))
|
||||||
|
['abc']
|
||||||
|
>>> list(iterate('abc', include_string=True))
|
||||||
|
['a', 'b', 'c']
|
||||||
|
'''
|
||||||
|
if isiterable(obj, include_string=include_string):
|
||||||
|
for item in obj:
|
||||||
|
yield item
|
||||||
|
else:
|
||||||
|
yield obj
|
||||||
|
|
||||||
|
__all__ = ('isiterable', 'iterate',)
|
10
kitchen/pycompat24/__init__.py
Normal file
10
kitchen/pycompat24/__init__.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
'''
|
||||||
|
The :mod:`kitchen.pycompat24` module contains implementations of functionality
|
||||||
|
introduced in python-2.4 for use on earlier versions of python.
|
||||||
|
'''
|
||||||
|
from kitchen.versioning import version_tuple_to_string
|
||||||
|
|
||||||
|
__version_info__ = ((1, 1, 0),)
|
||||||
|
__version__ = version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
__all__ = ('base64', 'sets', 'subprocess')
|
46
kitchen/pycompat24/base64/__init__.py
Normal file
46
kitchen/pycompat24/base64/__init__.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2010 Red Hat, Inc
|
||||||
|
#
|
||||||
|
# This file is part of kitchen
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||||
|
# terms of the GNU Lesser General Public License as published by the Free
|
||||||
|
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
# more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
|
||||||
|
'''
|
||||||
|
Implement the modern base64 interface.
|
||||||
|
|
||||||
|
Python-2.4 and above have a new API for the base64 module. This is a backport
|
||||||
|
of that module for use on python-2.3.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
:mod:`base64`
|
||||||
|
for information about using the functions provided here.
|
||||||
|
'''
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# :W0401,W0614: The purpose of this module is to create a backport of base64
|
||||||
|
# so we ignore these pylint warnings
|
||||||
|
#pylint:disable-msg=W0401,W0614
|
||||||
|
if sys.version_info >= (2, 4):
|
||||||
|
from base64 import *
|
||||||
|
else:
|
||||||
|
from kitchen.pycompat24.base64._base64 import *
|
||||||
|
|
||||||
|
__all__ = ( 'b16decode', 'b16encode', 'b32decode', 'b32encode', 'b64decode',
|
||||||
|
'b64encode', 'decode', 'decodestring', 'encode', 'encodestring',
|
||||||
|
'standard_b64decode', 'standard_b64encode', 'urlsafe_b64decode',
|
||||||
|
'urlsafe_b64encode',)
|
363
kitchen/pycompat24/base64/_base64.py
Normal file
363
kitchen/pycompat24/base64/_base64.py
Normal file
|
@ -0,0 +1,363 @@
|
||||||
|
#! /usr/bin/env python
|
||||||
|
|
||||||
|
"""RFC 3548: Base16, Base32, Base64 Data Encodings"""
|
||||||
|
|
||||||
|
# Modified 04-Oct-1995 by Jack Jansen to use binascii module
|
||||||
|
# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
|
||||||
|
|
||||||
|
import re
|
||||||
|
import struct
|
||||||
|
import binascii
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# Legacy interface exports traditional RFC 1521 Base64 encodings
|
||||||
|
'encode', 'decode', 'encodestring', 'decodestring',
|
||||||
|
# Generalized interface for other encodings
|
||||||
|
'b64encode', 'b64decode', 'b32encode', 'b32decode',
|
||||||
|
'b16encode', 'b16decode',
|
||||||
|
# Standard Base64 encoding
|
||||||
|
'standard_b64encode', 'standard_b64decode',
|
||||||
|
# Some common Base64 alternatives. As referenced by RFC 3458, see thread
|
||||||
|
# starting at:
|
||||||
|
#
|
||||||
|
# http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
|
||||||
|
'urlsafe_b64encode', 'urlsafe_b64decode',
|
||||||
|
]
|
||||||
|
|
||||||
|
_translation = [chr(_x) for _x in range(256)]
|
||||||
|
EMPTYSTRING = ''
|
||||||
|
|
||||||
|
|
||||||
|
def _translate(s, altchars):
|
||||||
|
translation = _translation[:]
|
||||||
|
for k, v in altchars.items():
|
||||||
|
translation[ord(k)] = v
|
||||||
|
return s.translate(''.join(translation))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Base64 encoding/decoding uses binascii
|
||||||
|
|
||||||
|
def b64encode(s, altchars=None):
|
||||||
|
"""Encode a string using Base64.
|
||||||
|
|
||||||
|
s is the string to encode. Optional altchars must be a string of at least
|
||||||
|
length 2 (additional characters are ignored) which specifies an
|
||||||
|
alternative alphabet for the '+' and '/' characters. This allows an
|
||||||
|
application to e.g. generate url or filesystem safe Base64 strings.
|
||||||
|
|
||||||
|
The encoded string is returned.
|
||||||
|
"""
|
||||||
|
# Strip off the trailing newline
|
||||||
|
encoded = binascii.b2a_base64(s)[:-1]
|
||||||
|
if altchars is not None:
|
||||||
|
return _translate(encoded, {'+': altchars[0], '/': altchars[1]})
|
||||||
|
return encoded
|
||||||
|
|
||||||
|
|
||||||
|
def b64decode(s, altchars=None):
|
||||||
|
"""Decode a Base64 encoded string.
|
||||||
|
|
||||||
|
s is the string to decode. Optional altchars must be a string of at least
|
||||||
|
length 2 (additional characters are ignored) which specifies the
|
||||||
|
alternative alphabet used instead of the '+' and '/' characters.
|
||||||
|
|
||||||
|
The decoded string is returned. A TypeError is raised if s were
|
||||||
|
incorrectly padded or if there are non-alphabet characters present in the
|
||||||
|
string.
|
||||||
|
"""
|
||||||
|
if altchars is not None:
|
||||||
|
s = _translate(s, {altchars[0]: '+', altchars[1]: '/'})
|
||||||
|
try:
|
||||||
|
return binascii.a2b_base64(s)
|
||||||
|
except binascii.Error, msg:
|
||||||
|
# Transform this exception for consistency
|
||||||
|
raise TypeError(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def standard_b64encode(s):
|
||||||
|
"""Encode a string using the standard Base64 alphabet.
|
||||||
|
|
||||||
|
s is the string to encode. The encoded string is returned.
|
||||||
|
"""
|
||||||
|
return b64encode(s)
|
||||||
|
|
||||||
|
def standard_b64decode(s):
|
||||||
|
"""Decode a string encoded with the standard Base64 alphabet.
|
||||||
|
|
||||||
|
s is the string to decode. The decoded string is returned. A TypeError
|
||||||
|
is raised if the string is incorrectly padded or if there are non-alphabet
|
||||||
|
characters present in the string.
|
||||||
|
"""
|
||||||
|
return b64decode(s)
|
||||||
|
|
||||||
|
def urlsafe_b64encode(s):
|
||||||
|
"""Encode a string using a url-safe Base64 alphabet.
|
||||||
|
|
||||||
|
s is the string to encode. The encoded string is returned. The alphabet
|
||||||
|
uses '-' instead of '+' and '_' instead of '/'.
|
||||||
|
"""
|
||||||
|
return b64encode(s, '-_')
|
||||||
|
|
||||||
|
def urlsafe_b64decode(s):
|
||||||
|
"""Decode a string encoded with the standard Base64 alphabet.
|
||||||
|
|
||||||
|
s is the string to decode. The decoded string is returned. A TypeError
|
||||||
|
is raised if the string is incorrectly padded or if there are non-alphabet
|
||||||
|
characters present in the string.
|
||||||
|
|
||||||
|
The alphabet uses '-' instead of '+' and '_' instead of '/'.
|
||||||
|
"""
|
||||||
|
return b64decode(s, '-_')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Base32 encoding/decoding must be done in Python
|
||||||
|
_b32alphabet = {
|
||||||
|
0: 'A', 9: 'J', 18: 'S', 27: '3',
|
||||||
|
1: 'B', 10: 'K', 19: 'T', 28: '4',
|
||||||
|
2: 'C', 11: 'L', 20: 'U', 29: '5',
|
||||||
|
3: 'D', 12: 'M', 21: 'V', 30: '6',
|
||||||
|
4: 'E', 13: 'N', 22: 'W', 31: '7',
|
||||||
|
5: 'F', 14: 'O', 23: 'X',
|
||||||
|
6: 'G', 15: 'P', 24: 'Y',
|
||||||
|
7: 'H', 16: 'Q', 25: 'Z',
|
||||||
|
8: 'I', 17: 'R', 26: '2',
|
||||||
|
}
|
||||||
|
|
||||||
|
_b32tab = _b32alphabet.items()
|
||||||
|
_b32tab.sort()
|
||||||
|
_b32tab = [v for k, v in _b32tab]
|
||||||
|
_b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()])
|
||||||
|
|
||||||
|
|
||||||
|
def b32encode(s):
|
||||||
|
"""Encode a string using Base32.
|
||||||
|
|
||||||
|
s is the string to encode. The encoded string is returned.
|
||||||
|
"""
|
||||||
|
parts = []
|
||||||
|
quanta, leftover = divmod(len(s), 5)
|
||||||
|
# Pad the last quantum with zero bits if necessary
|
||||||
|
if leftover:
|
||||||
|
s += ('\0' * (5 - leftover))
|
||||||
|
quanta += 1
|
||||||
|
for i in range(quanta):
|
||||||
|
# c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this
|
||||||
|
# code is to process the 40 bits in units of 5 bits. So we take the 1
|
||||||
|
# leftover bit of c1 and tack it onto c2. Then we take the 2 leftover
|
||||||
|
# bits of c2 and tack them onto c3. The shifts and masks are intended
|
||||||
|
# to give us values of exactly 5 bits in width.
|
||||||
|
c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5])
|
||||||
|
c2 += (c1 & 1) << 16 # 17 bits wide
|
||||||
|
c3 += (c2 & 3) << 8 # 10 bits wide
|
||||||
|
parts.extend([_b32tab[c1 >> 11], # bits 1 - 5
|
||||||
|
_b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
|
||||||
|
_b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
|
||||||
|
_b32tab[c2 >> 12], # bits 16 - 20 (1 - 5)
|
||||||
|
_b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
|
||||||
|
_b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
|
||||||
|
_b32tab[c3 >> 5], # bits 31 - 35 (1 - 5)
|
||||||
|
_b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5)
|
||||||
|
])
|
||||||
|
encoded = EMPTYSTRING.join(parts)
|
||||||
|
# Adjust for any leftover partial quanta
|
||||||
|
if leftover == 1:
|
||||||
|
return encoded[:-6] + '======'
|
||||||
|
elif leftover == 2:
|
||||||
|
return encoded[:-4] + '===='
|
||||||
|
elif leftover == 3:
|
||||||
|
return encoded[:-3] + '==='
|
||||||
|
elif leftover == 4:
|
||||||
|
return encoded[:-1] + '='
|
||||||
|
return encoded
|
||||||
|
|
||||||
|
|
||||||
|
def b32decode(s, casefold=False, map01=None):
|
||||||
|
"""Decode a Base32 encoded string.
|
||||||
|
|
||||||
|
s is the string to decode. Optional casefold is a flag specifying whether
|
||||||
|
a lowercase alphabet is acceptable as input. For security purposes, the
|
||||||
|
default is False.
|
||||||
|
|
||||||
|
RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O
|
||||||
|
(oh), and for optional mapping of the digit 1 (one) to either the letter I
|
||||||
|
(eye) or letter L (el). The optional argument map01 when not None,
|
||||||
|
specifies which letter the digit 1 should be mapped to (when map01 is not
|
||||||
|
None, the digit 0 is always mapped to the letter O). For security
|
||||||
|
purposes the default is None, so that 0 and 1 are not allowed in the
|
||||||
|
input.
|
||||||
|
|
||||||
|
The decoded string is returned. A TypeError is raised if s were
|
||||||
|
incorrectly padded or if there are non-alphabet characters present in the
|
||||||
|
string.
|
||||||
|
"""
|
||||||
|
quanta, leftover = divmod(len(s), 8)
|
||||||
|
if leftover:
|
||||||
|
raise TypeError('Incorrect padding')
|
||||||
|
# Handle section 2.4 zero and one mapping. The flag map01 will be either
|
||||||
|
# False, or the character to map the digit 1 (one) to. It should be
|
||||||
|
# either L (el) or I (eye).
|
||||||
|
if map01:
|
||||||
|
s = _translate(s, {'0': 'O', '1': map01})
|
||||||
|
if casefold:
|
||||||
|
s = s.upper()
|
||||||
|
# Strip off pad characters from the right. We need to count the pad
|
||||||
|
# characters because this will tell us how many null bytes to remove from
|
||||||
|
# the end of the decoded string.
|
||||||
|
padchars = 0
|
||||||
|
mo = re.search('(?P<pad>[=]*)$', s)
|
||||||
|
if mo:
|
||||||
|
padchars = len(mo.group('pad'))
|
||||||
|
if padchars > 0:
|
||||||
|
s = s[:-padchars]
|
||||||
|
# Now decode the full quanta
|
||||||
|
parts = []
|
||||||
|
acc = 0
|
||||||
|
shift = 35
|
||||||
|
for c in s:
|
||||||
|
val = _b32rev.get(c)
|
||||||
|
if val is None:
|
||||||
|
raise TypeError('Non-base32 digit found')
|
||||||
|
acc += _b32rev[c] << shift
|
||||||
|
shift -= 5
|
||||||
|
if shift < 0:
|
||||||
|
parts.append(binascii.unhexlify('%010x' % acc))
|
||||||
|
acc = 0
|
||||||
|
shift = 35
|
||||||
|
# Process the last, partial quanta
|
||||||
|
last = binascii.unhexlify('%010x' % acc)
|
||||||
|
if padchars == 0:
|
||||||
|
last = '' # No characters
|
||||||
|
elif padchars == 1:
|
||||||
|
last = last[:-1]
|
||||||
|
elif padchars == 3:
|
||||||
|
last = last[:-2]
|
||||||
|
elif padchars == 4:
|
||||||
|
last = last[:-3]
|
||||||
|
elif padchars == 6:
|
||||||
|
last = last[:-4]
|
||||||
|
else:
|
||||||
|
raise TypeError('Incorrect padding')
|
||||||
|
parts.append(last)
|
||||||
|
return EMPTYSTRING.join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
|
||||||
|
# lowercase. The RFC also recommends against accepting input case
|
||||||
|
# insensitively.
|
||||||
|
def b16encode(s):
|
||||||
|
"""Encode a string using Base16.
|
||||||
|
|
||||||
|
s is the string to encode. The encoded string is returned.
|
||||||
|
"""
|
||||||
|
return binascii.hexlify(s).upper()
|
||||||
|
|
||||||
|
|
||||||
|
def b16decode(s, casefold=False):
|
||||||
|
"""Decode a Base16 encoded string.
|
||||||
|
|
||||||
|
s is the string to decode. Optional casefold is a flag specifying whether
|
||||||
|
a lowercase alphabet is acceptable as input. For security purposes, the
|
||||||
|
default is False.
|
||||||
|
|
||||||
|
The decoded string is returned. A TypeError is raised if s were
|
||||||
|
incorrectly padded or if there are non-alphabet characters present in the
|
||||||
|
string.
|
||||||
|
"""
|
||||||
|
if casefold:
|
||||||
|
s = s.upper()
|
||||||
|
if re.search('[^0-9A-F]', s):
|
||||||
|
raise TypeError('Non-base16 digit found')
|
||||||
|
return binascii.unhexlify(s)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Legacy interface. This code could be cleaned up since I don't believe
|
||||||
|
# binascii has any line length limitations. It just doesn't seem worth it
|
||||||
|
# though.
|
||||||
|
|
||||||
|
MAXLINESIZE = 76 # Excluding the CRLF
|
||||||
|
MAXBINSIZE = (MAXLINESIZE//4)*3
|
||||||
|
|
||||||
|
def encode(input, output):
|
||||||
|
"""Encode a file."""
|
||||||
|
while True:
|
||||||
|
s = input.read(MAXBINSIZE)
|
||||||
|
if not s:
|
||||||
|
break
|
||||||
|
while len(s) < MAXBINSIZE:
|
||||||
|
ns = input.read(MAXBINSIZE-len(s))
|
||||||
|
if not ns:
|
||||||
|
break
|
||||||
|
s += ns
|
||||||
|
line = binascii.b2a_base64(s)
|
||||||
|
output.write(line)
|
||||||
|
|
||||||
|
|
||||||
|
def decode(input, output):
|
||||||
|
"""Decode a file."""
|
||||||
|
while True:
|
||||||
|
line = input.readline()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
s = binascii.a2b_base64(line)
|
||||||
|
output.write(s)
|
||||||
|
|
||||||
|
|
||||||
|
def encodestring(s):
|
||||||
|
"""Encode a string into multiple lines of base-64 data."""
|
||||||
|
pieces = []
|
||||||
|
for i in range(0, len(s), MAXBINSIZE):
|
||||||
|
chunk = s[i : i + MAXBINSIZE]
|
||||||
|
pieces.append(binascii.b2a_base64(chunk))
|
||||||
|
return "".join(pieces)
|
||||||
|
|
||||||
|
|
||||||
|
def decodestring(s):
|
||||||
|
"""Decode a string."""
|
||||||
|
return binascii.a2b_base64(s)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Useable as a script...
|
||||||
|
def test():
|
||||||
|
"""Small test program"""
|
||||||
|
import sys, getopt
|
||||||
|
try:
|
||||||
|
opts, args = getopt.getopt(sys.argv[1:], 'deut')
|
||||||
|
except getopt.error, msg:
|
||||||
|
sys.stdout = sys.stderr
|
||||||
|
print msg
|
||||||
|
print """usage: %s [-d|-e|-u|-t] [file|-]
|
||||||
|
-d, -u: decode
|
||||||
|
-e: encode (default)
|
||||||
|
-t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]
|
||||||
|
sys.exit(2)
|
||||||
|
func = encode
|
||||||
|
for o, a in opts:
|
||||||
|
if o == '-e': func = encode
|
||||||
|
if o == '-d': func = decode
|
||||||
|
if o == '-u': func = decode
|
||||||
|
if o == '-t': test1(); return
|
||||||
|
if args and args[0] != '-':
|
||||||
|
fh = open(args[0], 'rb')
|
||||||
|
try:
|
||||||
|
func(fh, sys.stdout)
|
||||||
|
finally:
|
||||||
|
fh.close()
|
||||||
|
else:
|
||||||
|
func(sys.stdin, sys.stdout)
|
||||||
|
|
||||||
|
|
||||||
|
def test1():
|
||||||
|
s0 = "Aladdin:open sesame"
|
||||||
|
s1 = encodestring(s0)
|
||||||
|
s2 = decodestring(s1)
|
||||||
|
print s0, repr(s1), s2
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test()
|
92
kitchen/pycompat24/sets/__init__.py
Normal file
92
kitchen/pycompat24/sets/__init__.py
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2010 Red Hat, Inc
|
||||||
|
#
|
||||||
|
# This file is part of kitchen
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||||
|
# terms of the GNU Lesser General Public License as published by the Free
|
||||||
|
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
# more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
|
||||||
|
'''
|
||||||
|
In python-2.4, a builtin :class:`set` type was added to python. This module
|
||||||
|
provides a function to emulate that on python-2.3 by using the :mod:`sets`
|
||||||
|
module.
|
||||||
|
|
||||||
|
:func:`set`
|
||||||
|
Create a set. If running on python 2.4+ this is the :class:`set`
|
||||||
|
constructor. If using python-2.3, it's :class:`sets.Set`.
|
||||||
|
|
||||||
|
:func:`frozenset`
|
||||||
|
Create a frozenset. If running on python2.4+ this is the
|
||||||
|
:class:`frozenset` constructor. If using python-2.3, it's
|
||||||
|
:class:`sets.ImmutableSet`.
|
||||||
|
|
||||||
|
.. versionchanged:: 0.2.0 API: kitchen.pycompat24 1.0.0
|
||||||
|
Added set and frozenset
|
||||||
|
'''
|
||||||
|
import __builtin__
|
||||||
|
|
||||||
|
# Setup set and frozenset on this module
|
||||||
|
# :W0622,C0103: The purpose of this module is to define set and frozenset if
|
||||||
|
# they aren't in builtins already so we disregard these pylint warnings
|
||||||
|
#pylint:disable-msg=W0622,C0103
|
||||||
|
if not hasattr(__builtin__, 'set'):
|
||||||
|
import sets
|
||||||
|
set = sets.Set
|
||||||
|
else:
|
||||||
|
set = set
|
||||||
|
|
||||||
|
if not hasattr(__builtin__, 'frozenset'):
|
||||||
|
import sets
|
||||||
|
frozenset = sets.ImmutableSet
|
||||||
|
else:
|
||||||
|
frozenset = frozenset
|
||||||
|
#pylint:enable-msg=W0622,C0103
|
||||||
|
|
||||||
|
def add_builtin_set():
|
||||||
|
'''If there's no set builtin, us the :mod:`sets` module to make one
|
||||||
|
|
||||||
|
This function makes sure that a :class:`set` and :class:`frozenset` type
|
||||||
|
are available in the :mod:`__builtin__` namespace. Since the function
|
||||||
|
checks whether :class:`set` and :class:`frozenset` are already present in
|
||||||
|
the :mod:`__builtin__` namespace and refuses to overwrite those if found,
|
||||||
|
it's safe to call this in multiple places and in scripts run under
|
||||||
|
python-2.4+, where a more efficient set implementation is already present
|
||||||
|
in the :mod:`__builtin__` namespace.
|
||||||
|
|
||||||
|
However, since this function modifies :mod:`__builtin__` there's no need
|
||||||
|
to call it more than once so you likely want to do something like this
|
||||||
|
when your program loads::
|
||||||
|
|
||||||
|
myprogram/__init__.py:
|
||||||
|
|
||||||
|
from kitchen.pycompat24 import sets
|
||||||
|
builtinset.add_builtin_set()
|
||||||
|
|
||||||
|
You can then use :func:`set` and :func:`frozenset` anywhere in your code::
|
||||||
|
|
||||||
|
myprogram/compute.py:
|
||||||
|
|
||||||
|
def math_students(algebra_student_list, geometry_student_list):
|
||||||
|
return set(algebra_student_list) union set(geometry_student_list)
|
||||||
|
'''
|
||||||
|
if not hasattr(__builtin__, 'set'):
|
||||||
|
__builtin__.set = set
|
||||||
|
|
||||||
|
if not hasattr(__builtin__, 'frozenset'):
|
||||||
|
__builtin__.frozenset = frozenset
|
||||||
|
|
||||||
|
__all__ = ('add_builtin_set', 'set', 'frozenset')
|
5
kitchen/pycompat24/subprocess.py
Normal file
5
kitchen/pycompat24/subprocess.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# :W0401, W0611, W0614: Rather than have two versions of subprocess, we import
|
||||||
|
# the python2.7 version here as well
|
||||||
|
#pylint:disable-msg=W0401,W0611,W0614
|
||||||
|
from kitchen.pycompat27.subprocess import *
|
||||||
|
from kitchen.pycompat27.subprocess import __all__
|
12
kitchen/pycompat25/__init__.py
Normal file
12
kitchen/pycompat25/__init__.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
'''
|
||||||
|
The :mod:`kitchen.pycompat25` module contains implementations of functionality
|
||||||
|
introduced in python-2.5.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from kitchen.versioning import version_tuple_to_string
|
||||||
|
|
||||||
|
__version_info__ = ((1, 0, 0),)
|
||||||
|
__version__ = version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ('collections',)
|
9
kitchen/pycompat25/collections/__init__.py
Normal file
9
kitchen/pycompat25/collections/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
try:
|
||||||
|
#:E0611: deafultdict doesn't exist in python-2.4 or less but that's why we
|
||||||
|
# have it in a try: except:. So we can use our version if necessary.
|
||||||
|
#pylint:disable-msg=E0611
|
||||||
|
from collections import defaultdict
|
||||||
|
except ImportError:
|
||||||
|
from kitchen.pycompat25.collections._defaultdict import defaultdict
|
||||||
|
|
||||||
|
__all__ = ('defaultdict',)
|
137
kitchen/pycompat25/collections/_defaultdict.py
Normal file
137
kitchen/pycompat25/collections/_defaultdict.py
Normal file
|
@ -0,0 +1,137 @@
|
||||||
|
##
|
||||||
|
# Transcribed from http://code.activestate.com/recipes/523034/ on May 1, 2009
|
||||||
|
# by Jef Spaleta This code provides an emulation for the defaultdict
|
||||||
|
# functionality introduced in python 2.5's collection module
|
||||||
|
#
|
||||||
|
# Changes from the original:
|
||||||
|
# * Change the return value from __reduce__ to use iteritems() to prevent
|
||||||
|
# a segfault when pickling. (Jef Spaleta)
|
||||||
|
# * Change how we setup the module to use collections.defaultdict by default
|
||||||
|
# (Toshio Kuratomi)
|
||||||
|
#
|
||||||
|
# Copyright (c) 2007 Justin Kirtland
|
||||||
|
#
|
||||||
|
# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
|
||||||
|
# --------------------------------------------
|
||||||
|
#
|
||||||
|
# 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"),
|
||||||
|
# and the Individual or Organization ("Licensee") accessing and otherwise
|
||||||
|
# using this software ("Python") in source or binary form and its
|
||||||
|
# associated documentation.
|
||||||
|
#
|
||||||
|
# 2. Subject to the terms and conditions of this License Agreement, PSF hereby
|
||||||
|
# grants Licensee a nonexclusive, royalty-free, world-wide license to
|
||||||
|
# reproduce, analyze, test, perform and/or display publicly, prepare
|
||||||
|
# derivative works, distribute, and otherwise use Python alone or in any
|
||||||
|
# derivative version, provided, however, that PSF's License Agreement and
|
||||||
|
# PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004,
|
||||||
|
# 2005, 2006 Python Software Foundation; All Rights Reserved" are retained
|
||||||
|
# in Python alone or in any derivative version prepared by Licensee.
|
||||||
|
#
|
||||||
|
# 3. In the event Licensee prepares a derivative work that is based on or
|
||||||
|
# incorporates Python or any part thereof, and wants to make the derivative
|
||||||
|
# work available to others as provided herein, then Licensee hereby agrees
|
||||||
|
# to include in any such work a brief summary of the changes made to
|
||||||
|
# Python.
|
||||||
|
#
|
||||||
|
# 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF
|
||||||
|
# MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF
|
||||||
|
# EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY
|
||||||
|
# REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY
|
||||||
|
# PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD
|
||||||
|
# PARTY RIGHTS.
|
||||||
|
#
|
||||||
|
# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY
|
||||||
|
# INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF
|
||||||
|
# MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE
|
||||||
|
# THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
|
||||||
|
#
|
||||||
|
# 6. This License Agreement will automatically terminate upon a material
|
||||||
|
# breach of its terms and conditions.
|
||||||
|
#
|
||||||
|
# 7. Nothing in this License Agreement shall be deemed to create any
|
||||||
|
# relationship of agency, partnership, or joint venture between PSF and
|
||||||
|
# Licensee. This License Agreement does not grant permission to use PSF
|
||||||
|
# trademarks or trade name in a trademark sense to endorse or promote
|
||||||
|
# products or services of Licensee, or any third party.
|
||||||
|
#
|
||||||
|
# 8. By copying, installing or otherwise using Python, Licensee agrees to be
|
||||||
|
# bound by the terms and conditions of this License Agreement.
|
||||||
|
|
||||||
|
'''
|
||||||
|
-----------
|
||||||
|
defaultdict
|
||||||
|
-----------
|
||||||
|
|
||||||
|
This is a pure python implementation of defaultdict that is compatible with
|
||||||
|
the defaultdict class provided by python-2.5 and above.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
:class:`collections.defaultdict`
|
||||||
|
for documentation on this module
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Pylint disabled messages
|
||||||
|
#
|
||||||
|
# :C0103: We're defnining a compatible class name therefore we need to match
|
||||||
|
# the format of that name.
|
||||||
|
|
||||||
|
import types
|
||||||
|
|
||||||
|
from kitchen import b_
|
||||||
|
|
||||||
|
# :C0103, W0613: We're implementing the python-2.5 defaultdict API so
|
||||||
|
# we have to use the same names as python.
|
||||||
|
# :C0111: We point people at the stdlib API docs for defaultdict rather than
|
||||||
|
# reproduce it here.
|
||||||
|
#pylint:disable-msg=C0103,W0613,C0111
|
||||||
|
|
||||||
|
class defaultdict(dict):
|
||||||
|
def __init__(self, default_factory=None, *args, **kwargs):
|
||||||
|
if (default_factory is not None and
|
||||||
|
not hasattr(default_factory, '__call__')):
|
||||||
|
raise TypeError(b_('First argument must be callable'))
|
||||||
|
dict.__init__(self, *args, **kwargs)
|
||||||
|
self.default_factory = default_factory
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
try:
|
||||||
|
return dict.__getitem__(self, key)
|
||||||
|
except KeyError:
|
||||||
|
return self.__missing__(key)
|
||||||
|
|
||||||
|
def __missing__(self, key):
|
||||||
|
if self.default_factory is None:
|
||||||
|
raise KeyError(key)
|
||||||
|
self[key] = value = self.default_factory()
|
||||||
|
return value
|
||||||
|
|
||||||
|
def __reduce__(self):
|
||||||
|
if self.default_factory is None:
|
||||||
|
args = tuple()
|
||||||
|
else:
|
||||||
|
args = self.default_factory,
|
||||||
|
return type(self), args, None, None, self.iteritems()
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
return self.__copy__()
|
||||||
|
|
||||||
|
def __copy__(self):
|
||||||
|
return type(self)(self.default_factory, self)
|
||||||
|
|
||||||
|
def __deepcopy__(self, memo):
|
||||||
|
import copy
|
||||||
|
return type(self)(self.default_factory,
|
||||||
|
copy.deepcopy(self.items()))
|
||||||
|
def __repr__(self):
|
||||||
|
# Note: Have to use "is not None" otherwise we get an infinite
|
||||||
|
# recursion
|
||||||
|
if isinstance(self.default_factory, types.MethodType) \
|
||||||
|
and self.default_factory.im_self is not None \
|
||||||
|
and issubclass(self.default_factory.im_class, defaultdict):
|
||||||
|
defrepr = '<bound method sub._factory of defaultdict(...'
|
||||||
|
else:
|
||||||
|
defrepr = repr(self.default_factory)
|
||||||
|
return 'defaultdict(%s, %s)' % (defrepr, dict.__repr__(self))
|
||||||
|
|
||||||
|
__all__ = ('defaultdict',)
|
13
kitchen/pycompat27/__init__.py
Normal file
13
kitchen/pycompat27/__init__.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
'''
|
||||||
|
The :mod:`kitchen.pycompat27` module contains implementations of functionality
|
||||||
|
introduced in python-2.7 for use on earlier versions of python.
|
||||||
|
|
||||||
|
.. versionchanged:: 0.2.3
|
||||||
|
Made mswindows, MAXFD, and list2cmdline available from the module
|
||||||
|
'''
|
||||||
|
from kitchen.versioning import version_tuple_to_string
|
||||||
|
|
||||||
|
__version_info__ = ((1, 1, 0),)
|
||||||
|
__version__ = version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
__all__ = ('subprocess',)
|
46
kitchen/pycompat27/subprocess/__init__.py
Normal file
46
kitchen/pycompat27/subprocess/__init__.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2011 Red Hat, Inc
|
||||||
|
#
|
||||||
|
# This file is part of kitchen
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||||
|
# terms of the GNU Lesser General Public License as published by the Free
|
||||||
|
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
# more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
|
||||||
|
'''
|
||||||
|
Implement the modern subprocess interface
|
||||||
|
|
||||||
|
Python-2.5 and python-2.7 introduce new API features to subprocess. This is
|
||||||
|
a backport of that module for use on earlier python versions.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
:mod:`subprocess`
|
||||||
|
for information about using the functions provided here.
|
||||||
|
'''
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# :W0401,W0611,W0614: We're importing compatibility to the python-2.7 version
|
||||||
|
# of subprocess.
|
||||||
|
#pylint:disable-msg=W0401,W0611,W0614
|
||||||
|
if sys.version_info >= (2, 7):
|
||||||
|
from subprocess import *
|
||||||
|
from subprocess import MAXFD, list2cmdline, mswindows
|
||||||
|
from subprocess import __all__
|
||||||
|
else:
|
||||||
|
from kitchen.pycompat27.subprocess._subprocess import *
|
||||||
|
from kitchen.pycompat27.subprocess._subprocess import MAXFD, \
|
||||||
|
list2cmdline, mswindows
|
||||||
|
from kitchen.pycompat27.subprocess._subprocess import __all__
|
1538
kitchen/pycompat27/subprocess/_subprocess.py
Normal file
1538
kitchen/pycompat27/subprocess/_subprocess.py
Normal file
File diff suppressed because it is too large
Load diff
35
kitchen/release.py
Normal file
35
kitchen/release.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
'''
|
||||||
|
Information about this kitchen release.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from kitchen import _, __version__
|
||||||
|
|
||||||
|
NAME = 'kitchen'
|
||||||
|
VERSION = __version__
|
||||||
|
DESCRIPTION = _('Kitchen contains a cornucopia of useful code')
|
||||||
|
LONG_DESCRIPTION = _('''
|
||||||
|
We've all done it. In the process of writing a brand new application we've
|
||||||
|
discovered that we need a little bit of code that we've invented before.
|
||||||
|
Perhaps it's something to handle unicode text. Perhaps it's something to make
|
||||||
|
a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being
|
||||||
|
a tiny bit of code that seems too small to worry about pushing into its own
|
||||||
|
module so it sits there, a part of your current project, waiting to be cut and
|
||||||
|
pasted into your next project. And the next. And the next. And since that
|
||||||
|
little bittybit of code proved so useful to you, it's highly likely that it
|
||||||
|
proved useful to someone else as well. Useful enough that they've written it
|
||||||
|
and copy and pasted it over and over into each of their new projects.
|
||||||
|
|
||||||
|
Well, no longer! Kitchen aims to pull these small snippets of code into a few
|
||||||
|
python modules which you can import and use within your project. No more copy
|
||||||
|
and paste! Now you can let someone else maintain and release these small
|
||||||
|
snippets so that you can get on with your life.
|
||||||
|
''')
|
||||||
|
AUTHOR = 'Toshio Kuratomi, Seth Vidal, others'
|
||||||
|
EMAIL = 'toshio@fedoraproject.org'
|
||||||
|
COPYRIGHT = '2011 Red Hat, Inc. and others'
|
||||||
|
URL = 'https://fedorahosted.org/kitchen'
|
||||||
|
DOWNLOAD_URL = 'https://fedorahosted.org/releases/k/i/kitchen'
|
||||||
|
LICENSE = 'LGPLv2+'
|
||||||
|
|
||||||
|
__all__ = ('NAME', 'VERSION', 'DESCRIPTION', 'LONG_DESCRIPTION', 'AUTHOR',
|
||||||
|
'EMAIL', 'COPYRIGHT', 'URL', 'DOWNLOAD_URL', 'LICENSE')
|
17
kitchen/text/__init__.py
Normal file
17
kitchen/text/__init__.py
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
'''
|
||||||
|
------------
|
||||||
|
Kitchen.text
|
||||||
|
------------
|
||||||
|
|
||||||
|
Kitchen.text contains functions for manipulating text in python.
|
||||||
|
|
||||||
|
This includes things like converting between byte strings and unicode,
|
||||||
|
and displaying text on the screen.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from kitchen.versioning import version_tuple_to_string
|
||||||
|
|
||||||
|
__version_info__ = ((2, 1, 1),)
|
||||||
|
__version__ = version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
__all__ = ('converters', 'exceptions', 'misc',)
|
921
kitchen/text/converters.py
Normal file
921
kitchen/text/converters.py
Normal file
|
@ -0,0 +1,921 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2011 Red Hat, Inc.
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
# Seth Vidal
|
||||||
|
#
|
||||||
|
# Portions of code taken from yum/i18n.py and
|
||||||
|
# python-fedora: fedora/textutils.py
|
||||||
|
|
||||||
|
'''
|
||||||
|
Functions to handle conversion of byte :class:`str` and :class:`unicode`
|
||||||
|
strings.
|
||||||
|
|
||||||
|
.. versionchanged:: kitchen 0.2a2 ; API kitchen.text 2.0.0
|
||||||
|
Added :func:`~kitchen.text.converters.getwriter`
|
||||||
|
|
||||||
|
.. versionchanged:: kitchen 0.2.2 ; API kitchen.text 2.1.0
|
||||||
|
Added :func:`~kitchen.text.converters.exception_to_unicode`,
|
||||||
|
:func:`~kitchen.text.converters.exception_to_bytes`,
|
||||||
|
:data:`~kitchen.text.converters.EXCEPTION_CONVERTERS`,
|
||||||
|
and :data:`~kitchen.text.converters.BYTE_EXCEPTION_CONVERTERS`
|
||||||
|
|
||||||
|
.. versionchanged:: kitchen 1.0.1 ; API kitchen.text 2.1.1
|
||||||
|
Deprecated :data:`~kitchen.text.converters.BYTE_EXCEPTION_CONVERTERS` as
|
||||||
|
we've simplified :func:`~kitchen.text.converters.exception_to_unicode` and
|
||||||
|
:func:`~kitchen.text.converters.exception_to_bytes` to make it unnecessary
|
||||||
|
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
from base64 import b64encode, b64decode
|
||||||
|
except ImportError:
|
||||||
|
from kitchen.pycompat24.base64 import b64encode, b64decode
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
import warnings
|
||||||
|
import xml.sax.saxutils
|
||||||
|
|
||||||
|
# We need to access b_() for localizing our strings but we'll end up with
|
||||||
|
# a circular import if we import it directly.
|
||||||
|
import kitchen as k
|
||||||
|
from kitchen.pycompat24 import sets
|
||||||
|
sets.add_builtin_set()
|
||||||
|
|
||||||
|
from kitchen.text.exceptions import ControlCharError, XmlEncodeError
|
||||||
|
from kitchen.text.misc import guess_encoding, html_entities_unescape, \
|
||||||
|
process_control_chars
|
||||||
|
|
||||||
|
#: Aliases for the utf-8 codec
|
||||||
|
_UTF8_ALIASES = frozenset(('utf-8', 'UTF-8', 'utf8', 'UTF8', 'utf_8', 'UTF_8',
|
||||||
|
'utf', 'UTF', 'u8', 'U8'))
|
||||||
|
#: Aliases for the latin-1 codec
|
||||||
|
_LATIN1_ALIASES = frozenset(('latin-1', 'LATIN-1', 'latin1', 'LATIN1',
|
||||||
|
'latin', 'LATIN', 'l1', 'L1', 'cp819', 'CP819', '8859', 'iso8859-1',
|
||||||
|
'ISO8859-1', 'iso-8859-1', 'ISO-8859-1'))
|
||||||
|
|
||||||
|
# EXCEPTION_CONVERTERS is defined below due to using to_unicode
|
||||||
|
|
||||||
|
def to_unicode(obj, encoding='utf-8', errors='replace', nonstring=None,
|
||||||
|
non_string=None):
|
||||||
|
'''Convert an object into a :class:`unicode` string
|
||||||
|
|
||||||
|
:arg obj: Object to convert to a :class:`unicode` string. This should
|
||||||
|
normally be a byte :class:`str`
|
||||||
|
:kwarg encoding: What encoding to try converting the byte :class:`str` as.
|
||||||
|
Defaults to :term:`utf-8`
|
||||||
|
:kwarg errors: If errors are found while decoding, perform this action.
|
||||||
|
Defaults to ``replace`` which replaces the invalid bytes with
|
||||||
|
a character that means the bytes were unable to be decoded. Other
|
||||||
|
values are the same as the error handling schemes in the `codec base
|
||||||
|
classes
|
||||||
|
<http://docs.python.org/library/codecs.html#codec-base-classes>`_.
|
||||||
|
For instance ``strict`` which raises an exception and ``ignore`` which
|
||||||
|
simply omits the non-decodable characters.
|
||||||
|
:kwarg nonstring: How to treat nonstring values. Possible values are:
|
||||||
|
|
||||||
|
:simplerepr: Attempt to call the object's "simple representation"
|
||||||
|
method and return that value. Python-2.3+ has two methods that
|
||||||
|
try to return a simple representation: :meth:`object.__unicode__`
|
||||||
|
and :meth:`object.__str__`. We first try to get a usable value
|
||||||
|
from :meth:`object.__unicode__`. If that fails we try the same
|
||||||
|
with :meth:`object.__str__`.
|
||||||
|
:empty: Return an empty :class:`unicode` string
|
||||||
|
:strict: Raise a :exc:`TypeError`
|
||||||
|
:passthru: Return the object unchanged
|
||||||
|
:repr: Attempt to return a :class:`unicode` string of the repr of the
|
||||||
|
object
|
||||||
|
|
||||||
|
Default is ``simplerepr``
|
||||||
|
|
||||||
|
:kwarg non_string: *Deprecated* Use :attr:`nonstring` instead
|
||||||
|
:raises TypeError: if :attr:`nonstring` is ``strict`` and
|
||||||
|
a non-:class:`basestring` object is passed in or if :attr:`nonstring`
|
||||||
|
is set to an unknown value
|
||||||
|
:raises UnicodeDecodeError: if :attr:`errors` is ``strict`` and
|
||||||
|
:attr:`obj` is not decodable using the given encoding
|
||||||
|
:returns: :class:`unicode` string or the original object depending on the
|
||||||
|
value of :attr:`nonstring`.
|
||||||
|
|
||||||
|
Usually this should be used on a byte :class:`str` but it can take both
|
||||||
|
byte :class:`str` and :class:`unicode` strings intelligently. Nonstring
|
||||||
|
objects are handled in different ways depending on the setting of the
|
||||||
|
:attr:`nonstring` parameter.
|
||||||
|
|
||||||
|
The default values of this function are set so as to always return
|
||||||
|
a :class:`unicode` string and never raise an error when converting from
|
||||||
|
a byte :class:`str` to a :class:`unicode` string. However, when you do
|
||||||
|
not pass validly encoded text (or a nonstring object), you may end up with
|
||||||
|
output that you don't expect. Be sure you understand the requirements of
|
||||||
|
your data, not just ignore errors by passing it through this function.
|
||||||
|
|
||||||
|
.. versionchanged:: 0.2.1a2
|
||||||
|
Deprecated :attr:`non_string` in favor of :attr:`nonstring` parameter and changed
|
||||||
|
default value to ``simplerepr``
|
||||||
|
'''
|
||||||
|
if isinstance(obj, basestring):
|
||||||
|
if isinstance(obj, unicode):
|
||||||
|
return obj
|
||||||
|
if encoding in _UTF8_ALIASES:
|
||||||
|
return unicode(obj, 'utf-8', errors)
|
||||||
|
if encoding in _LATIN1_ALIASES:
|
||||||
|
return unicode(obj, 'latin-1', errors)
|
||||||
|
return obj.decode(encoding, errors)
|
||||||
|
|
||||||
|
if non_string:
|
||||||
|
warnings.warn(k.b_('non_string is a deprecated parameter of'
|
||||||
|
' to_unicode(). Use nonstring instead'), DeprecationWarning,
|
||||||
|
stacklevel=2)
|
||||||
|
if not nonstring:
|
||||||
|
nonstring = non_string
|
||||||
|
|
||||||
|
if not nonstring:
|
||||||
|
nonstring = 'simplerepr'
|
||||||
|
if nonstring == 'empty':
|
||||||
|
return u''
|
||||||
|
elif nonstring == 'passthru':
|
||||||
|
return obj
|
||||||
|
elif nonstring == 'simplerepr':
|
||||||
|
try:
|
||||||
|
simple = obj.__unicode__()
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
simple = None
|
||||||
|
if not simple:
|
||||||
|
try:
|
||||||
|
simple = str(obj)
|
||||||
|
except UnicodeError:
|
||||||
|
try:
|
||||||
|
simple = obj.__str__()
|
||||||
|
except (UnicodeError, AttributeError):
|
||||||
|
simple = u''
|
||||||
|
if not isinstance(simple, unicode):
|
||||||
|
return unicode(simple, encoding, errors)
|
||||||
|
return simple
|
||||||
|
elif nonstring in ('repr', 'strict'):
|
||||||
|
obj_repr = repr(obj)
|
||||||
|
if not isinstance(obj_repr, unicode):
|
||||||
|
obj_repr = unicode(obj_repr, encoding, errors)
|
||||||
|
if nonstring == 'repr':
|
||||||
|
return obj_repr
|
||||||
|
raise TypeError(k.b_('to_unicode was given "%(obj)s" which is neither'
|
||||||
|
' a byte string (str) or a unicode string') %
|
||||||
|
{'obj': obj_repr.encode(encoding, 'replace')})
|
||||||
|
|
||||||
|
raise TypeError(k.b_('nonstring value, %(param)s, is not set to a valid'
|
||||||
|
' action') % {'param': nonstring})
|
||||||
|
|
||||||
|
def to_bytes(obj, encoding='utf-8', errors='replace', nonstring=None,
|
||||||
|
non_string=None):
|
||||||
|
'''Convert an object into a byte :class:`str`
|
||||||
|
|
||||||
|
:arg obj: Object to convert to a byte :class:`str`. This should normally
|
||||||
|
be a :class:`unicode` string.
|
||||||
|
:kwarg encoding: Encoding to use to convert the :class:`unicode` string
|
||||||
|
into a byte :class:`str`. Defaults to :term:`utf-8`.
|
||||||
|
:kwarg errors: If errors are found while encoding, perform this action.
|
||||||
|
Defaults to ``replace`` which replaces the invalid bytes with
|
||||||
|
a character that means the bytes were unable to be encoded. Other
|
||||||
|
values are the same as the error handling schemes in the `codec base
|
||||||
|
classes
|
||||||
|
<http://docs.python.org/library/codecs.html#codec-base-classes>`_.
|
||||||
|
For instance ``strict`` which raises an exception and ``ignore`` which
|
||||||
|
simply omits the non-encodable characters.
|
||||||
|
:kwarg nonstring: How to treat nonstring values. Possible values are:
|
||||||
|
|
||||||
|
:simplerepr: Attempt to call the object's "simple representation"
|
||||||
|
method and return that value. Python-2.3+ has two methods that
|
||||||
|
try to return a simple representation: :meth:`object.__unicode__`
|
||||||
|
and :meth:`object.__str__`. We first try to get a usable value
|
||||||
|
from :meth:`object.__str__`. If that fails we try the same
|
||||||
|
with :meth:`object.__unicode__`.
|
||||||
|
:empty: Return an empty byte :class:`str`
|
||||||
|
:strict: Raise a :exc:`TypeError`
|
||||||
|
:passthru: Return the object unchanged
|
||||||
|
:repr: Attempt to return a byte :class:`str` of the :func:`repr` of the
|
||||||
|
object
|
||||||
|
|
||||||
|
Default is ``simplerepr``.
|
||||||
|
|
||||||
|
:kwarg non_string: *Deprecated* Use :attr:`nonstring` instead.
|
||||||
|
:raises TypeError: if :attr:`nonstring` is ``strict`` and
|
||||||
|
a non-:class:`basestring` object is passed in or if :attr:`nonstring`
|
||||||
|
is set to an unknown value.
|
||||||
|
:raises UnicodeEncodeError: if :attr:`errors` is ``strict`` and all of the
|
||||||
|
bytes of :attr:`obj` are unable to be encoded using :attr:`encoding`.
|
||||||
|
:returns: byte :class:`str` or the original object depending on the value
|
||||||
|
of :attr:`nonstring`.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
If you pass a byte :class:`str` into this function the byte
|
||||||
|
:class:`str` is returned unmodified. It is **not** re-encoded with
|
||||||
|
the specified :attr:`encoding`. The easiest way to achieve that is::
|
||||||
|
|
||||||
|
to_bytes(to_unicode(text), encoding='utf-8')
|
||||||
|
|
||||||
|
The initial :func:`to_unicode` call will ensure text is
|
||||||
|
a :class:`unicode` string. Then, :func:`to_bytes` will turn that into
|
||||||
|
a byte :class:`str` with the specified encoding.
|
||||||
|
|
||||||
|
Usually, this should be used on a :class:`unicode` string but it can take
|
||||||
|
either a byte :class:`str` or a :class:`unicode` string intelligently.
|
||||||
|
Nonstring objects are handled in different ways depending on the setting
|
||||||
|
of the :attr:`nonstring` parameter.
|
||||||
|
|
||||||
|
The default values of this function are set so as to always return a byte
|
||||||
|
:class:`str` and never raise an error when converting from unicode to
|
||||||
|
bytes. However, when you do not pass an encoding that can validly encode
|
||||||
|
the object (or a non-string object), you may end up with output that you
|
||||||
|
don't expect. Be sure you understand the requirements of your data, not
|
||||||
|
just ignore errors by passing it through this function.
|
||||||
|
|
||||||
|
.. versionchanged:: 0.2.1a2
|
||||||
|
Deprecated :attr:`non_string` in favor of :attr:`nonstring` parameter
|
||||||
|
and changed default value to ``simplerepr``
|
||||||
|
'''
|
||||||
|
if isinstance(obj, basestring):
|
||||||
|
if isinstance(obj, str):
|
||||||
|
return obj
|
||||||
|
return obj.encode(encoding, errors)
|
||||||
|
if non_string:
|
||||||
|
warnings.warn(k.b_('non_string is a deprecated parameter of'
|
||||||
|
' to_bytes(). Use nonstring instead'), DeprecationWarning,
|
||||||
|
stacklevel=2)
|
||||||
|
if not nonstring:
|
||||||
|
nonstring = non_string
|
||||||
|
if not nonstring:
|
||||||
|
nonstring = 'simplerepr'
|
||||||
|
|
||||||
|
if nonstring == 'empty':
|
||||||
|
return ''
|
||||||
|
elif nonstring == 'passthru':
|
||||||
|
return obj
|
||||||
|
elif nonstring == 'simplerepr':
|
||||||
|
try:
|
||||||
|
simple = str(obj)
|
||||||
|
except UnicodeError:
|
||||||
|
try:
|
||||||
|
simple = obj.__str__()
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
simple = None
|
||||||
|
if not simple:
|
||||||
|
try:
|
||||||
|
simple = obj.__unicode__()
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
simple = ''
|
||||||
|
if isinstance(simple, unicode):
|
||||||
|
simple = simple.encode(encoding, 'replace')
|
||||||
|
return simple
|
||||||
|
elif nonstring in ('repr', 'strict'):
|
||||||
|
try:
|
||||||
|
obj_repr = obj.__repr__()
|
||||||
|
except (AttributeError, UnicodeError):
|
||||||
|
obj_repr = ''
|
||||||
|
if isinstance(obj_repr, unicode):
|
||||||
|
obj_repr = obj_repr.encode(encoding, errors)
|
||||||
|
else:
|
||||||
|
obj_repr = str(obj_repr)
|
||||||
|
if nonstring == 'repr':
|
||||||
|
return obj_repr
|
||||||
|
raise TypeError(k.b_('to_bytes was given "%(obj)s" which is neither'
|
||||||
|
' a unicode string or a byte string (str)') % {'obj': obj_repr})
|
||||||
|
|
||||||
|
raise TypeError(k.b_('nonstring value, %(param)s, is not set to a valid'
|
||||||
|
' action') % {'param': nonstring})
|
||||||
|
|
||||||
|
def getwriter(encoding):
|
||||||
|
'''Return a :class:`codecs.StreamWriter` that resists tracing back.
|
||||||
|
|
||||||
|
:arg encoding: Encoding to use for transforming :class:`unicode` strings
|
||||||
|
into byte :class:`str`.
|
||||||
|
:rtype: :class:`codecs.StreamWriter`
|
||||||
|
:returns: :class:`~codecs.StreamWriter` that you can instantiate to wrap output
|
||||||
|
streams to automatically translate :class:`unicode` strings into :attr:`encoding`.
|
||||||
|
|
||||||
|
This is a reimplemetation of :func:`codecs.getwriter` that returns
|
||||||
|
a :class:`~codecs.StreamWriter` that resists issuing tracebacks. The
|
||||||
|
:class:`~codecs.StreamWriter` that is returned uses
|
||||||
|
:func:`kitchen.text.converters.to_bytes` to convert :class:`unicode`
|
||||||
|
strings into byte :class:`str`. The departures from
|
||||||
|
:func:`codecs.getwriter` are:
|
||||||
|
|
||||||
|
1) The :class:`~codecs.StreamWriter` that is returned will take byte
|
||||||
|
:class:`str` as well as :class:`unicode` strings. Any byte
|
||||||
|
:class:`str` will be passed through unmodified.
|
||||||
|
2) The default error handler for unknown bytes is to ``replace`` the bytes
|
||||||
|
with the unknown character (``?`` in most ascii-based encodings, ``<EFBFBD>``
|
||||||
|
in the utf encodings) whereas :func:`codecs.getwriter` defaults to
|
||||||
|
``strict``. Like :class:`codecs.StreamWriter`, the returned
|
||||||
|
:class:`~codecs.StreamWriter` can have its error handler changed in
|
||||||
|
code by setting ``stream.errors = 'new_handler_name'``
|
||||||
|
|
||||||
|
Example usage::
|
||||||
|
|
||||||
|
$ LC_ALL=C python
|
||||||
|
>>> import sys
|
||||||
|
>>> from kitchen.text.converters import getwriter
|
||||||
|
>>> UTF8Writer = getwriter('utf-8')
|
||||||
|
>>> unwrapped_stdout = sys.stdout
|
||||||
|
>>> sys.stdout = UTF8Writer(unwrapped_stdout)
|
||||||
|
>>> print 'caf\\xc3\\xa9'
|
||||||
|
café
|
||||||
|
>>> print u'caf\\xe9'
|
||||||
|
café
|
||||||
|
>>> ASCIIWriter = getwriter('ascii')
|
||||||
|
>>> sys.stdout = ASCIIWriter(unwrapped_stdout)
|
||||||
|
>>> print 'caf\\xc3\\xa9'
|
||||||
|
café
|
||||||
|
>>> print u'caf\\xe9'
|
||||||
|
caf?
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
API docs for :class:`codecs.StreamWriter` and :func:`codecs.getwriter`
|
||||||
|
and `Print Fails <http://wiki.python.org/moin/PrintFails>`_ on the
|
||||||
|
python wiki.
|
||||||
|
|
||||||
|
.. versionadded:: kitchen 0.2a2, API: kitchen.text 1.1.0
|
||||||
|
'''
|
||||||
|
class _StreamWriter(codecs.StreamWriter):
|
||||||
|
# :W0223: We don't need to implement all methods of StreamWriter.
|
||||||
|
# This is not the actual class that gets used but a replacement for
|
||||||
|
# the actual class.
|
||||||
|
# :C0111: We're implementing an API from the stdlib. Just point
|
||||||
|
# people at that documentation instead of writing docstrings here.
|
||||||
|
#pylint:disable-msg=W0223,C0111
|
||||||
|
def __init__(self, stream, errors='replace'):
|
||||||
|
codecs.StreamWriter.__init__(self, stream, errors)
|
||||||
|
|
||||||
|
def encode(self, msg, errors='replace'):
|
||||||
|
return (to_bytes(msg, encoding=self.encoding, errors=errors),
|
||||||
|
len(msg))
|
||||||
|
|
||||||
|
_StreamWriter.encoding = encoding
|
||||||
|
return _StreamWriter
|
||||||
|
|
||||||
|
def to_utf8(obj, errors='replace', non_string='passthru'):
|
||||||
|
'''*Deprecated*
|
||||||
|
|
||||||
|
Convert :class:`unicode` to an encoded :term:`utf-8` byte :class:`str`.
|
||||||
|
You should be using :func:`to_bytes` instead::
|
||||||
|
|
||||||
|
to_bytes(obj, encoding='utf-8', non_string='passthru')
|
||||||
|
'''
|
||||||
|
warnings.warn(k.b_('kitchen.text.converters.to_utf8 is deprecated. Use'
|
||||||
|
' kitchen.text.converters.to_bytes(obj, encoding="utf-8",'
|
||||||
|
' nonstring="passthru" instead.'), DeprecationWarning, stacklevel=2)
|
||||||
|
return to_bytes(obj, encoding='utf-8', errors=errors,
|
||||||
|
nonstring=non_string)
|
||||||
|
|
||||||
|
### str is also the type name for byte strings so it's not a good name for
|
||||||
|
### something that can return unicode strings
|
||||||
|
def to_str(obj):
|
||||||
|
'''*Deprecated*
|
||||||
|
|
||||||
|
This function converts something to a byte :class:`str` if it isn't one.
|
||||||
|
It's used to call :func:`str` or :func:`unicode` on the object to get its
|
||||||
|
simple representation without danger of getting a :exc:`UnicodeError`.
|
||||||
|
You should be using :func:`to_unicode` or :func:`to_bytes` explicitly
|
||||||
|
instead.
|
||||||
|
|
||||||
|
If you need :class:`unicode` strings::
|
||||||
|
|
||||||
|
to_unicode(obj, nonstring='simplerepr')
|
||||||
|
|
||||||
|
If you need byte :class:`str`::
|
||||||
|
|
||||||
|
to_bytes(obj, nonstring='simplerepr')
|
||||||
|
'''
|
||||||
|
warnings.warn(k.b_('to_str is deprecated. Use to_unicode or to_bytes'
|
||||||
|
' instead. See the to_str docstring for'
|
||||||
|
' porting information.'),
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
return to_bytes(obj, nonstring='simplerepr')
|
||||||
|
|
||||||
|
# Exception message extraction functions
|
||||||
|
EXCEPTION_CONVERTERS = (lambda e: e.args[0], lambda e: e)
|
||||||
|
''' Tuple of functions to try to use to convert an exception into a string
|
||||||
|
representation. Its main use is to extract a string (:class:`unicode` or
|
||||||
|
:class:`str`) from an exception object in :func:`exception_to_unicode` and
|
||||||
|
:func:`exception_to_bytes`. The functions here will try the exception's
|
||||||
|
``args[0]`` and the exception itself (roughly equivalent to
|
||||||
|
`str(exception)`) to extract the message. This is only a default and can
|
||||||
|
be easily overridden when calling those functions. There are several
|
||||||
|
reasons you might wish to do that. If you have exceptions where the best
|
||||||
|
string representing the exception is not returned by the default
|
||||||
|
functions, you can add another function to extract from a different
|
||||||
|
field::
|
||||||
|
|
||||||
|
from kitchen.text.converters import (EXCEPTION_CONVERTERS,
|
||||||
|
exception_to_unicode)
|
||||||
|
|
||||||
|
class MyError(Exception):
|
||||||
|
def __init__(self, message):
|
||||||
|
self.value = message
|
||||||
|
|
||||||
|
c = [lambda e: e.value]
|
||||||
|
c.extend(EXCEPTION_CONVERTERS)
|
||||||
|
try:
|
||||||
|
raise MyError('An Exception message')
|
||||||
|
except MyError, e:
|
||||||
|
print exception_to_unicode(e, converters=c)
|
||||||
|
|
||||||
|
Another reason would be if you're converting to a byte :class:`str` and
|
||||||
|
you know the :class:`str` needs to be a non-:term:`utf-8` encoding.
|
||||||
|
:func:`exception_to_bytes` defaults to :term:`utf-8` but if you convert
|
||||||
|
into a byte :class:`str` explicitly using a converter then you can choose
|
||||||
|
a different encoding::
|
||||||
|
|
||||||
|
from kitchen.text.converters import (EXCEPTION_CONVERTERS,
|
||||||
|
exception_to_bytes, to_bytes)
|
||||||
|
c = [lambda e: to_bytes(e.args[0], encoding='euc_jp'),
|
||||||
|
lambda e: to_bytes(e, encoding='euc_jp')]
|
||||||
|
c.extend(EXCEPTION_CONVERTERS)
|
||||||
|
try:
|
||||||
|
do_something()
|
||||||
|
except Exception, e:
|
||||||
|
log = open('logfile.euc_jp', 'a')
|
||||||
|
log.write('%s\n' % exception_to_bytes(e, converters=c)
|
||||||
|
log.close()
|
||||||
|
|
||||||
|
Each function in this list should take the exception as its sole argument
|
||||||
|
and return a string containing the message representing the exception.
|
||||||
|
The functions may return the message as a :byte class:`str`,
|
||||||
|
a :class:`unicode` string, or even an object if you trust the object to
|
||||||
|
return a decent string representation. The :func:`exception_to_unicode`
|
||||||
|
and :func:`exception_to_bytes` functions will make sure to convert the
|
||||||
|
string to the proper type before returning.
|
||||||
|
|
||||||
|
.. versionadded:: 0.2.2
|
||||||
|
'''
|
||||||
|
|
||||||
|
BYTE_EXCEPTION_CONVERTERS = (lambda e: to_bytes(e.args[0]), to_bytes)
|
||||||
|
'''*Deprecated*: Use :data:`EXCEPTION_CONVERTERS` instead.
|
||||||
|
|
||||||
|
Tuple of functions to try to use to convert an exception into a string
|
||||||
|
representation. This tuple is similar to the one in
|
||||||
|
:data:`EXCEPTION_CONVERTERS` but it's used with :func:`exception_to_bytes`
|
||||||
|
instead. Ideally, these functions should do their best to return the data
|
||||||
|
as a byte :class:`str` but the results will be run through
|
||||||
|
:func:`to_bytes` before being returned.
|
||||||
|
|
||||||
|
.. versionadded:: 0.2.2
|
||||||
|
.. versionchanged:: 1.0.1
|
||||||
|
Deprecated as simplifications allow :data:`EXCEPTION_CONVERTERS` to
|
||||||
|
perform the same function.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def exception_to_unicode(exc, converters=EXCEPTION_CONVERTERS):
|
||||||
|
'''Convert an exception object into a unicode representation
|
||||||
|
|
||||||
|
:arg exc: Exception object to convert
|
||||||
|
:kwarg converters: List of functions to use to convert the exception into
|
||||||
|
a string. See :data:`EXCEPTION_CONVERTERS` for the default value and
|
||||||
|
an example of adding other converters to the defaults. The functions
|
||||||
|
in the list are tried one at a time to see if they can extract
|
||||||
|
a string from the exception. The first one to do so without raising
|
||||||
|
an exception is used.
|
||||||
|
:returns: :class:`unicode` string representation of the exception. The
|
||||||
|
value extracted by the :attr:`converters` will be converted into
|
||||||
|
:class:`unicode` before being returned using the :term:`utf-8`
|
||||||
|
encoding. If you know you need to use an alternate encoding add
|
||||||
|
a function that does that to the list of functions in
|
||||||
|
:attr:`converters`)
|
||||||
|
|
||||||
|
.. versionadded:: 0.2.2
|
||||||
|
'''
|
||||||
|
msg = u'<exception failed to convert to text>'
|
||||||
|
for func in converters:
|
||||||
|
try:
|
||||||
|
msg = func(exc)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return to_unicode(msg)
|
||||||
|
|
||||||
|
def exception_to_bytes(exc, converters=EXCEPTION_CONVERTERS):
|
||||||
|
'''Convert an exception object into a str representation
|
||||||
|
|
||||||
|
:arg exc: Exception object to convert
|
||||||
|
:kwarg converters: List of functions to use to convert the exception into
|
||||||
|
a string. See :data:`EXCEPTION_CONVERTERS` for the default value and
|
||||||
|
an example of adding other converters to the defaults. The functions
|
||||||
|
in the list are tried one at a time to see if they can extract
|
||||||
|
a string from the exception. The first one to do so without raising
|
||||||
|
an exception is used.
|
||||||
|
:returns: byte :class:`str` representation of the exception. The value
|
||||||
|
extracted by the :attr:`converters` will be converted into
|
||||||
|
:class:`str` before being returned using the :term:`utf-8` encoding.
|
||||||
|
If you know you need to use an alternate encoding add a function that
|
||||||
|
does that to the list of functions in :attr:`converters`)
|
||||||
|
|
||||||
|
.. versionadded:: 0.2.2
|
||||||
|
.. versionchanged:: 1.0.1
|
||||||
|
Code simplification allowed us to switch to using
|
||||||
|
:data:`EXCEPTION_CONVERTERS` as the default value of
|
||||||
|
:attr:`converters`.
|
||||||
|
'''
|
||||||
|
msg = '<exception failed to convert to text>'
|
||||||
|
for func in converters:
|
||||||
|
try:
|
||||||
|
msg = func(exc)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return to_bytes(msg)
|
||||||
|
|
||||||
|
#
|
||||||
|
# XML Related Functions
|
||||||
|
#
|
||||||
|
|
||||||
|
def unicode_to_xml(string, encoding='utf-8', attrib=False,
|
||||||
|
control_chars='replace'):
|
||||||
|
'''Take a :class:`unicode` string and turn it into a byte :class:`str`
|
||||||
|
suitable for xml
|
||||||
|
|
||||||
|
:arg string: :class:`unicode` string to encode into an XML compatible byte
|
||||||
|
:class:`str`
|
||||||
|
:kwarg encoding: encoding to use for the returned byte :class:`str`.
|
||||||
|
Default is to encode to :term:`UTF-8`. If some of the characters in
|
||||||
|
:attr:`string` are not encodable in this encoding, the unknown
|
||||||
|
characters will be entered into the output string using xml character
|
||||||
|
references.
|
||||||
|
:kwarg attrib: If :data:`True`, quote the string for use in an xml
|
||||||
|
attribute. If :data:`False` (default), quote for use in an xml text
|
||||||
|
field.
|
||||||
|
:kwarg control_chars: :term:`control characters` are not allowed in XML
|
||||||
|
documents. When we encounter those we need to know what to do. Valid
|
||||||
|
options are:
|
||||||
|
|
||||||
|
:replace: (default) Replace the control characters with ``?``
|
||||||
|
:ignore: Remove the characters altogether from the output
|
||||||
|
:strict: Raise an :exc:`~kitchen.text.exceptions.XmlEncodeError` when
|
||||||
|
we encounter a :term:`control character`
|
||||||
|
|
||||||
|
:raises kitchen.text.exceptions.XmlEncodeError: If :attr:`control_chars`
|
||||||
|
is set to ``strict`` and the string to be made suitable for output to
|
||||||
|
xml contains :term:`control characters` or if :attr:`string` is not
|
||||||
|
a :class:`unicode` string then we raise this exception.
|
||||||
|
:raises ValueError: If :attr:`control_chars` is set to something other than
|
||||||
|
``replace``, ``ignore``, or ``strict``.
|
||||||
|
:rtype: byte :class:`str`
|
||||||
|
:returns: representation of the :class:`unicode` string as a valid XML
|
||||||
|
byte :class:`str`
|
||||||
|
|
||||||
|
XML files consist mainly of text encoded using a particular charset. XML
|
||||||
|
also denies the use of certain bytes in the encoded text (example: ``ASCII
|
||||||
|
Null``). There are also special characters that must be escaped if they
|
||||||
|
are present in the input (example: ``<``). This function takes care of
|
||||||
|
all of those issues for you.
|
||||||
|
|
||||||
|
There are a few different ways to use this function depending on your
|
||||||
|
needs. The simplest invocation is like this::
|
||||||
|
|
||||||
|
unicode_to_xml(u'String with non-ASCII characters: <"á と">')
|
||||||
|
|
||||||
|
This will return the following to you, encoded in :term:`utf-8`::
|
||||||
|
|
||||||
|
'String with non-ASCII characters: <"á と">'
|
||||||
|
|
||||||
|
Pretty straightforward. Now, what if you need to encode your document in
|
||||||
|
something other than :term:`utf-8`? For instance, ``latin-1``? Let's
|
||||||
|
see::
|
||||||
|
|
||||||
|
unicode_to_xml(u'String with non-ASCII characters: <"á と">', encoding='latin-1')
|
||||||
|
'String with non-ASCII characters: <"á と">'
|
||||||
|
|
||||||
|
Because the ``と`` character is not available in the ``latin-1`` charset,
|
||||||
|
it is replaced with ``と`` in our output. This is an xml character
|
||||||
|
reference which represents the character at unicode codepoint ``12392``, the
|
||||||
|
``と`` character.
|
||||||
|
|
||||||
|
When you want to reverse this, use :func:`xml_to_unicode` which will turn
|
||||||
|
a byte :class:`str` into a :class:`unicode` string and replace the xml
|
||||||
|
character references with the unicode characters.
|
||||||
|
|
||||||
|
XML also has the quirk of not allowing :term:`control characters` in its
|
||||||
|
output. The :attr:`control_chars` parameter allows us to specify what to
|
||||||
|
do with those. For use cases that don't need absolute character by
|
||||||
|
character fidelity (example: holding strings that will just be used for
|
||||||
|
display in a GUI app later), the default value of ``replace`` works well::
|
||||||
|
|
||||||
|
unicode_to_xml(u'String with disallowed control chars: \u0000\u0007')
|
||||||
|
'String with disallowed control chars: ??'
|
||||||
|
|
||||||
|
If you do need to be able to reproduce all of the characters at a later
|
||||||
|
date (examples: if the string is a key value in a database or a path on a
|
||||||
|
filesystem) you have many choices. Here are a few that rely on ``utf-7``,
|
||||||
|
a verbose encoding that encodes :term:`control characters` (as well as
|
||||||
|
non-:term:`ASCII` unicode values) to characters from within the
|
||||||
|
:term:`ASCII` printable characters. The good thing about doing this is
|
||||||
|
that the code is pretty simple. You just need to use ``utf-7`` both when
|
||||||
|
encoding the field for xml and when decoding it for use in your python
|
||||||
|
program::
|
||||||
|
|
||||||
|
unicode_to_xml(u'String with unicode: と and control char: \u0007', encoding='utf7')
|
||||||
|
'String with unicode: +MGg and control char: +AAc-'
|
||||||
|
# [...]
|
||||||
|
xml_to_unicode('String with unicode: +MGg and control char: +AAc-', encoding='utf7')
|
||||||
|
u'String with unicode: と and control char: \u0007'
|
||||||
|
|
||||||
|
As you can see, the ``utf-7`` encoding will transform even characters that
|
||||||
|
would be representable in :term:`utf-8`. This can be a drawback if you
|
||||||
|
want unicode characters in the file to be readable without being decoded
|
||||||
|
first. You can work around this with increased complexity in your
|
||||||
|
application code::
|
||||||
|
|
||||||
|
encoding = 'utf-8'
|
||||||
|
u_string = u'String with unicode: と and control char: \u0007'
|
||||||
|
try:
|
||||||
|
# First attempt to encode to utf8
|
||||||
|
data = unicode_to_xml(u_string, encoding=encoding, errors='strict')
|
||||||
|
except XmlEncodeError:
|
||||||
|
# Fallback to utf-7
|
||||||
|
encoding = 'utf-7'
|
||||||
|
data = unicode_to_xml(u_string, encoding=encoding, errors='strict')
|
||||||
|
write_tag('<mytag encoding=%s>%s</mytag>' % (encoding, data))
|
||||||
|
# [...]
|
||||||
|
encoding = tag.attributes.encoding
|
||||||
|
u_string = xml_to_unicode(u_string, encoding=encoding)
|
||||||
|
|
||||||
|
Using code similar to that, you can have some fields encoded using your
|
||||||
|
default encoding and fallback to ``utf-7`` if there are :term:`control
|
||||||
|
characters` present.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
If your goal is to preserve the :term:`control characters` you cannot
|
||||||
|
save the entire file as ``utf-7`` and set the xml encoding parameter
|
||||||
|
to ``utf-7`` if your goal is to preserve the :term:`control
|
||||||
|
characters`. Because XML doesn't allow :term:`control characters`,
|
||||||
|
you have to encode those separate from any encoding work that the XML
|
||||||
|
parser itself knows about.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:func:`bytes_to_xml`
|
||||||
|
if you're dealing with bytes that are non-text or of an unknown
|
||||||
|
encoding that you must preserve on a byte for byte level.
|
||||||
|
:func:`guess_encoding_to_xml`
|
||||||
|
if you're dealing with strings in unknown encodings that you don't
|
||||||
|
need to save with char-for-char fidelity.
|
||||||
|
'''
|
||||||
|
if not string:
|
||||||
|
# Small optimization
|
||||||
|
return ''
|
||||||
|
try:
|
||||||
|
process_control_chars(string, strategy=control_chars)
|
||||||
|
except TypeError:
|
||||||
|
raise XmlEncodeError(k.b_('unicode_to_xml must have a unicode type as'
|
||||||
|
' the first argument. Use bytes_string_to_xml for byte'
|
||||||
|
' strings.'))
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(k.b_('The control_chars argument to unicode_to_xml'
|
||||||
|
' must be one of ignore, replace, or strict'))
|
||||||
|
except ControlCharError, exc:
|
||||||
|
raise XmlEncodeError(exc.args[0])
|
||||||
|
|
||||||
|
string = string.encode(encoding, 'xmlcharrefreplace')
|
||||||
|
|
||||||
|
# Escape characters that have special meaning in xml
|
||||||
|
if attrib:
|
||||||
|
string = xml.sax.saxutils.escape(string, entities={'"':"""})
|
||||||
|
else:
|
||||||
|
string = xml.sax.saxutils.escape(string)
|
||||||
|
return string
|
||||||
|
|
||||||
|
def xml_to_unicode(byte_string, encoding='utf-8', errors='replace'):
|
||||||
|
'''Transform a byte :class:`str` from an xml file into a :class:`unicode`
|
||||||
|
string
|
||||||
|
|
||||||
|
:arg byte_string: byte :class:`str` to decode
|
||||||
|
:kwarg encoding: encoding that the byte :class:`str` is in
|
||||||
|
:kwarg errors: What to do if not every character is valid in
|
||||||
|
:attr:`encoding`. See the :func:`to_unicode` documentation for legal
|
||||||
|
values.
|
||||||
|
:rtype: :class:`unicode` string
|
||||||
|
:returns: string decoded from :attr:`byte_string`
|
||||||
|
|
||||||
|
This function attempts to reverse what :func:`unicode_to_xml` does. It
|
||||||
|
takes a byte :class:`str` (presumably read in from an xml file) and
|
||||||
|
expands all the html entities into unicode characters and decodes the byte
|
||||||
|
:class:`str` into a :class:`unicode` string. One thing it cannot do is
|
||||||
|
restore any :term:`control characters` that were removed prior to
|
||||||
|
inserting into the file. If you need to keep such characters you need to
|
||||||
|
use :func:`xml_to_bytes` and :func:`bytes_to_xml` or use on of the
|
||||||
|
strategies documented in :func:`unicode_to_xml` instead.
|
||||||
|
'''
|
||||||
|
string = to_unicode(byte_string, encoding=encoding, errors=errors)
|
||||||
|
string = html_entities_unescape(string)
|
||||||
|
return string
|
||||||
|
|
||||||
|
def byte_string_to_xml(byte_string, input_encoding='utf-8', errors='replace',
|
||||||
|
output_encoding='utf-8', attrib=False, control_chars='replace'):
|
||||||
|
'''Make sure a byte :class:`str` is validly encoded for xml output
|
||||||
|
|
||||||
|
:arg byte_string: Byte :class:`str` to turn into valid xml output
|
||||||
|
:kwarg input_encoding: Encoding of :attr:`byte_string`. Default ``utf-8``
|
||||||
|
:kwarg errors: How to handle errors encountered while decoding the
|
||||||
|
:attr:`byte_string` into :class:`unicode` at the beginning of the
|
||||||
|
process. Values are:
|
||||||
|
|
||||||
|
:replace: (default) Replace the invalid bytes with a ``?``
|
||||||
|
:ignore: Remove the characters altogether from the output
|
||||||
|
:strict: Raise an :exc:`UnicodeDecodeError` when we encounter
|
||||||
|
a non-decodable character
|
||||||
|
|
||||||
|
:kwarg output_encoding: Encoding for the xml file that this string will go
|
||||||
|
into. Default is ``utf-8``. If all the characters in
|
||||||
|
:attr:`byte_string` are not encodable in this encoding, the unknown
|
||||||
|
characters will be entered into the output string using xml character
|
||||||
|
references.
|
||||||
|
:kwarg attrib: If :data:`True`, quote the string for use in an xml
|
||||||
|
attribute. If :data:`False` (default), quote for use in an xml text
|
||||||
|
field.
|
||||||
|
:kwarg control_chars: XML does not allow :term:`control characters`. When
|
||||||
|
we encounter those we need to know what to do. Valid options are:
|
||||||
|
|
||||||
|
:replace: (default) Replace the :term:`control characters` with ``?``
|
||||||
|
:ignore: Remove the characters altogether from the output
|
||||||
|
:strict: Raise an error when we encounter a :term:`control character`
|
||||||
|
|
||||||
|
:raises XmlEncodeError: If :attr:`control_chars` is set to ``strict`` and
|
||||||
|
the string to be made suitable for output to xml contains
|
||||||
|
:term:`control characters` then we raise this exception.
|
||||||
|
:raises UnicodeDecodeError: If errors is set to ``strict`` and the
|
||||||
|
:attr:`byte_string` contains bytes that are not decodable using
|
||||||
|
:attr:`input_encoding`, this error is raised
|
||||||
|
:rtype: byte :class:`str`
|
||||||
|
:returns: representation of the byte :class:`str` in the output encoding with
|
||||||
|
any bytes that aren't available in xml taken care of.
|
||||||
|
|
||||||
|
Use this when you have a byte :class:`str` representing text that you need
|
||||||
|
to make suitable for output to xml. There are several cases where this
|
||||||
|
is the case. For instance, if you need to transform some strings encoded
|
||||||
|
in ``latin-1`` to :term:`utf-8` for output::
|
||||||
|
|
||||||
|
utf8_string = byte_string_to_xml(latin1_string, input_encoding='latin-1')
|
||||||
|
|
||||||
|
If you already have strings in the proper encoding you may still want to
|
||||||
|
use this function to remove :term:`control characters`::
|
||||||
|
|
||||||
|
cleaned_string = byte_string_to_xml(string, input_encoding='utf-8', output_encoding='utf-8')
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:func:`unicode_to_xml`
|
||||||
|
for other ideas on using this function
|
||||||
|
'''
|
||||||
|
if not isinstance(byte_string, str):
|
||||||
|
raise XmlEncodeError(k.b_('byte_string_to_xml can only take a byte'
|
||||||
|
' string as its first argument. Use unicode_to_xml for'
|
||||||
|
' unicode strings'))
|
||||||
|
|
||||||
|
# Decode the string into unicode
|
||||||
|
u_string = unicode(byte_string, input_encoding, errors)
|
||||||
|
return unicode_to_xml(u_string, encoding=output_encoding,
|
||||||
|
attrib=attrib, control_chars=control_chars)
|
||||||
|
|
||||||
|
def xml_to_byte_string(byte_string, input_encoding='utf-8', errors='replace',
|
||||||
|
output_encoding='utf-8'):
|
||||||
|
'''Transform a byte :class:`str` from an xml file into :class:`unicode`
|
||||||
|
string
|
||||||
|
|
||||||
|
:arg byte_string: byte :class:`str` to decode
|
||||||
|
:kwarg input_encoding: encoding that the byte :class:`str` is in
|
||||||
|
:kwarg errors: What to do if not every character is valid in
|
||||||
|
:attr:`encoding`. See the :func:`to_unicode` docstring for legal
|
||||||
|
values.
|
||||||
|
:kwarg output_encoding: Encoding for the output byte :class:`str`
|
||||||
|
:returns: :class:`unicode` string decoded from :attr:`byte_string`
|
||||||
|
|
||||||
|
This function attempts to reverse what :func:`unicode_to_xml` does. It
|
||||||
|
takes a byte :class:`str` (presumably read in from an xml file) and
|
||||||
|
expands all the html entities into unicode characters and decodes the
|
||||||
|
byte :class:`str` into a :class:`unicode` string. One thing it cannot do
|
||||||
|
is restore any :term:`control characters` that were removed prior to
|
||||||
|
inserting into the file. If you need to keep such characters you need to
|
||||||
|
use :func:`xml_to_bytes` and :func:`bytes_to_xml` or use one of the
|
||||||
|
strategies documented in :func:`unicode_to_xml` instead.
|
||||||
|
'''
|
||||||
|
string = xml_to_unicode(byte_string, input_encoding, errors)
|
||||||
|
return to_bytes(string, output_encoding, errors)
|
||||||
|
|
||||||
|
def bytes_to_xml(byte_string, *args, **kwargs):
|
||||||
|
'''Return a byte :class:`str` encoded so it is valid inside of any xml
|
||||||
|
file
|
||||||
|
|
||||||
|
:arg byte_string: byte :class:`str` to transform
|
||||||
|
:arg \*args, \*\*kwargs: extra arguments to this function are passed on to
|
||||||
|
the function actually implementing the encoding. You can use this to
|
||||||
|
tweak the output in some cases but, as a general rule, you shouldn't
|
||||||
|
because the underlying encoding function is not guaranteed to remain
|
||||||
|
the same.
|
||||||
|
:rtype: byte :class:`str` consisting of all :term:`ASCII` characters
|
||||||
|
:returns: byte :class:`str` representation of the input. This will be encoded
|
||||||
|
using base64.
|
||||||
|
|
||||||
|
This function is made especially to put binary information into xml
|
||||||
|
documents.
|
||||||
|
|
||||||
|
This function is intended for encoding things that must be preserved
|
||||||
|
byte-for-byte. If you want to encode a byte string that's text and don't
|
||||||
|
mind losing the actual bytes you probably want to try :func:`byte_string_to_xml`
|
||||||
|
or :func:`guess_encoding_to_xml` instead.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Although the current implementation uses :func:`base64.b64encode` and
|
||||||
|
there's no plans to change it, that isn't guaranteed. If you want to
|
||||||
|
make sure that you can encode and decode these messages it's best to
|
||||||
|
use :func:`xml_to_bytes` if you use this function to encode.
|
||||||
|
'''
|
||||||
|
# Can you do this yourself? Yes, you can.
|
||||||
|
return b64encode(byte_string, *args, **kwargs)
|
||||||
|
|
||||||
|
def xml_to_bytes(byte_string, *args, **kwargs):
|
||||||
|
'''Decode a string encoded using :func:`bytes_to_xml`
|
||||||
|
|
||||||
|
:arg byte_string: byte :class:`str` to transform. This should be a base64
|
||||||
|
encoded sequence of bytes originally generated by :func:`bytes_to_xml`.
|
||||||
|
:arg \*args, \*\*kwargs: extra arguments to this function are passed on to
|
||||||
|
the function actually implementing the encoding. You can use this to
|
||||||
|
tweak the output in some cases but, as a general rule, you shouldn't
|
||||||
|
because the underlying encoding function is not guaranteed to remain
|
||||||
|
the same.
|
||||||
|
:rtype: byte :class:`str`
|
||||||
|
:returns: byte :class:`str` that's the decoded input
|
||||||
|
|
||||||
|
If you've got fields in an xml document that were encoded with
|
||||||
|
:func:`bytes_to_xml` then you want to use this function to undecode them.
|
||||||
|
It converts a base64 encoded string into a byte :class:`str`.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Although the current implementation uses :func:`base64.b64decode` and
|
||||||
|
there's no plans to change it, that isn't guaranteed. If you want to
|
||||||
|
make sure that you can encode and decode these messages it's best to
|
||||||
|
use :func:`bytes_to_xml` if you use this function to decode.
|
||||||
|
'''
|
||||||
|
return b64decode(byte_string, *args, **kwargs)
|
||||||
|
|
||||||
|
def guess_encoding_to_xml(string, output_encoding='utf-8', attrib=False,
|
||||||
|
control_chars='replace'):
|
||||||
|
'''Return a byte :class:`str` suitable for inclusion in xml
|
||||||
|
|
||||||
|
:arg string: :class:`unicode` or byte :class:`str` to be transformed into
|
||||||
|
a byte :class:`str` suitable for inclusion in xml. If string is
|
||||||
|
a byte :class:`str` we attempt to guess the encoding. If we cannot guess,
|
||||||
|
we fallback to ``latin-1``.
|
||||||
|
:kwarg output_encoding: Output encoding for the byte :class:`str`. This
|
||||||
|
should match the encoding of your xml file.
|
||||||
|
:kwarg attrib: If :data:`True`, escape the item for use in an xml
|
||||||
|
attribute. If :data:`False` (default) escape the item for use in
|
||||||
|
a text node.
|
||||||
|
:returns: :term:`utf-8` encoded byte :class:`str`
|
||||||
|
|
||||||
|
'''
|
||||||
|
# Unicode strings can just be run through unicode_to_xml()
|
||||||
|
if isinstance(string, unicode):
|
||||||
|
return unicode_to_xml(string, encoding=output_encoding,
|
||||||
|
attrib=attrib, control_chars=control_chars)
|
||||||
|
|
||||||
|
# Guess the encoding of the byte strings
|
||||||
|
input_encoding = guess_encoding(string)
|
||||||
|
|
||||||
|
# Return the new byte string
|
||||||
|
return byte_string_to_xml(string, input_encoding=input_encoding,
|
||||||
|
errors='replace', output_encoding=output_encoding,
|
||||||
|
attrib=attrib, control_chars=control_chars)
|
||||||
|
|
||||||
|
def to_xml(string, encoding='utf-8', attrib=False, control_chars='ignore'):
|
||||||
|
'''*Deprecated*: Use :func:`guess_encoding_to_xml` instead
|
||||||
|
'''
|
||||||
|
warnings.warn(k.b_('kitchen.text.converters.to_xml is deprecated. Use'
|
||||||
|
' kitchen.text.converters.guess_encoding_to_xml instead.'),
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
return guess_encoding_to_xml(string, output_encoding=encoding,
|
||||||
|
attrib=attrib, control_chars=control_chars)
|
||||||
|
|
||||||
|
__all__ = ('BYTE_EXCEPTION_CONVERTERS', 'EXCEPTION_CONVERTERS',
|
||||||
|
'byte_string_to_xml', 'bytes_to_xml', 'exception_to_bytes',
|
||||||
|
'exception_to_unicode', 'getwriter', 'guess_encoding_to_xml',
|
||||||
|
'to_bytes', 'to_str', 'to_unicode', 'to_utf8', 'to_xml',
|
||||||
|
'unicode_to_xml', 'xml_to_byte_string', 'xml_to_bytes',
|
||||||
|
'xml_to_unicode')
|
901
kitchen/text/display.py
Normal file
901
kitchen/text/display.py
Normal file
|
@ -0,0 +1,901 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2010 Red Hat, Inc.
|
||||||
|
# Copyright (c) 2010 Ville Skyttä
|
||||||
|
# Copyright (c) 2009 Tim Lauridsen
|
||||||
|
# Copyright (c) 2007 Marcus Kuhn
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||||
|
# terms of the GNU Lesser General Public License as published by the Free
|
||||||
|
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
# more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# James Antill <james@fedoraproject.org>
|
||||||
|
# Marcus Kuhn
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
# Tim Lauridsen
|
||||||
|
# Ville Skyttä
|
||||||
|
#
|
||||||
|
# Portions of this are from yum/i18n.py
|
||||||
|
'''
|
||||||
|
-----------------------
|
||||||
|
Format Text for Display
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
Functions related to displaying unicode text. Unicode characters don't all
|
||||||
|
have the same width so we need helper functions for displaying them.
|
||||||
|
|
||||||
|
.. versionadded:: 0.2 kitchen.display API 1.0.0
|
||||||
|
'''
|
||||||
|
import itertools
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
|
from kitchen import b_
|
||||||
|
from kitchen.text.converters import to_unicode, to_bytes
|
||||||
|
from kitchen.text.exceptions import ControlCharError
|
||||||
|
|
||||||
|
# This is ported from ustr_utf8_* which I got from:
|
||||||
|
# http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
|
||||||
|
# I've tried to leave it close to the original C (same names etc.) so that
|
||||||
|
# it is easy to read/compare both versions... James Antilles
|
||||||
|
|
||||||
|
#
|
||||||
|
# Reimplemented quite a bit of this for speed. Use the bzr log or annotate
|
||||||
|
# commands to see what I've changed since importing this file.-Toshio Kuratomi
|
||||||
|
|
||||||
|
# ----------------------------- BEG utf8 ------------------to-----------
|
||||||
|
# This is an implementation of wcwidth() and wcswidth() (defined in
|
||||||
|
# IEEE Std 1002.1-2001) for Unicode.
|
||||||
|
#
|
||||||
|
# http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
|
||||||
|
# http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
|
||||||
|
#
|
||||||
|
# In fixed-width output devices, Latin characters all occupy a single
|
||||||
|
# "cell" position of equal width, whereas ideographic CJK characters
|
||||||
|
# occupy two such cells. Interoperability between terminal-line
|
||||||
|
# applications and (teletype-style) character terminals using the
|
||||||
|
# UTF-8 encoding requires agreement on which character should advance
|
||||||
|
# the cursor by how many cell positions. No established formal
|
||||||
|
# standards exist at present on which Unicode character shall occupy
|
||||||
|
# how many cell positions on character terminals. These routines are
|
||||||
|
# a first attempt of defining such behavior based on simple rules
|
||||||
|
# applied to data provided by the Unicode Consortium.
|
||||||
|
#
|
||||||
|
# [...]
|
||||||
|
#
|
||||||
|
# Markus Kuhn -- 2007-05-26 (Unicode 5.0)
|
||||||
|
#
|
||||||
|
# Permission to use, copy, modify, and distribute this software
|
||||||
|
# for any purpose and without fee is hereby granted. The author
|
||||||
|
# disclaims all warranties with regard to this software.
|
||||||
|
#
|
||||||
|
# Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
|
||||||
|
|
||||||
|
# Renamed but still pretty much JA's port of MK's code
|
||||||
|
def _interval_bisearch(value, table):
|
||||||
|
'''Binary search in an interval table.
|
||||||
|
|
||||||
|
:arg value: numeric value to search for
|
||||||
|
:arg table: Ordered list of intervals. This is a list of two-tuples. The
|
||||||
|
elements of the two-tuple define an interval's start and end points.
|
||||||
|
:returns: If :attr:`value` is found within an interval in the :attr:`table`
|
||||||
|
return :data:`True`. Otherwise, :data:`False`
|
||||||
|
|
||||||
|
This function checks whether a numeric value is present within a table
|
||||||
|
of intervals. It checks using a binary search algorithm, dividing the
|
||||||
|
list of values in half and checking against the values until it determines
|
||||||
|
whether the value is in the table.
|
||||||
|
'''
|
||||||
|
minimum = 0
|
||||||
|
maximum = len(table) - 1
|
||||||
|
if value < table[minimum][0] or value > table[maximum][1]:
|
||||||
|
return False
|
||||||
|
|
||||||
|
while maximum >= minimum:
|
||||||
|
mid = (minimum + maximum) / 2
|
||||||
|
if value > table[mid][1]:
|
||||||
|
minimum = mid + 1
|
||||||
|
elif value < table[mid][0]:
|
||||||
|
maximum = mid - 1
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
_COMBINING = (
|
||||||
|
(0x300, 0x36f), (0x483, 0x489), (0x591, 0x5bd),
|
||||||
|
(0x5bf, 0x5bf), (0x5c1, 0x5c2), (0x5c4, 0x5c5),
|
||||||
|
(0x5c7, 0x5c7), (0x600, 0x603), (0x610, 0x61a),
|
||||||
|
(0x64b, 0x65e), (0x670, 0x670), (0x6d6, 0x6e4),
|
||||||
|
(0x6e7, 0x6e8), (0x6ea, 0x6ed), (0x70f, 0x70f),
|
||||||
|
(0x711, 0x711), (0x730, 0x74a), (0x7a6, 0x7b0),
|
||||||
|
(0x7eb, 0x7f3), (0x816, 0x819), (0x81b, 0x823),
|
||||||
|
(0x825, 0x827), (0x829, 0x82d), (0x901, 0x902),
|
||||||
|
(0x93c, 0x93c), (0x941, 0x948), (0x94d, 0x94d),
|
||||||
|
(0x951, 0x954), (0x962, 0x963), (0x981, 0x981),
|
||||||
|
(0x9bc, 0x9bc), (0x9c1, 0x9c4), (0x9cd, 0x9cd),
|
||||||
|
(0x9e2, 0x9e3), (0xa01, 0xa02), (0xa3c, 0xa3c),
|
||||||
|
(0xa41, 0xa42), (0xa47, 0xa48), (0xa4b, 0xa4d),
|
||||||
|
(0xa70, 0xa71), (0xa81, 0xa82), (0xabc, 0xabc),
|
||||||
|
(0xac1, 0xac5), (0xac7, 0xac8), (0xacd, 0xacd),
|
||||||
|
(0xae2, 0xae3), (0xb01, 0xb01), (0xb3c, 0xb3c),
|
||||||
|
(0xb3f, 0xb3f), (0xb41, 0xb43), (0xb4d, 0xb4d),
|
||||||
|
(0xb56, 0xb56), (0xb82, 0xb82), (0xbc0, 0xbc0),
|
||||||
|
(0xbcd, 0xbcd), (0xc3e, 0xc40), (0xc46, 0xc48),
|
||||||
|
(0xc4a, 0xc4d), (0xc55, 0xc56), (0xcbc, 0xcbc),
|
||||||
|
(0xcbf, 0xcbf), (0xcc6, 0xcc6), (0xccc, 0xccd),
|
||||||
|
(0xce2, 0xce3), (0xd41, 0xd43), (0xd4d, 0xd4d),
|
||||||
|
(0xdca, 0xdca), (0xdd2, 0xdd4), (0xdd6, 0xdd6),
|
||||||
|
(0xe31, 0xe31), (0xe34, 0xe3a), (0xe47, 0xe4e),
|
||||||
|
(0xeb1, 0xeb1), (0xeb4, 0xeb9), (0xebb, 0xebc),
|
||||||
|
(0xec8, 0xecd), (0xf18, 0xf19), (0xf35, 0xf35),
|
||||||
|
(0xf37, 0xf37), (0xf39, 0xf39), (0xf71, 0xf7e),
|
||||||
|
(0xf80, 0xf84), (0xf86, 0xf87), (0xf90, 0xf97),
|
||||||
|
(0xf99, 0xfbc), (0xfc6, 0xfc6), (0x102d, 0x1030),
|
||||||
|
(0x1032, 0x1032), (0x1036, 0x1037), (0x1039, 0x103a),
|
||||||
|
(0x1058, 0x1059), (0x108d, 0x108d), (0x1160, 0x11ff),
|
||||||
|
(0x135f, 0x135f), (0x1712, 0x1714), (0x1732, 0x1734),
|
||||||
|
(0x1752, 0x1753), (0x1772, 0x1773), (0x17b4, 0x17b5),
|
||||||
|
(0x17b7, 0x17bd), (0x17c6, 0x17c6), (0x17c9, 0x17d3),
|
||||||
|
(0x17dd, 0x17dd), (0x180b, 0x180d), (0x18a9, 0x18a9),
|
||||||
|
(0x1920, 0x1922), (0x1927, 0x1928), (0x1932, 0x1932),
|
||||||
|
(0x1939, 0x193b), (0x1a17, 0x1a18), (0x1a60, 0x1a60),
|
||||||
|
(0x1a75, 0x1a7c), (0x1a7f, 0x1a7f), (0x1b00, 0x1b03),
|
||||||
|
(0x1b34, 0x1b34), (0x1b36, 0x1b3a), (0x1b3c, 0x1b3c),
|
||||||
|
(0x1b42, 0x1b42), (0x1b44, 0x1b44), (0x1b6b, 0x1b73),
|
||||||
|
(0x1baa, 0x1baa), (0x1c37, 0x1c37), (0x1cd0, 0x1cd2),
|
||||||
|
(0x1cd4, 0x1ce0), (0x1ce2, 0x1ce8), (0x1ced, 0x1ced),
|
||||||
|
(0x1dc0, 0x1de6), (0x1dfd, 0x1dff), (0x200b, 0x200f),
|
||||||
|
(0x202a, 0x202e), (0x2060, 0x2063), (0x206a, 0x206f),
|
||||||
|
(0x20d0, 0x20f0), (0x2cef, 0x2cf1), (0x2de0, 0x2dff),
|
||||||
|
(0x302a, 0x302f), (0x3099, 0x309a), (0xa66f, 0xa66f),
|
||||||
|
(0xa67c, 0xa67d), (0xa6f0, 0xa6f1), (0xa806, 0xa806),
|
||||||
|
(0xa80b, 0xa80b), (0xa825, 0xa826), (0xa8c4, 0xa8c4),
|
||||||
|
(0xa8e0, 0xa8f1), (0xa92b, 0xa92d), (0xa953, 0xa953),
|
||||||
|
(0xa9b3, 0xa9b3), (0xa9c0, 0xa9c0), (0xaab0, 0xaab0),
|
||||||
|
(0xaab2, 0xaab4), (0xaab7, 0xaab8), (0xaabe, 0xaabf),
|
||||||
|
(0xaac1, 0xaac1), (0xabed, 0xabed), (0xfb1e, 0xfb1e),
|
||||||
|
(0xfe00, 0xfe0f), (0xfe20, 0xfe26), (0xfeff, 0xfeff),
|
||||||
|
(0xfff9, 0xfffb), (0x101fd, 0x101fd), (0x10a01, 0x10a03),
|
||||||
|
(0x10a05, 0x10a06), (0x10a0c, 0x10a0f), (0x10a38, 0x10a3a),
|
||||||
|
(0x10a3f, 0x10a3f), (0x110b9, 0x110ba), (0x1d165, 0x1d169),
|
||||||
|
(0x1d16d, 0x1d182), (0x1d185, 0x1d18b), (0x1d1aa, 0x1d1ad),
|
||||||
|
(0x1d242, 0x1d244), (0xe0001, 0xe0001), (0xe0020, 0xe007f),
|
||||||
|
(0xe0100, 0xe01ef), )
|
||||||
|
'''
|
||||||
|
Internal table, provided by this module to list :term:`code points` which
|
||||||
|
combine with other characters and therefore should have no :term:`textual
|
||||||
|
width`. This is a sorted :class:`tuple` of non-overlapping intervals. Each
|
||||||
|
interval is a :class:`tuple` listing a starting :term:`code point` and ending
|
||||||
|
:term:`code point`. Every :term:`code point` between the two end points is
|
||||||
|
a combining character.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:func:`~kitchen.text.display._generate_combining_table`
|
||||||
|
for how this table is generated
|
||||||
|
|
||||||
|
This table was last regenerated on python-2.7.0 with
|
||||||
|
:data:`unicodedata.unidata_version` 5.1.0
|
||||||
|
'''
|
||||||
|
|
||||||
|
# New function from Toshio Kuratomi (LGPLv2+)
|
||||||
|
def _generate_combining_table():
|
||||||
|
'''Combine Markus Kuhn's data with :mod:`unicodedata` to make combining
|
||||||
|
char list
|
||||||
|
|
||||||
|
:rtype: :class:`tuple` of tuples
|
||||||
|
:returns: :class:`tuple` of intervals of :term:`code points` that are
|
||||||
|
combining character. Each interval is a 2-:class:`tuple` of the
|
||||||
|
starting :term:`code point` and the ending :term:`code point` for the
|
||||||
|
combining characters.
|
||||||
|
|
||||||
|
In normal use, this function serves to tell how we're generating the
|
||||||
|
combining char list. For speed reasons, we use this to generate a static
|
||||||
|
list and just use that later.
|
||||||
|
|
||||||
|
Markus Kuhn's list of combining characters is more complete than what's in
|
||||||
|
the python :mod:`unicodedata` library but the python :mod:`unicodedata` is
|
||||||
|
synced against later versions of the unicode database
|
||||||
|
|
||||||
|
This is used to generate the :data:`~kitchen.text.display._COMBINING`
|
||||||
|
table.
|
||||||
|
'''
|
||||||
|
# Marcus Kuhn's sorted list of non-overlapping intervals of non-spacing
|
||||||
|
# characters generated ifrom Unicode 5.0 data by:
|
||||||
|
# "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c"
|
||||||
|
markus_kuhn_combining_5_0 = (
|
||||||
|
( 0x0300, 0x036F ), ( 0x0483, 0x0486 ), ( 0x0488, 0x0489 ),
|
||||||
|
( 0x0591, 0x05BD ), ( 0x05BF, 0x05BF ), ( 0x05C1, 0x05C2 ),
|
||||||
|
( 0x05C4, 0x05C5 ), ( 0x05C7, 0x05C7 ), ( 0x0600, 0x0603 ),
|
||||||
|
( 0x0610, 0x0615 ), ( 0x064B, 0x065E ), ( 0x0670, 0x0670 ),
|
||||||
|
( 0x06D6, 0x06E4 ), ( 0x06E7, 0x06E8 ), ( 0x06EA, 0x06ED ),
|
||||||
|
( 0x070F, 0x070F ), ( 0x0711, 0x0711 ), ( 0x0730, 0x074A ),
|
||||||
|
( 0x07A6, 0x07B0 ), ( 0x07EB, 0x07F3 ), ( 0x0901, 0x0902 ),
|
||||||
|
( 0x093C, 0x093C ), ( 0x0941, 0x0948 ), ( 0x094D, 0x094D ),
|
||||||
|
( 0x0951, 0x0954 ), ( 0x0962, 0x0963 ), ( 0x0981, 0x0981 ),
|
||||||
|
( 0x09BC, 0x09BC ), ( 0x09C1, 0x09C4 ), ( 0x09CD, 0x09CD ),
|
||||||
|
( 0x09E2, 0x09E3 ), ( 0x0A01, 0x0A02 ), ( 0x0A3C, 0x0A3C ),
|
||||||
|
( 0x0A41, 0x0A42 ), ( 0x0A47, 0x0A48 ), ( 0x0A4B, 0x0A4D ),
|
||||||
|
( 0x0A70, 0x0A71 ), ( 0x0A81, 0x0A82 ), ( 0x0ABC, 0x0ABC ),
|
||||||
|
( 0x0AC1, 0x0AC5 ), ( 0x0AC7, 0x0AC8 ), ( 0x0ACD, 0x0ACD ),
|
||||||
|
( 0x0AE2, 0x0AE3 ), ( 0x0B01, 0x0B01 ), ( 0x0B3C, 0x0B3C ),
|
||||||
|
( 0x0B3F, 0x0B3F ), ( 0x0B41, 0x0B43 ), ( 0x0B4D, 0x0B4D ),
|
||||||
|
( 0x0B56, 0x0B56 ), ( 0x0B82, 0x0B82 ), ( 0x0BC0, 0x0BC0 ),
|
||||||
|
( 0x0BCD, 0x0BCD ), ( 0x0C3E, 0x0C40 ), ( 0x0C46, 0x0C48 ),
|
||||||
|
( 0x0C4A, 0x0C4D ), ( 0x0C55, 0x0C56 ), ( 0x0CBC, 0x0CBC ),
|
||||||
|
( 0x0CBF, 0x0CBF ), ( 0x0CC6, 0x0CC6 ), ( 0x0CCC, 0x0CCD ),
|
||||||
|
( 0x0CE2, 0x0CE3 ), ( 0x0D41, 0x0D43 ), ( 0x0D4D, 0x0D4D ),
|
||||||
|
( 0x0DCA, 0x0DCA ), ( 0x0DD2, 0x0DD4 ), ( 0x0DD6, 0x0DD6 ),
|
||||||
|
( 0x0E31, 0x0E31 ), ( 0x0E34, 0x0E3A ), ( 0x0E47, 0x0E4E ),
|
||||||
|
( 0x0EB1, 0x0EB1 ), ( 0x0EB4, 0x0EB9 ), ( 0x0EBB, 0x0EBC ),
|
||||||
|
( 0x0EC8, 0x0ECD ), ( 0x0F18, 0x0F19 ), ( 0x0F35, 0x0F35 ),
|
||||||
|
( 0x0F37, 0x0F37 ), ( 0x0F39, 0x0F39 ), ( 0x0F71, 0x0F7E ),
|
||||||
|
( 0x0F80, 0x0F84 ), ( 0x0F86, 0x0F87 ), ( 0x0F90, 0x0F97 ),
|
||||||
|
( 0x0F99, 0x0FBC ), ( 0x0FC6, 0x0FC6 ), ( 0x102D, 0x1030 ),
|
||||||
|
( 0x1032, 0x1032 ), ( 0x1036, 0x1037 ), ( 0x1039, 0x1039 ),
|
||||||
|
( 0x1058, 0x1059 ), ( 0x1160, 0x11FF ), ( 0x135F, 0x135F ),
|
||||||
|
( 0x1712, 0x1714 ), ( 0x1732, 0x1734 ), ( 0x1752, 0x1753 ),
|
||||||
|
( 0x1772, 0x1773 ), ( 0x17B4, 0x17B5 ), ( 0x17B7, 0x17BD ),
|
||||||
|
( 0x17C6, 0x17C6 ), ( 0x17C9, 0x17D3 ), ( 0x17DD, 0x17DD ),
|
||||||
|
( 0x180B, 0x180D ), ( 0x18A9, 0x18A9 ), ( 0x1920, 0x1922 ),
|
||||||
|
( 0x1927, 0x1928 ), ( 0x1932, 0x1932 ), ( 0x1939, 0x193B ),
|
||||||
|
( 0x1A17, 0x1A18 ), ( 0x1B00, 0x1B03 ), ( 0x1B34, 0x1B34 ),
|
||||||
|
( 0x1B36, 0x1B3A ), ( 0x1B3C, 0x1B3C ), ( 0x1B42, 0x1B42 ),
|
||||||
|
( 0x1B6B, 0x1B73 ), ( 0x1DC0, 0x1DCA ), ( 0x1DFE, 0x1DFF ),
|
||||||
|
( 0x200B, 0x200F ), ( 0x202A, 0x202E ), ( 0x2060, 0x2063 ),
|
||||||
|
( 0x206A, 0x206F ), ( 0x20D0, 0x20EF ), ( 0x302A, 0x302F ),
|
||||||
|
( 0x3099, 0x309A ), ( 0xA806, 0xA806 ), ( 0xA80B, 0xA80B ),
|
||||||
|
( 0xA825, 0xA826 ), ( 0xFB1E, 0xFB1E ), ( 0xFE00, 0xFE0F ),
|
||||||
|
( 0xFE20, 0xFE23 ), ( 0xFEFF, 0xFEFF ), ( 0xFFF9, 0xFFFB ),
|
||||||
|
( 0x10A01, 0x10A03 ), ( 0x10A05, 0x10A06 ), ( 0x10A0C, 0x10A0F ),
|
||||||
|
( 0x10A38, 0x10A3A ), ( 0x10A3F, 0x10A3F ), ( 0x1D167, 0x1D169 ),
|
||||||
|
( 0x1D173, 0x1D182 ), ( 0x1D185, 0x1D18B ), ( 0x1D1AA, 0x1D1AD ),
|
||||||
|
( 0x1D242, 0x1D244 ), ( 0xE0001, 0xE0001 ), ( 0xE0020, 0xE007F ),
|
||||||
|
( 0xE0100, 0xE01EF ))
|
||||||
|
combining = []
|
||||||
|
in_interval = False
|
||||||
|
interval = []
|
||||||
|
for codepoint in xrange (0, 0xFFFFF + 1):
|
||||||
|
if _interval_bisearch(codepoint, markus_kuhn_combining_5_0) or \
|
||||||
|
unicodedata.combining(unichr(codepoint)):
|
||||||
|
if not in_interval:
|
||||||
|
# Found first part of an interval
|
||||||
|
interval = [codepoint]
|
||||||
|
in_interval = True
|
||||||
|
else:
|
||||||
|
if in_interval:
|
||||||
|
in_interval = False
|
||||||
|
interval.append(codepoint - 1)
|
||||||
|
combining.append(interval)
|
||||||
|
|
||||||
|
if in_interval:
|
||||||
|
# If we're at the end and the interval is open, close it.
|
||||||
|
# :W0631: We looped through a static range so we know codepoint is
|
||||||
|
# defined here
|
||||||
|
#pylint:disable-msg=W0631
|
||||||
|
interval.append(codepoint)
|
||||||
|
combining.append(interval)
|
||||||
|
|
||||||
|
return tuple(itertools.imap(tuple, combining))
|
||||||
|
|
||||||
|
# New function from Toshio Kuratomi (LGPLv2+)
|
||||||
|
def _print_combining_table():
|
||||||
|
'''Print out a new :data:`_COMBINING` table
|
||||||
|
|
||||||
|
This will print a new :data:`_COMBINING` table in the format used in
|
||||||
|
:file:`kitchen/text/display.py`. It's useful for updating the
|
||||||
|
:data:`_COMBINING` table with updated data from a new python as the format
|
||||||
|
won't change from what's already in the file.
|
||||||
|
'''
|
||||||
|
table = _generate_combining_table()
|
||||||
|
entries = 0
|
||||||
|
print '_COMBINING = ('
|
||||||
|
for pair in table:
|
||||||
|
if entries >= 3:
|
||||||
|
entries = 0
|
||||||
|
print
|
||||||
|
if entries == 0:
|
||||||
|
print ' ',
|
||||||
|
entries += 1
|
||||||
|
entry = '(0x%x, 0x%x),' % pair
|
||||||
|
print entry,
|
||||||
|
print ')'
|
||||||
|
|
||||||
|
# Handling of control chars rewritten. Rest is JA's port of MK's C code.
|
||||||
|
# -Toshio Kuratomi
|
||||||
|
def _ucp_width(ucs, control_chars='guess'):
|
||||||
|
'''Get the :term:`textual width` of a ucs character
|
||||||
|
|
||||||
|
:arg ucs: integer representing a single unicode :term:`code point`
|
||||||
|
:kwarg control_chars: specify how to deal with :term:`control characters`.
|
||||||
|
Possible values are:
|
||||||
|
|
||||||
|
:guess: (default) will take a guess for :term:`control character`
|
||||||
|
widths. Most codes will return zero width. ``backspace``,
|
||||||
|
``delete``, and ``clear delete`` return -1. ``escape`` currently
|
||||||
|
returns -1 as well but this is not guaranteed as it's not always
|
||||||
|
correct
|
||||||
|
:strict: will raise :exc:`~kitchen.text.exceptions.ControlCharError`
|
||||||
|
if a :term:`control character` is encountered
|
||||||
|
|
||||||
|
:raises ControlCharError: if the :term:`code point` is a unicode
|
||||||
|
:term:`control character` and :attr:`control_chars` is set to 'strict'
|
||||||
|
:returns: :term:`textual width` of the character.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
It's important to remember this is :term:`textual width` and not the
|
||||||
|
number of characters or bytes.
|
||||||
|
'''
|
||||||
|
# test for 8-bit control characters
|
||||||
|
if ucs < 32 or (ucs < 0xa0 and ucs >= 0x7f):
|
||||||
|
# Control character detected
|
||||||
|
if control_chars == 'strict':
|
||||||
|
raise ControlCharError(b_('_ucp_width does not understand how to'
|
||||||
|
' assign a width value to control characters.'))
|
||||||
|
if ucs in (0x08, 0x07F, 0x94):
|
||||||
|
# Backspace, delete, and clear delete remove a single character
|
||||||
|
return -1
|
||||||
|
if ucs == 0x1b:
|
||||||
|
# Excape is tricky. It removes some number of characters that
|
||||||
|
# come after it but the amount is dependent on what is
|
||||||
|
# interpreting the code.
|
||||||
|
# So this is going to often be wrong but other values will be
|
||||||
|
# wrong as well.
|
||||||
|
return -1
|
||||||
|
# All other control characters get 0 width
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if _interval_bisearch(ucs, _COMBINING):
|
||||||
|
# Combining characters return 0 width as they will be combined with
|
||||||
|
# the width from other characters
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# if we arrive here, ucs is not a combining or C0/C1 control character
|
||||||
|
|
||||||
|
return (1 +
|
||||||
|
(ucs >= 0x1100 and
|
||||||
|
(ucs <= 0x115f or # Hangul Jamo init. consonants
|
||||||
|
ucs == 0x2329 or ucs == 0x232a or
|
||||||
|
(ucs >= 0x2e80 and ucs <= 0xa4cf and
|
||||||
|
ucs != 0x303f) or # CJK ... Yi
|
||||||
|
(ucs >= 0xac00 and ucs <= 0xd7a3) or # Hangul Syllables
|
||||||
|
(ucs >= 0xf900 and ucs <= 0xfaff) or # CJK Compatibility Ideographs
|
||||||
|
(ucs >= 0xfe10 and ucs <= 0xfe19) or # Vertical forms
|
||||||
|
(ucs >= 0xfe30 and ucs <= 0xfe6f) or # CJK Compatibility Forms
|
||||||
|
(ucs >= 0xff00 and ucs <= 0xff60) or # Fullwidth Forms
|
||||||
|
(ucs >= 0xffe0 and ucs <= 0xffe6) or
|
||||||
|
(ucs >= 0x20000 and ucs <= 0x2fffd) or
|
||||||
|
(ucs >= 0x30000 and ucs <= 0x3fffd))))
|
||||||
|
|
||||||
|
# Wholly rewritten by me (LGPLv2+) -Toshio Kuratomi
|
||||||
|
def textual_width(msg, control_chars='guess', encoding='utf-8',
|
||||||
|
errors='replace'):
|
||||||
|
'''Get the :term:`textual width` of a string
|
||||||
|
|
||||||
|
:arg msg: :class:`unicode` string or byte :class:`str` to get the width of
|
||||||
|
:kwarg control_chars: specify how to deal with :term:`control characters`.
|
||||||
|
Possible values are:
|
||||||
|
|
||||||
|
:guess: (default) will take a guess for :term:`control character`
|
||||||
|
widths. Most codes will return zero width. ``backspace``,
|
||||||
|
``delete``, and ``clear delete`` return -1. ``escape`` currently
|
||||||
|
returns -1 as well but this is not guaranteed as it's not always
|
||||||
|
correct
|
||||||
|
:strict: will raise :exc:`kitchen.text.exceptions.ControlCharError`
|
||||||
|
if a :term:`control character` is encountered
|
||||||
|
|
||||||
|
:kwarg encoding: If we are given a byte :class:`str` this is used to
|
||||||
|
decode it into :class:`unicode` string. Any characters that are not
|
||||||
|
decodable in this encoding will get a value dependent on the
|
||||||
|
:attr:`errors` parameter.
|
||||||
|
:kwarg errors: How to treat errors encoding the byte :class:`str` to
|
||||||
|
:class:`unicode` string. Legal values are the same as for
|
||||||
|
:func:`kitchen.text.converters.to_unicode`. The default value of
|
||||||
|
``replace`` will cause undecodable byte sequences to have a width of
|
||||||
|
one. ``ignore`` will have a width of zero.
|
||||||
|
:raises ControlCharError: if :attr:`msg` contains a :term:`control
|
||||||
|
character` and :attr:`control_chars` is ``strict``.
|
||||||
|
:returns: :term:`Textual width` of the :attr:`msg`. This is the amount of
|
||||||
|
space that the string will consume on a monospace display. It's
|
||||||
|
measured in the number of cell positions or columns it will take up on
|
||||||
|
a monospace display. This is **not** the number of glyphs that are in
|
||||||
|
the string.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
This function can be wrong sometimes because Unicode does not specify
|
||||||
|
a strict width value for all of the :term:`code points`. In
|
||||||
|
particular, we've found that some Tamil characters take up to four
|
||||||
|
character cells but we return a lesser amount.
|
||||||
|
'''
|
||||||
|
# On python 2.6.4, x86_64, I've benchmarked a few alternate
|
||||||
|
# implementations::
|
||||||
|
#
|
||||||
|
# timeit.repeat('display.textual_width(data)',
|
||||||
|
# 'from __main__ import display, data', number=100)
|
||||||
|
# I varied data by size and content (1MB of ascii, a few words, 43K utf8,
|
||||||
|
# unicode type
|
||||||
|
#
|
||||||
|
# :this implementation: fastest across the board
|
||||||
|
#
|
||||||
|
# :list comprehension: 6-16% slower
|
||||||
|
# return sum([_ucp_width(ord(c), control_chars=control_chars)
|
||||||
|
# for c in msg])
|
||||||
|
#
|
||||||
|
# :generator expression: 9-18% slower
|
||||||
|
# return sum((_ucp_width(ord(c), control_chars=control_chars) for c in
|
||||||
|
# msg))
|
||||||
|
#
|
||||||
|
# :lambda: 10-19% slower
|
||||||
|
# return sum(itertools.imap(lambda x: _ucp_width(ord(x), control_chars),
|
||||||
|
# msg))
|
||||||
|
#
|
||||||
|
# :partial application: 13-22% slower
|
||||||
|
# func = functools.partial(_ucp_width, control_chars=control_chars)
|
||||||
|
# return sum(itertools.imap(func, itertools.imap(ord, msg)))
|
||||||
|
#
|
||||||
|
# :the original code: 4-38% slower
|
||||||
|
# The 4% was for the short, ascii only string. All the other pieces of
|
||||||
|
# data yielded over 30% slower times.
|
||||||
|
|
||||||
|
# Non decodable data is just assigned a single cell width
|
||||||
|
msg = to_unicode(msg, encoding=encoding, errors=errors)
|
||||||
|
# Add the width of each char
|
||||||
|
return sum(
|
||||||
|
# calculate width of each char
|
||||||
|
itertools.starmap(_ucp_width,
|
||||||
|
# Setup the arguments to _ucp_width
|
||||||
|
itertools.izip(
|
||||||
|
# int value of each char
|
||||||
|
itertools.imap(ord, msg),
|
||||||
|
# control_chars arg in a form that izip will deal with
|
||||||
|
itertools.repeat(control_chars))))
|
||||||
|
|
||||||
|
# Wholly rewritten by me -Toshio Kuratomi
|
||||||
|
def textual_width_chop(msg, chop, encoding='utf-8', errors='replace'):
|
||||||
|
'''Given a string, return it chopped to a given :term:`textual width`
|
||||||
|
|
||||||
|
:arg msg: :class:`unicode` string or byte :class:`str` to chop
|
||||||
|
:arg chop: Chop :attr:`msg` if it exceeds this :term:`textual width`
|
||||||
|
:kwarg encoding: If we are given a byte :class:`str`, this is used to
|
||||||
|
decode it into a :class:`unicode` string. Any characters that are not
|
||||||
|
decodable in this encoding will be assigned a width of one.
|
||||||
|
:kwarg errors: How to treat errors encoding the byte :class:`str` to
|
||||||
|
:class:`unicode`. Legal values are the same as for
|
||||||
|
:func:`kitchen.text.converters.to_unicode`
|
||||||
|
:rtype: :class:`unicode` string
|
||||||
|
:returns: :class:`unicode` string of the :attr:`msg` chopped at the given
|
||||||
|
:term:`textual width`
|
||||||
|
|
||||||
|
This is what you want to use instead of ``%.*s``, as it does the "right"
|
||||||
|
thing with regard to :term:`UTF-8` sequences, :term:`control characters`,
|
||||||
|
and characters that take more than one cell position. Eg::
|
||||||
|
|
||||||
|
>>> # Wrong: only displays 8 characters because it is operating on bytes
|
||||||
|
>>> print "%.*s" % (10, 'café ñunru!')
|
||||||
|
café ñun
|
||||||
|
>>> # Properly operates on graphemes
|
||||||
|
>>> '%s' % (textual_width_chop('café ñunru!', 10))
|
||||||
|
café ñunru
|
||||||
|
>>> # takes too many columns because the kanji need two cell positions
|
||||||
|
>>> print '1234567890\\n%.*s' % (10, u'一二三四五六七八九十')
|
||||||
|
1234567890
|
||||||
|
一二三四五六七八九十
|
||||||
|
>>> # Properly chops at 10 columns
|
||||||
|
>>> print '1234567890\\n%s' % (textual_width_chop(u'一二三四五六七八九十', 10))
|
||||||
|
1234567890
|
||||||
|
一二三四五
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
msg = to_unicode(msg, encoding=encoding, errors=errors)
|
||||||
|
|
||||||
|
width = textual_width(msg)
|
||||||
|
if width <= chop:
|
||||||
|
return msg
|
||||||
|
maximum = len(msg)
|
||||||
|
if maximum > chop * 2:
|
||||||
|
# A character can take at most 2 cell positions so this is the actual
|
||||||
|
# maximum
|
||||||
|
maximum = chop * 2
|
||||||
|
minimum = 0
|
||||||
|
eos = maximum
|
||||||
|
if eos > chop:
|
||||||
|
eos = chop
|
||||||
|
width = textual_width(msg[:eos])
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# if current width is high,
|
||||||
|
if width > chop:
|
||||||
|
# calculate new midpoint
|
||||||
|
mid = minimum + (eos - minimum) / 2
|
||||||
|
if mid == eos:
|
||||||
|
break
|
||||||
|
if (eos - chop) < (eos - mid):
|
||||||
|
while width > chop:
|
||||||
|
width = width - _ucp_width(ord(msg[eos-1]))
|
||||||
|
eos -= 1
|
||||||
|
return msg[:eos]
|
||||||
|
# subtract distance between eos and mid from width
|
||||||
|
width = width - textual_width(msg[mid:eos])
|
||||||
|
maximum = eos
|
||||||
|
eos = mid
|
||||||
|
# if current width is low,
|
||||||
|
elif width < chop:
|
||||||
|
# Note: at present, the if (eos - chop) < (eos - mid):
|
||||||
|
# short-circuit above means that we never use this branch.
|
||||||
|
|
||||||
|
# calculate new midpoint
|
||||||
|
mid = eos + (maximum - eos) / 2
|
||||||
|
if mid == eos:
|
||||||
|
break
|
||||||
|
if (chop - eos) < (mid - eos):
|
||||||
|
while width < chop:
|
||||||
|
new_width = _ucp_width(ord(msg[eos]))
|
||||||
|
width = width + new_width
|
||||||
|
eos += 1
|
||||||
|
return msg[:eos]
|
||||||
|
|
||||||
|
# add distance between eos and new mid to width
|
||||||
|
width = width + textual_width(msg[eos:mid])
|
||||||
|
minimum = eos
|
||||||
|
eos = mid
|
||||||
|
if eos > maximum:
|
||||||
|
eos = maximum
|
||||||
|
break
|
||||||
|
# if current is just right
|
||||||
|
else:
|
||||||
|
return msg[:eos]
|
||||||
|
return msg[:eos]
|
||||||
|
|
||||||
|
# I made some adjustments for using unicode but largely unchanged from JA's
|
||||||
|
# port of MK's code -Toshio
|
||||||
|
def textual_width_fill(msg, fill, chop=None, left=True, prefix='', suffix=''):
|
||||||
|
'''Expand a :class:`unicode` string to a specified :term:`textual width`
|
||||||
|
or chop to same
|
||||||
|
|
||||||
|
:arg msg: :class:`unicode` string to format
|
||||||
|
:arg fill: pad string until the :term:`textual width` of the string is
|
||||||
|
this length
|
||||||
|
:kwarg chop: before doing anything else, chop the string to this length.
|
||||||
|
Default: Don't chop the string at all
|
||||||
|
:kwarg left: If :data:`True` (default) left justify the string and put the
|
||||||
|
padding on the right. If :data:`False`, pad on the left side.
|
||||||
|
:kwarg prefix: Attach this string before the field we're filling
|
||||||
|
:kwarg suffix: Append this string to the end of the field we're filling
|
||||||
|
:rtype: :class:`unicode` string
|
||||||
|
:returns: :attr:`msg` formatted to fill the specified width. If no
|
||||||
|
:attr:`chop` is specified, the string could exceed the fill length
|
||||||
|
when completed. If :attr:`prefix` or :attr:`suffix` are printable
|
||||||
|
characters, the string could be longer than the fill width.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
:attr:`prefix` and :attr:`suffix` should be used for "invisible"
|
||||||
|
characters like highlighting, color changing escape codes, etc. The
|
||||||
|
fill characters are appended outside of any :attr:`prefix` or
|
||||||
|
:attr:`suffix` elements. This allows you to only highlight
|
||||||
|
:attr:`msg` inside of the field you're filling.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
:attr:`msg`, :attr:`prefix`, and :attr:`suffix` should all be
|
||||||
|
representable as unicode characters. In particular, any escape
|
||||||
|
sequences in :attr:`prefix` and :attr:`suffix` need to be convertible
|
||||||
|
to :class:`unicode`. If you need to use byte sequences here rather
|
||||||
|
than unicode characters, use
|
||||||
|
:func:`~kitchen.text.display.byte_string_textual_width_fill` instead.
|
||||||
|
|
||||||
|
This function expands a string to fill a field of a particular
|
||||||
|
:term:`textual width`. Use it instead of ``%*.*s``, as it does the
|
||||||
|
"right" thing with regard to :term:`UTF-8` sequences, :term:`control
|
||||||
|
characters`, and characters that take more than one cell position in
|
||||||
|
a display. Example usage::
|
||||||
|
|
||||||
|
>>> msg = u'一二三四五六七八九十'
|
||||||
|
>>> # Wrong: This uses 10 characters instead of 10 cells:
|
||||||
|
>>> u":%-*.*s:" % (10, 10, msg[:9])
|
||||||
|
:一二三四五六七八九 :
|
||||||
|
>>> # This uses 10 cells like we really want:
|
||||||
|
>>> u":%s:" % (textual_width_fill(msg[:9], 10, 10))
|
||||||
|
:一二三四五:
|
||||||
|
|
||||||
|
>>> # Wrong: Right aligned in the field, but too many cells
|
||||||
|
>>> u"%20.10s" % (msg)
|
||||||
|
一二三四五六七八九十
|
||||||
|
>>> # Correct: Right aligned with proper number of cells
|
||||||
|
>>> u"%s" % (textual_width_fill(msg, 20, 10, left=False))
|
||||||
|
一二三四五
|
||||||
|
|
||||||
|
>>> # Wrong: Adding some escape characters to highlight the line but too many cells
|
||||||
|
>>> u"%s%20.10s%s" % (prefix, msg, suffix)
|
||||||
|
u'\x1b[7m 一二三四五六七八九十\x1b[0m'
|
||||||
|
>>> # Correct highlight of the line
|
||||||
|
>>> u"%s%s%s" % (prefix, display.textual_width_fill(msg, 20, 10, left=False), suffix)
|
||||||
|
u'\x1b[7m 一二三四五\x1b[0m'
|
||||||
|
|
||||||
|
>>> # Correct way to not highlight the fill
|
||||||
|
>>> u"%s" % (display.textual_width_fill(msg, 20, 10, left=False, prefix=prefix, suffix=suffix))
|
||||||
|
u' \x1b[7m一二三四五\x1b[0m'
|
||||||
|
'''
|
||||||
|
msg = to_unicode(msg)
|
||||||
|
if chop is not None:
|
||||||
|
msg = textual_width_chop(msg, chop)
|
||||||
|
width = textual_width(msg)
|
||||||
|
if width >= fill:
|
||||||
|
if prefix or suffix:
|
||||||
|
msg = u''.join([prefix, msg, suffix])
|
||||||
|
else:
|
||||||
|
extra = u' ' * (fill - width)
|
||||||
|
if left:
|
||||||
|
msg = u''.join([prefix, msg, suffix, extra])
|
||||||
|
else:
|
||||||
|
msg = u''.join([extra, prefix, msg, suffix])
|
||||||
|
return msg
|
||||||
|
|
||||||
|
def _textual_width_le(width, *args):
|
||||||
|
'''Optimize the common case when deciding which :term:`textual width` is
|
||||||
|
larger
|
||||||
|
|
||||||
|
:arg width: :term:`textual width` to compare against.
|
||||||
|
:arg \*args: :class:`unicode` strings to check the total :term:`textual
|
||||||
|
width` of
|
||||||
|
:returns: :data:`True` if the total length of :attr:`args` are less than
|
||||||
|
or equal to :attr:`width`. Otherwise :data:`False`.
|
||||||
|
|
||||||
|
We often want to know "does X fit in Y". It takes a while to use
|
||||||
|
:func:`textual_width` to calculate this. However, we know that the number
|
||||||
|
of canonically composed :class:`unicode` characters is always going to
|
||||||
|
have 1 or 2 for the :term:`textual width` per character. With this we can
|
||||||
|
take the following shortcuts:
|
||||||
|
|
||||||
|
1) If the number of canonically composed characters is more than width,
|
||||||
|
the true :term:`textual width` cannot be less than width.
|
||||||
|
2) If the number of canonically composed characters * 2 is less than the
|
||||||
|
width then the :term:`textual width` must be ok.
|
||||||
|
|
||||||
|
:term:`textual width` of a canonically composed :class:`unicode` string
|
||||||
|
will always be greater than or equal to the the number of :class:`unicode`
|
||||||
|
characters. So we can first check if the number of composed
|
||||||
|
:class:`unicode` characters is less than the asked for width. If it is we
|
||||||
|
can return :data:`True` immediately. If not, then we must do a full
|
||||||
|
:term:`textual width` lookup.
|
||||||
|
'''
|
||||||
|
string = ''.join(args)
|
||||||
|
string = unicodedata.normalize('NFC', string)
|
||||||
|
if len(string) > width:
|
||||||
|
return False
|
||||||
|
elif len(string) * 2 <= width:
|
||||||
|
return True
|
||||||
|
elif len(to_bytes(string)) <= width:
|
||||||
|
# Check against bytes.
|
||||||
|
# utf8 has the property of having the same amount or more bytes per
|
||||||
|
# character than textual width.
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
true_width = textual_width(string)
|
||||||
|
return true_width <= width
|
||||||
|
|
||||||
|
def wrap(text, width=70, initial_indent=u'', subsequent_indent=u'',
|
||||||
|
encoding='utf-8', errors='replace'):
|
||||||
|
'''Works like we want :func:`textwrap.wrap` to work,
|
||||||
|
|
||||||
|
:arg text: :class:`unicode` string or byte :class:`str` to wrap
|
||||||
|
:kwarg width: :term:`textual width` at which to wrap. Default: 70
|
||||||
|
:kwarg initial_indent: string to use to indent the first line. Default:
|
||||||
|
do not indent.
|
||||||
|
:kwarg subsequent_indent: string to use to wrap subsequent lines.
|
||||||
|
Default: do not indent
|
||||||
|
:kwarg encoding: Encoding to use if :attr:`text` is a byte :class:`str`
|
||||||
|
:kwarg errors: error handler to use if :attr:`text` is a byte :class:`str`
|
||||||
|
and contains some undecodable characters.
|
||||||
|
:rtype: :class:`list` of :class:`unicode` strings
|
||||||
|
:returns: list of lines that have been text wrapped and indented.
|
||||||
|
|
||||||
|
:func:`textwrap.wrap` from the |stdlib|_ has two drawbacks that this
|
||||||
|
attempts to fix:
|
||||||
|
|
||||||
|
1. It does not handle :term:`textual width`. It only operates on bytes or
|
||||||
|
characters which are both inadequate (due to multi-byte and double
|
||||||
|
width characters).
|
||||||
|
2. It malforms lists and blocks.
|
||||||
|
'''
|
||||||
|
# Tested with:
|
||||||
|
# yum info robodoc gpicview php-pear-Net-Socket wmctrl ustr moreutils
|
||||||
|
# mediawiki-HNP ocspd insight yum mousepad
|
||||||
|
# ...at 120, 80 and 40 chars.
|
||||||
|
# Also, notable among lots of others, searching for "\n ":
|
||||||
|
# exim-clamav, jpackage-utils, tcldom, synaptics, "quake3",
|
||||||
|
# perl-Class-Container, ez-ipupdate, perl-Net-XMPP, "kipi-plugins",
|
||||||
|
# perl-Apache-DBI, netcdf, python-configobj, "translate-toolkit", alpine,
|
||||||
|
# "udunits", "conntrack-tools"
|
||||||
|
#
|
||||||
|
# Note that, we "fail" on:
|
||||||
|
# alsa-plugins-jack, setools*, dblatex, uisp, "perl-Getopt-GUI-Long",
|
||||||
|
# suitesparse, "synce-serial", writer2latex, xenwatch, ltsp-utils
|
||||||
|
|
||||||
|
def _indent_at_beg(line):
|
||||||
|
'''Return the indent to use for this and (possibly) subsequent lines
|
||||||
|
|
||||||
|
:arg line: :class:`unicode` line of text to process
|
||||||
|
:rtype: tuple
|
||||||
|
:returns: tuple of count of whitespace before getting to the start of
|
||||||
|
this line followed by a count to the following indent if this
|
||||||
|
block of text is an entry in a list.
|
||||||
|
'''
|
||||||
|
# Find the first non-whitespace character
|
||||||
|
try:
|
||||||
|
char = line.strip()[0]
|
||||||
|
except IndexError:
|
||||||
|
# All whitespace
|
||||||
|
return 0, 0
|
||||||
|
else:
|
||||||
|
count = line.find(char)
|
||||||
|
|
||||||
|
# if we have a bullet character, check for list
|
||||||
|
if char not in u'-*.o\u2022\u2023\u2218':
|
||||||
|
# No bullet; not a list
|
||||||
|
return count, 0
|
||||||
|
|
||||||
|
# List: Keep searching until we hit the innermost list
|
||||||
|
nxt = _indent_at_beg(line[count+1:])
|
||||||
|
nxt = nxt[1] or nxt[0]
|
||||||
|
if nxt:
|
||||||
|
return count, count + 1 + nxt
|
||||||
|
return count, 0
|
||||||
|
|
||||||
|
initial_indent = to_unicode(initial_indent, encoding=encoding,
|
||||||
|
errors=errors)
|
||||||
|
subsequent_indent = to_unicode(subsequent_indent, encoding=encoding,
|
||||||
|
errors=errors)
|
||||||
|
subsequent_indent_width = textual_width(subsequent_indent)
|
||||||
|
|
||||||
|
text = to_unicode(text, encoding=encoding, errors=errors).rstrip(u'\n')
|
||||||
|
lines = text.expandtabs().split(u'\n')
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
indent = initial_indent
|
||||||
|
wrap_last = False
|
||||||
|
cur_sab = 0
|
||||||
|
cur_spc_indent = 0
|
||||||
|
for line in lines:
|
||||||
|
line = line.rstrip(u' ')
|
||||||
|
(last_sab, last_spc_indent) = (cur_sab, cur_spc_indent)
|
||||||
|
(cur_sab, cur_spc_indent) = _indent_at_beg(line)
|
||||||
|
force_nl = False # We want to stop wrapping under "certain" conditions:
|
||||||
|
if wrap_last and cur_spc_indent: # if line starts a list or
|
||||||
|
force_nl = True
|
||||||
|
if wrap_last and cur_sab == len(line):# is empty line
|
||||||
|
force_nl = True
|
||||||
|
if wrap_last and not last_spc_indent: # if we don't continue a list
|
||||||
|
if cur_sab >= 4 and cur_sab != last_sab: # and is "block indented"
|
||||||
|
force_nl = True
|
||||||
|
if force_nl:
|
||||||
|
ret.append(indent.rstrip(u' '))
|
||||||
|
indent = subsequent_indent
|
||||||
|
wrap_last = False
|
||||||
|
if cur_sab == len(line): # empty line, remove spaces to make it easier.
|
||||||
|
line = u''
|
||||||
|
if wrap_last:
|
||||||
|
line = line.lstrip(u' ')
|
||||||
|
cur_spc_indent = last_spc_indent
|
||||||
|
|
||||||
|
if _textual_width_le(width, indent, line):
|
||||||
|
wrap_last = False
|
||||||
|
ret.append(indent + line)
|
||||||
|
indent = subsequent_indent
|
||||||
|
continue
|
||||||
|
|
||||||
|
wrap_last = True
|
||||||
|
words = line.split(u' ')
|
||||||
|
line = indent
|
||||||
|
spcs = cur_spc_indent
|
||||||
|
if not spcs and cur_sab >= 4:
|
||||||
|
spcs = cur_sab
|
||||||
|
for word in words:
|
||||||
|
if (not _textual_width_le(width, line, word) and
|
||||||
|
textual_width(line) > subsequent_indent_width):
|
||||||
|
ret.append(line.rstrip(u' '))
|
||||||
|
line = subsequent_indent + u' ' * spcs
|
||||||
|
line += word
|
||||||
|
line += u' '
|
||||||
|
indent = line.rstrip(u' ') + u' '
|
||||||
|
if wrap_last:
|
||||||
|
ret.append(indent.rstrip(u' '))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def fill(text, *args, **kwargs):
|
||||||
|
'''Works like we want :func:`textwrap.fill` to work
|
||||||
|
|
||||||
|
:arg text: :class:`unicode` string or byte :class:`str` to process
|
||||||
|
:returns: :class:`unicode` string with each line separated by a newline
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:func:`kitchen.text.display.wrap`
|
||||||
|
for other parameters that you can give this command.
|
||||||
|
|
||||||
|
This function is a light wrapper around :func:`kitchen.text.display.wrap`.
|
||||||
|
Where that function returns a :class:`list` of lines, this function
|
||||||
|
returns one string with each line separated by a newline.
|
||||||
|
'''
|
||||||
|
return u'\n'.join(wrap(text, *args, **kwargs))
|
||||||
|
|
||||||
|
#
|
||||||
|
# Byte strings
|
||||||
|
#
|
||||||
|
|
||||||
|
def byte_string_textual_width_fill(msg, fill, chop=None, left=True, prefix='',
|
||||||
|
suffix='', encoding='utf-8', errors='replace'):
|
||||||
|
'''Expand a byte :class:`str` to a specified :term:`textual width` or chop
|
||||||
|
to same
|
||||||
|
|
||||||
|
:arg msg: byte :class:`str` encoded in :term:`UTF-8` that we want formatted
|
||||||
|
:arg fill: pad :attr:`msg` until the :term:`textual width` is this long
|
||||||
|
:kwarg chop: before doing anything else, chop the string to this length.
|
||||||
|
Default: Don't chop the string at all
|
||||||
|
:kwarg left: If :data:`True` (default) left justify the string and put the
|
||||||
|
padding on the right. If :data:`False`, pad on the left side.
|
||||||
|
:kwarg prefix: Attach this byte :class:`str` before the field we're
|
||||||
|
filling
|
||||||
|
:kwarg suffix: Append this byte :class:`str` to the end of the field we're
|
||||||
|
filling
|
||||||
|
:rtype: byte :class:`str`
|
||||||
|
:returns: :attr:`msg` formatted to fill the specified :term:`textual
|
||||||
|
width`. If no :attr:`chop` is specified, the string could exceed the
|
||||||
|
fill length when completed. If :attr:`prefix` or :attr:`suffix` are
|
||||||
|
printable characters, the string could be longer than fill width.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
:attr:`prefix` and :attr:`suffix` should be used for "invisible"
|
||||||
|
characters like highlighting, color changing escape codes, etc. The
|
||||||
|
fill characters are appended outside of any :attr:`prefix` or
|
||||||
|
:attr:`suffix` elements. This allows you to only highlight
|
||||||
|
:attr:`msg` inside of the field you're filling.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
:func:`~kitchen.text.display.textual_width_fill`
|
||||||
|
For example usage. This function has only two differences.
|
||||||
|
|
||||||
|
1. it takes byte :class:`str` for :attr:`prefix` and
|
||||||
|
:attr:`suffix` so you can pass in arbitrary sequences of
|
||||||
|
bytes, not just unicode characters.
|
||||||
|
2. it returns a byte :class:`str` instead of a :class:`unicode`
|
||||||
|
string.
|
||||||
|
'''
|
||||||
|
prefix = to_bytes(prefix, encoding=encoding, errors=errors)
|
||||||
|
suffix = to_bytes(suffix, encoding=encoding, errors=errors)
|
||||||
|
|
||||||
|
if chop is not None:
|
||||||
|
msg = textual_width_chop(msg, chop, encoding=encoding, errors=errors)
|
||||||
|
width = textual_width(msg)
|
||||||
|
msg = to_bytes(msg)
|
||||||
|
|
||||||
|
if width >= fill:
|
||||||
|
if prefix or suffix:
|
||||||
|
msg = ''.join([prefix, msg, suffix])
|
||||||
|
else:
|
||||||
|
extra = ' ' * (fill - width)
|
||||||
|
if left:
|
||||||
|
msg = ''.join([prefix, msg, suffix, extra])
|
||||||
|
else:
|
||||||
|
msg = ''.join([extra, prefix, msg, suffix])
|
||||||
|
|
||||||
|
return msg
|
||||||
|
|
||||||
|
__all__ = ('byte_string_textual_width_fill', 'fill', 'textual_width',
|
||||||
|
'textual_width_chop', 'textual_width_fill', 'wrap')
|
40
kitchen/text/exceptions.py
Normal file
40
kitchen/text/exceptions.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2010 Red Hat, Inc
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
#
|
||||||
|
'''
|
||||||
|
-----------------------
|
||||||
|
Kitchen.text exceptions
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
Exception classes thrown by kitchen's text processing routines.
|
||||||
|
'''
|
||||||
|
from kitchen import exceptions
|
||||||
|
|
||||||
|
class XmlEncodeError(exceptions.KitchenError):
|
||||||
|
'''Exception thrown by error conditions when encoding an xml string.
|
||||||
|
'''
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ControlCharError(exceptions.KitchenError):
|
||||||
|
'''Exception thrown when an ascii control character is encountered.
|
||||||
|
'''
|
||||||
|
pass
|
||||||
|
|
||||||
|
__all__ = ('XmlEncodeError', 'ControlCharError')
|
313
kitchen/text/misc.py
Normal file
313
kitchen/text/misc.py
Normal file
|
@ -0,0 +1,313 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Copyright (c) 2011 Red Hat, Inc
|
||||||
|
# Copyright (c) 2010 Seth Vidal
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# James Antill
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
# Seth Vidal
|
||||||
|
#
|
||||||
|
# Portions of this code taken from yum/misc.py and yum/i18n.py
|
||||||
|
'''
|
||||||
|
---------------------------------------------
|
||||||
|
Miscellaneous functions for manipulating text
|
||||||
|
---------------------------------------------
|
||||||
|
|
||||||
|
Collection of text functions that don't fit in another category.
|
||||||
|
'''
|
||||||
|
import htmlentitydefs
|
||||||
|
import itertools
|
||||||
|
import re
|
||||||
|
|
||||||
|
try:
|
||||||
|
import chardet
|
||||||
|
except ImportError:
|
||||||
|
chardet = None
|
||||||
|
|
||||||
|
# We need to access b_() for localizing our strings but we'll end up with
|
||||||
|
# a circular import if we import it directly.
|
||||||
|
import kitchen as k
|
||||||
|
from kitchen.pycompat24 import sets
|
||||||
|
from kitchen.text.exceptions import ControlCharError
|
||||||
|
|
||||||
|
sets.add_builtin_set()
|
||||||
|
|
||||||
|
# Define a threshold for chardet confidence. If we fall below this we decode
|
||||||
|
# byte strings we're guessing about as latin1
|
||||||
|
_CHARDET_THRESHHOLD = 0.6
|
||||||
|
|
||||||
|
# ASCII control codes that are illegal in xml 1.0
|
||||||
|
_CONTROL_CODES = frozenset(range(0, 8) + [11, 12] + range(14, 32))
|
||||||
|
_CONTROL_CHARS = frozenset(itertools.imap(unichr, _CONTROL_CODES))
|
||||||
|
|
||||||
|
# _ENTITY_RE
|
||||||
|
_ENTITY_RE = re.compile(r'(?s)<[^>]*>|&#?\w+;')
|
||||||
|
|
||||||
|
def guess_encoding(byte_string, disable_chardet=False):
|
||||||
|
'''Try to guess the encoding of a byte :class:`str`
|
||||||
|
|
||||||
|
:arg byte_string: byte :class:`str` to guess the encoding of
|
||||||
|
:kwarg disable_chardet: If this is True, we never attempt to use
|
||||||
|
:mod:`chardet` to guess the encoding. This is useful if you need to
|
||||||
|
have reproducibility whether :mod:`chardet` is installed or not.
|
||||||
|
Default: :data:`False`.
|
||||||
|
:raises TypeError: if :attr:`byte_string` is not a byte :class:`str` type
|
||||||
|
:returns: string containing a guess at the encoding of
|
||||||
|
:attr:`byte_string`. This is appropriate to pass as the encoding
|
||||||
|
argument when encoding and decoding unicode strings.
|
||||||
|
|
||||||
|
We start by attempting to decode the byte :class:`str` as :term:`UTF-8`.
|
||||||
|
If this succeeds we tell the world it's :term:`UTF-8` text. If it doesn't
|
||||||
|
and :mod:`chardet` is installed on the system and :attr:`disable_chardet`
|
||||||
|
is False this function will use it to try detecting the encoding of
|
||||||
|
:attr:`byte_string`. If it is not installed or :mod:`chardet` cannot
|
||||||
|
determine the encoding with a high enough confidence then we rather
|
||||||
|
arbitrarily claim that it is ``latin-1``. Since ``latin-1`` will encode
|
||||||
|
to every byte, decoding from ``latin-1`` to :class:`unicode` will not
|
||||||
|
cause :exc:`UnicodeErrors` although the output might be mangled.
|
||||||
|
'''
|
||||||
|
if not isinstance(byte_string, str):
|
||||||
|
raise TypeError(k.b_('byte_string must be a byte string (str)'))
|
||||||
|
input_encoding = 'utf-8'
|
||||||
|
try:
|
||||||
|
unicode(byte_string, input_encoding, 'strict')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
input_encoding = None
|
||||||
|
|
||||||
|
if not input_encoding and chardet and not disable_chardet:
|
||||||
|
detection_info = chardet.detect(byte_string)
|
||||||
|
if detection_info['confidence'] >= _CHARDET_THRESHHOLD:
|
||||||
|
input_encoding = detection_info['encoding']
|
||||||
|
|
||||||
|
if not input_encoding:
|
||||||
|
input_encoding = 'latin-1'
|
||||||
|
|
||||||
|
return input_encoding
|
||||||
|
|
||||||
|
def str_eq(str1, str2, encoding='utf-8', errors='replace'):
|
||||||
|
'''Compare two stringsi, converting to byte :class:`str` if one is
|
||||||
|
:class:`unicode`
|
||||||
|
|
||||||
|
:arg str1: First string to compare
|
||||||
|
:arg str2: Second string to compare
|
||||||
|
:kwarg encoding: If we need to convert one string into a byte :class:`str`
|
||||||
|
to compare, the encoding to use. Default is :term:`utf-8`.
|
||||||
|
:kwarg errors: What to do if we encounter errors when encoding the string.
|
||||||
|
See the :func:`kitchen.text.converters.to_bytes` documentation for
|
||||||
|
possible values. The default is ``replace``.
|
||||||
|
|
||||||
|
This function prevents :exc:`UnicodeError` (python-2.4 or less) and
|
||||||
|
:exc:`UnicodeWarning` (python 2.5 and higher) when we compare
|
||||||
|
a :class:`unicode` string to a byte :class:`str`. The errors normally
|
||||||
|
arise because the conversion is done to :term:`ASCII`. This function
|
||||||
|
lets you convert to :term:`utf-8` or another encoding instead.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
When we need to convert one of the strings from :class:`unicode` in
|
||||||
|
order to compare them we convert the :class:`unicode` string into
|
||||||
|
a byte :class:`str`. That means that strings can compare differently
|
||||||
|
if you use different encodings for each.
|
||||||
|
|
||||||
|
Note that ``str1 == str2`` is faster than this function if you can accept
|
||||||
|
the following limitations:
|
||||||
|
|
||||||
|
* Limited to python-2.5+ (otherwise a :exc:`UnicodeDecodeError` may be
|
||||||
|
thrown)
|
||||||
|
* Will generate a :exc:`UnicodeWarning` if non-:term:`ASCII` byte
|
||||||
|
:class:`str` is compared to :class:`unicode` string.
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
return (not str1 < str2) and (not str1 > str2)
|
||||||
|
except UnicodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if isinstance(str1, unicode):
|
||||||
|
str1 = str1.encode(encoding, errors)
|
||||||
|
else:
|
||||||
|
str2 = str2.encode(encoding, errors)
|
||||||
|
if str1 == str2:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def process_control_chars(string, strategy='replace'):
|
||||||
|
'''Look for and transform :term:`control characters` in a string
|
||||||
|
|
||||||
|
:arg string: string to search for and transform :term:`control characters`
|
||||||
|
within
|
||||||
|
:kwarg strategy: XML does not allow :term:`ASCII` :term:`control
|
||||||
|
characters`. When we encounter those we need to know what to do.
|
||||||
|
Valid options are:
|
||||||
|
|
||||||
|
:replace: (default) Replace the :term:`control characters`
|
||||||
|
with ``"?"``
|
||||||
|
:ignore: Remove the characters altogether from the output
|
||||||
|
:strict: Raise a :exc:`~kitchen.text.exceptions.ControlCharError` when
|
||||||
|
we encounter a control character
|
||||||
|
:raises TypeError: if :attr:`string` is not a unicode string.
|
||||||
|
:raises ValueError: if the strategy is not one of replace, ignore, or
|
||||||
|
strict.
|
||||||
|
:raises kitchen.text.exceptions.ControlCharError: if the strategy is
|
||||||
|
``strict`` and a :term:`control character` is present in the
|
||||||
|
:attr:`string`
|
||||||
|
:returns: :class:`unicode` string with no :term:`control characters` in
|
||||||
|
it.
|
||||||
|
'''
|
||||||
|
if not isinstance(string, unicode):
|
||||||
|
raise TypeError(k.b_('process_control_char must have a unicode type as'
|
||||||
|
' the first argument.'))
|
||||||
|
if strategy == 'ignore':
|
||||||
|
control_table = dict(zip(_CONTROL_CODES, [None] * len(_CONTROL_CODES)))
|
||||||
|
elif strategy == 'replace':
|
||||||
|
control_table = dict(zip(_CONTROL_CODES, [u'?'] * len(_CONTROL_CODES)))
|
||||||
|
elif strategy == 'strict':
|
||||||
|
control_table = None
|
||||||
|
# Test that there are no control codes present
|
||||||
|
data = frozenset(string)
|
||||||
|
if [c for c in _CONTROL_CHARS if c in data]:
|
||||||
|
raise ControlCharError(k.b_('ASCII control code present in string'
|
||||||
|
' input'))
|
||||||
|
else:
|
||||||
|
raise ValueError(k.b_('The strategy argument to process_control_chars'
|
||||||
|
' must be one of ignore, replace, or strict'))
|
||||||
|
|
||||||
|
if control_table:
|
||||||
|
string = string.translate(control_table)
|
||||||
|
|
||||||
|
return string
|
||||||
|
|
||||||
|
# Originally written by Fredrik Lundh (January 15, 2003) and placed in the
|
||||||
|
# public domain::
|
||||||
|
#
|
||||||
|
# Unless otherwise noted, source code can be be used freely. Examples, test
|
||||||
|
# scripts and other short code fragments can be considered as being in the
|
||||||
|
# public domain.
|
||||||
|
#
|
||||||
|
# http://effbot.org/zone/re-sub.htm#unescape-html
|
||||||
|
# http://effbot.org/zone/copyright.htm
|
||||||
|
#
|
||||||
|
def html_entities_unescape(string):
|
||||||
|
'''Substitute unicode characters for HTML entities
|
||||||
|
|
||||||
|
:arg string: :class:`unicode` string to substitute out html entities
|
||||||
|
:raises TypeError: if something other than a :class:`unicode` string is
|
||||||
|
given
|
||||||
|
:rtype: :class:`unicode` string
|
||||||
|
:returns: The plain text without html entities
|
||||||
|
'''
|
||||||
|
def fixup(match):
|
||||||
|
string = match.group(0)
|
||||||
|
if string[:1] == u"<":
|
||||||
|
return "" # ignore tags
|
||||||
|
if string[:2] == u"&#":
|
||||||
|
try:
|
||||||
|
if string[:3] == u"&#x":
|
||||||
|
return unichr(int(string[3:-1], 16))
|
||||||
|
else:
|
||||||
|
return unichr(int(string[2:-1]))
|
||||||
|
except ValueError:
|
||||||
|
# If the value is outside the unicode codepoint range, leave
|
||||||
|
# it in the output as is
|
||||||
|
pass
|
||||||
|
elif string[:1] == u"&":
|
||||||
|
entity = htmlentitydefs.entitydefs.get(string[1:-1].encode('utf-8'))
|
||||||
|
if entity:
|
||||||
|
if entity[:2] == "&#":
|
||||||
|
try:
|
||||||
|
return unichr(int(entity[2:-1]))
|
||||||
|
except ValueError:
|
||||||
|
# If the value is outside the unicode codepoint range,
|
||||||
|
# leave it in the output as is
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return unicode(entity, "iso-8859-1")
|
||||||
|
return string # leave as is
|
||||||
|
|
||||||
|
if not isinstance(string, unicode):
|
||||||
|
raise TypeError(k.b_('html_entities_unescape must have a unicode type'
|
||||||
|
' for its first argument'))
|
||||||
|
return re.sub(_ENTITY_RE, fixup, string)
|
||||||
|
|
||||||
|
def byte_string_valid_xml(byte_string, encoding='utf-8'):
|
||||||
|
'''Check that a byte :class:`str` would be valid in xml
|
||||||
|
|
||||||
|
:arg byte_string: Byte :class:`str` to check
|
||||||
|
:arg encoding: Encoding of the xml file. Default: :term:`UTF-8`
|
||||||
|
:returns: :data:`True` if the string is valid. :data:`False` if it would
|
||||||
|
be invalid in the xml file
|
||||||
|
|
||||||
|
In some cases you'll have a whole bunch of byte strings and rather than
|
||||||
|
transforming them to :class:`unicode` and back to byte :class:`str` for
|
||||||
|
output to xml, you will just want to make sure they work with the xml file
|
||||||
|
you're constructing. This function will help you do that. Example::
|
||||||
|
|
||||||
|
ARRAY_OF_MOSTLY_UTF8_STRINGS = [...]
|
||||||
|
processed_array = []
|
||||||
|
for string in ARRAY_OF_MOSTLY_UTF8_STRINGS:
|
||||||
|
if byte_string_valid_xml(string, 'utf-8'):
|
||||||
|
processed_array.append(string)
|
||||||
|
else:
|
||||||
|
processed_array.append(guess_bytes_to_xml(string, encoding='utf-8'))
|
||||||
|
output_xml(processed_array)
|
||||||
|
'''
|
||||||
|
if not isinstance(byte_string, str):
|
||||||
|
# Not a byte string
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
u_string = unicode(byte_string, encoding)
|
||||||
|
except UnicodeError:
|
||||||
|
# Not encoded with the xml file's encoding
|
||||||
|
return False
|
||||||
|
|
||||||
|
data = frozenset(u_string)
|
||||||
|
if data.intersection(_CONTROL_CHARS):
|
||||||
|
# Contains control codes
|
||||||
|
return False
|
||||||
|
|
||||||
|
# The byte string is compatible with this xml file
|
||||||
|
return True
|
||||||
|
|
||||||
|
def byte_string_valid_encoding(byte_string, encoding='utf-8'):
|
||||||
|
'''Detect if a byte :class:`str` is valid in a specific encoding
|
||||||
|
|
||||||
|
:arg byte_string: Byte :class:`str` to test for bytes not valid in this
|
||||||
|
encoding
|
||||||
|
:kwarg encoding: encoding to test against. Defaults to :term:`UTF-8`.
|
||||||
|
:returns: :data:`True` if there are no invalid :term:`UTF-8` characters.
|
||||||
|
:data:`False` if an invalid character is detected.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
This function checks whether the byte :class:`str` is valid in the
|
||||||
|
specified encoding. It **does not** detect whether the byte
|
||||||
|
:class:`str` actually was encoded in that encoding. If you want that
|
||||||
|
sort of functionality, you probably want to use
|
||||||
|
:func:`~kitchen.text.misc.guess_encoding` instead.
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
unicode(byte_string, encoding)
|
||||||
|
except UnicodeError:
|
||||||
|
# Not encoded with the xml file's encoding
|
||||||
|
return False
|
||||||
|
|
||||||
|
# byte string is valid in this encoding
|
||||||
|
return True
|
||||||
|
|
||||||
|
__all__ = ('byte_string_valid_encoding', 'byte_string_valid_xml',
|
||||||
|
'guess_encoding', 'html_entities_unescape', 'process_control_chars',
|
||||||
|
'str_eq')
|
170
kitchen/text/utf8.py
Normal file
170
kitchen/text/utf8.py
Normal file
|
@ -0,0 +1,170 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2011 Red Hat, Inc.
|
||||||
|
# Copyright (c) 2010 Ville Skyttä
|
||||||
|
# Copyright (c) 2009 Tim Lauridsen
|
||||||
|
# Copyright (c) 2007 Marcus Kuhn
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||||
|
# terms of the GNU Lesser General Public License as published by the Free
|
||||||
|
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
# more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
|
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# James Antill <james@fedoraproject.org>
|
||||||
|
# Marcus Kuhn
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
# Tim Lauridsen
|
||||||
|
# Ville Skyttä
|
||||||
|
#
|
||||||
|
# Portions of this are from yum/i18n.py
|
||||||
|
'''
|
||||||
|
-----
|
||||||
|
UTF-8
|
||||||
|
-----
|
||||||
|
|
||||||
|
Functions for operating on byte :class:`str` encoded as :term:`UTF-8`
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
In many cases, it is better to convert to :class:`unicode`, operate on the
|
||||||
|
strings, then convert back to :term:`UTF-8`. :class:`unicode` type can
|
||||||
|
handle many of these functions itself. For those that it doesn't
|
||||||
|
(removing control characters from length calculations, for instance) the
|
||||||
|
code to do so with a :class:`unicode` type is often simpler.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
All of the functions in this module are deprecated. Most of them have
|
||||||
|
been replaced with functions that operate on unicode values in
|
||||||
|
:mod:`kitchen.text.display`. :func:`kitchen.text.utf8.utf8_valid` has
|
||||||
|
been replaced with a function in :mod:`kitchen.text.misc`.
|
||||||
|
'''
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from kitchen import b_
|
||||||
|
from kitchen.text.converters import to_unicode, to_bytes
|
||||||
|
from kitchen.text.misc import byte_string_valid_encoding
|
||||||
|
from kitchen.text.display import _textual_width_le, \
|
||||||
|
byte_string_textual_width_fill, fill, textual_width, \
|
||||||
|
textual_width_chop, wrap
|
||||||
|
|
||||||
|
#
|
||||||
|
# Deprecated functions
|
||||||
|
#
|
||||||
|
|
||||||
|
def utf8_valid(msg):
|
||||||
|
'''**Deprecated** Detect if a string is valid :term:`utf-8`
|
||||||
|
|
||||||
|
Use :func:`kitchen.text.misc.byte_string_valid_encoding` instead.
|
||||||
|
'''
|
||||||
|
warnings.warn(b_('kitchen.text.utf8.utf8_valid is deprecated. Use'
|
||||||
|
' kitchen.text.misc.byte_string_valid_encoding(msg) instead'),
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
return byte_string_valid_encoding(msg)
|
||||||
|
|
||||||
|
def utf8_width(msg):
|
||||||
|
'''**Deprecated** Get the :term:`textual width` of a :term:`utf-8` string
|
||||||
|
|
||||||
|
Use :func:`kitchen.text.display.textual_width` instead.
|
||||||
|
'''
|
||||||
|
warnings.warn(b_('kitchen.text.utf8.utf8_width is deprecated. Use'
|
||||||
|
' kitchen.text.display.textual_width(msg) instead'),
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
return textual_width(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def utf8_width_chop(msg, chop=None):
|
||||||
|
'''**Deprecated** Return a string chopped to a given :term:`textual width`
|
||||||
|
|
||||||
|
Use :func:`~kitchen.text.display.textual_width_chop` and
|
||||||
|
:func:`~kitchen.text.display.textual_width` instead::
|
||||||
|
|
||||||
|
>>> msg = 'く ku ら ra と to み mi'
|
||||||
|
>>> # Old way:
|
||||||
|
>>> utf8_width_chop(msg, 5)
|
||||||
|
(5, 'く ku')
|
||||||
|
>>> # New way
|
||||||
|
>>> from kitchen.text.converters import to_bytes
|
||||||
|
>>> from kitchen.text.display import textual_width, textual_width_chop
|
||||||
|
>>> (textual_width(msg), to_bytes(textual_width_chop(msg, 5)))
|
||||||
|
(5, 'く ku')
|
||||||
|
'''
|
||||||
|
warnings.warn(b_('kitchen.text.utf8.utf8_width_chop is deprecated. Use'
|
||||||
|
' kitchen.text.display.textual_width_chop instead'), DeprecationWarning,
|
||||||
|
stacklevel=2)
|
||||||
|
|
||||||
|
if chop == None:
|
||||||
|
return textual_width(msg), msg
|
||||||
|
|
||||||
|
as_bytes = not isinstance(msg, unicode)
|
||||||
|
|
||||||
|
chopped_msg = textual_width_chop(msg, chop)
|
||||||
|
if as_bytes:
|
||||||
|
chopped_msg = to_bytes(chopped_msg)
|
||||||
|
return textual_width(chopped_msg), chopped_msg
|
||||||
|
|
||||||
|
def utf8_width_fill(msg, fill, chop=None, left=True, prefix='', suffix=''):
|
||||||
|
'''**Deprecated** Pad a :term:`utf-8` string to fill a specified width
|
||||||
|
|
||||||
|
Use :func:`~kitchen.text.display.byte_string_textual_width_fill` instead
|
||||||
|
'''
|
||||||
|
warnings.warn(b_('kitchen.text.utf8.utf8_width_fill is deprecated. Use'
|
||||||
|
' kitchen.text.display.byte_string_textual_width_fill instead'),
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
|
||||||
|
return byte_string_textual_width_fill(msg, fill, chop=chop, left=left,
|
||||||
|
prefix=prefix, suffix=suffix)
|
||||||
|
|
||||||
|
def utf8_text_wrap(text, width=70, initial_indent='', subsequent_indent=''):
|
||||||
|
'''**Deprecated** Similar to :func:`textwrap.wrap` but understands
|
||||||
|
:term:`utf-8` data and doesn't screw up lists/blocks/etc
|
||||||
|
|
||||||
|
Use :func:`kitchen.text.display.wrap` instead
|
||||||
|
'''
|
||||||
|
warnings.warn(b_('kitchen.text.utf8.utf8_text_wrap is deprecated. Use'
|
||||||
|
' kitchen.text.display.wrap instead'),
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
|
||||||
|
as_bytes = not isinstance(text, unicode)
|
||||||
|
|
||||||
|
text = to_unicode(text)
|
||||||
|
lines = wrap(text, width=width, initial_indent=initial_indent,
|
||||||
|
subsequent_indent=subsequent_indent)
|
||||||
|
if as_bytes:
|
||||||
|
lines = [to_bytes(m) for m in lines]
|
||||||
|
|
||||||
|
return lines
|
||||||
|
|
||||||
|
def utf8_text_fill(text, *args, **kwargs):
|
||||||
|
'''**Deprecated** Similar to :func:`textwrap.fill` but understands
|
||||||
|
:term:`utf-8` strings and doesn't screw up lists/blocks/etc.
|
||||||
|
|
||||||
|
Use :func:`kitchen.text.display.fill` instead.
|
||||||
|
'''
|
||||||
|
warnings.warn(b_('kitchen.text.utf8.utf8_text_fill is deprecated. Use'
|
||||||
|
' kitchen.text.display.fill instead'),
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
# This assumes that all args. are utf8.
|
||||||
|
return fill(text, *args, **kwargs)
|
||||||
|
|
||||||
|
def _utf8_width_le(width, *args):
|
||||||
|
'''**Deprecated** Convert the arguments to unicode and use
|
||||||
|
:func:`kitchen.text.display._textual_width_le` instead.
|
||||||
|
'''
|
||||||
|
warnings.warn(b_('kitchen.text.utf8._utf8_width_le is deprecated. Use'
|
||||||
|
' kitchen.text.display._textual_width_le instead'),
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
# This assumes that all args. are utf8.
|
||||||
|
return _textual_width_le(width, to_unicode(''.join(args)))
|
||||||
|
|
||||||
|
__all__ = ('utf8_text_fill', 'utf8_text_wrap', 'utf8_valid', 'utf8_width',
|
||||||
|
'utf8_width_chop', 'utf8_width_fill')
|
107
kitchen/versioning/__init__.py
Normal file
107
kitchen/versioning/__init__.py
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2011 Red Hat, Inc
|
||||||
|
#
|
||||||
|
# kitchen is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# kitchen is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# Lesser General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||||
|
'''
|
||||||
|
----------------------------
|
||||||
|
PEP-386 compliant versioning
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
:pep:`386` defines a standard format for version strings. This module
|
||||||
|
contains a function for creating strings in that format.
|
||||||
|
'''
|
||||||
|
__version_info__ = ((1, 0, 0),)
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
def version_tuple_to_string(version_info):
|
||||||
|
'''Return a :pep:`386` version string from a :pep:`386` style version tuple
|
||||||
|
|
||||||
|
:arg version_info: Nested set of tuples that describes the version. See
|
||||||
|
below for an example.
|
||||||
|
:returns: a version string
|
||||||
|
|
||||||
|
This function implements just enough of :pep:`386` to satisfy our needs.
|
||||||
|
:pep:`386` defines a standard format for version strings and refers to
|
||||||
|
a function that will be merged into the |stdlib|_ that transforms a tuple
|
||||||
|
of version information into a standard version string. This function is
|
||||||
|
an implementation of that function. Once that function becomes available
|
||||||
|
in the |stdlib|_ we will start using it and deprecate this function.
|
||||||
|
|
||||||
|
:attr:`version_info` takes the form that :pep:`386`'s
|
||||||
|
:func:`NormalizedVersion.from_parts` uses::
|
||||||
|
|
||||||
|
((Major, Minor, [Micros]), [(Alpha/Beta/rc marker, version)],
|
||||||
|
[(post/dev marker, version)])
|
||||||
|
|
||||||
|
Ex: ((1, 0, 0), ('a', 2), ('dev', 3456))
|
||||||
|
|
||||||
|
It generates a :pep:`386` compliant version string::
|
||||||
|
|
||||||
|
N.N[.N]+[{a|b|c|rc}N[.N]+][.postN][.devN]
|
||||||
|
|
||||||
|
Ex: 1.0.0a2.dev3456
|
||||||
|
|
||||||
|
.. warning:: This function does next to no error checking. It's up to the
|
||||||
|
person defining the version tuple to make sure that the values make
|
||||||
|
sense. If the :pep:`386` compliant version parser doesn't get
|
||||||
|
released soon we'll look at making this function check that the
|
||||||
|
version tuple makes sense before transforming it into a string.
|
||||||
|
|
||||||
|
It's recommended that you use this function to keep
|
||||||
|
a :data:`__version_info__` tuple and :data:`__version__` string in your
|
||||||
|
modules. Why do we need both a tuple and a string? The string is often
|
||||||
|
useful for putting into human readable locations like release
|
||||||
|
announcements, version strings in tarballs, etc. Meanwhile the tuple is
|
||||||
|
very easy for a computer to compare. For example, kitchen sets up its
|
||||||
|
version information like this::
|
||||||
|
|
||||||
|
from kitchen.versioning import version_tuple_to_string
|
||||||
|
__version_info__ = ((0, 2, 1),)
|
||||||
|
__version__ = version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
Other programs that depend on a kitchen version between 0.2.1 and 0.3.0
|
||||||
|
can find whether the present version is okay with code like this::
|
||||||
|
|
||||||
|
from kitchen import __version_info__, __version__
|
||||||
|
if __version_info__ < ((0, 2, 1),) or __version_info__ >= ((0, 3, 0),):
|
||||||
|
print 'kitchen is present but not at the right version.'
|
||||||
|
print 'We need at least version 0.2.1 and less than 0.3.0'
|
||||||
|
print 'Currently found: kitchen-%s' % __version__
|
||||||
|
'''
|
||||||
|
ver_components = []
|
||||||
|
for values in version_info:
|
||||||
|
if isinstance(values[0], int):
|
||||||
|
ver_components.append('.'.join(itertools.imap(str, values)))
|
||||||
|
else:
|
||||||
|
if isinstance(values[0], unicode):
|
||||||
|
modifier = values[0].encode('ascii')
|
||||||
|
else:
|
||||||
|
modifier = values[0]
|
||||||
|
if modifier in ('a', 'b', 'c', 'rc'):
|
||||||
|
ver_components.append('%s%s' % (modifier,
|
||||||
|
'.'.join(itertools.imap(str, values[1:])) or '0'))
|
||||||
|
else:
|
||||||
|
ver_components.append('.%s%s' % (modifier,
|
||||||
|
str(values[1])))
|
||||||
|
return unicode(''.join(ver_components), 'ascii')
|
||||||
|
|
||||||
|
|
||||||
|
__version__ = version_tuple_to_string(__version_info__)
|
||||||
|
|
||||||
|
__all__ = ('version_tuple_to_string',)
|
BIN
locale/de/LC_MESSAGES/kitchen.mo
Normal file
BIN
locale/de/LC_MESSAGES/kitchen.mo
Normal file
Binary file not shown.
BIN
locale/en_US/LC_MESSAGES/kitchen.mo
Normal file
BIN
locale/en_US/LC_MESSAGES/kitchen.mo
Normal file
Binary file not shown.
184
po/de.po
Normal file
184
po/de.po
Normal file
|
@ -0,0 +1,184 @@
|
||||||
|
# Translations template for PROJECT.
|
||||||
|
# Copyright (C) 2012 ORGANIZATION
|
||||||
|
# This file is distributed under the same license as the PROJECT project.
|
||||||
|
#
|
||||||
|
# Translators:
|
||||||
|
# Christoph Scheid <c@shri.de>, 2012.
|
||||||
|
msgid ""
|
||||||
|
msgstr ""
|
||||||
|
"Project-Id-Version: Kitchen: Miscellaneous, useful python code\n"
|
||||||
|
"Report-Msgid-Bugs-To: https://fedorahosted.org/kitchen/\n"
|
||||||
|
"POT-Creation-Date: 2012-01-03 18:23-0800\n"
|
||||||
|
"PO-Revision-Date: 2012-01-13 20:39+0000\n"
|
||||||
|
"Last-Translator: Christoph Scheid <c@shri.de>\n"
|
||||||
|
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||||
|
"MIME-Version: 1.0\n"
|
||||||
|
"Content-Type: text/plain; charset=UTF-8\n"
|
||||||
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
|
"Generated-By: Babel 0.9.6\n"
|
||||||
|
"Language: de\n"
|
||||||
|
"Plural-Forms: nplurals=2; plural=(n != 1)\n"
|
||||||
|
|
||||||
|
#: kitchen/release.py:9
|
||||||
|
msgid "Kitchen contains a cornucopia of useful code"
|
||||||
|
msgstr "Kitchen ist ein Füllhorn voller nützlichem Code."
|
||||||
|
|
||||||
|
#: kitchen/release.py:10
|
||||||
|
msgid ""
|
||||||
|
"\n"
|
||||||
|
"We've all done it. In the process of writing a brand new application we've\n"
|
||||||
|
"discovered that we need a little bit of code that we've invented before.\n"
|
||||||
|
"Perhaps it's something to handle unicode text. Perhaps it's something to make\n"
|
||||||
|
"a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being\n"
|
||||||
|
"a tiny bit of code that seems too small to worry about pushing into its own\n"
|
||||||
|
"module so it sits there, a part of your current project, waiting to be cut and\n"
|
||||||
|
"pasted into your next project. And the next. And the next. And since that\n"
|
||||||
|
"little bittybit of code proved so useful to you, it's highly likely that it\n"
|
||||||
|
"proved useful to someone else as well. Useful enough that they've written it\n"
|
||||||
|
"and copy and pasted it over and over into each of their new projects.\n"
|
||||||
|
"\n"
|
||||||
|
"Well, no longer! Kitchen aims to pull these small snippets of code into a few\n"
|
||||||
|
"python modules which you can import and use within your project. No more copy\n"
|
||||||
|
"and paste! Now you can let someone else maintain and release these small\n"
|
||||||
|
"snippets so that you can get on with your life.\n"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/pycompat25/collections/_defaultdict.py:93
|
||||||
|
msgid "First argument must be callable"
|
||||||
|
msgstr "Das erste Argument muss ausführbar (callable) sein."
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:140
|
||||||
|
msgid ""
|
||||||
|
"non_string is a deprecated parameter of to_unicode(). Use nonstring instead"
|
||||||
|
msgstr "non_string ist ein veralteter Parameter von to_unicode(). Stattdessen nonstring verwenden."
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:174
|
||||||
|
#, python-format
|
||||||
|
msgid ""
|
||||||
|
"to_unicode was given \"%(obj)s\" which is neither a byte string (str) or a "
|
||||||
|
"unicode string"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:178 kitchen/text/converters.py:297
|
||||||
|
#, python-format
|
||||||
|
msgid "nonstring value, %(param)s, is not set to a valid action"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:255
|
||||||
|
msgid ""
|
||||||
|
"non_string is a deprecated parameter of to_bytes(). Use nonstring instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:294
|
||||||
|
#, python-format
|
||||||
|
msgid ""
|
||||||
|
"to_bytes was given \"%(obj)s\" which is neither a unicode string or a byte "
|
||||||
|
"string (str)"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:378
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.converters.to_utf8 is deprecated. Use "
|
||||||
|
"kitchen.text.converters.to_bytes(obj, encoding=\"utf-8\", "
|
||||||
|
"nonstring=\"passthru\" instead."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:403
|
||||||
|
msgid ""
|
||||||
|
"to_str is deprecated. Use to_unicode or to_bytes instead. See the to_str "
|
||||||
|
"docstring for porting information."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:685
|
||||||
|
msgid ""
|
||||||
|
"unicode_to_xml must have a unicode type as the first argument. Use "
|
||||||
|
"bytes_string_to_xml for byte strings."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:689
|
||||||
|
msgid ""
|
||||||
|
"The control_chars argument to unicode_to_xml must be one of ignore, replace,"
|
||||||
|
" or strict"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:786
|
||||||
|
msgid ""
|
||||||
|
"byte_string_to_xml can only take a byte string as its first argument. Use "
|
||||||
|
"unicode_to_xml for unicode strings"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:910
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.converters.to_xml is deprecated. Use "
|
||||||
|
"kitchen.text.converters.guess_encoding_to_xml instead."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/display.py:344
|
||||||
|
msgid ""
|
||||||
|
"_ucp_width does not understand how to assign a width value to control "
|
||||||
|
"characters."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:83
|
||||||
|
msgid "byte_string must be a byte string (str)"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:171
|
||||||
|
msgid "process_control_char must have a unicode type as the first argument."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:182
|
||||||
|
msgid "ASCII control code present in string input"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:185
|
||||||
|
msgid ""
|
||||||
|
"The strategy argument to process_control_chars must be one of ignore, "
|
||||||
|
"replace, or strict"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:241
|
||||||
|
msgid "html_entities_unescape must have a unicode type for its first argument"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:69
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_valid is deprecated. Use "
|
||||||
|
"kitchen.text.misc.byte_string_valid_encoding(msg) instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:79
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_width is deprecated. Use "
|
||||||
|
"kitchen.text.display.textual_width(msg) instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:101
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_width_chop is deprecated. Use "
|
||||||
|
"kitchen.text.display.textual_width_chop instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:120
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_width_fill is deprecated. Use "
|
||||||
|
"kitchen.text.display.byte_string_textual_width_fill instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:133
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_text_wrap is deprecated. Use "
|
||||||
|
"kitchen.text.display.wrap instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:153
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_text_fill is deprecated. Use "
|
||||||
|
"kitchen.text.display.fill instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:163
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8._utf8_width_le is deprecated. Use "
|
||||||
|
"kitchen.text.display._textual_width_le instead"
|
||||||
|
msgstr ""
|
239
po/en_US.po
Normal file
239
po/en_US.po
Normal file
|
@ -0,0 +1,239 @@
|
||||||
|
# Translations template for PROJECT.
|
||||||
|
# Copyright (C) 2012 ORGANIZATION
|
||||||
|
# This file is distributed under the same license as the PROJECT project.
|
||||||
|
#
|
||||||
|
# Translators:
|
||||||
|
msgid ""
|
||||||
|
msgstr ""
|
||||||
|
"Project-Id-Version: Kitchen: Miscellaneous, useful python code\n"
|
||||||
|
"Report-Msgid-Bugs-To: https://fedorahosted.org/kitchen/\n"
|
||||||
|
"POT-Creation-Date: 2012-01-03 18:23-0800\n"
|
||||||
|
"PO-Revision-Date: 2012-01-03 07:48+0000\n"
|
||||||
|
"Last-Translator: Toshio Kuratomi <a.badger@gmail.com>\n"
|
||||||
|
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||||
|
"MIME-Version: 1.0\n"
|
||||||
|
"Content-Type: text/plain; charset=UTF-8\n"
|
||||||
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
|
"Generated-By: Babel 0.9.6\n"
|
||||||
|
"Language: en_US\n"
|
||||||
|
"Plural-Forms: nplurals=2; plural=(n != 1)\n"
|
||||||
|
|
||||||
|
#: kitchen/release.py:9
|
||||||
|
msgid "Kitchen contains a cornucopia of useful code"
|
||||||
|
msgstr "Kitchen contains a cornucopia of useful code"
|
||||||
|
|
||||||
|
#: kitchen/release.py:10
|
||||||
|
msgid ""
|
||||||
|
"\n"
|
||||||
|
"We've all done it. In the process of writing a brand new application we've\n"
|
||||||
|
"discovered that we need a little bit of code that we've invented before.\n"
|
||||||
|
"Perhaps it's something to handle unicode text. Perhaps it's something to make\n"
|
||||||
|
"a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being\n"
|
||||||
|
"a tiny bit of code that seems too small to worry about pushing into its own\n"
|
||||||
|
"module so it sits there, a part of your current project, waiting to be cut and\n"
|
||||||
|
"pasted into your next project. And the next. And the next. And since that\n"
|
||||||
|
"little bittybit of code proved so useful to you, it's highly likely that it\n"
|
||||||
|
"proved useful to someone else as well. Useful enough that they've written it\n"
|
||||||
|
"and copy and pasted it over and over into each of their new projects.\n"
|
||||||
|
"\n"
|
||||||
|
"Well, no longer! Kitchen aims to pull these small snippets of code into a few\n"
|
||||||
|
"python modules which you can import and use within your project. No more copy\n"
|
||||||
|
"and paste! Now you can let someone else maintain and release these small\n"
|
||||||
|
"snippets so that you can get on with your life.\n"
|
||||||
|
msgstr ""
|
||||||
|
"\n"
|
||||||
|
"We've all done it. In the process of writing a brand new application we've\n"
|
||||||
|
"discovered that we need a little bit of code that we've invented before.\n"
|
||||||
|
"Perhaps it's something to handle unicode text. Perhaps it's something to make\n"
|
||||||
|
"a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being\n"
|
||||||
|
"a tiny bit of code that seems too small to worry about pushing into its own\n"
|
||||||
|
"module so it sits there, a part of your current project, waiting to be cut and\n"
|
||||||
|
"pasted into your next project. And the next. And the next. And since that\n"
|
||||||
|
"little bittybit of code proved so useful to you, it's highly likely that it\n"
|
||||||
|
"proved useful to someone else as well. Useful enough that they've written it\n"
|
||||||
|
"and copy and pasted it over and over into each of their new projects.\n"
|
||||||
|
"\n"
|
||||||
|
"Well, no longer! Kitchen aims to pull these small snippets of code into a few\n"
|
||||||
|
"python modules which you can import and use within your project. No more copy\n"
|
||||||
|
"and paste! Now you can let someone else maintain and release these small\n"
|
||||||
|
"snippets so that you can get on with your life.\n"
|
||||||
|
|
||||||
|
#: kitchen/pycompat25/collections/_defaultdict.py:93
|
||||||
|
msgid "First argument must be callable"
|
||||||
|
msgstr "First argument must be callable"
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:140
|
||||||
|
msgid ""
|
||||||
|
"non_string is a deprecated parameter of to_unicode(). Use nonstring instead"
|
||||||
|
msgstr ""
|
||||||
|
"non_string is a deprecated parameter of to_unicode(). Use nonstring instead"
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:174
|
||||||
|
#, python-format
|
||||||
|
msgid ""
|
||||||
|
"to_unicode was given \"%(obj)s\" which is neither a byte string (str) or a "
|
||||||
|
"unicode string"
|
||||||
|
msgstr ""
|
||||||
|
"to_unicode was given \"%(obj)s\" which is neither a byte string (str) or a "
|
||||||
|
"unicode string"
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:178 kitchen/text/converters.py:297
|
||||||
|
#, python-format
|
||||||
|
msgid "nonstring value, %(param)s, is not set to a valid action"
|
||||||
|
msgstr "nonstring value, %(param)s, is not set to a valid action"
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:255
|
||||||
|
msgid ""
|
||||||
|
"non_string is a deprecated parameter of to_bytes(). Use nonstring instead"
|
||||||
|
msgstr ""
|
||||||
|
"non_string is a deprecated parameter of to_bytes(). Use nonstring instead"
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:294
|
||||||
|
#, python-format
|
||||||
|
msgid ""
|
||||||
|
"to_bytes was given \"%(obj)s\" which is neither a unicode string or a byte "
|
||||||
|
"string (str)"
|
||||||
|
msgstr ""
|
||||||
|
"to_bytes was given \"%(obj)s\" which is neither a unicode string or a byte "
|
||||||
|
"string (str)"
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:378
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.converters.to_utf8 is deprecated. Use "
|
||||||
|
"kitchen.text.converters.to_bytes(obj, encoding=\"utf-8\", "
|
||||||
|
"nonstring=\"passthru\" instead."
|
||||||
|
msgstr ""
|
||||||
|
"kitchen.text.converters.to_utf8 is deprecated. Use "
|
||||||
|
"kitchen.text.converters.to_bytes(obj, encoding=\"utf-8\", "
|
||||||
|
"nonstring=\"passthru\" instead."
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:403
|
||||||
|
msgid ""
|
||||||
|
"to_str is deprecated. Use to_unicode or to_bytes instead. See the to_str "
|
||||||
|
"docstring for porting information."
|
||||||
|
msgstr ""
|
||||||
|
"to_str is deprecated. Use to_unicode or to_bytes instead. See the to_str "
|
||||||
|
"docstring for porting information."
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:685
|
||||||
|
msgid ""
|
||||||
|
"unicode_to_xml must have a unicode type as the first argument. Use "
|
||||||
|
"bytes_string_to_xml for byte strings."
|
||||||
|
msgstr ""
|
||||||
|
"unicode_to_xml must have a unicode type as the first argument. Use "
|
||||||
|
"bytes_string_to_xml for byte strings."
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:689
|
||||||
|
msgid ""
|
||||||
|
"The control_chars argument to unicode_to_xml must be one of ignore, replace,"
|
||||||
|
" or strict"
|
||||||
|
msgstr ""
|
||||||
|
"The control_chars argument to unicode_to_xml must be one of ignore, replace,"
|
||||||
|
" or strict"
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:786
|
||||||
|
msgid ""
|
||||||
|
"byte_string_to_xml can only take a byte string as its first argument. Use "
|
||||||
|
"unicode_to_xml for unicode strings"
|
||||||
|
msgstr ""
|
||||||
|
"byte_string_to_xml can only take a byte string as its first argument. Use "
|
||||||
|
"unicode_to_xml for unicode strings"
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:910
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.converters.to_xml is deprecated. Use "
|
||||||
|
"kitchen.text.converters.guess_encoding_to_xml instead."
|
||||||
|
msgstr ""
|
||||||
|
"kitchen.text.converters.to_xml is deprecated. Use "
|
||||||
|
"kitchen.text.converters.guess_encoding_to_xml instead."
|
||||||
|
|
||||||
|
#: kitchen/text/display.py:344
|
||||||
|
msgid ""
|
||||||
|
"_ucp_width does not understand how to assign a width value to control "
|
||||||
|
"characters."
|
||||||
|
msgstr ""
|
||||||
|
"_ucp_width does not understand how to assign a width value to control "
|
||||||
|
"characters."
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:83
|
||||||
|
msgid "byte_string must be a byte string (str)"
|
||||||
|
msgstr "byte_string must be a byte string (str)"
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:171
|
||||||
|
msgid "process_control_char must have a unicode type as the first argument."
|
||||||
|
msgstr "process_control_char must have a unicode type as the first argument."
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:182
|
||||||
|
msgid "ASCII control code present in string input"
|
||||||
|
msgstr "ASCII control code present in string input"
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:185
|
||||||
|
msgid ""
|
||||||
|
"The strategy argument to process_control_chars must be one of ignore, "
|
||||||
|
"replace, or strict"
|
||||||
|
msgstr ""
|
||||||
|
"The strategy argument to process_control_chars must be one of ignore, "
|
||||||
|
"replace, or strict"
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:241
|
||||||
|
msgid "html_entities_unescape must have a unicode type for its first argument"
|
||||||
|
msgstr ""
|
||||||
|
"html_entities_unescape must have a unicode type for its first argument"
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:69
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_valid is deprecated. Use "
|
||||||
|
"kitchen.text.misc.byte_string_valid_encoding(msg) instead"
|
||||||
|
msgstr ""
|
||||||
|
"kitchen.text.utf8.utf8_valid is deprecated. Use "
|
||||||
|
"kitchen.text.misc.byte_string_valid_encoding(msg) instead"
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:79
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_width is deprecated. Use "
|
||||||
|
"kitchen.text.display.textual_width(msg) instead"
|
||||||
|
msgstr ""
|
||||||
|
"kitchen.text.utf8.utf8_width is deprecated. Use "
|
||||||
|
"kitchen.text.display.textual_width(msg) instead"
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:101
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_width_chop is deprecated. Use "
|
||||||
|
"kitchen.text.display.textual_width_chop instead"
|
||||||
|
msgstr ""
|
||||||
|
"kitchen.text.utf8.utf8_width_chop is deprecated. Use "
|
||||||
|
"kitchen.text.display.textual_width_chop instead"
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:120
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_width_fill is deprecated. Use "
|
||||||
|
"kitchen.text.display.byte_string_textual_width_fill instead"
|
||||||
|
msgstr ""
|
||||||
|
"kitchen.text.utf8.utf8_width_fill is deprecated. Use "
|
||||||
|
"kitchen.text.display.byte_string_textual_width_fill instead"
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:133
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_text_wrap is deprecated. Use "
|
||||||
|
"kitchen.text.display.wrap instead"
|
||||||
|
msgstr ""
|
||||||
|
"kitchen.text.utf8.utf8_text_wrap is deprecated. Use "
|
||||||
|
"kitchen.text.display.wrap instead"
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:153
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_text_fill is deprecated. Use "
|
||||||
|
"kitchen.text.display.fill instead"
|
||||||
|
msgstr ""
|
||||||
|
"kitchen.text.utf8.utf8_text_fill is deprecated. Use "
|
||||||
|
"kitchen.text.display.fill instead"
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:163
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8._utf8_width_le is deprecated. Use "
|
||||||
|
"kitchen.text.display._textual_width_le instead"
|
||||||
|
msgstr ""
|
||||||
|
"kitchen.text.utf8._utf8_width_le is deprecated. Use "
|
||||||
|
"kitchen.text.display._textual_width_le instead"
|
||||||
|
|
||||||
|
|
194
po/kitchen.pot
Normal file
194
po/kitchen.pot
Normal file
|
@ -0,0 +1,194 @@
|
||||||
|
# Translations template for PROJECT.
|
||||||
|
# Copyright (C) 2012 ORGANIZATION
|
||||||
|
# This file is distributed under the same license as the PROJECT project.
|
||||||
|
# FIRST AUTHOR <EMAIL@ADDRESS>, 2012.
|
||||||
|
#
|
||||||
|
#, fuzzy
|
||||||
|
msgid ""
|
||||||
|
msgstr ""
|
||||||
|
"Project-Id-Version: PROJECT VERSION\n"
|
||||||
|
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||||
|
"POT-Creation-Date: 2012-01-03 18:23-0800\n"
|
||||||
|
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||||
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||||
|
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||||
|
"MIME-Version: 1.0\n"
|
||||||
|
"Content-Type: text/plain; charset=utf-8\n"
|
||||||
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
|
"Generated-By: Babel 0.9.6\n"
|
||||||
|
|
||||||
|
#: kitchen/release.py:9
|
||||||
|
msgid "Kitchen contains a cornucopia of useful code"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/release.py:10
|
||||||
|
msgid ""
|
||||||
|
"\n"
|
||||||
|
"We've all done it. In the process of writing a brand new application "
|
||||||
|
"we've\n"
|
||||||
|
"discovered that we need a little bit of code that we've invented before.\n"
|
||||||
|
"Perhaps it's something to handle unicode text. Perhaps it's something to"
|
||||||
|
" make\n"
|
||||||
|
"a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up "
|
||||||
|
"being\n"
|
||||||
|
"a tiny bit of code that seems too small to worry about pushing into its "
|
||||||
|
"own\n"
|
||||||
|
"module so it sits there, a part of your current project, waiting to be "
|
||||||
|
"cut and\n"
|
||||||
|
"pasted into your next project. And the next. And the next. And since "
|
||||||
|
"that\n"
|
||||||
|
"little bittybit of code proved so useful to you, it's highly likely that "
|
||||||
|
"it\n"
|
||||||
|
"proved useful to someone else as well. Useful enough that they've "
|
||||||
|
"written it\n"
|
||||||
|
"and copy and pasted it over and over into each of their new projects.\n"
|
||||||
|
"\n"
|
||||||
|
"Well, no longer! Kitchen aims to pull these small snippets of code into "
|
||||||
|
"a few\n"
|
||||||
|
"python modules which you can import and use within your project. No more"
|
||||||
|
" copy\n"
|
||||||
|
"and paste! Now you can let someone else maintain and release these small"
|
||||||
|
"\n"
|
||||||
|
"snippets so that you can get on with your life.\n"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/pycompat25/collections/_defaultdict.py:93
|
||||||
|
msgid "First argument must be callable"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:140
|
||||||
|
msgid ""
|
||||||
|
"non_string is a deprecated parameter of to_unicode(). Use nonstring "
|
||||||
|
"instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:174
|
||||||
|
#, python-format
|
||||||
|
msgid ""
|
||||||
|
"to_unicode was given \"%(obj)s\" which is neither a byte string (str) or "
|
||||||
|
"a unicode string"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:178 kitchen/text/converters.py:297
|
||||||
|
#, python-format
|
||||||
|
msgid "nonstring value, %(param)s, is not set to a valid action"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:255
|
||||||
|
msgid "non_string is a deprecated parameter of to_bytes(). Use nonstring instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:294
|
||||||
|
#, python-format
|
||||||
|
msgid ""
|
||||||
|
"to_bytes was given \"%(obj)s\" which is neither a unicode string or a "
|
||||||
|
"byte string (str)"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:378
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.converters.to_utf8 is deprecated. Use "
|
||||||
|
"kitchen.text.converters.to_bytes(obj, encoding=\"utf-8\", "
|
||||||
|
"nonstring=\"passthru\" instead."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:403
|
||||||
|
msgid ""
|
||||||
|
"to_str is deprecated. Use to_unicode or to_bytes instead. See the "
|
||||||
|
"to_str docstring for porting information."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:685
|
||||||
|
msgid ""
|
||||||
|
"unicode_to_xml must have a unicode type as the first argument. Use "
|
||||||
|
"bytes_string_to_xml for byte strings."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:689
|
||||||
|
msgid ""
|
||||||
|
"The control_chars argument to unicode_to_xml must be one of ignore, "
|
||||||
|
"replace, or strict"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:786
|
||||||
|
msgid ""
|
||||||
|
"byte_string_to_xml can only take a byte string as its first argument. "
|
||||||
|
"Use unicode_to_xml for unicode strings"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/converters.py:910
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.converters.to_xml is deprecated. Use "
|
||||||
|
"kitchen.text.converters.guess_encoding_to_xml instead."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/display.py:344
|
||||||
|
msgid ""
|
||||||
|
"_ucp_width does not understand how to assign a width value to control "
|
||||||
|
"characters."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:83
|
||||||
|
msgid "byte_string must be a byte string (str)"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:171
|
||||||
|
msgid "process_control_char must have a unicode type as the first argument."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:182
|
||||||
|
msgid "ASCII control code present in string input"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:185
|
||||||
|
msgid ""
|
||||||
|
"The strategy argument to process_control_chars must be one of ignore, "
|
||||||
|
"replace, or strict"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/misc.py:241
|
||||||
|
msgid "html_entities_unescape must have a unicode type for its first argument"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:69
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_valid is deprecated. Use "
|
||||||
|
"kitchen.text.misc.byte_string_valid_encoding(msg) instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:79
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_width is deprecated. Use "
|
||||||
|
"kitchen.text.display.textual_width(msg) instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:101
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_width_chop is deprecated. Use "
|
||||||
|
"kitchen.text.display.textual_width_chop instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:120
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_width_fill is deprecated. Use "
|
||||||
|
"kitchen.text.display.byte_string_textual_width_fill instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:133
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_text_wrap is deprecated. Use "
|
||||||
|
"kitchen.text.display.wrap instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:153
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8.utf8_text_fill is deprecated. Use "
|
||||||
|
"kitchen.text.display.fill instead"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen/text/utf8.py:163
|
||||||
|
msgid ""
|
||||||
|
"kitchen.text.utf8._utf8_width_le is deprecated. Use "
|
||||||
|
"kitchen.text.display._textual_width_le instead"
|
||||||
|
msgstr ""
|
||||||
|
|
64
releaseutils.py
Executable file
64
releaseutils.py
Executable file
|
@ -0,0 +1,64 @@
|
||||||
|
#!/usr/bin/python -tt
|
||||||
|
|
||||||
|
import ConfigParser
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from kitchen.pycompat27 import subprocess
|
||||||
|
|
||||||
|
class MsgFmt(object):
|
||||||
|
def run(self, args):
|
||||||
|
cmd = subprocess.Popen(args, shell=False)
|
||||||
|
cmd.wait()
|
||||||
|
|
||||||
|
def setup_message_compiler():
|
||||||
|
# Look for msgfmt
|
||||||
|
try:
|
||||||
|
subprocess.Popen(['msgfmt', '-h'], stdout=subprocess.PIPE)
|
||||||
|
except OSError:
|
||||||
|
import babel.messages.frontend
|
||||||
|
|
||||||
|
return (babel.messages.frontend.CommandLineInterface(),
|
||||||
|
'pybabel compile -D %(domain)s -d locale -i %(pofile)s -l %(lang)s'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return (MsgFmt(), 'msgfmt -c -o locale/%(lang)s/LC_MESSAGES/%(domain)s.mo %(pofile)s')
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Get the directory with message catalogs
|
||||||
|
# Reuse transifex's config file first as it will know this
|
||||||
|
cfg = ConfigParser.SafeConfigParser()
|
||||||
|
cfg.read('.tx/config')
|
||||||
|
cmd, args = setup_message_compiler()
|
||||||
|
|
||||||
|
try:
|
||||||
|
shutil.rmtree('locale')
|
||||||
|
except OSError, e:
|
||||||
|
# If the error is that locale does not exist, we're okay. We're
|
||||||
|
# deleting it here, afterall
|
||||||
|
if e.errno != 2:
|
||||||
|
raise
|
||||||
|
|
||||||
|
for section in [s for s in cfg.sections() if s != 'main']:
|
||||||
|
try:
|
||||||
|
file_filter = cfg.get(section, 'file_filter')
|
||||||
|
source_file = cfg.get(section, 'source_file')
|
||||||
|
except ConfigParser.NoOptionError:
|
||||||
|
continue
|
||||||
|
glob_pattern = file_filter.replace('<lang>', '*')
|
||||||
|
pot = os.path.basename(source_file)
|
||||||
|
if pot.endswith('.pot'):
|
||||||
|
pot = pot[:-4]
|
||||||
|
arg_values = {'domain': pot}
|
||||||
|
for po_file in glob.glob(glob_pattern):
|
||||||
|
file_pattern = os.path.basename(po_file)
|
||||||
|
lang = file_pattern.replace('.po','')
|
||||||
|
os.makedirs(os.path.join('locale', lang, 'LC_MESSAGES'))
|
||||||
|
arg_values['pofile'] = po_file
|
||||||
|
arg_values['lang'] = lang
|
||||||
|
compile_args = args % arg_values
|
||||||
|
compile_args = compile_args.split(' ')
|
||||||
|
cmd.run(compile_args)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
2
setup.cfg
Normal file
2
setup.cfg
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[upload_docs]
|
||||||
|
upload_dir=build/sphinx/html
|
57
setup.py
Executable file
57
setup.py
Executable file
|
@ -0,0 +1,57 @@
|
||||||
|
#!/usr/bin/python -tt
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from distutils.command.sdist import sdist as _sdist
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
|
||||||
|
from setuptools import find_packages, setup
|
||||||
|
import kitchen.release
|
||||||
|
|
||||||
|
import releaseutils
|
||||||
|
|
||||||
|
# Override sdist command to compile the message catalogs as well
|
||||||
|
class Sdist(_sdist, object):
|
||||||
|
def run(self):
|
||||||
|
releaseutils.main()
|
||||||
|
data_files = []
|
||||||
|
for langfile in filter(os.path.isfile, glob.glob('locale/*/*/*.mo')):
|
||||||
|
data_files.append((os.path.dirname(langfile), [langfile]))
|
||||||
|
if self.distribution.data_files and \
|
||||||
|
hasattr(self.distribution.data_files, 'extend'):
|
||||||
|
self.distribution.data_files.extend(data_files)
|
||||||
|
else:
|
||||||
|
self.distribution.data_files = data_files
|
||||||
|
super(Sdist, self).run()
|
||||||
|
|
||||||
|
|
||||||
|
setup(name='kitchen',
|
||||||
|
version=str(kitchen.release.__version__),
|
||||||
|
description=kitchen.release.DESCRIPTION,
|
||||||
|
long_description=kitchen.release.LONG_DESCRIPTION,
|
||||||
|
author=kitchen.release.AUTHOR,
|
||||||
|
author_email=kitchen.release.EMAIL,
|
||||||
|
maintainer='Toshio Kuratomi',
|
||||||
|
maintainer_email='toshio@fedoraproject.org',
|
||||||
|
license=kitchen.release.LICENSE,
|
||||||
|
url=kitchen.release.URL,
|
||||||
|
download_url=kitchen.release.DOWNLOAD_URL,
|
||||||
|
cmdclass={'sdist': Sdist
|
||||||
|
},
|
||||||
|
keywords='Useful Small Code Snippets',
|
||||||
|
classifiers=[
|
||||||
|
'Development Status :: 4 - Beta',
|
||||||
|
'License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)',
|
||||||
|
'Operating System :: OS Independent',
|
||||||
|
'Programming Language :: Python :: 2.3',
|
||||||
|
'Programming Language :: Python :: 2.4',
|
||||||
|
'Programming Language :: Python :: 2.5',
|
||||||
|
'Programming Language :: Python :: 2.6',
|
||||||
|
'Programming Language :: Python :: 2.7',
|
||||||
|
'Topic :: Software Development :: Internationalization',
|
||||||
|
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||||
|
'Topic :: Text Processing :: General',
|
||||||
|
],
|
||||||
|
packages=find_packages(),
|
||||||
|
data_files=[],
|
||||||
|
)
|
129
tests/base_classes.py
Normal file
129
tests/base_classes.py
Normal file
|
@ -0,0 +1,129 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Base class for testing unicode and utf8 functions. This holds data that's
|
||||||
|
# useful for making tests
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from kitchen.text.converters import to_bytes
|
||||||
|
from kitchen.text import misc
|
||||||
|
|
||||||
|
class UnicodeTestData(object):
|
||||||
|
# This should encode fine -- sanity check
|
||||||
|
u_ascii = u'the quick brown fox jumped over the lazy dog'
|
||||||
|
b_ascii = 'the quick brown fox jumped over the lazy dog'
|
||||||
|
|
||||||
|
# First challenge -- what happens with latin-1 characters
|
||||||
|
u_spanish = u'El veloz murciélago saltó sobre el perro perezoso.'
|
||||||
|
# utf8 and latin1 both support these chars so no mangling
|
||||||
|
utf8_spanish = u_spanish.encode('utf8')
|
||||||
|
latin1_spanish = u_spanish.encode('latin1')
|
||||||
|
|
||||||
|
# ASCII does not have the accented characters so it mangles
|
||||||
|
ascii_mangled_spanish_as_ascii = u_spanish.encode('ascii', 'replace')
|
||||||
|
# Attempting to decode using the wrong charset will mangle
|
||||||
|
# Note: as a general principle, we do not want to have code that mangles
|
||||||
|
# input of one charset and output of the same charset. We want to avoid
|
||||||
|
# things like::
|
||||||
|
# input latin-1, transform to unicode with utf-8, output latin-1.
|
||||||
|
u_mangled_spanish_utf8_as_latin1 = unicode(utf8_spanish, encoding='latin1', errors='replace')
|
||||||
|
u_mangled_spanish_utf8_as_ascii = unicode(utf8_spanish, encoding='ascii', errors='replace')
|
||||||
|
u_mangled_spanish_latin1_as_ascii = unicode(latin1_spanish, encoding='ascii', errors='replace')
|
||||||
|
u_mangled_spanish_latin1_as_utf8 = unicode(latin1_spanish, encoding='utf-8', errors='replace')
|
||||||
|
ascii_twice_mangled_spanish_latin1_as_utf8_as_ascii = u_mangled_spanish_latin1_as_utf8.encode('ascii', 'replace')
|
||||||
|
utf8_mangled_spanish_latin1_as_utf8 = u_mangled_spanish_latin1_as_utf8.encode('utf-8')
|
||||||
|
u_spanish_ignore = unicode(latin1_spanish, encoding='utf8', errors='ignore')
|
||||||
|
|
||||||
|
u_japanese = u"速い茶色のキツネが怠惰な犬に'増"
|
||||||
|
utf8_japanese = u_japanese.encode('utf8')
|
||||||
|
euc_jp_japanese = u_japanese.encode('euc_jp')
|
||||||
|
u_mangled_euc_jp_as_latin1 = unicode(euc_jp_japanese, 'latin1')
|
||||||
|
u_mangled_euc_jp_as_utf8 = unicode(euc_jp_japanese, 'utf-8', 'replace')
|
||||||
|
utf8_mangled_euc_jp_as_latin1 = u_mangled_euc_jp_as_latin1.encode('utf8')
|
||||||
|
u_mangled_japanese_utf8_as_latin1 = unicode(utf8_japanese, 'latin1')
|
||||||
|
u_mangled_japanese_utf8_as_ascii = u"<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'<EFBFBD><EFBFBD><EFBFBD>"
|
||||||
|
ascii_mangled_japanese_replace_as_latin1 = "??????????????'?"
|
||||||
|
latin1_mangled_japanese_replace_as_latin1 = "??????????????'?"
|
||||||
|
|
||||||
|
u_mixed = u'く ku ら ra と to み mi'
|
||||||
|
utf8_mixed = u_mixed.encode('utf8')
|
||||||
|
utf8_ku = u_mixed[0].encode('utf8')
|
||||||
|
utf8_ra = u_mixed[2].encode('utf8')
|
||||||
|
utf8_to = u_mixed[4].encode('utf8')
|
||||||
|
utf8_mi = u_mixed[6].encode('utf8')
|
||||||
|
|
||||||
|
u_mixed_replace = u'\ufffd ku \ufffd ra \ufffd to \ufffd mi'
|
||||||
|
u_mixed_ignore = u' ku ra to mi'
|
||||||
|
latin1_mixed_replace = '? ku ? ra ? to ? mi'
|
||||||
|
latin1_mixed_ignore = ' ku ra to mi'
|
||||||
|
|
||||||
|
u_entity = u'Test: <"&"> – ' + u_japanese + u'é'
|
||||||
|
utf8_entity = u_entity.encode('utf8')
|
||||||
|
u_entity_escape = u'Test: <"&"> – ' + unicode(u_japanese.encode('ascii', 'xmlcharrefreplace'), 'ascii') + u'é'
|
||||||
|
utf8_entity_escape = 'Test: <"&"> – 速い茶色のキツネが怠惰な犬に\'増é'
|
||||||
|
utf8_attrib_escape = 'Test: <"&"> – 速い茶色のキツネが怠惰な犬に\'増é'
|
||||||
|
ascii_entity_escape = (u'Test: <"&"> – ' + u_japanese + u'é').encode('ascii', 'xmlcharrefreplace').replace('&', '&',1).replace('<', '<').replace('>', '>')
|
||||||
|
|
||||||
|
b_byte_chars = ' '.join(map(chr, range(0, 256)))
|
||||||
|
b_byte_encoded = 'ACABIAIgAyAEIAUgBiAHIAggCSAKIAsgDCANIA4gDyAQIBEgEiATIBQgFSAWIBcgGCAZIBogGyAcIB0gHiAfICAgISAiICMgJCAlICYgJyAoICkgKiArICwgLSAuIC8gMCAxIDIgMyA0IDUgNiA3IDggOSA6IDsgPCA9ID4gPyBAIEEgQiBDIEQgRSBGIEcgSCBJIEogSyBMIE0gTiBPIFAgUSBSIFMgVCBVIFYgVyBYIFkgWiBbIFwgXSBeIF8gYCBhIGIgYyBkIGUgZiBnIGggaSBqIGsgbCBtIG4gbyBwIHEgciBzIHQgdSB2IHcgeCB5IHogeyB8IH0gfiB/IIAggSCCIIMghCCFIIYghyCIIIkgiiCLIIwgjSCOII8gkCCRIJIgkyCUIJUgliCXIJggmSCaIJsgnCCdIJ4gnyCgIKEgoiCjIKQgpSCmIKcgqCCpIKogqyCsIK0griCvILAgsSCyILMgtCC1ILYgtyC4ILkguiC7ILwgvSC+IL8gwCDBIMIgwyDEIMUgxiDHIMggySDKIMsgzCDNIM4gzyDQINEg0iDTINQg1SDWINcg2CDZINog2yDcIN0g3iDfIOAg4SDiIOMg5CDlIOYg5yDoIOkg6iDrIOwg7SDuIO8g8CDxIPIg8yD0IPUg9iD3IPgg+SD6IPsg/CD9IP4g/w=='
|
||||||
|
|
||||||
|
repr_re = re.compile('^<[^ ]*\.([^.]+) object at .*>$')
|
||||||
|
|
||||||
|
u_paragraph = u'''ConfigObj is a simple but powerful config file reader and writer: an ini file
|
||||||
|
round tripper. Its main feature is that it is very easy to use, with a
|
||||||
|
straightforward programmer's interface and a simple syntax for config files.
|
||||||
|
It has lots of other features though:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
* Nested sections (subsections), to any level
|
||||||
|
* List values
|
||||||
|
* Multiple line values
|
||||||
|
* String interpolation (substitution)
|
||||||
|
* Integrated with a powerful validation system
|
||||||
|
o including automatic type checking/conversion
|
||||||
|
o repeated sections
|
||||||
|
o and allowing default values
|
||||||
|
* All comments in the file are preserved
|
||||||
|
* The order of keys/sections is preserved
|
||||||
|
* No external dependencies
|
||||||
|
* Full Unicode support
|
||||||
|
* A powerful unrepr mode for storing basic datatypes
|
||||||
|
'''
|
||||||
|
utf8_paragraph = u_paragraph.encode('utf-8', 'replace')
|
||||||
|
u_paragraph_out = [u'ConfigObj is a simple but powerful config file reader and writer: an',
|
||||||
|
u'ini file round tripper. Its main feature is that it is very easy to',
|
||||||
|
u"use, with a straightforward programmer's interface and a simple syntax",
|
||||||
|
u'for config files. It has lots of other features though:',
|
||||||
|
u'',
|
||||||
|
u'',
|
||||||
|
u'',
|
||||||
|
u' * Nested sections (subsections), to any level',
|
||||||
|
u' * List values',
|
||||||
|
u' * Multiple line values',
|
||||||
|
u' * String interpolation (substitution)',
|
||||||
|
u' * Integrated with a powerful validation system',
|
||||||
|
u' o including automatic type checking/conversion',
|
||||||
|
u' o repeated sections',
|
||||||
|
u' o and allowing default values',
|
||||||
|
u' * All comments in the file are preserved',
|
||||||
|
u' * The order of keys/sections is preserved',
|
||||||
|
u' * No external dependencies',
|
||||||
|
u' * Full Unicode support',
|
||||||
|
u' * A powerful unrepr mode for storing basic datatypes']
|
||||||
|
|
||||||
|
utf8_paragraph_out = [line.encode('utf-8', 'replace') for line in u_paragraph_out]
|
||||||
|
|
||||||
|
u_mixed_para = u'くらとみ kuratomi ' * 5
|
||||||
|
utf8_mixed_para = u_mixed_para.encode('utf8')
|
||||||
|
u_mixed_para_out = [u'くらとみ kuratomi くらとみ kuratomi くらとみ kuratomi くらとみ',
|
||||||
|
u'kuratomi くらとみ kuratomi']
|
||||||
|
u_mixed_para_57_initial_subsequent_out = [u' くらとみ kuratomi くらとみ kuratomi くらとみ kuratomi',
|
||||||
|
u'----くらとみ kuratomi くらとみ kuratomi']
|
||||||
|
utf8_mixed_para_out = map(to_bytes, u_mixed_para_out)
|
||||||
|
utf8_mixed_para_57_initial_subsequent_out = map(to_bytes, u_mixed_para_57_initial_subsequent_out)
|
||||||
|
|
||||||
|
u_ascii_chars = u' '.join(map(unichr, range(0, 256)))
|
||||||
|
u_ascii_no_ctrl = u''.join([c for c in u_ascii_chars if ord(c) not in misc._CONTROL_CODES])
|
||||||
|
u_ascii_ctrl_replace = u_ascii_chars.translate(dict([(c, u'?') for c in misc._CONTROL_CODES]))
|
||||||
|
utf8_ascii_chars = u_ascii_chars.encode('utf8')
|
46
tests/data/locale-old/pt_BR.po
Normal file
46
tests/data/locale-old/pt_BR.po
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
# Portuguese (Brazil) translations for kitchen.
|
||||||
|
# Copyright (C) 2010 ORGANIZATION
|
||||||
|
# This file is distributed under the same license as the kitchen project.
|
||||||
|
# FIRST AUTHOR <EMAIL@ADDRESS>, 2010.
|
||||||
|
#
|
||||||
|
msgid ""
|
||||||
|
msgstr ""
|
||||||
|
"Project-Id-Version: kitchen 0.2.1a1\n"
|
||||||
|
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||||
|
"POT-Creation-Date: 2010-09-03 00:49+0400\n"
|
||||||
|
"PO-Revision-Date: 2010-09-08 00:45-0400\n"
|
||||||
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||||
|
"Language-Team: pt_BR <LL@li.org>\n"
|
||||||
|
"Plural-Forms: nplurals=2; plural=(n > 1)\n"
|
||||||
|
"MIME-Version: 1.0\n"
|
||||||
|
"Content-Type: text/plain; charset=utf-8\n"
|
||||||
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
|
"Generated-By: Babel 0.9.5\n"
|
||||||
|
|
||||||
|
#: kitchen.py:1
|
||||||
|
msgid "kitchen sink"
|
||||||
|
msgstr "placeholder"
|
||||||
|
|
||||||
|
#: kitchen.py:2
|
||||||
|
msgid "くらとみ"
|
||||||
|
msgstr "placeholder"
|
||||||
|
|
||||||
|
#: kitchen.py:3
|
||||||
|
msgid "Kuratomi"
|
||||||
|
msgstr "placeholder"
|
||||||
|
|
||||||
|
#: kitchen.py:4
|
||||||
|
msgid "1 lemon"
|
||||||
|
msgid_plural "4 lemons"
|
||||||
|
msgstr[0] "1 placeholder"
|
||||||
|
msgstr[1] "4 placeholders"
|
||||||
|
|
||||||
|
#: kitchen.py:5
|
||||||
|
msgid "一 limão"
|
||||||
|
msgid_plural "四 limões"
|
||||||
|
msgstr[0] "1 placeholder"
|
||||||
|
msgstr[1] "4 placeholders"
|
||||||
|
|
||||||
|
#: kitchen.py:6
|
||||||
|
msgid "Only café in fallback"
|
||||||
|
msgstr "Yes, only café in fallback"
|
BIN
tests/data/locale-old/pt_BR/LC_MESSAGES/test.mo
Normal file
BIN
tests/data/locale-old/pt_BR/LC_MESSAGES/test.mo
Normal file
Binary file not shown.
46
tests/data/locale-old/test.pot
Normal file
46
tests/data/locale-old/test.pot
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
# Translations template for kitchen.
|
||||||
|
# Copyright (C) 2010 ORGANIZATION
|
||||||
|
# This file is distributed under the same license as the kitchen project.
|
||||||
|
# FIRST AUTHOR <EMAIL@ADDRESS>, 2010.
|
||||||
|
#
|
||||||
|
#, fuzzy
|
||||||
|
msgid ""
|
||||||
|
msgstr ""
|
||||||
|
"Project-Id-Version: kitchen 0.2.1a1\n"
|
||||||
|
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||||
|
"POT-Creation-Date: 2010-09-03 00:49-0400\n"
|
||||||
|
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||||
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||||
|
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||||
|
"MIME-Version: 1.0\n"
|
||||||
|
"Content-Type: text/plain; charset=utf-8\n"
|
||||||
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
|
"Generated-By: Babel 0.9.5\n"
|
||||||
|
|
||||||
|
#: kitchen.py:1
|
||||||
|
msgid "kitchen sink"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen.py:2
|
||||||
|
msgid "くらとみ"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen.py:3
|
||||||
|
msgid "Kuratomi"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen.py:4
|
||||||
|
msgid "1 lemon"
|
||||||
|
msgid_plural "4 lemons"
|
||||||
|
msgstr[0] ""
|
||||||
|
msgstr[1] ""
|
||||||
|
|
||||||
|
#: kitchen.py:5
|
||||||
|
msgid "一 limão"
|
||||||
|
msgid_plural "四 limões"
|
||||||
|
msgstr[0] ""
|
||||||
|
msgstr[1] ""
|
||||||
|
|
||||||
|
#: kitchen.py:6
|
||||||
|
msgid "Only café in fallback"
|
||||||
|
msgstr ""
|
46
tests/data/locale/pt_BR.po
Normal file
46
tests/data/locale/pt_BR.po
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
# Portuguese (Brazil) translations for kitchen.
|
||||||
|
# Copyright (C) 2010 ORGANIZATION
|
||||||
|
# This file is distributed under the same license as the kitchen project.
|
||||||
|
# FIRST AUTHOR <EMAIL@ADDRESS>, 2010.
|
||||||
|
#
|
||||||
|
#, fuzzy
|
||||||
|
msgid ""
|
||||||
|
msgstr ""
|
||||||
|
"Project-Id-Version: kitchen 0.2.1a1\n"
|
||||||
|
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||||
|
"POT-Creation-Date: 2010-09-03 00:49+0400\n"
|
||||||
|
"PO-Revision-Date: 2010-09-08 00:45-0400\n"
|
||||||
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||||
|
"Language-Team: pt_BR <LL@li.org>\n"
|
||||||
|
"Plural-Forms: nplurals=2; plural=(n > 1)\n"
|
||||||
|
"MIME-Version: 1.0\n"
|
||||||
|
"Content-Type: text/plain; charset=utf-8\n"
|
||||||
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
|
"Generated-By: Babel 0.9.5\n"
|
||||||
|
|
||||||
|
#: kitchen.py:1
|
||||||
|
msgid "kitchen sink"
|
||||||
|
msgstr "pia da cozinha"
|
||||||
|
|
||||||
|
#: kitchen.py:2
|
||||||
|
#, fuzzy
|
||||||
|
msgid "くらとみ"
|
||||||
|
msgstr "Kuratomi"
|
||||||
|
|
||||||
|
#: kitchen.py:3
|
||||||
|
#, fuzzy
|
||||||
|
msgid "Kuratomi"
|
||||||
|
msgstr "くらとみ"
|
||||||
|
|
||||||
|
#: kitchen.py:4
|
||||||
|
msgid "1 lemon"
|
||||||
|
msgid_plural "4 lemons"
|
||||||
|
msgstr[0] "一 limão"
|
||||||
|
msgstr[1] "四 limões"
|
||||||
|
|
||||||
|
#: kitchen.py:5
|
||||||
|
msgid "一 limão"
|
||||||
|
msgid_plural "四 limões"
|
||||||
|
msgstr[0] "1 lemon"
|
||||||
|
msgstr[1] "4 lemons"
|
||||||
|
|
BIN
tests/data/locale/pt_BR/LC_MESSAGES/test.mo
Normal file
BIN
tests/data/locale/pt_BR/LC_MESSAGES/test.mo
Normal file
Binary file not shown.
42
tests/data/locale/test.pot
Normal file
42
tests/data/locale/test.pot
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
# Translations template for kitchen.
|
||||||
|
# Copyright (C) 2010 ORGANIZATION
|
||||||
|
# This file is distributed under the same license as the kitchen project.
|
||||||
|
# FIRST AUTHOR <EMAIL@ADDRESS>, 2010.
|
||||||
|
#
|
||||||
|
#, fuzzy
|
||||||
|
msgid ""
|
||||||
|
msgstr ""
|
||||||
|
"Project-Id-Version: kitchen 0.2.1a1\n"
|
||||||
|
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||||
|
"POT-Creation-Date: 2010-09-03 00:49-0400\n"
|
||||||
|
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||||
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||||
|
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||||
|
"MIME-Version: 1.0\n"
|
||||||
|
"Content-Type: text/plain; charset=utf-8\n"
|
||||||
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
|
"Generated-By: Babel 0.9.5\n"
|
||||||
|
|
||||||
|
#: kitchen.py:1
|
||||||
|
msgid "kitchen sink"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen.py:2
|
||||||
|
msgid "くらとみ"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen.py:3
|
||||||
|
msgid "Kuratomi"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: kitchen.py:4
|
||||||
|
msgid "1 lemon"
|
||||||
|
msgid_plural "4 lemons"
|
||||||
|
msgstr[0] ""
|
||||||
|
msgstr[1] ""
|
||||||
|
|
||||||
|
#: kitchen.py:5
|
||||||
|
msgid "一 limão"
|
||||||
|
msgid_plural "四 limões"
|
||||||
|
msgstr[0] ""
|
||||||
|
msgstr[1] ""
|
11
tests/subprocessdata/sigchild_ignore.py
Normal file
11
tests/subprocessdata/sigchild_ignore.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
import os
|
||||||
|
import signal, sys
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||||
|
|
||||||
|
from kitchen.pycompat27.subprocess import _subprocess as subprocess
|
||||||
|
|
||||||
|
# On Linux this causes os.waitpid to fail with OSError as the OS has already
|
||||||
|
# reaped our child process. The wait() passing the OSError on to the caller
|
||||||
|
# and causing us to exit with an error is what we are testing against.
|
||||||
|
signal.signal(signal.SIGCHLD, signal.SIG_IGN)
|
||||||
|
subprocess.Popen([sys.executable, '-c', 'print("albatross")']).wait()
|
193
tests/test__all__.py
Normal file
193
tests/test__all__.py
Normal file
|
@ -0,0 +1,193 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from nose import tools
|
||||||
|
|
||||||
|
import os
|
||||||
|
import types
|
||||||
|
import warnings
|
||||||
|
from kitchen.pycompat24.sets import add_builtin_set
|
||||||
|
add_builtin_set()
|
||||||
|
|
||||||
|
def logit(msg):
|
||||||
|
log = open('/var/tmp/test.log', 'a')
|
||||||
|
log.write('%s\n' % msg)
|
||||||
|
log.close()
|
||||||
|
|
||||||
|
class NoAll(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class FailedImport(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class Test__all__(object):
|
||||||
|
'''Test that every function in __all__ exists and that no public methods
|
||||||
|
are missing from __all__
|
||||||
|
'''
|
||||||
|
known_private = set([('kitchen', 'collections', 'version_tuple_to_string'),
|
||||||
|
('kitchen.collections', 'strictdict', 'defaultdict'),
|
||||||
|
('kitchen', 'i18n', 'version_tuple_to_string'),
|
||||||
|
('kitchen', 'i18n', 'to_bytes'),
|
||||||
|
('kitchen', 'i18n', 'to_unicode'),
|
||||||
|
('kitchen', 'i18n', 'ENOENT'),
|
||||||
|
('kitchen', 'i18n', 'byte_string_valid_encoding'),
|
||||||
|
('kitchen', 'iterutils', 'version_tuple_to_string'),
|
||||||
|
('kitchen', 'pycompat24', 'version_tuple_to_string'),
|
||||||
|
('kitchen', 'pycompat25', 'version_tuple_to_string'),
|
||||||
|
('kitchen.pycompat25.collections', '_defaultdict', 'b_'),
|
||||||
|
('kitchen', 'pycompat27', 'version_tuple_to_string'),
|
||||||
|
('kitchen.pycompat27', 'subprocess', 'MAXFD'),
|
||||||
|
('kitchen.pycompat27', 'subprocess', 'list2cmdline'),
|
||||||
|
('kitchen.pycompat27', 'subprocess', 'mswindows'),
|
||||||
|
('kitchen', 'text', 'version_tuple_to_string'),
|
||||||
|
('kitchen.text', 'converters', 'b_'),
|
||||||
|
('kitchen.text', 'converters', 'b64decode'),
|
||||||
|
('kitchen.text', 'converters', 'b64encode'),
|
||||||
|
('kitchen.text', 'converters', 'ControlCharError'),
|
||||||
|
('kitchen.text', 'converters', 'guess_encoding'),
|
||||||
|
('kitchen.text', 'converters', 'html_entities_unescape'),
|
||||||
|
('kitchen.text', 'converters', 'process_control_chars'),
|
||||||
|
('kitchen.text', 'converters', 'XmlEncodeError'),
|
||||||
|
('kitchen.text', 'misc', 'b_'),
|
||||||
|
('kitchen.text', 'misc', 'chardet'),
|
||||||
|
('kitchen.text', 'misc', 'ControlCharError'),
|
||||||
|
('kitchen.text', 'display', 'b_'),
|
||||||
|
('kitchen.text', 'display', 'ControlCharError'),
|
||||||
|
('kitchen.text', 'display', 'to_bytes'),
|
||||||
|
('kitchen.text', 'display', 'to_unicode'),
|
||||||
|
('kitchen.text', 'utf8', 'b_'),
|
||||||
|
('kitchen.text', 'utf8', 'byte_string_textual_width_fill'),
|
||||||
|
('kitchen.text', 'utf8', 'byte_string_valid_encoding'),
|
||||||
|
('kitchen.text', 'utf8', 'fill'),
|
||||||
|
('kitchen.text', 'utf8', 'textual_width'),
|
||||||
|
('kitchen.text', 'utf8', 'textual_width_chop'),
|
||||||
|
('kitchen.text', 'utf8', 'to_bytes'),
|
||||||
|
('kitchen.text', 'utf8', 'to_unicode'),
|
||||||
|
('kitchen.text', 'utf8', 'wrap'),
|
||||||
|
])
|
||||||
|
lib_dir = os.path.join(os.path.dirname(__file__), '..', 'kitchen')
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
# Silence deprecation warnings
|
||||||
|
warnings.simplefilter('ignore', DeprecationWarning)
|
||||||
|
def tearDown(self):
|
||||||
|
warnings.simplefilter('default', DeprecationWarning)
|
||||||
|
|
||||||
|
def walk_modules(self, basedir, modpath):
|
||||||
|
files = os.listdir(basedir)
|
||||||
|
files.sort()
|
||||||
|
for fn in files:
|
||||||
|
path = os.path.join(basedir, fn)
|
||||||
|
if os.path.isdir(path):
|
||||||
|
pkg_init = os.path.join(path, '__init__.py')
|
||||||
|
if os.path.exists(pkg_init):
|
||||||
|
yield pkg_init, modpath + fn
|
||||||
|
for p, m in self.walk_modules(path, modpath + fn + '.'):
|
||||||
|
yield p, m
|
||||||
|
continue
|
||||||
|
if not fn.endswith('.py') or fn == '__init__.py':
|
||||||
|
continue
|
||||||
|
yield path, modpath + fn[:-3]
|
||||||
|
|
||||||
|
def check_has__all__(self, modpath):
|
||||||
|
# This heuristic speeds up the process by removing, de facto,
|
||||||
|
# most test modules (and avoiding the auto-executing ones).
|
||||||
|
f = None
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
f = open(modpath, 'rb')
|
||||||
|
tools.ok_('__all__' in f.read(), '%s does not contain __all__' % modpath)
|
||||||
|
except IOError, e:
|
||||||
|
tools.ok_(False, '%s' % e)
|
||||||
|
finally:
|
||||||
|
if f:
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
def test_has__all__(self):
|
||||||
|
'''
|
||||||
|
For each module, check that it has an __all__
|
||||||
|
'''
|
||||||
|
# Blacklisted modules and packages
|
||||||
|
blacklist = set([ ])
|
||||||
|
|
||||||
|
for path, modname in [m for m in self.walk_modules(self.lib_dir, '')
|
||||||
|
if m[1] not in blacklist]:
|
||||||
|
# Check that it has an __all__
|
||||||
|
yield self.check_has__all__, path
|
||||||
|
|
||||||
|
def check_everything_in__all__exists(self, modname, modpath):
|
||||||
|
names = {}
|
||||||
|
exec 'from %s import %s' % (modpath, modname) in names
|
||||||
|
if not hasattr(names[modname], '__all__'):
|
||||||
|
# This should have been reported by test_has__all__
|
||||||
|
return
|
||||||
|
|
||||||
|
interior_names = {}
|
||||||
|
try:
|
||||||
|
exec 'from %s.%s import *' % (modpath, modname) in interior_names
|
||||||
|
except Exception, e:
|
||||||
|
# Include the module name in the exception string
|
||||||
|
tools.ok_(False, '__all__ failure in %s: %s: %s' % (
|
||||||
|
modname, e.__class__.__name__, e))
|
||||||
|
if '__builtins__' in interior_names:
|
||||||
|
del interior_names['__builtins__']
|
||||||
|
keys = set(interior_names)
|
||||||
|
all = set(names[modname].__all__)
|
||||||
|
tools.ok_(keys == all)
|
||||||
|
|
||||||
|
def test_everything_in__all__exists(self):
|
||||||
|
'''
|
||||||
|
For each name in module's __all__, check that it exists
|
||||||
|
'''
|
||||||
|
# Blacklisted modules and packages
|
||||||
|
blacklist = set([ ])
|
||||||
|
|
||||||
|
for path, modname in [m for m in self.walk_modules(self.lib_dir, '')
|
||||||
|
if m[1] not in blacklist]:
|
||||||
|
# From path, deduce the module name
|
||||||
|
from_name = path[path.find('../kitchen') + 3:]
|
||||||
|
if from_name.endswith('__init__.py'):
|
||||||
|
# Remove __init__.py as well as the filename
|
||||||
|
from_name = os.path.dirname(from_name)
|
||||||
|
from_name = os.path.dirname(from_name)
|
||||||
|
from_name = unicode(from_name, 'utf-8')
|
||||||
|
from_name = from_name.translate({ord(u'/'): u'.'})
|
||||||
|
from_name = from_name.encode('utf-8')
|
||||||
|
yield self.check_everything_in__all__exists, modname.split('.')[-1], from_name
|
||||||
|
|
||||||
|
|
||||||
|
def check__all__is_complete(self, modname, modpath):
|
||||||
|
names = {}
|
||||||
|
exec 'from %s import %s' % (modpath, modname) in names
|
||||||
|
if not hasattr(names[modname], '__all__'):
|
||||||
|
# This should have been reported by test_has__all__
|
||||||
|
return
|
||||||
|
|
||||||
|
mod = names[modname]
|
||||||
|
expected_public = [k for k in mod.__dict__ if (modpath, modname, k)
|
||||||
|
not in self.known_private and not k.startswith("_") and not
|
||||||
|
isinstance(mod.__dict__[k], types.ModuleType)]
|
||||||
|
|
||||||
|
all = set(mod.__all__)
|
||||||
|
public = set(expected_public)
|
||||||
|
tools.ok_(all.issuperset(public), 'These public names are not in %s.__all__: %s'
|
||||||
|
% (modname, ', '.join(public.difference(all))))
|
||||||
|
|
||||||
|
def test__all__is_complete(self):
|
||||||
|
'''
|
||||||
|
For each module, check that every public name is in __all__
|
||||||
|
'''
|
||||||
|
# Blacklisted modules and packages
|
||||||
|
blacklist = set(['pycompat27.subprocess._subprocess',
|
||||||
|
'pycompat24.base64._base64'])
|
||||||
|
|
||||||
|
for path, modname in [m for m in self.walk_modules(self.lib_dir, '')
|
||||||
|
if m[1] not in blacklist]:
|
||||||
|
# From path, deduce the module name
|
||||||
|
from_name = path[path.find('../kitchen') + 3:]
|
||||||
|
if from_name.endswith('__init__.py'):
|
||||||
|
# Remove __init__.py as well as the filename
|
||||||
|
from_name = os.path.dirname(from_name)
|
||||||
|
from_name = os.path.dirname(from_name)
|
||||||
|
from_name = unicode(from_name, 'utf-8')
|
||||||
|
from_name = from_name.translate({ord(u'/'): u'.'})
|
||||||
|
from_name = from_name.encode('utf-8')
|
||||||
|
yield self.check__all__is_complete, modname.split('.')[-1], from_name
|
190
tests/test_base64.py
Normal file
190
tests/test_base64.py
Normal file
|
@ -0,0 +1,190 @@
|
||||||
|
import unittest
|
||||||
|
from test import test_support
|
||||||
|
from kitchen.pycompat24.base64 import _base64 as base64
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LegacyBase64TestCase(unittest.TestCase):
|
||||||
|
def test_encodestring(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
eq(base64.encodestring("www.python.org"), "d3d3LnB5dGhvbi5vcmc=\n")
|
||||||
|
eq(base64.encodestring("a"), "YQ==\n")
|
||||||
|
eq(base64.encodestring("ab"), "YWI=\n")
|
||||||
|
eq(base64.encodestring("abc"), "YWJj\n")
|
||||||
|
eq(base64.encodestring(""), "")
|
||||||
|
eq(base64.encodestring("abcdefghijklmnopqrstuvwxyz"
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"0123456789!@#0^&*();:<>,. []{}"),
|
||||||
|
"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||||
|
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
|
||||||
|
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==\n")
|
||||||
|
|
||||||
|
def test_decodestring(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
eq(base64.decodestring("d3d3LnB5dGhvbi5vcmc=\n"), "www.python.org")
|
||||||
|
eq(base64.decodestring("YQ==\n"), "a")
|
||||||
|
eq(base64.decodestring("YWI=\n"), "ab")
|
||||||
|
eq(base64.decodestring("YWJj\n"), "abc")
|
||||||
|
eq(base64.decodestring("YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||||
|
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
|
||||||
|
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==\n"),
|
||||||
|
"abcdefghijklmnopqrstuvwxyz"
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"0123456789!@#0^&*();:<>,. []{}")
|
||||||
|
eq(base64.decodestring(''), '')
|
||||||
|
|
||||||
|
def test_encode(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
from cStringIO import StringIO
|
||||||
|
infp = StringIO('abcdefghijklmnopqrstuvwxyz'
|
||||||
|
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||||
|
'0123456789!@#0^&*();:<>,. []{}')
|
||||||
|
outfp = StringIO()
|
||||||
|
base64.encode(infp, outfp)
|
||||||
|
eq(outfp.getvalue(),
|
||||||
|
'YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE'
|
||||||
|
'RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT'
|
||||||
|
'Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==\n')
|
||||||
|
|
||||||
|
def test_decode(self):
|
||||||
|
from cStringIO import StringIO
|
||||||
|
infp = StringIO('d3d3LnB5dGhvbi5vcmc=')
|
||||||
|
outfp = StringIO()
|
||||||
|
base64.decode(infp, outfp)
|
||||||
|
self.assertEqual(outfp.getvalue(), 'www.python.org')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class BaseXYTestCase(unittest.TestCase):
|
||||||
|
def test_b64encode(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
# Test default alphabet
|
||||||
|
eq(base64.b64encode("www.python.org"), "d3d3LnB5dGhvbi5vcmc=")
|
||||||
|
eq(base64.b64encode('\x00'), 'AA==')
|
||||||
|
eq(base64.b64encode("a"), "YQ==")
|
||||||
|
eq(base64.b64encode("ab"), "YWI=")
|
||||||
|
eq(base64.b64encode("abc"), "YWJj")
|
||||||
|
eq(base64.b64encode(""), "")
|
||||||
|
eq(base64.b64encode("abcdefghijklmnopqrstuvwxyz"
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"0123456789!@#0^&*();:<>,. []{}"),
|
||||||
|
"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||||
|
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
|
||||||
|
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==")
|
||||||
|
# Test with arbitrary alternative characters
|
||||||
|
eq(base64.b64encode('\xd3V\xbeo\xf7\x1d', altchars='*$'), '01a*b$cd')
|
||||||
|
# Test standard alphabet
|
||||||
|
eq(base64.standard_b64encode("www.python.org"), "d3d3LnB5dGhvbi5vcmc=")
|
||||||
|
eq(base64.standard_b64encode("a"), "YQ==")
|
||||||
|
eq(base64.standard_b64encode("ab"), "YWI=")
|
||||||
|
eq(base64.standard_b64encode("abc"), "YWJj")
|
||||||
|
eq(base64.standard_b64encode(""), "")
|
||||||
|
eq(base64.standard_b64encode("abcdefghijklmnopqrstuvwxyz"
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"0123456789!@#0^&*();:<>,. []{}"),
|
||||||
|
"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||||
|
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
|
||||||
|
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==")
|
||||||
|
# Test with 'URL safe' alternative characters
|
||||||
|
eq(base64.urlsafe_b64encode('\xd3V\xbeo\xf7\x1d'), '01a-b_cd')
|
||||||
|
|
||||||
|
def test_b64decode(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
eq(base64.b64decode("d3d3LnB5dGhvbi5vcmc="), "www.python.org")
|
||||||
|
eq(base64.b64decode('AA=='), '\x00')
|
||||||
|
eq(base64.b64decode("YQ=="), "a")
|
||||||
|
eq(base64.b64decode("YWI="), "ab")
|
||||||
|
eq(base64.b64decode("YWJj"), "abc")
|
||||||
|
eq(base64.b64decode("YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||||
|
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
|
||||||
|
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
|
||||||
|
"abcdefghijklmnopqrstuvwxyz"
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"0123456789!@#0^&*();:<>,. []{}")
|
||||||
|
eq(base64.b64decode(''), '')
|
||||||
|
# Test with arbitrary alternative characters
|
||||||
|
eq(base64.b64decode('01a*b$cd', altchars='*$'), '\xd3V\xbeo\xf7\x1d')
|
||||||
|
# Test standard alphabet
|
||||||
|
eq(base64.standard_b64decode("d3d3LnB5dGhvbi5vcmc="), "www.python.org")
|
||||||
|
eq(base64.standard_b64decode("YQ=="), "a")
|
||||||
|
eq(base64.standard_b64decode("YWI="), "ab")
|
||||||
|
eq(base64.standard_b64decode("YWJj"), "abc")
|
||||||
|
eq(base64.standard_b64decode(""), "")
|
||||||
|
eq(base64.standard_b64decode("YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||||
|
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
|
||||||
|
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
|
||||||
|
"abcdefghijklmnopqrstuvwxyz"
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"0123456789!@#0^&*();:<>,. []{}")
|
||||||
|
# Test with 'URL safe' alternative characters
|
||||||
|
eq(base64.urlsafe_b64decode('01a-b_cd'), '\xd3V\xbeo\xf7\x1d')
|
||||||
|
|
||||||
|
def test_b64decode_error(self):
|
||||||
|
self.assertRaises(TypeError, base64.b64decode, 'abc')
|
||||||
|
|
||||||
|
def test_b32encode(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
eq(base64.b32encode(''), '')
|
||||||
|
eq(base64.b32encode('\x00'), 'AA======')
|
||||||
|
eq(base64.b32encode('a'), 'ME======')
|
||||||
|
eq(base64.b32encode('ab'), 'MFRA====')
|
||||||
|
eq(base64.b32encode('abc'), 'MFRGG===')
|
||||||
|
eq(base64.b32encode('abcd'), 'MFRGGZA=')
|
||||||
|
eq(base64.b32encode('abcde'), 'MFRGGZDF')
|
||||||
|
|
||||||
|
def test_b32decode(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
eq(base64.b32decode(''), '')
|
||||||
|
eq(base64.b32decode('AA======'), '\x00')
|
||||||
|
eq(base64.b32decode('ME======'), 'a')
|
||||||
|
eq(base64.b32decode('MFRA===='), 'ab')
|
||||||
|
eq(base64.b32decode('MFRGG==='), 'abc')
|
||||||
|
eq(base64.b32decode('MFRGGZA='), 'abcd')
|
||||||
|
eq(base64.b32decode('MFRGGZDF'), 'abcde')
|
||||||
|
|
||||||
|
def test_b32decode_casefold(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
eq(base64.b32decode('', True), '')
|
||||||
|
eq(base64.b32decode('ME======', True), 'a')
|
||||||
|
eq(base64.b32decode('MFRA====', True), 'ab')
|
||||||
|
eq(base64.b32decode('MFRGG===', True), 'abc')
|
||||||
|
eq(base64.b32decode('MFRGGZA=', True), 'abcd')
|
||||||
|
eq(base64.b32decode('MFRGGZDF', True), 'abcde')
|
||||||
|
# Lower cases
|
||||||
|
eq(base64.b32decode('me======', True), 'a')
|
||||||
|
eq(base64.b32decode('mfra====', True), 'ab')
|
||||||
|
eq(base64.b32decode('mfrgg===', True), 'abc')
|
||||||
|
eq(base64.b32decode('mfrggza=', True), 'abcd')
|
||||||
|
eq(base64.b32decode('mfrggzdf', True), 'abcde')
|
||||||
|
# Expected exceptions
|
||||||
|
self.assertRaises(TypeError, base64.b32decode, 'me======')
|
||||||
|
# Mapping zero and one
|
||||||
|
eq(base64.b32decode('MLO23456'), 'b\xdd\xad\xf3\xbe')
|
||||||
|
eq(base64.b32decode('M1023456', map01='L'), 'b\xdd\xad\xf3\xbe')
|
||||||
|
eq(base64.b32decode('M1023456', map01='I'), 'b\x1d\xad\xf3\xbe')
|
||||||
|
|
||||||
|
def test_b32decode_error(self):
|
||||||
|
self.assertRaises(TypeError, base64.b32decode, 'abc')
|
||||||
|
self.assertRaises(TypeError, base64.b32decode, 'ABCDEF==')
|
||||||
|
|
||||||
|
def test_b16encode(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
eq(base64.b16encode('\x01\x02\xab\xcd\xef'), '0102ABCDEF')
|
||||||
|
eq(base64.b16encode('\x00'), '00')
|
||||||
|
|
||||||
|
def test_b16decode(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
eq(base64.b16decode('0102ABCDEF'), '\x01\x02\xab\xcd\xef')
|
||||||
|
eq(base64.b16decode('00'), '\x00')
|
||||||
|
# Lower case is not allowed without a flag
|
||||||
|
self.assertRaises(TypeError, base64.b16decode, '0102abcdef')
|
||||||
|
# Case fold
|
||||||
|
eq(base64.b16decode('0102abcdef', True), '\x01\x02\xab\xcd\xef')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#def test_main():
|
||||||
|
# test_support.run_unittest(__name__)
|
||||||
|
#
|
||||||
|
#if __name__ == '__main__':
|
||||||
|
# test_main()
|
156
tests/test_collections.py
Normal file
156
tests/test_collections.py
Normal file
|
@ -0,0 +1,156 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
|
||||||
|
from kitchen.pycompat24.sets import add_builtin_set
|
||||||
|
add_builtin_set()
|
||||||
|
|
||||||
|
from kitchen import collections
|
||||||
|
|
||||||
|
def test_strict_dict_get_set():
|
||||||
|
'''Test getting and setting items in StrictDict'''
|
||||||
|
d = collections.StrictDict()
|
||||||
|
d[u'a'] = 1
|
||||||
|
d['a'] = 2
|
||||||
|
tools.ok_(d[u'a'] != d['a'])
|
||||||
|
tools.ok_(len(d) == 2)
|
||||||
|
|
||||||
|
d[u'\xf1'] = 1
|
||||||
|
d['\xf1'] = 2
|
||||||
|
d[u'\xf1'.encode('utf8')] = 3
|
||||||
|
tools.ok_(d[u'\xf1'] == 1)
|
||||||
|
tools.ok_(d['\xf1'] == 2)
|
||||||
|
tools.ok_(d[u'\xf1'.encode('utf8')] == 3)
|
||||||
|
tools.ok_(len(d) == 5)
|
||||||
|
|
||||||
|
class TestStrictDict(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.d = collections.StrictDict()
|
||||||
|
self.d[u'a'] = 1
|
||||||
|
self.d['a'] = 2
|
||||||
|
self.d[u'\xf1'] = 1
|
||||||
|
self.d['\xf1'] = 2
|
||||||
|
self.d[u'\xf1'.encode('utf8')] = 3
|
||||||
|
self.keys = [u'a', 'a', u'\xf1', '\xf1', u'\xf1'.encode('utf8')]
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
del(self.d)
|
||||||
|
|
||||||
|
def _compare_lists(self, list1, list2, debug=False):
|
||||||
|
'''We have a mixture of bytes and unicode and need python2.3 compat
|
||||||
|
|
||||||
|
So we have to compare these lists manually and inefficiently
|
||||||
|
'''
|
||||||
|
def _compare_lists_helper(compare_to, dupes, idx, length):
|
||||||
|
if i not in compare_to:
|
||||||
|
return False
|
||||||
|
for n in range(1, length + 1):
|
||||||
|
if i not in dupes[n][idx]:
|
||||||
|
dupes[n][idx].add(i)
|
||||||
|
return True
|
||||||
|
if len(list1) != len(list2):
|
||||||
|
return False
|
||||||
|
|
||||||
|
list1_dupes = dict([(i, (set(), set(), set())) for i in range(1, len(list1)+1)])
|
||||||
|
list2_dupes = dict([(i, (set(), set(), set())) for i in range(1, len(list1)+1)])
|
||||||
|
|
||||||
|
list1_u = [l for l in list1 if isinstance(l, unicode)]
|
||||||
|
list1_b = [l for l in list1 if isinstance(l, str)]
|
||||||
|
list1_o = [l for l in list1 if not (isinstance(l, unicode) or isinstance(l, str))]
|
||||||
|
|
||||||
|
list2_u = [l for l in list2 if isinstance(l, unicode)]
|
||||||
|
list2_b = [l for l in list2 if isinstance(l, str)]
|
||||||
|
list2_o = [l for l in list2 if not (isinstance(l, unicode) or isinstance(l, str))]
|
||||||
|
|
||||||
|
for i in list1:
|
||||||
|
if isinstance(i, unicode):
|
||||||
|
if not _compare_lists_helper(list2_u, list1_dupes, 0, len(list1)):
|
||||||
|
return False
|
||||||
|
elif isinstance(i, str):
|
||||||
|
if not _compare_lists_helper(list2_b, list1_dupes, 1, len(list1)):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
if not _compare_lists_helper(list2_o, list1_dupes, 2, len(list1)):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if list1_dupes[2][0] or list1_dupes[2][1] or list1_dupes[2][2]:
|
||||||
|
for i in list2:
|
||||||
|
if isinstance(i, unicode):
|
||||||
|
if not _compare_lists_helper(list1_u, list2_dupes, 0, len(list1)):
|
||||||
|
return False
|
||||||
|
elif isinstance(i, str):
|
||||||
|
if not _compare_lists_helper(list1_b, list2_dupes, 1, len(list1)):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
if not _compare_lists_helper(list1_o, list2_dupes, 2, len(list1)):
|
||||||
|
return False
|
||||||
|
|
||||||
|
for i in range(2, len(list1)+1):
|
||||||
|
for n in list1_dupes[i]:
|
||||||
|
if n not in list2_dupes[i]:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def test__compare_list(self):
|
||||||
|
'''*sigh* this test support function is so complex we need to test it'''
|
||||||
|
tools.ok_(self._compare_lists(['a', 'b', 'c'], ['c', 'a', 'b']))
|
||||||
|
tools.ok_(not self._compare_lists(['b', 'c'], ['c', 'a', 'b']))
|
||||||
|
tools.ok_(not self._compare_lists([u'a', 'b'], ['a', 'b']))
|
||||||
|
tools.ok_(not self._compare_lists(['a', u'b'], [u'a', 'b']))
|
||||||
|
tools.ok_(self._compare_lists(['a', 'b', 1], ['a', 1, 'b']))
|
||||||
|
tools.ok_(self._compare_lists([u'a', u'b'], [u'a', u'b']))
|
||||||
|
tools.ok_(self._compare_lists([u'a', 'b'], [u'a', 'b']))
|
||||||
|
tools.ok_(not self._compare_lists([u'a', 'b'], [u'a', u'b']))
|
||||||
|
tools.ok_(self._compare_lists([u'a', 'b', 'b', 'c', u'a'], [u'a', u'a', 'b', 'c', 'b']))
|
||||||
|
tools.ok_(not self._compare_lists([u'a', 'b', 'b', 'c', 'a'], [u'a', u'a', 'b', 'c', 'b']))
|
||||||
|
tools.ok_(not self._compare_lists([u'a', 'b', 'b', 'c', u'a'], [u'a', 'b', 'b', 'c', 'b']))
|
||||||
|
|
||||||
|
def test_strict_dict_len(self):
|
||||||
|
'''StrictDict len'''
|
||||||
|
tools.ok_(len(self.d) == 5)
|
||||||
|
|
||||||
|
def test_strict_dict_del(self):
|
||||||
|
'''StrictDict del'''
|
||||||
|
tools.ok_(len(self.d) == 5)
|
||||||
|
del(self.d[u'\xf1'])
|
||||||
|
tools.assert_raises(KeyError, self.d.__getitem__, u'\xf1')
|
||||||
|
tools.ok_(len(self.d) == 4)
|
||||||
|
|
||||||
|
def test_strict_dict_iter(self):
|
||||||
|
'''StrictDict iteration'''
|
||||||
|
keys = []
|
||||||
|
for k in self.d:
|
||||||
|
keys.append(k)
|
||||||
|
tools.ok_(self._compare_lists(keys, self.keys))
|
||||||
|
|
||||||
|
keys = []
|
||||||
|
for k in self.d.iterkeys():
|
||||||
|
keys.append(k)
|
||||||
|
tools.ok_(self._compare_lists(keys, self.keys))
|
||||||
|
|
||||||
|
keys = [k for k in self.d]
|
||||||
|
tools.ok_(self._compare_lists(keys, self.keys))
|
||||||
|
|
||||||
|
keys = []
|
||||||
|
for k in self.d.keys():
|
||||||
|
keys.append(k)
|
||||||
|
tools.ok_(self._compare_lists(keys, self.keys))
|
||||||
|
|
||||||
|
def test_strict_dict_contains(self):
|
||||||
|
'''StrictDict contains function'''
|
||||||
|
tools.ok_('b' not in self.d)
|
||||||
|
tools.ok_(u'b' not in self.d)
|
||||||
|
tools.ok_('\xf1' in self.d)
|
||||||
|
tools.ok_(u'\xf1' in self.d)
|
||||||
|
tools.ok_('a' in self.d)
|
||||||
|
tools.ok_(u'a' in self.d)
|
||||||
|
|
||||||
|
del(self.d[u'\xf1'])
|
||||||
|
tools.ok_(u'\xf1' not in self.d)
|
||||||
|
tools.ok_('\xf1' in self.d)
|
||||||
|
|
||||||
|
del(self.d['a'])
|
||||||
|
tools.ok_(u'a' in self.d)
|
||||||
|
tools.ok_('a' not in self.d)
|
387
tests/test_converters.py
Normal file
387
tests/test_converters.py
Normal file
|
@ -0,0 +1,387 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
from nose.plugins.skip import SkipTest
|
||||||
|
|
||||||
|
import StringIO
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
try:
|
||||||
|
import chardet
|
||||||
|
except:
|
||||||
|
chardet = None
|
||||||
|
|
||||||
|
from kitchen.text import converters
|
||||||
|
from kitchen.text.exceptions import XmlEncodeError
|
||||||
|
|
||||||
|
import base_classes
|
||||||
|
|
||||||
|
class UnicodeNoStr(object):
|
||||||
|
def __unicode__(self):
|
||||||
|
return u'El veloz murciélago saltó sobre el perro perezoso.'
|
||||||
|
|
||||||
|
class StrNoUnicode(object):
|
||||||
|
def __str__(self):
|
||||||
|
return u'El veloz murciélago saltó sobre el perro perezoso.'.encode('utf8')
|
||||||
|
|
||||||
|
class StrReturnsUnicode(object):
|
||||||
|
def __str__(self):
|
||||||
|
return u'El veloz murciélago saltó sobre el perro perezoso.'
|
||||||
|
|
||||||
|
class UnicodeReturnsStr(object):
|
||||||
|
def __unicode__(self):
|
||||||
|
return u'El veloz murciélago saltó sobre el perro perezoso.'.encode('utf8')
|
||||||
|
|
||||||
|
class UnicodeStrCrossed(object):
|
||||||
|
def __unicode__(self):
|
||||||
|
return u'El veloz murciélago saltó sobre el perro perezoso.'.encode('utf8')
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return u'El veloz murciélago saltó sobre el perro perezoso.'
|
||||||
|
|
||||||
|
class ReprUnicode(object):
|
||||||
|
def __repr__(self):
|
||||||
|
return u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'
|
||||||
|
|
||||||
|
class TestConverters(unittest.TestCase, base_classes.UnicodeTestData):
|
||||||
|
def test_to_unicode(self):
|
||||||
|
'''Test to_unicode when the user gives good values'''
|
||||||
|
tools.ok_(converters.to_unicode(self.u_japanese, encoding='latin1') == self.u_japanese)
|
||||||
|
|
||||||
|
tools.ok_(converters.to_unicode(self.utf8_spanish) == self.u_spanish)
|
||||||
|
tools.ok_(converters.to_unicode(self.utf8_japanese) == self.u_japanese)
|
||||||
|
|
||||||
|
tools.ok_(converters.to_unicode(self.latin1_spanish, encoding='latin1') == self.u_spanish)
|
||||||
|
tools.ok_(converters.to_unicode(self.euc_jp_japanese, encoding='euc_jp') == self.u_japanese)
|
||||||
|
|
||||||
|
tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'nonstring': 'foo'})
|
||||||
|
|
||||||
|
def test_to_unicode_errors(self):
|
||||||
|
tools.ok_(converters.to_unicode(self.latin1_spanish) == self.u_mangled_spanish_latin1_as_utf8)
|
||||||
|
tools.ok_(converters.to_unicode(self.latin1_spanish, errors='ignore') == self.u_spanish_ignore)
|
||||||
|
tools.assert_raises(UnicodeDecodeError, converters.to_unicode,
|
||||||
|
*[self.latin1_spanish], **{'errors': 'strict'})
|
||||||
|
|
||||||
|
def test_to_unicode_nonstring(self):
|
||||||
|
tools.ok_(converters.to_unicode(5) == u'5')
|
||||||
|
tools.ok_(converters.to_unicode(5, nonstring='empty') == u'')
|
||||||
|
tools.ok_(converters.to_unicode(5, nonstring='passthru') == 5)
|
||||||
|
tools.ok_(converters.to_unicode(5, nonstring='simplerepr') == u'5')
|
||||||
|
tools.ok_(converters.to_unicode(5, nonstring='repr') == u'5')
|
||||||
|
tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'nonstring': 'strict'})
|
||||||
|
|
||||||
|
tools.ok_(converters.to_unicode(UnicodeNoStr(), nonstring='simplerepr') == self.u_spanish)
|
||||||
|
tools.ok_(converters.to_unicode(StrNoUnicode(), nonstring='simplerepr') == self.u_spanish)
|
||||||
|
tools.ok_(converters.to_unicode(StrReturnsUnicode(), nonstring='simplerepr') == self.u_spanish)
|
||||||
|
tools.ok_(converters.to_unicode(UnicodeReturnsStr(), nonstring='simplerepr') == self.u_spanish)
|
||||||
|
tools.ok_(converters.to_unicode(UnicodeStrCrossed(), nonstring='simplerepr') == self.u_spanish)
|
||||||
|
|
||||||
|
obj_repr = converters.to_unicode(object, nonstring='simplerepr')
|
||||||
|
tools.ok_(obj_repr == u"<type 'object'>" and isinstance(obj_repr, unicode))
|
||||||
|
|
||||||
|
def test_to_bytes(self):
|
||||||
|
'''Test to_bytes when the user gives good values'''
|
||||||
|
tools.ok_(converters.to_bytes(self.utf8_japanese, encoding='latin1') == self.utf8_japanese)
|
||||||
|
|
||||||
|
tools.ok_(converters.to_bytes(self.u_spanish) == self.utf8_spanish)
|
||||||
|
tools.ok_(converters.to_bytes(self.u_japanese) == self.utf8_japanese)
|
||||||
|
|
||||||
|
tools.ok_(converters.to_bytes(self.u_spanish, encoding='latin1') == self.latin1_spanish)
|
||||||
|
tools.ok_(converters.to_bytes(self.u_japanese, encoding='euc_jp') == self.euc_jp_japanese)
|
||||||
|
|
||||||
|
def test_to_bytes_errors(self):
|
||||||
|
tools.ok_(converters.to_bytes(self.u_mixed, encoding='latin1') ==
|
||||||
|
self.latin1_mixed_replace)
|
||||||
|
tools.ok_(converters.to_bytes(self.u_mixed, encoding='latin',
|
||||||
|
errors='ignore') == self.latin1_mixed_ignore)
|
||||||
|
tools.assert_raises(UnicodeEncodeError, converters.to_bytes,
|
||||||
|
*[self.u_mixed], **{'errors': 'strict', 'encoding': 'latin1'})
|
||||||
|
|
||||||
|
def _check_repr_bytes(self, repr_string, obj_name):
|
||||||
|
tools.ok_(isinstance(repr_string, str))
|
||||||
|
match = self.repr_re.match(repr_string)
|
||||||
|
tools.ok_(match != None)
|
||||||
|
tools.ok_(match.groups()[0] == obj_name)
|
||||||
|
|
||||||
|
def test_to_bytes_nonstring(self):
|
||||||
|
tools.ok_(converters.to_bytes(5) == '5')
|
||||||
|
tools.ok_(converters.to_bytes(5, nonstring='empty') == '')
|
||||||
|
tools.ok_(converters.to_bytes(5, nonstring='passthru') == 5)
|
||||||
|
tools.ok_(converters.to_bytes(5, nonstring='simplerepr') == '5')
|
||||||
|
tools.ok_(converters.to_bytes(5, nonstring='repr') == '5')
|
||||||
|
|
||||||
|
# Raise a TypeError if the msg is nonstring and we're set to strict
|
||||||
|
tools.assert_raises(TypeError, converters.to_bytes, *[5], **{'nonstring': 'strict'})
|
||||||
|
# Raise a TypeError if given an invalid nonstring arg
|
||||||
|
tools.assert_raises(TypeError, converters.to_bytes, *[5], **{'nonstring': 'INVALID'})
|
||||||
|
|
||||||
|
# No __str__ method so this returns repr
|
||||||
|
string = converters.to_bytes(UnicodeNoStr(), nonstring='simplerepr')
|
||||||
|
self._check_repr_bytes(string, 'UnicodeNoStr')
|
||||||
|
|
||||||
|
# This object's _str__ returns a utf8 encoded object
|
||||||
|
tools.ok_(converters.to_bytes(StrNoUnicode(), nonstring='simplerepr') == self.utf8_spanish)
|
||||||
|
|
||||||
|
# This object's __str__ returns unicode which to_bytes converts to utf8
|
||||||
|
tools.ok_(converters.to_bytes(StrReturnsUnicode(), nonstring='simplerepr') == self.utf8_spanish)
|
||||||
|
# Unless we explicitly ask for something different
|
||||||
|
tools.ok_(converters.to_bytes(StrReturnsUnicode(),
|
||||||
|
nonstring='simplerepr', encoding='latin1') == self.latin1_spanish)
|
||||||
|
|
||||||
|
# This object has no __str__ so it returns repr
|
||||||
|
string = converters.to_bytes(UnicodeReturnsStr(), nonstring='simplerepr')
|
||||||
|
self._check_repr_bytes(string, 'UnicodeReturnsStr')
|
||||||
|
|
||||||
|
# This object's __str__ returns unicode which to_bytes converts to utf8
|
||||||
|
tools.ok_(converters.to_bytes(UnicodeStrCrossed(), nonstring='simplerepr') == self.utf8_spanish)
|
||||||
|
|
||||||
|
# This object's __repr__ returns unicode which to_bytes converts to utf8
|
||||||
|
tools.ok_(converters.to_bytes(ReprUnicode(), nonstring='simplerepr')
|
||||||
|
== u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'.encode('utf8'))
|
||||||
|
tools.ok_(converters.to_bytes(ReprUnicode(), nonstring='repr') ==
|
||||||
|
u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'.encode('utf8'))
|
||||||
|
|
||||||
|
obj_repr = converters.to_bytes(object, nonstring='simplerepr')
|
||||||
|
tools.ok_(obj_repr == "<type 'object'>" and isinstance(obj_repr, str))
|
||||||
|
|
||||||
|
def test_unicode_to_xml(self):
|
||||||
|
tools.ok_(converters.unicode_to_xml(None) == '')
|
||||||
|
tools.assert_raises(XmlEncodeError, converters.unicode_to_xml, *['byte string'])
|
||||||
|
tools.assert_raises(ValueError, converters.unicode_to_xml, *[u'string'], **{'control_chars': 'foo'})
|
||||||
|
tools.assert_raises(XmlEncodeError, converters.unicode_to_xml,
|
||||||
|
*[u'string\u0002'], **{'control_chars': 'strict'})
|
||||||
|
tools.ok_(converters.unicode_to_xml(self.u_entity) == self.utf8_entity_escape)
|
||||||
|
tools.ok_(converters.unicode_to_xml(self.u_entity, attrib=True) == self.utf8_attrib_escape)
|
||||||
|
|
||||||
|
def test_xml_to_unicode(self):
|
||||||
|
tools.ok_(converters.xml_to_unicode(self.utf8_entity_escape, 'utf8', 'replace') == self.u_entity)
|
||||||
|
tools.ok_(converters.xml_to_unicode(self.utf8_attrib_escape, 'utf8', 'replace') == self.u_entity)
|
||||||
|
|
||||||
|
def test_xml_to_byte_string(self):
|
||||||
|
tools.ok_(converters.xml_to_byte_string(self.utf8_entity_escape, 'utf8', 'replace') == self.u_entity.encode('utf8'))
|
||||||
|
tools.ok_(converters.xml_to_byte_string(self.utf8_attrib_escape, 'utf8', 'replace') == self.u_entity.encode('utf8'))
|
||||||
|
|
||||||
|
tools.ok_(converters.xml_to_byte_string(self.utf8_attrib_escape,
|
||||||
|
output_encoding='euc_jp', errors='replace') ==
|
||||||
|
self.u_entity.encode('euc_jp', 'replace'))
|
||||||
|
tools.ok_(converters.xml_to_byte_string(self.utf8_attrib_escape,
|
||||||
|
output_encoding='latin1', errors='replace') ==
|
||||||
|
self.u_entity.encode('latin1', 'replace'))
|
||||||
|
|
||||||
|
def test_byte_string_to_xml(self):
|
||||||
|
tools.assert_raises(XmlEncodeError, converters.byte_string_to_xml, *[u'test'])
|
||||||
|
tools.ok_(converters.byte_string_to_xml(self.utf8_entity) == self.utf8_entity_escape)
|
||||||
|
tools.ok_(converters.byte_string_to_xml(self.utf8_entity, attrib=True) == self.utf8_attrib_escape)
|
||||||
|
|
||||||
|
def test_bytes_to_xml(self):
|
||||||
|
tools.ok_(converters.bytes_to_xml(self.b_byte_chars) == self.b_byte_encoded)
|
||||||
|
|
||||||
|
def test_xml_to_bytes(self):
|
||||||
|
tools.ok_(converters.xml_to_bytes(self.b_byte_encoded) == self.b_byte_chars)
|
||||||
|
|
||||||
|
def test_guess_encoding_to_xml(self):
|
||||||
|
tools.ok_(converters.guess_encoding_to_xml(self.u_entity) == self.utf8_entity_escape)
|
||||||
|
tools.ok_(converters.guess_encoding_to_xml(self.utf8_spanish) == self.utf8_spanish)
|
||||||
|
tools.ok_(converters.guess_encoding_to_xml(self.latin1_spanish) == self.utf8_spanish)
|
||||||
|
tools.ok_(converters.guess_encoding_to_xml(self.utf8_japanese) == self.utf8_japanese)
|
||||||
|
|
||||||
|
def test_guess_encoding_to_xml_euc_japanese(self):
|
||||||
|
if chardet:
|
||||||
|
tools.ok_(converters.guess_encoding_to_xml(self.euc_jp_japanese)
|
||||||
|
== self.utf8_japanese)
|
||||||
|
else:
|
||||||
|
raise SkipTest('chardet not installed, euc_japanese won\'t be detected')
|
||||||
|
|
||||||
|
def test_guess_encoding_to_xml_euc_japanese_mangled(self):
|
||||||
|
if chardet:
|
||||||
|
raise SkipTest('chardet installed, euc_japanese won\'t be mangled')
|
||||||
|
else:
|
||||||
|
tools.ok_(converters.guess_encoding_to_xml(self.euc_jp_japanese)
|
||||||
|
== self.utf8_mangled_euc_jp_as_latin1)
|
||||||
|
|
||||||
|
class TestGetWriter(unittest.TestCase, base_classes.UnicodeTestData):
|
||||||
|
def setUp(self):
|
||||||
|
self.io = StringIO.StringIO()
|
||||||
|
|
||||||
|
def test_utf8_writer(self):
|
||||||
|
writer = converters.getwriter('utf-8')
|
||||||
|
io = writer(self.io)
|
||||||
|
io.write(u'%s\n' % self.u_japanese)
|
||||||
|
io.seek(0)
|
||||||
|
result = io.read().strip()
|
||||||
|
tools.ok_(result == self.utf8_japanese)
|
||||||
|
|
||||||
|
io.seek(0)
|
||||||
|
io.truncate(0)
|
||||||
|
io.write('%s\n' % self.euc_jp_japanese)
|
||||||
|
io.seek(0)
|
||||||
|
result = io.read().strip()
|
||||||
|
tools.ok_(result == self.euc_jp_japanese)
|
||||||
|
|
||||||
|
io.seek(0)
|
||||||
|
io.truncate(0)
|
||||||
|
io.write('%s\n' % self.utf8_japanese)
|
||||||
|
io.seek(0)
|
||||||
|
result = io.read().strip()
|
||||||
|
tools.ok_(result == self.utf8_japanese)
|
||||||
|
|
||||||
|
def test_error_handlers(self):
|
||||||
|
'''Test setting alternate error handlers'''
|
||||||
|
writer = converters.getwriter('latin1')
|
||||||
|
io = writer(self.io, errors='strict')
|
||||||
|
tools.assert_raises(UnicodeEncodeError, io.write, self.u_japanese)
|
||||||
|
|
||||||
|
|
||||||
|
class TestExceptionConverters(unittest.TestCase, base_classes.UnicodeTestData):
|
||||||
|
def setUp(self):
|
||||||
|
self.exceptions = {}
|
||||||
|
tests = {'u_jpn': self.u_japanese,
|
||||||
|
'u_spanish': self.u_spanish,
|
||||||
|
'utf8_jpn': self.utf8_japanese,
|
||||||
|
'utf8_spanish': self.utf8_spanish,
|
||||||
|
'euc_jpn': self.euc_jp_japanese,
|
||||||
|
'latin1_spanish': self.latin1_spanish}
|
||||||
|
for test in tests.iteritems():
|
||||||
|
try:
|
||||||
|
raise Exception(test[1])
|
||||||
|
except Exception, self.exceptions[test[0]]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_exception_to_unicode_with_unicode(self):
|
||||||
|
tools.ok_(converters.exception_to_unicode(self.exceptions['u_jpn']) == self.u_japanese)
|
||||||
|
tools.ok_(converters.exception_to_unicode(self.exceptions['u_spanish']) == self.u_spanish)
|
||||||
|
|
||||||
|
def test_exception_to_unicode_with_bytes(self):
|
||||||
|
tools.ok_(converters.exception_to_unicode(self.exceptions['utf8_jpn']) == self.u_japanese)
|
||||||
|
tools.ok_(converters.exception_to_unicode(self.exceptions['utf8_spanish']) == self.u_spanish)
|
||||||
|
# Mangled latin1/utf8 conversion but no tracebacks
|
||||||
|
tools.ok_(converters.exception_to_unicode(self.exceptions['latin1_spanish']) == self.u_mangled_spanish_latin1_as_utf8)
|
||||||
|
# Mangled euc_jp/utf8 conversion but no tracebacks
|
||||||
|
tools.ok_(converters.exception_to_unicode(self.exceptions['euc_jpn']) == self.u_mangled_euc_jp_as_utf8)
|
||||||
|
|
||||||
|
def test_exception_to_unicode_custom(self):
|
||||||
|
# If given custom functions, then we should not mangle
|
||||||
|
c = [lambda e: converters.to_unicode(e, encoding='euc_jp')]
|
||||||
|
tools.ok_(converters.exception_to_unicode(self.exceptions['euc_jpn'],
|
||||||
|
converters=c) == self.u_japanese)
|
||||||
|
c.extend(converters.EXCEPTION_CONVERTERS)
|
||||||
|
tools.ok_(converters.exception_to_unicode(self.exceptions['euc_jpn'],
|
||||||
|
converters=c) == self.u_japanese)
|
||||||
|
|
||||||
|
c = [lambda e: converters.to_unicode(e, encoding='latin1')]
|
||||||
|
tools.ok_(converters.exception_to_unicode(self.exceptions['latin1_spanish'],
|
||||||
|
converters=c) == self.u_spanish)
|
||||||
|
c.extend(converters.EXCEPTION_CONVERTERS)
|
||||||
|
tools.ok_(converters.exception_to_unicode(self.exceptions['latin1_spanish'],
|
||||||
|
converters=c) == self.u_spanish)
|
||||||
|
|
||||||
|
def test_exception_to_bytes_with_unicode(self):
|
||||||
|
tools.ok_(converters.exception_to_bytes(self.exceptions['u_jpn']) == self.utf8_japanese)
|
||||||
|
tools.ok_(converters.exception_to_bytes(self.exceptions['u_spanish']) == self.utf8_spanish)
|
||||||
|
|
||||||
|
def test_exception_to_bytes_with_bytes(self):
|
||||||
|
tools.ok_(converters.exception_to_bytes(self.exceptions['utf8_jpn']) == self.utf8_japanese)
|
||||||
|
tools.ok_(converters.exception_to_bytes(self.exceptions['utf8_spanish']) == self.utf8_spanish)
|
||||||
|
tools.ok_(converters.exception_to_bytes(self.exceptions['latin1_spanish']) == self.latin1_spanish)
|
||||||
|
tools.ok_(converters.exception_to_bytes(self.exceptions['euc_jpn']) == self.euc_jp_japanese)
|
||||||
|
|
||||||
|
def test_exception_to_bytes_custom(self):
|
||||||
|
# If given custom functions, then we should not mangle
|
||||||
|
c = [lambda e: converters.to_bytes(e, encoding='euc_jp')]
|
||||||
|
tools.ok_(converters.exception_to_bytes(self.exceptions['euc_jpn'],
|
||||||
|
converters=c) == self.euc_jp_japanese)
|
||||||
|
c.extend(converters.EXCEPTION_CONVERTERS)
|
||||||
|
tools.ok_(converters.exception_to_bytes(self.exceptions['euc_jpn'],
|
||||||
|
converters=c) == self.euc_jp_japanese)
|
||||||
|
|
||||||
|
c = [lambda e: converters.to_bytes(e, encoding='latin1')]
|
||||||
|
tools.ok_(converters.exception_to_bytes(self.exceptions['latin1_spanish'],
|
||||||
|
converters=c) == self.latin1_spanish)
|
||||||
|
c.extend(converters.EXCEPTION_CONVERTERS)
|
||||||
|
tools.ok_(converters.exception_to_bytes(self.exceptions['latin1_spanish'],
|
||||||
|
converters=c) == self.latin1_spanish)
|
||||||
|
|
||||||
|
|
||||||
|
class TestDeprecatedConverters(TestConverters):
|
||||||
|
def setUp(self):
|
||||||
|
warnings.simplefilter('ignore', DeprecationWarning)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
warnings.simplefilter('default', DeprecationWarning)
|
||||||
|
|
||||||
|
def test_to_xml(self):
|
||||||
|
tools.ok_(converters.to_xml(self.u_entity) == self.utf8_entity_escape)
|
||||||
|
tools.ok_(converters.to_xml(self.utf8_spanish) == self.utf8_spanish)
|
||||||
|
tools.ok_(converters.to_xml(self.latin1_spanish) == self.utf8_spanish)
|
||||||
|
tools.ok_(converters.to_xml(self.utf8_japanese) == self.utf8_japanese)
|
||||||
|
|
||||||
|
def test_to_utf8(self):
|
||||||
|
tools.ok_(converters.to_utf8(self.u_japanese) == self.utf8_japanese)
|
||||||
|
tools.ok_(converters.to_utf8(self.utf8_spanish) == self.utf8_spanish)
|
||||||
|
|
||||||
|
def test_to_str(self):
|
||||||
|
tools.ok_(converters.to_str(self.u_japanese) == self.utf8_japanese)
|
||||||
|
tools.ok_(converters.to_str(self.utf8_spanish) == self.utf8_spanish)
|
||||||
|
tools.ok_(converters.to_str(object) == "<type 'object'>")
|
||||||
|
|
||||||
|
def test_non_string(self):
|
||||||
|
'''Test deprecated non_string parameter'''
|
||||||
|
# unicode
|
||||||
|
tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'non_string': 'foo'})
|
||||||
|
tools.ok_(converters.to_unicode(5, non_string='empty') == u'')
|
||||||
|
tools.ok_(converters.to_unicode(5, non_string='passthru') == 5)
|
||||||
|
tools.ok_(converters.to_unicode(5, non_string='simplerepr') == u'5')
|
||||||
|
tools.ok_(converters.to_unicode(5, non_string='repr') == u'5')
|
||||||
|
tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'non_string': 'strict'})
|
||||||
|
|
||||||
|
tools.ok_(converters.to_unicode(UnicodeNoStr(), non_string='simplerepr') == self.u_spanish)
|
||||||
|
tools.ok_(converters.to_unicode(StrNoUnicode(), non_string='simplerepr') == self.u_spanish)
|
||||||
|
tools.ok_(converters.to_unicode(StrReturnsUnicode(), non_string='simplerepr') == self.u_spanish)
|
||||||
|
tools.ok_(converters.to_unicode(UnicodeReturnsStr(), non_string='simplerepr') == self.u_spanish)
|
||||||
|
tools.ok_(converters.to_unicode(UnicodeStrCrossed(), non_string='simplerepr') == self.u_spanish)
|
||||||
|
|
||||||
|
obj_repr = converters.to_unicode(object, non_string='simplerepr')
|
||||||
|
tools.ok_(obj_repr == u"<type 'object'>" and isinstance(obj_repr, unicode))
|
||||||
|
|
||||||
|
# Bytes
|
||||||
|
tools.ok_(converters.to_bytes(5) == '5')
|
||||||
|
tools.ok_(converters.to_bytes(5, non_string='empty') == '')
|
||||||
|
tools.ok_(converters.to_bytes(5, non_string='passthru') == 5)
|
||||||
|
tools.ok_(converters.to_bytes(5, non_string='simplerepr') == '5')
|
||||||
|
tools.ok_(converters.to_bytes(5, non_string='repr') == '5')
|
||||||
|
|
||||||
|
# Raise a TypeError if the msg is non_string and we're set to strict
|
||||||
|
tools.assert_raises(TypeError, converters.to_bytes, *[5], **{'non_string': 'strict'})
|
||||||
|
# Raise a TypeError if given an invalid non_string arg
|
||||||
|
tools.assert_raises(TypeError, converters.to_bytes, *[5], **{'non_string': 'INVALID'})
|
||||||
|
|
||||||
|
# No __str__ method so this returns repr
|
||||||
|
string = converters.to_bytes(UnicodeNoStr(), non_string='simplerepr')
|
||||||
|
self._check_repr_bytes(string, 'UnicodeNoStr')
|
||||||
|
|
||||||
|
# This object's _str__ returns a utf8 encoded object
|
||||||
|
tools.ok_(converters.to_bytes(StrNoUnicode(), non_string='simplerepr') == self.utf8_spanish)
|
||||||
|
|
||||||
|
# This object's __str__ returns unicode which to_bytes converts to utf8
|
||||||
|
tools.ok_(converters.to_bytes(StrReturnsUnicode(), non_string='simplerepr') == self.utf8_spanish)
|
||||||
|
# Unless we explicitly ask for something different
|
||||||
|
tools.ok_(converters.to_bytes(StrReturnsUnicode(),
|
||||||
|
non_string='simplerepr', encoding='latin1') == self.latin1_spanish)
|
||||||
|
|
||||||
|
# This object has no __str__ so it returns repr
|
||||||
|
string = converters.to_bytes(UnicodeReturnsStr(), non_string='simplerepr')
|
||||||
|
self._check_repr_bytes(string, 'UnicodeReturnsStr')
|
||||||
|
|
||||||
|
# This object's __str__ returns unicode which to_bytes converts to utf8
|
||||||
|
tools.ok_(converters.to_bytes(UnicodeStrCrossed(), non_string='simplerepr') == self.utf8_spanish)
|
||||||
|
|
||||||
|
# This object's __repr__ returns unicode which to_bytes converts to utf8
|
||||||
|
tools.ok_(converters.to_bytes(ReprUnicode(), non_string='simplerepr')
|
||||||
|
== u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'.encode('utf8'))
|
||||||
|
tools.ok_(converters.to_bytes(ReprUnicode(), non_string='repr') ==
|
||||||
|
u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'.encode('utf8'))
|
||||||
|
|
||||||
|
obj_repr = converters.to_bytes(object, non_string='simplerepr')
|
||||||
|
tools.ok_(obj_repr == "<type 'object'>" and isinstance(obj_repr, str))
|
180
tests/test_defaultdict.py
Normal file
180
tests/test_defaultdict.py
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
"""Unit tests for collections.defaultdict."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import copy
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from test import test_support
|
||||||
|
|
||||||
|
from kitchen.pycompat25.collections._defaultdict import defaultdict
|
||||||
|
|
||||||
|
def foobar():
|
||||||
|
return list
|
||||||
|
|
||||||
|
class TestDefaultDict(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_basic(self):
|
||||||
|
d1 = defaultdict()
|
||||||
|
self.assertEqual(d1.default_factory, None)
|
||||||
|
d1.default_factory = list
|
||||||
|
d1[12].append(42)
|
||||||
|
self.assertEqual(d1, {12: [42]})
|
||||||
|
d1[12].append(24)
|
||||||
|
self.assertEqual(d1, {12: [42, 24]})
|
||||||
|
d1[13]
|
||||||
|
d1[14]
|
||||||
|
self.assertEqual(d1, {12: [42, 24], 13: [], 14: []})
|
||||||
|
self.assert_(d1[12] is not d1[13] is not d1[14])
|
||||||
|
d2 = defaultdict(list, foo=1, bar=2)
|
||||||
|
self.assertEqual(d2.default_factory, list)
|
||||||
|
self.assertEqual(d2, {"foo": 1, "bar": 2})
|
||||||
|
self.assertEqual(d2["foo"], 1)
|
||||||
|
self.assertEqual(d2["bar"], 2)
|
||||||
|
self.assertEqual(d2[42], [])
|
||||||
|
self.assert_("foo" in d2)
|
||||||
|
self.assert_("foo" in d2.keys())
|
||||||
|
self.assert_("bar" in d2)
|
||||||
|
self.assert_("bar" in d2.keys())
|
||||||
|
self.assert_(42 in d2)
|
||||||
|
self.assert_(42 in d2.keys())
|
||||||
|
self.assert_(12 not in d2)
|
||||||
|
self.assert_(12 not in d2.keys())
|
||||||
|
d2.default_factory = None
|
||||||
|
self.assertEqual(d2.default_factory, None)
|
||||||
|
try:
|
||||||
|
d2[15]
|
||||||
|
except KeyError, err:
|
||||||
|
self.assertEqual(err.args, (15,))
|
||||||
|
else:
|
||||||
|
self.fail("d2[15] didn't raise KeyError")
|
||||||
|
self.assertRaises(TypeError, defaultdict, 1)
|
||||||
|
|
||||||
|
def test_missing(self):
|
||||||
|
d1 = defaultdict()
|
||||||
|
self.assertRaises(KeyError, d1.__missing__, 42)
|
||||||
|
d1.default_factory = list
|
||||||
|
self.assertEqual(d1.__missing__(42), [])
|
||||||
|
|
||||||
|
def test_repr(self):
|
||||||
|
d1 = defaultdict()
|
||||||
|
self.assertEqual(d1.default_factory, None)
|
||||||
|
self.assertEqual(repr(d1), "defaultdict(None, {})")
|
||||||
|
self.assertEqual(eval(repr(d1)), d1)
|
||||||
|
d1[11] = 41
|
||||||
|
self.assertEqual(repr(d1), "defaultdict(None, {11: 41})")
|
||||||
|
d2 = defaultdict(int)
|
||||||
|
self.assertEqual(d2.default_factory, int)
|
||||||
|
d2[12] = 42
|
||||||
|
self.assertEqual(repr(d2), "defaultdict(<type 'int'>, {12: 42})")
|
||||||
|
def foo(): return 43
|
||||||
|
d3 = defaultdict(foo)
|
||||||
|
|
||||||
|
self.assert_(d3.default_factory is foo)
|
||||||
|
d3[13]
|
||||||
|
self.assertEqual(repr(d3), "defaultdict(%s, {13: 43})" % repr(foo))
|
||||||
|
|
||||||
|
def test_print(self):
|
||||||
|
d1 = defaultdict()
|
||||||
|
def foo(): return 42
|
||||||
|
d2 = defaultdict(foo, {1: 2})
|
||||||
|
# NOTE: We can't use tempfile.[Named]TemporaryFile since this
|
||||||
|
# code must exercise the tp_print C code, which only gets
|
||||||
|
# invoked for *real* files.
|
||||||
|
tfn = tempfile.mktemp()
|
||||||
|
try:
|
||||||
|
f = open(tfn, "w+")
|
||||||
|
try:
|
||||||
|
print >>f, d1
|
||||||
|
print >>f, d2
|
||||||
|
f.seek(0)
|
||||||
|
self.assertEqual(f.readline(), repr(d1) + "\n")
|
||||||
|
self.assertEqual(f.readline(), repr(d2) + "\n")
|
||||||
|
finally:
|
||||||
|
f.close()
|
||||||
|
finally:
|
||||||
|
os.remove(tfn)
|
||||||
|
|
||||||
|
def test_copy(self):
|
||||||
|
d1 = defaultdict()
|
||||||
|
d2 = d1.copy()
|
||||||
|
self.assertEqual(type(d2), defaultdict)
|
||||||
|
self.assertEqual(d2.default_factory, None)
|
||||||
|
self.assertEqual(d2, {})
|
||||||
|
d1.default_factory = list
|
||||||
|
d3 = d1.copy()
|
||||||
|
self.assertEqual(type(d3), defaultdict)
|
||||||
|
self.assertEqual(d3.default_factory, list)
|
||||||
|
self.assertEqual(d3, {})
|
||||||
|
d1[42]
|
||||||
|
d4 = d1.copy()
|
||||||
|
self.assertEqual(type(d4), defaultdict)
|
||||||
|
self.assertEqual(d4.default_factory, list)
|
||||||
|
self.assertEqual(d4, {42: []})
|
||||||
|
d4[12]
|
||||||
|
self.assertEqual(d4, {42: [], 12: []})
|
||||||
|
|
||||||
|
# Issue 6637: Copy fails for empty default dict
|
||||||
|
d = defaultdict()
|
||||||
|
d['a'] = 42
|
||||||
|
e = d.copy()
|
||||||
|
self.assertEqual(e['a'], 42)
|
||||||
|
|
||||||
|
def test_shallow_copy(self):
|
||||||
|
d1 = defaultdict(foobar, {1: 1})
|
||||||
|
d2 = copy.copy(d1)
|
||||||
|
self.assertEqual(d2.default_factory, foobar)
|
||||||
|
self.assertEqual(d2, d1)
|
||||||
|
d1.default_factory = list
|
||||||
|
d2 = copy.copy(d1)
|
||||||
|
self.assertEqual(d2.default_factory, list)
|
||||||
|
self.assertEqual(d2, d1)
|
||||||
|
|
||||||
|
def test_deep_copy(self):
|
||||||
|
d1 = defaultdict(foobar, {1: [1]})
|
||||||
|
d2 = copy.deepcopy(d1)
|
||||||
|
self.assertEqual(d2.default_factory, foobar)
|
||||||
|
self.assertEqual(d2, d1)
|
||||||
|
self.assert_(d1[1] is not d2[1])
|
||||||
|
d1.default_factory = list
|
||||||
|
d2 = copy.deepcopy(d1)
|
||||||
|
self.assertEqual(d2.default_factory, list)
|
||||||
|
self.assertEqual(d2, d1)
|
||||||
|
|
||||||
|
def test_keyerror_without_factory(self):
|
||||||
|
d1 = defaultdict()
|
||||||
|
try:
|
||||||
|
d1[(1,)]
|
||||||
|
except KeyError, err:
|
||||||
|
self.assertEqual(err.args[0], (1,))
|
||||||
|
else:
|
||||||
|
self.fail("expected KeyError")
|
||||||
|
|
||||||
|
def test_recursive_repr(self):
|
||||||
|
# Issue2045: stack overflow when default_factory is a bound method
|
||||||
|
class sub(defaultdict):
|
||||||
|
def __init__(self):
|
||||||
|
self.default_factory = self._factory
|
||||||
|
def _factory(self):
|
||||||
|
return []
|
||||||
|
d = sub()
|
||||||
|
self.assert_(repr(d).startswith(
|
||||||
|
"defaultdict(<bound method sub._factory of defaultdict(..."))
|
||||||
|
|
||||||
|
# NOTE: printing a subclass of a builtin type does not call its
|
||||||
|
# tp_print slot. So this part is essentially the same test as above.
|
||||||
|
tfn = tempfile.mktemp()
|
||||||
|
try:
|
||||||
|
f = open(tfn, "w+")
|
||||||
|
try:
|
||||||
|
print >>f, d
|
||||||
|
finally:
|
||||||
|
f.close()
|
||||||
|
finally:
|
||||||
|
os.remove(tfn)
|
||||||
|
|
||||||
|
|
||||||
|
#def test_main():
|
||||||
|
# test_support.run_unittest(TestDefaultDict)
|
||||||
|
#
|
||||||
|
#if __name__ == "__main__":
|
||||||
|
# test_main()
|
47
tests/test_deprecation.py
Normal file
47
tests/test_deprecation.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import warnings
|
||||||
|
from kitchen.text import converters
|
||||||
|
from kitchen.text import utf8
|
||||||
|
|
||||||
|
class TestDeprecated(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
registry = sys._getframe(2).f_globals.get('__warningregistry__')
|
||||||
|
if registry:
|
||||||
|
registry.clear()
|
||||||
|
registry = sys._getframe(1).f_globals.get('__warningregistry__')
|
||||||
|
if registry:
|
||||||
|
registry.clear()
|
||||||
|
warnings.simplefilter('error', DeprecationWarning)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
warnings.simplefilter('default', DeprecationWarning)
|
||||||
|
|
||||||
|
def test_deprecated_functions(self):
|
||||||
|
'''Test that all deprecated functions raise DeprecationWarning'''
|
||||||
|
tools.assert_raises(DeprecationWarning, converters.to_utf8, u'café')
|
||||||
|
tools.assert_raises(DeprecationWarning, converters.to_str, 5)
|
||||||
|
tools.assert_raises(DeprecationWarning, converters.to_xml, 'test')
|
||||||
|
|
||||||
|
tools.assert_raises(DeprecationWarning, utf8.utf8_valid, 'test')
|
||||||
|
tools.assert_raises(DeprecationWarning, utf8.utf8_width, 'test')
|
||||||
|
tools.assert_raises(DeprecationWarning, utf8.utf8_width_chop, 'test')
|
||||||
|
tools.assert_raises(DeprecationWarning, utf8.utf8_width_fill, 'test', 'asd')
|
||||||
|
tools.assert_raises(DeprecationWarning, utf8.utf8_text_wrap, 'test')
|
||||||
|
tools.assert_raises(DeprecationWarning, utf8.utf8_text_fill, 'test')
|
||||||
|
tools.assert_raises(DeprecationWarning, utf8._utf8_width_le, 'test')
|
||||||
|
|
||||||
|
def test_deprecated_parameters(self):
|
||||||
|
tools.assert_raises(DeprecationWarning, converters.to_unicode, *[5],
|
||||||
|
**{'non_string': 'simplerepr'})
|
||||||
|
tools.assert_raises(DeprecationWarning, converters.to_unicode, *[5],
|
||||||
|
**{'nonstring': 'simplerepr', 'non_string': 'simplerepr'})
|
||||||
|
|
||||||
|
tools.assert_raises(DeprecationWarning, converters.to_bytes, *[5],
|
||||||
|
**{'non_string': 'simplerepr'})
|
||||||
|
tools.assert_raises(DeprecationWarning, converters.to_bytes, *[5],
|
||||||
|
**{'nonstring': 'simplerepr', 'non_string': 'simplerepr'})
|
749
tests/test_i18n.py
Normal file
749
tests/test_i18n.py
Normal file
|
@ -0,0 +1,749 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
|
||||||
|
import os
|
||||||
|
import types
|
||||||
|
|
||||||
|
from kitchen import i18n
|
||||||
|
|
||||||
|
import base_classes
|
||||||
|
|
||||||
|
class TestI18N_UTF8(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||||
|
os.environ['LC_ALL'] = 'pt_BR.UTF8'
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.old_LC_ALL:
|
||||||
|
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||||
|
else:
|
||||||
|
del(os.environ['LC_ALL'])
|
||||||
|
|
||||||
|
def test_easy_gettext_setup(self):
|
||||||
|
'''Test that the easy_gettext_setup function works
|
||||||
|
'''
|
||||||
|
_, N_ = i18n.easy_gettext_setup('foo', localedirs=
|
||||||
|
['%s/data/locale/' % os.path.dirname(__file__)])
|
||||||
|
tools.ok_(isinstance(_, types.MethodType))
|
||||||
|
tools.ok_(isinstance(N_, types.MethodType))
|
||||||
|
tools.ok_(_.im_func.func_name == 'ugettext')
|
||||||
|
tools.ok_(N_.im_func.func_name == 'ungettext')
|
||||||
|
|
||||||
|
tools.ok_(_('café') == u'café')
|
||||||
|
tools.ok_(_(u'café') == u'café')
|
||||||
|
tools.ok_(N_('café', 'cafés', 1) == u'café')
|
||||||
|
tools.ok_(N_('café', 'cafés', 2) == u'cafés')
|
||||||
|
tools.ok_(N_(u'café', u'cafés', 1) == u'café')
|
||||||
|
tools.ok_(N_(u'café', u'cafés', 2) == u'cafés')
|
||||||
|
|
||||||
|
def test_easy_gettext_setup_non_unicode(self):
|
||||||
|
'''Test that the easy_gettext_setup function works
|
||||||
|
'''
|
||||||
|
b_, bN_ = i18n.easy_gettext_setup('foo', localedirs=
|
||||||
|
['%s/data/locale/' % os.path.dirname(__file__)],
|
||||||
|
use_unicode=False)
|
||||||
|
tools.ok_(isinstance(b_, types.MethodType))
|
||||||
|
tools.ok_(isinstance(bN_, types.MethodType))
|
||||||
|
tools.ok_(b_.im_func.func_name == 'lgettext')
|
||||||
|
tools.ok_(bN_.im_func.func_name == 'lngettext')
|
||||||
|
|
||||||
|
tools.ok_(b_('café') == 'café')
|
||||||
|
tools.ok_(b_(u'café') == 'café')
|
||||||
|
tools.ok_(bN_('café', 'cafés', 1) == 'café')
|
||||||
|
tools.ok_(bN_('café', 'cafés', 2) == 'cafés')
|
||||||
|
tools.ok_(bN_(u'café', u'cafés', 1) == 'café')
|
||||||
|
tools.ok_(bN_(u'café', u'cafés', 2) == 'cafés')
|
||||||
|
|
||||||
|
def test_get_translation_object(self):
|
||||||
|
'''Test that the get_translation_object function works
|
||||||
|
'''
|
||||||
|
translations = i18n.get_translation_object('foo', ['%s/data/locale/' % os.path.dirname(__file__)])
|
||||||
|
tools.ok_(translations.__class__==i18n.DummyTranslations)
|
||||||
|
tools.assert_raises(IOError, i18n.get_translation_object, 'foo', ['%s/data/locale/' % os.path.dirname(__file__)], fallback=False)
|
||||||
|
|
||||||
|
translations = i18n.get_translation_object('test', ['%s/data/locale/' % os.path.dirname(__file__)])
|
||||||
|
tools.ok_(translations.__class__==i18n.NewGNUTranslations)
|
||||||
|
|
||||||
|
def test_get_translation_object_create_fallback(self):
|
||||||
|
'''Test get_translation_object creates fallbacks for additional catalogs'''
|
||||||
|
translations = i18n.get_translation_object('test',
|
||||||
|
['%s/data/locale' % os.path.dirname(__file__),
|
||||||
|
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||||
|
tools.ok_(translations.__class__==i18n.NewGNUTranslations)
|
||||||
|
tools.ok_(translations._fallback.__class__==i18n.NewGNUTranslations)
|
||||||
|
|
||||||
|
def test_get_translation_object_copy(self):
|
||||||
|
'''Test get_translation_object shallow copies the message catalog'''
|
||||||
|
translations = i18n.get_translation_object('test',
|
||||||
|
['%s/data/locale' % os.path.dirname(__file__),
|
||||||
|
'%s/data/locale-old' % os.path.dirname(__file__)], codeset='utf-8')
|
||||||
|
translations.input_charset = 'utf-8'
|
||||||
|
translations2 = i18n.get_translation_object('test',
|
||||||
|
['%s/data/locale' % os.path.dirname(__file__),
|
||||||
|
'%s/data/locale-old' % os.path.dirname(__file__)], codeset='latin-1')
|
||||||
|
translations2.input_charset = 'latin-1'
|
||||||
|
|
||||||
|
# Test that portions of the translation objects are the same and other
|
||||||
|
# portions are different (which is a space optimization so that the
|
||||||
|
# translation data isn't in memory multiple times)
|
||||||
|
tools.ok_(id(translations._fallback) != id(translations2._fallback))
|
||||||
|
tools.ok_(id(translations.output_charset()) != id(translations2.output_charset()))
|
||||||
|
tools.ok_(id(translations.input_charset) != id(translations2.input_charset))
|
||||||
|
tools.ok_(id(translations.input_charset) != id(translations2.input_charset))
|
||||||
|
tools.eq_(id(translations._catalog), id(translations2._catalog))
|
||||||
|
|
||||||
|
def test_get_translation_object_optional_params(self):
|
||||||
|
'''Smoketest leaving out optional parameters'''
|
||||||
|
translations = i18n.get_translation_object('test')
|
||||||
|
tools.ok_(translations.__class__ in (i18n.NewGNUTranslations, i18n.DummyTranslations))
|
||||||
|
|
||||||
|
def test_dummy_translation(self):
|
||||||
|
'''Test that we can create a DummyTranslation object
|
||||||
|
'''
|
||||||
|
tools.ok_(isinstance(i18n.DummyTranslations(), i18n.DummyTranslations))
|
||||||
|
|
||||||
|
# Note: Using nose's generator tests for this so we can't subclass
|
||||||
|
# unittest.TestCase
|
||||||
|
class TestDummyTranslations(base_classes.UnicodeTestData):
|
||||||
|
def __init__(self):
|
||||||
|
self.test_data = {'bytes': (( # First set is with default charset (utf8)
|
||||||
|
(self.u_ascii, self.b_ascii),
|
||||||
|
(self.u_spanish, self.utf8_spanish),
|
||||||
|
(self.u_japanese, self.utf8_japanese),
|
||||||
|
(self.b_ascii, self.b_ascii),
|
||||||
|
(self.utf8_spanish, self.utf8_spanish),
|
||||||
|
(self.latin1_spanish, self.utf8_mangled_spanish_latin1_as_utf8),
|
||||||
|
(self.utf8_japanese, self.utf8_japanese),
|
||||||
|
),
|
||||||
|
( # Second set is with output_charset of latin1 (ISO-8859-1)
|
||||||
|
(self.u_ascii, self.b_ascii),
|
||||||
|
(self.u_spanish, self.latin1_spanish),
|
||||||
|
(self.u_japanese, self.latin1_mangled_japanese_replace_as_latin1),
|
||||||
|
(self.b_ascii, self.b_ascii),
|
||||||
|
(self.utf8_spanish, self.utf8_spanish),
|
||||||
|
(self.latin1_spanish, self.latin1_spanish),
|
||||||
|
(self.utf8_japanese, self.utf8_japanese),
|
||||||
|
),
|
||||||
|
( # Third set is with output_charset of C
|
||||||
|
(self.u_ascii, self.b_ascii),
|
||||||
|
(self.u_spanish, self.ascii_mangled_spanish_as_ascii),
|
||||||
|
(self.u_japanese, self.ascii_mangled_japanese_replace_as_latin1),
|
||||||
|
(self.b_ascii, self.b_ascii),
|
||||||
|
(self.utf8_spanish, self.ascii_mangled_spanish_as_ascii),
|
||||||
|
(self.latin1_spanish, self.ascii_twice_mangled_spanish_latin1_as_utf8_as_ascii),
|
||||||
|
(self.utf8_japanese, self.ascii_mangled_japanese_replace_as_latin1),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
'unicode': (( # First set is with the default charset (utf8)
|
||||||
|
(self.u_ascii, self.u_ascii),
|
||||||
|
(self.u_spanish, self.u_spanish),
|
||||||
|
(self.u_japanese, self.u_japanese),
|
||||||
|
(self.b_ascii, self.u_ascii),
|
||||||
|
(self.utf8_spanish, self.u_spanish),
|
||||||
|
(self.latin1_spanish, self.u_mangled_spanish_latin1_as_utf8), # String is mangled but no exception
|
||||||
|
(self.utf8_japanese, self.u_japanese),
|
||||||
|
),
|
||||||
|
( # Second set is with _charset of latin1 (ISO-8859-1)
|
||||||
|
(self.u_ascii, self.u_ascii),
|
||||||
|
(self.u_spanish, self.u_spanish),
|
||||||
|
(self.u_japanese, self.u_japanese),
|
||||||
|
(self.b_ascii, self.u_ascii),
|
||||||
|
(self.utf8_spanish, self.u_mangled_spanish_utf8_as_latin1), # String mangled but no exception
|
||||||
|
(self.latin1_spanish, self.u_spanish),
|
||||||
|
(self.utf8_japanese, self.u_mangled_japanese_utf8_as_latin1), # String mangled but no exception
|
||||||
|
),
|
||||||
|
( # Third set is with _charset of C
|
||||||
|
(self.u_ascii, self.u_ascii),
|
||||||
|
(self.u_spanish, self.u_spanish),
|
||||||
|
(self.u_japanese, self.u_japanese),
|
||||||
|
(self.b_ascii, self.u_ascii),
|
||||||
|
(self.utf8_spanish, self.u_mangled_spanish_utf8_as_ascii), # String mangled but no exception
|
||||||
|
(self.latin1_spanish, self.u_mangled_spanish_latin1_as_ascii), # String mangled but no exception
|
||||||
|
(self.utf8_japanese, self.u_mangled_japanese_utf8_as_ascii), # String mangled but no exception
|
||||||
|
),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.translations = i18n.DummyTranslations()
|
||||||
|
|
||||||
|
def check_gettext(self, message, value, charset=None):
|
||||||
|
self.translations.set_output_charset(charset)
|
||||||
|
tools.eq_(self.translations.gettext(message), value,
|
||||||
|
msg='gettext(%s): trans: %s != val: %s (charset=%s)'
|
||||||
|
% (repr(message), repr(self.translations.gettext(message)),
|
||||||
|
repr(value), charset))
|
||||||
|
|
||||||
|
def check_lgettext(self, message, value, charset=None,
|
||||||
|
locale='en_US.UTF-8'):
|
||||||
|
os.environ['LC_ALL'] = locale
|
||||||
|
self.translations.set_output_charset(charset)
|
||||||
|
tools.eq_(self.translations.lgettext(message), value,
|
||||||
|
msg='lgettext(%s): trans: %s != val: %s (charset=%s, locale=%s)'
|
||||||
|
% (repr(message), repr(self.translations.lgettext(message)),
|
||||||
|
repr(value), charset, locale))
|
||||||
|
|
||||||
|
# Note: charset has a default value because nose isn't invoking setUp and
|
||||||
|
# tearDown each time check_* is run.
|
||||||
|
def check_ugettext(self, message, value, charset='utf-8'):
|
||||||
|
'''ugettext method with default values'''
|
||||||
|
self.translations.input_charset = charset
|
||||||
|
tools.eq_(self.translations.ugettext(message), value,
|
||||||
|
msg='ugettext(%s): trans: %s != val: %s (charset=%s)'
|
||||||
|
% (repr(message), repr(self.translations.ugettext(message)),
|
||||||
|
repr(value), charset))
|
||||||
|
|
||||||
|
def check_ngettext(self, message, value, charset=None):
|
||||||
|
self.translations.set_output_charset(charset)
|
||||||
|
tools.eq_(self.translations.ngettext(message, 'blank', 1), value)
|
||||||
|
tools.eq_(self.translations.ngettext('blank', message, 2), value)
|
||||||
|
tools.ok_(self.translations.ngettext(message, 'blank', 2) != value)
|
||||||
|
tools.ok_(self.translations.ngettext('blank', message, 1) != value)
|
||||||
|
|
||||||
|
def check_lngettext(self, message, value, charset=None, locale='en_US.UTF-8'):
|
||||||
|
os.environ['LC_ALL'] = locale
|
||||||
|
self.translations.set_output_charset(charset)
|
||||||
|
tools.eq_(self.translations.lngettext(message, 'blank', 1), value,
|
||||||
|
msg='lngettext(%s, "blank", 1): trans: %s != val: %s (charset=%s, locale=%s)'
|
||||||
|
% (repr(message), repr(self.translations.lngettext(message,
|
||||||
|
'blank', 1)), repr(value), charset, locale))
|
||||||
|
tools.eq_(self.translations.lngettext('blank', message, 2), value,
|
||||||
|
msg='lngettext("blank", %s, 2): trans: %s != val: %s (charset=%s, locale=%s)'
|
||||||
|
% (repr(message), repr(self.translations.lngettext('blank',
|
||||||
|
message, 2)), repr(value), charset, locale))
|
||||||
|
tools.ok_(self.translations.lngettext(message, 'blank', 2) != value,
|
||||||
|
msg='lngettext(%s, "blank", 2): trans: %s != val: %s (charset=%s, locale=%s)'
|
||||||
|
% (repr(message), repr(self.translations.lngettext(message,
|
||||||
|
'blank', 2)), repr(value), charset, locale))
|
||||||
|
tools.ok_(self.translations.lngettext('blank', message, 1) != value,
|
||||||
|
msg='lngettext("blank", %s, 1): trans: %s != val: %s (charset=%s, locale=%s)'
|
||||||
|
% (repr(message), repr(self.translations.lngettext('blank',
|
||||||
|
message, 1)), repr(value), charset, locale))
|
||||||
|
|
||||||
|
# Note: charset has a default value because nose isn't invoking setUp and
|
||||||
|
# tearDown each time check_* is run.
|
||||||
|
def check_ungettext(self, message, value, charset='utf-8'):
|
||||||
|
self.translations.input_charset = charset
|
||||||
|
tools.eq_(self.translations.ungettext(message, 'blank', 1), value)
|
||||||
|
tools.eq_(self.translations.ungettext('blank', message, 2), value)
|
||||||
|
tools.ok_(self.translations.ungettext(message, 'blank', 2) != value)
|
||||||
|
tools.ok_(self.translations.ungettext('blank', message, 1) != value)
|
||||||
|
|
||||||
|
def test_gettext(self):
|
||||||
|
'''gettext method with default values'''
|
||||||
|
for message, value in self.test_data['bytes'][0]:
|
||||||
|
yield self.check_gettext, message, value
|
||||||
|
|
||||||
|
def test_gettext_output_charset(self):
|
||||||
|
'''gettext method after output_charset is set'''
|
||||||
|
for message, value in self.test_data['bytes'][1]:
|
||||||
|
yield self.check_gettext, message, value, 'latin1'
|
||||||
|
|
||||||
|
def test_ngettext(self):
|
||||||
|
for message, value in self.test_data['bytes'][0]:
|
||||||
|
yield self.check_ngettext, message, value
|
||||||
|
|
||||||
|
def test_ngettext_output_charset(self):
|
||||||
|
for message, value in self.test_data['bytes'][1]:
|
||||||
|
yield self.check_ngettext, message, value, 'latin1'
|
||||||
|
|
||||||
|
def test_lgettext(self):
|
||||||
|
'''lgettext method with default values on a utf8 locale'''
|
||||||
|
for message, value in self.test_data['bytes'][0]:
|
||||||
|
yield self.check_lgettext, message, value
|
||||||
|
|
||||||
|
def test_lgettext_output_charset(self):
|
||||||
|
'''lgettext method after output_charset is set'''
|
||||||
|
for message, value in self.test_data['bytes'][1]:
|
||||||
|
yield self.check_lgettext, message, value, 'latin1'
|
||||||
|
|
||||||
|
def test_lgettext_output_charset_and_locale(self):
|
||||||
|
'''lgettext method after output_charset is set in C locale
|
||||||
|
|
||||||
|
output_charset should take precedence
|
||||||
|
'''
|
||||||
|
for message, value in self.test_data['bytes'][1]:
|
||||||
|
yield self.check_lgettext, message, value, 'latin1', 'C'
|
||||||
|
|
||||||
|
def test_lgettext_locale_C(self):
|
||||||
|
'''lgettext method in a C locale'''
|
||||||
|
for message, value in self.test_data['bytes'][2]:
|
||||||
|
yield self.check_lgettext, message, value, None, 'C'
|
||||||
|
|
||||||
|
def test_lngettext(self):
|
||||||
|
'''lngettext method with default values on a utf8 locale'''
|
||||||
|
for message, value in self.test_data['bytes'][0]:
|
||||||
|
yield self.check_lngettext, message, value
|
||||||
|
|
||||||
|
def test_lngettext_output_charset(self):
|
||||||
|
'''lngettext method after output_charset is set'''
|
||||||
|
for message, value in self.test_data['bytes'][1]:
|
||||||
|
yield self.check_lngettext, message, value, 'latin1'
|
||||||
|
|
||||||
|
def test_lngettext_output_charset_and_locale(self):
|
||||||
|
'''lngettext method after output_charset is set in C locale
|
||||||
|
|
||||||
|
output_charset should take precedence
|
||||||
|
'''
|
||||||
|
for message, value in self.test_data['bytes'][1]:
|
||||||
|
yield self.check_lngettext, message, value, 'latin1', 'C'
|
||||||
|
|
||||||
|
def test_lngettext_locale_C(self):
|
||||||
|
'''lngettext method in a C locale'''
|
||||||
|
for message, value in self.test_data['bytes'][2]:
|
||||||
|
yield self.check_lngettext, message, value, None, 'C'
|
||||||
|
|
||||||
|
def test_ugettext(self):
|
||||||
|
for message, value in self.test_data['unicode'][0]:
|
||||||
|
yield self.check_ugettext, message, value
|
||||||
|
|
||||||
|
def test_ugettext_charset_latin1(self):
|
||||||
|
for message, value in self.test_data['unicode'][1]:
|
||||||
|
yield self.check_ugettext, message, value, 'latin1'
|
||||||
|
|
||||||
|
def test_ugettext_charset_ascii(self):
|
||||||
|
for message, value in self.test_data['unicode'][2]:
|
||||||
|
yield self.check_ugettext, message, value, 'ascii'
|
||||||
|
|
||||||
|
def test_ungettext(self):
|
||||||
|
for message, value in self.test_data['unicode'][0]:
|
||||||
|
yield self.check_ungettext, message, value
|
||||||
|
|
||||||
|
def test_ungettext_charset_latin1(self):
|
||||||
|
for message, value in self.test_data['unicode'][1]:
|
||||||
|
yield self.check_ungettext, message, value, 'latin1'
|
||||||
|
|
||||||
|
def test_ungettext_charset_ascii(self):
|
||||||
|
for message, value in self.test_data['unicode'][2]:
|
||||||
|
yield self.check_ungettext, message, value, 'ascii'
|
||||||
|
|
||||||
|
def test_nonbasestring(self):
|
||||||
|
tools.eq_(self.translations.gettext(dict(hi='there')), '')
|
||||||
|
tools.eq_(self.translations.ngettext(dict(hi='there'), dict(hi='two'), 1), '')
|
||||||
|
tools.eq_(self.translations.lgettext(dict(hi='there')), '')
|
||||||
|
tools.eq_(self.translations.lngettext(dict(hi='there'), dict(hi='two'), 1), '')
|
||||||
|
tools.eq_(self.translations.ugettext(dict(hi='there')), u'')
|
||||||
|
tools.eq_(self.translations.ungettext(dict(hi='there'), dict(hi='two'), 1), u'')
|
||||||
|
|
||||||
|
|
||||||
|
class TestI18N_Latin1(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||||
|
os.environ['LC_ALL'] = 'pt_BR.ISO8859-1'
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.old_LC_ALL:
|
||||||
|
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||||
|
else:
|
||||||
|
del(os.environ['LC_ALL'])
|
||||||
|
|
||||||
|
def test_easy_gettext_setup_non_unicode(self):
|
||||||
|
'''Test that the easy_gettext_setup function works
|
||||||
|
'''
|
||||||
|
b_, bN_ = i18n.easy_gettext_setup('foo', localedirs=
|
||||||
|
['%s/data/locale/' % os.path.dirname(__file__)],
|
||||||
|
use_unicode=False)
|
||||||
|
|
||||||
|
tools.ok_(b_('café') == 'café')
|
||||||
|
tools.ok_(b_(u'café') == 'caf\xe9')
|
||||||
|
tools.ok_(bN_('café', 'cafés', 1) == 'café')
|
||||||
|
tools.ok_(bN_('café', 'cafés', 2) == 'cafés')
|
||||||
|
tools.ok_(bN_(u'café', u'cafés', 1) == 'caf\xe9')
|
||||||
|
tools.ok_(bN_(u'café', u'cafés', 2) == 'caf\xe9s')
|
||||||
|
|
||||||
|
|
||||||
|
class TestNewGNUTranslationsNoMatch(TestDummyTranslations):
|
||||||
|
def setUp(self):
|
||||||
|
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||||
|
os.environ['LC_ALL'] = 'pt_BR.utf8'
|
||||||
|
self.translations = i18n.get_translation_object('test', ['%s/data/locale/' % os.path.dirname(__file__)])
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.old_LC_ALL:
|
||||||
|
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||||
|
else:
|
||||||
|
del(os.environ['LC_ALL'])
|
||||||
|
|
||||||
|
|
||||||
|
class TestNewGNURealTranslations_UTF8(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||||
|
os.environ['LC_ALL'] = 'pt_BR.UTF8'
|
||||||
|
self.translations = i18n.get_translation_object('test', ['%s/data/locale/' % os.path.dirname(__file__)])
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.old_LC_ALL:
|
||||||
|
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||||
|
else:
|
||||||
|
del(os.environ['LC_ALL'])
|
||||||
|
|
||||||
|
def test_gettext(self):
|
||||||
|
_ = self.translations.gettext
|
||||||
|
tools.ok_(_('kitchen sink')=='pia da cozinha')
|
||||||
|
tools.ok_(_('Kuratomi')=='くらとみ')
|
||||||
|
tools.ok_(_('くらとみ')=='Kuratomi')
|
||||||
|
tools.ok_(_('Only café in fallback')=='Only café in fallback')
|
||||||
|
|
||||||
|
tools.ok_(_(u'kitchen sink')=='pia da cozinha')
|
||||||
|
tools.ok_(_(u'くらとみ')=='Kuratomi')
|
||||||
|
tools.ok_(_(u'Kuratomi')=='くらとみ')
|
||||||
|
tools.ok_(_(u'Only café in fallback')=='Only café in fallback')
|
||||||
|
|
||||||
|
def test_ngettext(self):
|
||||||
|
_ = self.translations.ngettext
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 1)=='一 limão')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='一 limão')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||||
|
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 2)=='四 limões')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='四 limões')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||||
|
|
||||||
|
def test_lgettext(self):
|
||||||
|
_ = self.translations.lgettext
|
||||||
|
tools.ok_(_('kitchen sink')=='pia da cozinha')
|
||||||
|
tools.ok_(_('Kuratomi')=='くらとみ')
|
||||||
|
tools.ok_(_('くらとみ')=='Kuratomi')
|
||||||
|
tools.ok_(_('Only café in fallback')=='Only café in fallback')
|
||||||
|
|
||||||
|
tools.ok_(_(u'kitchen sink')=='pia da cozinha')
|
||||||
|
tools.ok_(_(u'くらとみ')=='Kuratomi')
|
||||||
|
tools.ok_(_(u'Kuratomi')=='くらとみ')
|
||||||
|
tools.ok_(_(u'Only café in fallback')=='Only café in fallback')
|
||||||
|
|
||||||
|
def test_lngettext(self):
|
||||||
|
_ = self.translations.lngettext
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 1)=='一 limão')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='一 limão')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||||
|
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 2)=='四 limões')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='四 limões')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||||
|
|
||||||
|
def test_ugettext(self):
|
||||||
|
_ = self.translations.ugettext
|
||||||
|
tools.ok_(_('kitchen sink')==u'pia da cozinha')
|
||||||
|
tools.ok_(_('Kuratomi')==u'くらとみ')
|
||||||
|
tools.ok_(_('くらとみ')==u'Kuratomi')
|
||||||
|
tools.ok_(_('Only café in fallback')==u'Only café in fallback')
|
||||||
|
|
||||||
|
tools.ok_(_(u'kitchen sink')==u'pia da cozinha')
|
||||||
|
tools.ok_(_(u'くらとみ')==u'Kuratomi')
|
||||||
|
tools.ok_(_(u'Kuratomi')==u'くらとみ')
|
||||||
|
tools.ok_(_(u'Only café in fallback')==u'Only café in fallback')
|
||||||
|
|
||||||
|
def test_ungettext(self):
|
||||||
|
_ = self.translations.ungettext
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 1)==u'一 limão')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 1)==u'1 lemon')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 1)==u'一 limão')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 1)==u'1 lemon')
|
||||||
|
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 2)==u'四 limões')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 2)==u'4 lemons')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 2)==u'四 limões')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 2)==u'4 lemons')
|
||||||
|
|
||||||
|
|
||||||
|
class TestNewGNURealTranslations_Latin1(TestNewGNURealTranslations_UTF8):
|
||||||
|
def setUp(self):
|
||||||
|
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||||
|
os.environ['LC_ALL'] = 'pt_BR.ISO8859-1'
|
||||||
|
self.translations = i18n.get_translation_object('test', ['%s/data/locale/' % os.path.dirname(__file__)])
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.old_LC_ALL:
|
||||||
|
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||||
|
else:
|
||||||
|
del(os.environ['LC_ALL'])
|
||||||
|
|
||||||
|
def test_lgettext(self):
|
||||||
|
_ = self.translations.lgettext
|
||||||
|
tools.eq_(_('kitchen sink'), 'pia da cozinha')
|
||||||
|
tools.eq_(_('Kuratomi'), '????')
|
||||||
|
tools.eq_(_('くらとみ'), 'Kuratomi')
|
||||||
|
# The following returns utf-8 because latin-1 can hold all of the
|
||||||
|
# bytes that are present in utf-8 encodings. Therefore, we cannot
|
||||||
|
# tell that we should reencode the string. This will be displayed as
|
||||||
|
# mangled text if used in a program
|
||||||
|
tools.eq_(_('Only café in fallback'), 'Only caf\xc3\xa9 in fallback')
|
||||||
|
|
||||||
|
tools.eq_(_(u'kitchen sink'), 'pia da cozinha')
|
||||||
|
tools.eq_(_(u'くらとみ'), 'Kuratomi')
|
||||||
|
tools.eq_(_(u'Kuratomi'), '????')
|
||||||
|
tools.eq_(_(u'Only café in fallback'), 'Only caf\xe9 in fallback')
|
||||||
|
|
||||||
|
def test_lngettext(self):
|
||||||
|
_ = self.translations.lngettext
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 1)=='? lim\xe3o')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='? lim\xe3o')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||||
|
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 2)=='? lim\xf5es')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='? lim\xf5es')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||||
|
|
||||||
|
|
||||||
|
class TestFallbackNewGNUTranslationsNoMatch(TestDummyTranslations):
|
||||||
|
def setUp(self):
|
||||||
|
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||||
|
os.environ['LC_ALL'] = 'pt_BR.utf8'
|
||||||
|
self.translations = i18n.get_translation_object('test',
|
||||||
|
['%s/data/locale/' % os.path.dirname(__file__),
|
||||||
|
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.old_LC_ALL:
|
||||||
|
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||||
|
else:
|
||||||
|
del(os.environ['LC_ALL'])
|
||||||
|
|
||||||
|
|
||||||
|
class TestFallbackNewGNURealTranslations_UTF8(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||||
|
os.environ['LC_ALL'] = 'pt_BR.UTF8'
|
||||||
|
self.translations = i18n.get_translation_object('test',
|
||||||
|
['%s/data/locale/' % os.path.dirname(__file__),
|
||||||
|
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.old_LC_ALL:
|
||||||
|
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||||
|
else:
|
||||||
|
del(os.environ['LC_ALL'])
|
||||||
|
|
||||||
|
def test_gettext(self):
|
||||||
|
_ = self.translations.gettext
|
||||||
|
tools.ok_(_('kitchen sink')=='pia da cozinha')
|
||||||
|
tools.ok_(_('Kuratomi')=='くらとみ')
|
||||||
|
tools.ok_(_('くらとみ')=='Kuratomi')
|
||||||
|
tools.ok_(_(u'Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||||
|
|
||||||
|
tools.ok_(_(u'kitchen sink')=='pia da cozinha')
|
||||||
|
tools.ok_(_(u'くらとみ')=='Kuratomi')
|
||||||
|
tools.ok_(_(u'Kuratomi')=='くらとみ')
|
||||||
|
tools.ok_(_(u'Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||||
|
|
||||||
|
def test_ngettext(self):
|
||||||
|
_ = self.translations.ngettext
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 1)=='一 limão')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='一 limão')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||||
|
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 2)=='四 limões')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='四 limões')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||||
|
|
||||||
|
def test_lgettext(self):
|
||||||
|
_ = self.translations.lgettext
|
||||||
|
tools.eq_(_('kitchen sink'), 'pia da cozinha')
|
||||||
|
tools.ok_(_('Kuratomi')=='くらとみ')
|
||||||
|
tools.ok_(_('くらとみ')=='Kuratomi')
|
||||||
|
tools.ok_(_('Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||||
|
|
||||||
|
tools.ok_(_(u'kitchen sink')=='pia da cozinha')
|
||||||
|
tools.ok_(_(u'くらとみ')=='Kuratomi')
|
||||||
|
tools.ok_(_(u'Kuratomi')=='くらとみ')
|
||||||
|
tools.ok_(_(u'Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||||
|
|
||||||
|
def test_lngettext(self):
|
||||||
|
_ = self.translations.lngettext
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 1)=='一 limão')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='一 limão')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||||
|
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 2)=='四 limões')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='四 limões')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||||
|
|
||||||
|
def test_ugettext(self):
|
||||||
|
_ = self.translations.ugettext
|
||||||
|
tools.ok_(_('kitchen sink')==u'pia da cozinha')
|
||||||
|
tools.ok_(_('Kuratomi')==u'くらとみ')
|
||||||
|
tools.ok_(_('くらとみ')==u'Kuratomi')
|
||||||
|
tools.ok_(_('Only café in fallback')==u'Yes, only caf\xe9 in fallback')
|
||||||
|
|
||||||
|
tools.ok_(_(u'kitchen sink')==u'pia da cozinha')
|
||||||
|
tools.ok_(_(u'くらとみ')==u'Kuratomi')
|
||||||
|
tools.ok_(_(u'Kuratomi')==u'くらとみ')
|
||||||
|
tools.ok_(_(u'Only café in fallback')==u'Yes, only caf\xe9 in fallback')
|
||||||
|
|
||||||
|
def test_ungettext(self):
|
||||||
|
_ = self.translations.ungettext
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 1)==u'一 limão')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 1)==u'1 lemon')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 1)==u'一 limão')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 1)==u'1 lemon')
|
||||||
|
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 2)==u'四 limões')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 2)==u'4 lemons')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 2)==u'四 limões')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 2)==u'4 lemons')
|
||||||
|
|
||||||
|
|
||||||
|
class TestFallbackNewGNURealTranslations_Latin1(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||||
|
os.environ['LC_ALL'] = 'pt_BR.ISO8859-1'
|
||||||
|
self.translations = i18n.get_translation_object('test',
|
||||||
|
['%s/data/locale/' % os.path.dirname(__file__),
|
||||||
|
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.old_LC_ALL:
|
||||||
|
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||||
|
else:
|
||||||
|
del(os.environ['LC_ALL'])
|
||||||
|
|
||||||
|
def test_gettext(self):
|
||||||
|
_ = self.translations.gettext
|
||||||
|
tools.ok_(_('kitchen sink')=='pia da cozinha')
|
||||||
|
tools.ok_(_('Kuratomi')=='くらとみ')
|
||||||
|
tools.ok_(_('くらとみ')=='Kuratomi')
|
||||||
|
tools.ok_(_(u'Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||||
|
|
||||||
|
tools.ok_(_(u'kitchen sink')=='pia da cozinha')
|
||||||
|
tools.ok_(_(u'くらとみ')=='Kuratomi')
|
||||||
|
tools.ok_(_(u'Kuratomi')=='くらとみ')
|
||||||
|
tools.ok_(_(u'Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||||
|
|
||||||
|
def test_ngettext(self):
|
||||||
|
_ = self.translations.ngettext
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 1)=='一 limão')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='一 limão')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||||
|
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 2)=='四 limões')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='四 limões')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||||
|
|
||||||
|
def test_lgettext(self):
|
||||||
|
_ = self.translations.lgettext
|
||||||
|
tools.eq_(_('kitchen sink'), 'pia da cozinha')
|
||||||
|
tools.eq_(_('Kuratomi'), '????')
|
||||||
|
tools.eq_(_('くらとみ'), 'Kuratomi')
|
||||||
|
tools.eq_(_('Only café in fallback'), 'Yes, only caf\xe9 in fallback')
|
||||||
|
|
||||||
|
tools.eq_(_(u'kitchen sink'), 'pia da cozinha')
|
||||||
|
tools.eq_(_(u'くらとみ'), 'Kuratomi')
|
||||||
|
tools.eq_(_(u'Kuratomi'), '????')
|
||||||
|
tools.eq_(_(u'Only café in fallback'), 'Yes, only caf\xe9 in fallback')
|
||||||
|
|
||||||
|
def test_lngettext(self):
|
||||||
|
_ = self.translations.lngettext
|
||||||
|
tools.eq_(_('1 lemon', '4 lemons', 1), u'一 limão'.encode('latin1', 'replace'))
|
||||||
|
tools.eq_(_('一 limão', '四 limões', 1), '1 lemon')
|
||||||
|
tools.eq_(_(u'1 lemon', u'4 lemons', 1), u'一 limão'.encode('latin1', 'replace'))
|
||||||
|
tools.eq_(_(u'一 limão', u'四 limões', 1), '1 lemon')
|
||||||
|
|
||||||
|
tools.eq_(_('1 lemon', '4 lemons', 2), u'四 limões'.encode('latin1', 'replace'))
|
||||||
|
tools.eq_(_('一 limão', '四 limões', 2), '4 lemons')
|
||||||
|
tools.eq_(_(u'1 lemon', u'4 lemons', 2), u'四 limões'.encode('latin1', 'replace'))
|
||||||
|
tools.eq_(_(u'一 limão', u'四 limões', 2), '4 lemons')
|
||||||
|
|
||||||
|
def test_ugettext(self):
|
||||||
|
_ = self.translations.ugettext
|
||||||
|
tools.ok_(_('kitchen sink')==u'pia da cozinha')
|
||||||
|
tools.ok_(_('Kuratomi')==u'くらとみ')
|
||||||
|
tools.ok_(_('くらとみ')==u'Kuratomi')
|
||||||
|
tools.ok_(_('Only café in fallback')==u'Yes, only caf\xe9 in fallback')
|
||||||
|
|
||||||
|
tools.ok_(_(u'kitchen sink')==u'pia da cozinha')
|
||||||
|
tools.ok_(_(u'くらとみ')==u'Kuratomi')
|
||||||
|
tools.ok_(_(u'Kuratomi')==u'くらとみ')
|
||||||
|
tools.ok_(_(u'Only café in fallback')==u'Yes, only caf\xe9 in fallback')
|
||||||
|
|
||||||
|
def test_ungettext(self):
|
||||||
|
_ = self.translations.ungettext
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 1)==u'一 limão')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 1)==u'1 lemon')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 1)==u'一 limão')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 1)==u'1 lemon')
|
||||||
|
|
||||||
|
tools.ok_(_('1 lemon', '4 lemons', 2)==u'四 limões')
|
||||||
|
tools.ok_(_('一 limão', '四 limões', 2)==u'4 lemons')
|
||||||
|
tools.ok_(_(u'1 lemon', u'4 lemons', 2)==u'四 limões')
|
||||||
|
tools.ok_(_(u'一 limão', u'四 limões', 2)==u'4 lemons')
|
||||||
|
|
||||||
|
|
||||||
|
class TestFallback(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||||
|
os.environ['LC_ALL'] = 'pt_BR.ISO8859-1'
|
||||||
|
self.gtranslations = i18n.get_translation_object('test',
|
||||||
|
['%s/data/locale/' % os.path.dirname(__file__),
|
||||||
|
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||||
|
self.gtranslations.add_fallback(object())
|
||||||
|
self.dtranslations = i18n.get_translation_object('nonexistent',
|
||||||
|
['%s/data/locale/' % os.path.dirname(__file__),
|
||||||
|
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||||
|
self.dtranslations.add_fallback(object())
|
||||||
|
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.old_LC_ALL:
|
||||||
|
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||||
|
else:
|
||||||
|
del(os.environ['LC_ALL'])
|
||||||
|
|
||||||
|
def test_invalid_fallback_no_raise(self):
|
||||||
|
'''Test when we have an invalid fallback that it does not raise.'''
|
||||||
|
tools.eq_(self.gtranslations.gettext('abc'), 'abc')
|
||||||
|
tools.eq_(self.gtranslations.ugettext('abc'), 'abc')
|
||||||
|
tools.eq_(self.gtranslations.lgettext('abc'), 'abc')
|
||||||
|
tools.eq_(self.dtranslations.gettext('abc'), 'abc')
|
||||||
|
tools.eq_(self.dtranslations.ugettext('abc'), 'abc')
|
||||||
|
tools.eq_(self.dtranslations.lgettext('abc'), 'abc')
|
||||||
|
|
||||||
|
tools.eq_(self.dtranslations.ngettext('abc', 'cde', 1), 'abc')
|
||||||
|
tools.eq_(self.dtranslations.ungettext('abc', 'cde', 1), 'abc')
|
||||||
|
tools.eq_(self.dtranslations.lngettext('abc', 'cde', 1), 'abc')
|
||||||
|
tools.eq_(self.gtranslations.ngettext('abc', 'cde', 1), 'abc')
|
||||||
|
tools.eq_(self.gtranslations.ungettext('abc', 'cde', 1), 'abc')
|
||||||
|
tools.eq_(self.gtranslations.lngettext('abc', 'cde', 1), 'abc')
|
||||||
|
|
||||||
|
class TestDefaultLocaleDir(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||||
|
os.environ['LC_ALL'] = 'pt_BR.UTF8'
|
||||||
|
self.old_DEFAULT_LOCALEDIRS = i18n._DEFAULT_LOCALEDIR
|
||||||
|
i18n._DEFAULT_LOCALEDIR = '%s/data/locale/' % os.path.dirname(__file__)
|
||||||
|
self.translations = i18n.get_translation_object('test')
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.old_LC_ALL:
|
||||||
|
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||||
|
else:
|
||||||
|
del(os.environ['LC_ALL'])
|
||||||
|
if self.old_DEFAULT_LOCALEDIRS:
|
||||||
|
i18n._DEFAULT_LOCALEDIR = self.old_DEFAULT_LOCALEDIRS
|
||||||
|
|
||||||
|
def test_gettext(self):
|
||||||
|
_ = self.translations.gettext
|
||||||
|
tools.eq_(_('kitchen sink'), 'pia da cozinha')
|
||||||
|
tools.eq_(_('Kuratomi'), 'くらとみ')
|
||||||
|
tools.eq_(_('くらとみ'), 'Kuratomi')
|
||||||
|
tools.eq_(_('Only café in fallback'), 'Only café in fallback')
|
||||||
|
|
||||||
|
tools.eq_(_(u'kitchen sink'), 'pia da cozinha')
|
||||||
|
tools.eq_(_(u'くらとみ'), 'Kuratomi')
|
||||||
|
tools.eq_(_(u'Kuratomi'), 'くらとみ')
|
||||||
|
tools.eq_(_(u'Only café in fallback'), 'Only café in fallback')
|
||||||
|
|
||||||
|
|
57
tests/test_iterutils.py
Normal file
57
tests/test_iterutils.py
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
|
||||||
|
from kitchen import iterutils
|
||||||
|
|
||||||
|
class TestStrictDict(unittest.TestCase):
|
||||||
|
iterable_data = (
|
||||||
|
[0, 1, 2],
|
||||||
|
[],
|
||||||
|
(0, 1, 2),
|
||||||
|
tuple(),
|
||||||
|
set([0, 1, 2]),
|
||||||
|
set(),
|
||||||
|
dict(a=1, b=2),
|
||||||
|
dict(),
|
||||||
|
[None],
|
||||||
|
[False],
|
||||||
|
[0],
|
||||||
|
xrange(0, 3),
|
||||||
|
iter([1, 2, 3]),
|
||||||
|
)
|
||||||
|
non_iterable_data = (
|
||||||
|
None,
|
||||||
|
False,
|
||||||
|
True,
|
||||||
|
0,
|
||||||
|
1.1,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_isiterable(self):
|
||||||
|
for item in self.iterable_data:
|
||||||
|
tools.ok_(iterutils.isiterable(item) == True)
|
||||||
|
|
||||||
|
for item in self.non_iterable_data:
|
||||||
|
tools.ok_(iterutils.isiterable(item) == False)
|
||||||
|
|
||||||
|
# strings
|
||||||
|
tools.ok_(iterutils.isiterable('a', include_string=True) == True)
|
||||||
|
tools.ok_(iterutils.isiterable('a', include_string=False) == False)
|
||||||
|
tools.ok_(iterutils.isiterable('a') == False)
|
||||||
|
|
||||||
|
def test_iterate(self):
|
||||||
|
iterutils.iterate(None)
|
||||||
|
for item in self.non_iterable_data:
|
||||||
|
tools.ok_(list(iterutils.iterate(item)) == [item])
|
||||||
|
|
||||||
|
for item in self.iterable_data[:-1]:
|
||||||
|
tools.ok_(list(iterutils.iterate(item)) == list(item))
|
||||||
|
|
||||||
|
# iter() is exhausted after use so we have to test separately
|
||||||
|
tools.ok_(list(iterutils.iterate(iter([1, 2, 3]))) == [1, 2, 3])
|
||||||
|
|
||||||
|
# strings
|
||||||
|
tools.ok_(list(iterutils.iterate('abc')) == ['abc'])
|
||||||
|
tools.ok_(list(iterutils.iterate('abc', include_string=True)) == ['a', 'b', 'c'])
|
25
tests/test_pycompat.py
Normal file
25
tests/test_pycompat.py
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
|
||||||
|
class TestUsableModules(unittest.TestCase):
|
||||||
|
def test_subprocess(self):
|
||||||
|
'''Test that importing subprocess as a module works
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
from kitchen.pycompat24.subprocess import Popen
|
||||||
|
except ImportError:
|
||||||
|
tools.ok_(False, 'Unable to import pycompat24.subprocess as a module')
|
||||||
|
try:
|
||||||
|
from kitchen.pycompat27.subprocess import Popen
|
||||||
|
except ImportError:
|
||||||
|
tools.ok_(False, 'Unable to import pycompat27.subprocess as a module')
|
||||||
|
|
||||||
|
def test_base64(self):
|
||||||
|
'''Test that importing base64 as a module works
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
from kitchen.pycompat24.base64 import b64encode
|
||||||
|
except ImportError:
|
||||||
|
tools.ok_(False, 'Unable to import pycompat24.base64 as a module')
|
109
tests/test_pycompat24.py
Normal file
109
tests/test_pycompat24.py
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
from nose.plugins.skip import SkipTest
|
||||||
|
|
||||||
|
import __builtin__
|
||||||
|
import base64 as py_b64
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from kitchen.pycompat24 import sets
|
||||||
|
from kitchen.pycompat24.base64 import _base64 as base64
|
||||||
|
|
||||||
|
class TestSetsNoOverwrite(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.set_val = None
|
||||||
|
self.frozenset_val = None
|
||||||
|
if not hasattr(__builtin__, 'set'):
|
||||||
|
__builtin__.set = self.set_val
|
||||||
|
else:
|
||||||
|
self.set_val = __builtin__.set
|
||||||
|
if not hasattr(__builtin__, 'frozenset'):
|
||||||
|
__builtin__.frozenset = self.frozenset_val
|
||||||
|
else:
|
||||||
|
self.frozenset_val = __builtin__.frozenset
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self.frozenset_val == None:
|
||||||
|
del(__builtin__.frozenset)
|
||||||
|
if self.set_val == None:
|
||||||
|
del(__builtin__.set)
|
||||||
|
|
||||||
|
def test_sets_dont_overwrite(self):
|
||||||
|
'''Test that importing sets when there's already a set and frozenset defined does not overwrite
|
||||||
|
'''
|
||||||
|
sets.add_builtin_set()
|
||||||
|
tools.ok_(__builtin__.set == self.set_val)
|
||||||
|
tools.ok_(__builtin__.frozenset == self.frozenset_val)
|
||||||
|
|
||||||
|
class TestDefineSets(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
warnings.simplefilter('ignore', DeprecationWarning)
|
||||||
|
self.set_val = None
|
||||||
|
self.frozenset_val = None
|
||||||
|
if hasattr(__builtin__, 'set'):
|
||||||
|
self.set_val = __builtin__.set
|
||||||
|
del(__builtin__.set)
|
||||||
|
if hasattr(__builtin__, 'frozenset'):
|
||||||
|
self.frozenset_val = __builtin__.frozenset
|
||||||
|
del(__builtin__.frozenset)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
warnings.simplefilter('default', DeprecationWarning)
|
||||||
|
if self.set_val:
|
||||||
|
__builtin__.set = self.set_val
|
||||||
|
else:
|
||||||
|
del(__builtin__.set)
|
||||||
|
if self.frozenset_val:
|
||||||
|
__builtin__.frozenset = self.frozenset_val
|
||||||
|
else:
|
||||||
|
del(__builtin__.frozenset)
|
||||||
|
|
||||||
|
def test_pycompat_defines_set(self):
|
||||||
|
'''Test that calling pycompat24.add_builtin_set() adds set and frozenset to __builtin__
|
||||||
|
'''
|
||||||
|
import sets as py_sets
|
||||||
|
sets.add_builtin_set()
|
||||||
|
if self.set_val:
|
||||||
|
tools.ok_(__builtin__.set == self.set_val)
|
||||||
|
tools.ok_(__builtin__.frozenset == self.frozenset_val)
|
||||||
|
else:
|
||||||
|
tools.ok_(__builtin__.set == py_sets.Set)
|
||||||
|
tools.ok_(__builtin__.frozenset == py_sets.ImmutableSet)
|
||||||
|
|
||||||
|
class TestSubprocess(unittest.TestCase):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class TestBase64(unittest.TestCase):
|
||||||
|
b_byte_chars = ' '.join(map(chr, range(0, 256)))
|
||||||
|
b_byte_encoded = 'ACABIAIgAyAEIAUgBiAHIAggCSAKIAsgDCANIA4gDyAQIBEgEiATIBQgFSAWIBcgGCAZIBogGyAcIB0gHiAfICAgISAiICMgJCAlICYgJyAoICkgKiArICwgLSAuIC8gMCAxIDIgMyA0IDUgNiA3IDggOSA6IDsgPCA9ID4gPyBAIEEgQiBDIEQgRSBGIEcgSCBJIEogSyBMIE0gTiBPIFAgUSBSIFMgVCBVIFYgVyBYIFkgWiBbIFwgXSBeIF8gYCBhIGIgYyBkIGUgZiBnIGggaSBqIGsgbCBtIG4gbyBwIHEgciBzIHQgdSB2IHcgeCB5IHogeyB8IH0gfiB/IIAggSCCIIMghCCFIIYghyCIIIkgiiCLIIwgjSCOII8gkCCRIJIgkyCUIJUgliCXIJggmSCaIJsgnCCdIJ4gnyCgIKEgoiCjIKQgpSCmIKcgqCCpIKogqyCsIK0griCvILAgsSCyILMgtCC1ILYgtyC4ILkguiC7ILwgvSC+IL8gwCDBIMIgwyDEIMUgxiDHIMggySDKIMsgzCDNIM4gzyDQINEg0iDTINQg1SDWINcg2CDZINog2yDcIN0g3iDfIOAg4SDiIOMg5CDlIOYg5yDoIOkg6iDrIOwg7SDuIO8g8CDxIPIg8yD0IPUg9iD3IPgg+SD6IPsg/CD9IP4g/w=='
|
||||||
|
b_byte_encoded_urlsafe = 'ACABIAIgAyAEIAUgBiAHIAggCSAKIAsgDCANIA4gDyAQIBEgEiATIBQgFSAWIBcgGCAZIBogGyAcIB0gHiAfICAgISAiICMgJCAlICYgJyAoICkgKiArICwgLSAuIC8gMCAxIDIgMyA0IDUgNiA3IDggOSA6IDsgPCA9ID4gPyBAIEEgQiBDIEQgRSBGIEcgSCBJIEogSyBMIE0gTiBPIFAgUSBSIFMgVCBVIFYgVyBYIFkgWiBbIFwgXSBeIF8gYCBhIGIgYyBkIGUgZiBnIGggaSBqIGsgbCBtIG4gbyBwIHEgciBzIHQgdSB2IHcgeCB5IHogeyB8IH0gfiB_IIAggSCCIIMghCCFIIYghyCIIIkgiiCLIIwgjSCOII8gkCCRIJIgkyCUIJUgliCXIJggmSCaIJsgnCCdIJ4gnyCgIKEgoiCjIKQgpSCmIKcgqCCpIKogqyCsIK0griCvILAgsSCyILMgtCC1ILYgtyC4ILkguiC7ILwgvSC-IL8gwCDBIMIgwyDEIMUgxiDHIMggySDKIMsgzCDNIM4gzyDQINEg0iDTINQg1SDWINcg2CDZINog2yDcIN0g3iDfIOAg4SDiIOMg5CDlIOYg5yDoIOkg6iDrIOwg7SDuIO8g8CDxIPIg8yD0IPUg9iD3IPgg-SD6IPsg_CD9IP4g_w=='
|
||||||
|
|
||||||
|
def test_base64_encode(self):
|
||||||
|
tools.ok_(base64.b64encode(self.b_byte_chars) == self.b_byte_encoded)
|
||||||
|
tools.ok_(base64.b64encode(self.b_byte_chars, altchars='-_') == self.b_byte_encoded_urlsafe)
|
||||||
|
tools.ok_(base64.standard_b64encode(self.b_byte_chars) == self.b_byte_encoded)
|
||||||
|
tools.ok_(base64.urlsafe_b64encode(self.b_byte_chars) == self.b_byte_encoded_urlsafe)
|
||||||
|
|
||||||
|
tools.ok_(base64.b64encode(self.b_byte_chars) == self.b_byte_encoded)
|
||||||
|
tools.ok_(base64.b64encode(self.b_byte_chars, altchars='-_') == self.b_byte_encoded_urlsafe)
|
||||||
|
tools.ok_(base64.standard_b64encode(self.b_byte_chars) == self.b_byte_encoded)
|
||||||
|
tools.ok_(base64.urlsafe_b64encode(self.b_byte_chars) == self.b_byte_encoded_urlsafe)
|
||||||
|
|
||||||
|
def test_base64_decode(self):
|
||||||
|
tools.ok_(base64.b64decode(self.b_byte_encoded) == self.b_byte_chars)
|
||||||
|
tools.ok_(base64.b64decode(self.b_byte_encoded_urlsafe, altchars='-_') == self.b_byte_chars)
|
||||||
|
tools.ok_(base64.standard_b64decode(self.b_byte_encoded) == self.b_byte_chars)
|
||||||
|
tools.ok_(base64.urlsafe_b64decode(self.b_byte_encoded_urlsafe) == self.b_byte_chars)
|
||||||
|
|
||||||
|
tools.ok_(base64.b64decode(self.b_byte_encoded) == self.b_byte_chars)
|
||||||
|
tools.ok_(base64.b64decode(self.b_byte_encoded_urlsafe, altchars='-_') == self.b_byte_chars)
|
||||||
|
tools.ok_(base64.standard_b64decode(self.b_byte_encoded) == self.b_byte_chars)
|
||||||
|
tools.ok_(base64.urlsafe_b64decode(self.b_byte_encoded_urlsafe) == self.b_byte_chars)
|
||||||
|
|
||||||
|
def test_base64_stdlib_compat(self):
|
||||||
|
if not hasattr(py_b64, 'b64encode'):
|
||||||
|
raise SkipTest('Python-2.3 doesn\'t have b64encode to compare against')
|
||||||
|
tools.ok_(base64.b64encode(self.b_byte_chars) == py_b64.b64encode(self.b_byte_chars))
|
||||||
|
tools.ok_(base64.b64decode(self.b_byte_chars) == py_b64.b64decode(self.b_byte_chars))
|
1457
tests/test_subprocess.py
Normal file
1457
tests/test_subprocess.py
Normal file
File diff suppressed because it is too large
Load diff
161
tests/test_text_display.py
Normal file
161
tests/test_text_display.py
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
|
||||||
|
from kitchen.text.exceptions import ControlCharError
|
||||||
|
|
||||||
|
from kitchen.text import display
|
||||||
|
|
||||||
|
import base_classes
|
||||||
|
|
||||||
|
class TestDisplay(base_classes.UnicodeTestData, unittest.TestCase):
|
||||||
|
|
||||||
|
def test_internal_interval_bisearch(self):
|
||||||
|
'''Test that we can find things in an interval table'''
|
||||||
|
table = ((0, 3), (5,7), (9, 10))
|
||||||
|
tools.ok_(display._interval_bisearch(0, table) == True)
|
||||||
|
tools.ok_(display._interval_bisearch(1, table) == True)
|
||||||
|
tools.ok_(display._interval_bisearch(2, table) == True)
|
||||||
|
tools.ok_(display._interval_bisearch(3, table) == True)
|
||||||
|
tools.ok_(display._interval_bisearch(5, table) == True)
|
||||||
|
tools.ok_(display._interval_bisearch(6, table) == True)
|
||||||
|
tools.ok_(display._interval_bisearch(7, table) == True)
|
||||||
|
tools.ok_(display._interval_bisearch(9, table) == True)
|
||||||
|
tools.ok_(display._interval_bisearch(10, table) == True)
|
||||||
|
tools.ok_(display._interval_bisearch(-1, table) == False)
|
||||||
|
tools.ok_(display._interval_bisearch(4, table) == False)
|
||||||
|
tools.ok_(display._interval_bisearch(8, table) == False)
|
||||||
|
tools.ok_(display._interval_bisearch(11, table) == False)
|
||||||
|
|
||||||
|
def test_internal_generate_combining_table(self):
|
||||||
|
'''Test that the combining table we generate is equal to or a subseet of what's in the current table
|
||||||
|
|
||||||
|
If we assert it can mean one of two things:
|
||||||
|
|
||||||
|
1. The code is broken
|
||||||
|
2. The table we have is out of date.
|
||||||
|
'''
|
||||||
|
old_table = display._COMBINING
|
||||||
|
new_table = display._generate_combining_table()
|
||||||
|
for interval in new_table:
|
||||||
|
if interval[0] == interval[1]:
|
||||||
|
tools.ok_(display._interval_bisearch(interval[0], old_table) == True)
|
||||||
|
else:
|
||||||
|
for codepoint in xrange(interval[0], interval[1] + 1):
|
||||||
|
tools.ok_(display._interval_bisearch(interval[0], old_table) == True)
|
||||||
|
|
||||||
|
def test_internal_ucp_width(self):
|
||||||
|
'''Test that ucp_width returns proper width for characters'''
|
||||||
|
for codepoint in xrange(0, 0xFFFFF + 1):
|
||||||
|
if codepoint < 32 or (codepoint < 0xa0 and codepoint >= 0x7f):
|
||||||
|
# With strict on, we should raise an error
|
||||||
|
tools.assert_raises(ControlCharError, display._ucp_width, codepoint, 'strict')
|
||||||
|
|
||||||
|
if codepoint in (0x08, 0x1b, 0x7f, 0x94):
|
||||||
|
# Backspace, delete, clear delete remove one char
|
||||||
|
tools.ok_(display._ucp_width(codepoint) == -1)
|
||||||
|
else:
|
||||||
|
# Everything else returns 0
|
||||||
|
tools.ok_(display._ucp_width(codepoint) == 0)
|
||||||
|
elif display._interval_bisearch(codepoint, display._COMBINING):
|
||||||
|
# Combining character
|
||||||
|
tools.ok_(display._ucp_width(codepoint) == 0)
|
||||||
|
elif (codepoint >= 0x1100 and
|
||||||
|
(codepoint <= 0x115f or # Hangul Jamo init. consonants
|
||||||
|
codepoint == 0x2329 or codepoint == 0x232a or
|
||||||
|
(codepoint >= 0x2e80 and codepoint <= 0xa4cf and
|
||||||
|
codepoint != 0x303f) or # CJK ... Yi
|
||||||
|
(codepoint >= 0xac00 and codepoint <= 0xd7a3) or # Hangul Syllables
|
||||||
|
(codepoint >= 0xf900 and codepoint <= 0xfaff) or # CJK Compatibility Ideographs
|
||||||
|
(codepoint >= 0xfe10 and codepoint <= 0xfe19) or # Vertical forms
|
||||||
|
(codepoint >= 0xfe30 and codepoint <= 0xfe6f) or # CJK Compatibility Forms
|
||||||
|
(codepoint >= 0xff00 and codepoint <= 0xff60) or # Fullwidth Forms
|
||||||
|
(codepoint >= 0xffe0 and codepoint <= 0xffe6) or
|
||||||
|
(codepoint >= 0x20000 and codepoint <= 0x2fffd) or
|
||||||
|
(codepoint >= 0x30000 and codepoint <= 0x3fffd))):
|
||||||
|
tools.ok_(display._ucp_width(codepoint) == 2)
|
||||||
|
else:
|
||||||
|
tools.ok_(display._ucp_width(codepoint) == 1)
|
||||||
|
|
||||||
|
def test_textual_width(self):
|
||||||
|
'''Test that we find the proper number of spaces that a utf8 string will consume'''
|
||||||
|
tools.ok_(display.textual_width(self.u_japanese) == 31)
|
||||||
|
tools.ok_(display.textual_width(self.u_spanish) == 50)
|
||||||
|
tools.ok_(display.textual_width(self.u_mixed) == 23)
|
||||||
|
|
||||||
|
def test_textual_width_chop(self):
|
||||||
|
'''utf8_width_chop with byte strings'''
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 1000) == self.u_mixed)
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 23) == self.u_mixed)
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 22) == self.u_mixed[:-1])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 19) == self.u_mixed[:-4])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 1) == u'')
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 2) == self.u_mixed[0])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 3) == self.u_mixed[:2])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 4) == self.u_mixed[:3])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 5) == self.u_mixed[:4])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 6) == self.u_mixed[:5])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 7) == self.u_mixed[:5])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 8) == self.u_mixed[:6])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 9) == self.u_mixed[:7])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 10) == self.u_mixed[:8])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 11) == self.u_mixed[:9])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 12) == self.u_mixed[:10])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 13) == self.u_mixed[:10])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 14) == self.u_mixed[:11])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 15) == self.u_mixed[:12])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 16) == self.u_mixed[:13])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 17) == self.u_mixed[:14])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 18) == self.u_mixed[:15])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 19) == self.u_mixed[:15])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 20) == self.u_mixed[:16])
|
||||||
|
tools.ok_(display.textual_width_chop(self.u_mixed, 21) == self.u_mixed[:17])
|
||||||
|
|
||||||
|
def test_textual_width_fill(self):
|
||||||
|
'''Pad a utf8 string'''
|
||||||
|
tools.ok_(display.textual_width_fill(self.u_mixed, 1) == self.u_mixed)
|
||||||
|
tools.ok_(display.textual_width_fill(self.u_mixed, 25) == self.u_mixed + u' ')
|
||||||
|
tools.ok_(display.textual_width_fill(self.u_mixed, 25, left=False) == u' ' + self.u_mixed)
|
||||||
|
tools.ok_(display.textual_width_fill(self.u_mixed, 25, chop=18) == self.u_mixed[:-4] + u' ')
|
||||||
|
tools.ok_(display.textual_width_fill(self.u_mixed, 25, chop=18, prefix=self.u_spanish, suffix=self.u_spanish) == self.u_spanish + self.u_mixed[:-4] + self.u_spanish + u' ')
|
||||||
|
tools.ok_(display.textual_width_fill(self.u_mixed, 25, chop=18) == self.u_mixed[:-4] + u' ')
|
||||||
|
tools.ok_(display.textual_width_fill(self.u_mixed, 25, chop=18, prefix=self.u_spanish, suffix=self.u_spanish) == self.u_spanish + self.u_mixed[:-4] + self.u_spanish + u' ')
|
||||||
|
|
||||||
|
def test_internal_textual_width_le(self):
|
||||||
|
test_data = ''.join([self.u_mixed, self.u_spanish])
|
||||||
|
tw = display.textual_width(test_data)
|
||||||
|
tools.ok_(display._textual_width_le(68, self.u_mixed, self.u_spanish) == (tw <= 68))
|
||||||
|
tools.ok_(display._textual_width_le(69, self.u_mixed, self.u_spanish) == (tw <= 69))
|
||||||
|
tools.ok_(display._textual_width_le(137, self.u_mixed, self.u_spanish) == (tw <= 137))
|
||||||
|
tools.ok_(display._textual_width_le(138, self.u_mixed, self.u_spanish) == (tw <= 138))
|
||||||
|
tools.ok_(display._textual_width_le(78, self.u_mixed, self.u_spanish) == (tw <= 78))
|
||||||
|
tools.ok_(display._textual_width_le(79, self.u_mixed, self.u_spanish) == (tw <= 79))
|
||||||
|
|
||||||
|
def test_wrap(self):
|
||||||
|
'''Test that text wrapping works'''
|
||||||
|
tools.ok_(display.wrap(self.u_mixed) == [self.u_mixed])
|
||||||
|
tools.ok_(display.wrap(self.u_paragraph) == self.u_paragraph_out)
|
||||||
|
tools.ok_(display.wrap(self.utf8_paragraph) == self.u_paragraph_out)
|
||||||
|
tools.ok_(display.wrap(self.u_mixed_para) == self.u_mixed_para_out)
|
||||||
|
tools.ok_(display.wrap(self.u_mixed_para, width=57,
|
||||||
|
initial_indent=' ', subsequent_indent='----') ==
|
||||||
|
self.u_mixed_para_57_initial_subsequent_out)
|
||||||
|
|
||||||
|
def test_fill(self):
|
||||||
|
tools.ok_(display.fill(self.u_paragraph) == u'\n'.join(self.u_paragraph_out))
|
||||||
|
tools.ok_(display.fill(self.utf8_paragraph) == u'\n'.join(self.u_paragraph_out))
|
||||||
|
tools.ok_(display.fill(self.u_mixed_para) == u'\n'.join(self.u_mixed_para_out))
|
||||||
|
tools.ok_(display.fill(self.u_mixed_para, width=57,
|
||||||
|
initial_indent=' ', subsequent_indent='----') ==
|
||||||
|
u'\n'.join(self.u_mixed_para_57_initial_subsequent_out))
|
||||||
|
|
||||||
|
def test_byte_string_textual_width_fill(self):
|
||||||
|
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 1) == self.utf8_mixed)
|
||||||
|
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25) == self.utf8_mixed + ' ')
|
||||||
|
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25, left=False) == ' ' + self.utf8_mixed)
|
||||||
|
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25, chop=18) == self.u_mixed[:-4].encode('utf8') + ' ')
|
||||||
|
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25, chop=18, prefix=self.utf8_spanish, suffix=self.utf8_spanish) == self.utf8_spanish + self.u_mixed[:-4].encode('utf8') + self.utf8_spanish + ' ')
|
||||||
|
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25, chop=18) == self.u_mixed[:-4].encode('utf8') + ' ')
|
||||||
|
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25, chop=18, prefix=self.utf8_spanish, suffix=self.utf8_spanish) == self.utf8_spanish + self.u_mixed[:-4].encode('utf8') + self.utf8_spanish + ' ')
|
||||||
|
|
137
tests/test_text_misc.py
Normal file
137
tests/test_text_misc.py
Normal file
|
@ -0,0 +1,137 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
from nose.plugins.skip import SkipTest
|
||||||
|
|
||||||
|
try:
|
||||||
|
import chardet
|
||||||
|
except ImportError:
|
||||||
|
chardet = None
|
||||||
|
|
||||||
|
from kitchen.text import misc
|
||||||
|
from kitchen.text.exceptions import ControlCharError
|
||||||
|
from kitchen.text.converters import to_unicode
|
||||||
|
|
||||||
|
import base_classes
|
||||||
|
|
||||||
|
class TestTextMisc(unittest.TestCase, base_classes.UnicodeTestData):
|
||||||
|
def test_guess_encoding_no_chardet(self):
|
||||||
|
# Test that unicode strings are not allowed
|
||||||
|
tools.assert_raises(TypeError, misc.guess_encoding, self.u_spanish)
|
||||||
|
|
||||||
|
tools.ok_(misc.guess_encoding(self.utf8_spanish, disable_chardet=True) == 'utf-8')
|
||||||
|
tools.ok_(misc.guess_encoding(self.latin1_spanish, disable_chardet=True) == 'latin-1')
|
||||||
|
tools.ok_(misc.guess_encoding(self.utf8_japanese, disable_chardet=True) == 'utf-8')
|
||||||
|
tools.ok_(misc.guess_encoding(self.euc_jp_japanese, disable_chardet=True) == 'latin-1')
|
||||||
|
|
||||||
|
def test_guess_encoding_with_chardet(self):
|
||||||
|
# We go this slightly roundabout way because multiple encodings can
|
||||||
|
# output the same byte sequence. What we're really interested in is
|
||||||
|
# if we can get the original unicode string without knowing the
|
||||||
|
# converters beforehand
|
||||||
|
tools.ok_(to_unicode(self.utf8_spanish,
|
||||||
|
misc.guess_encoding(self.utf8_spanish)) == self.u_spanish)
|
||||||
|
tools.ok_(to_unicode(self.latin1_spanish,
|
||||||
|
misc.guess_encoding(self.latin1_spanish)) == self.u_spanish)
|
||||||
|
tools.ok_(to_unicode(self.utf8_japanese,
|
||||||
|
misc.guess_encoding(self.utf8_japanese)) == self.u_japanese)
|
||||||
|
|
||||||
|
def test_guess_encoding_with_chardet_installed(self):
|
||||||
|
if chardet:
|
||||||
|
tools.ok_(to_unicode(self.euc_jp_japanese,
|
||||||
|
misc.guess_encoding(self.euc_jp_japanese)) == self.u_japanese)
|
||||||
|
else:
|
||||||
|
raise SkipTest('chardet not installed, euc_jp will not be guessed correctly')
|
||||||
|
|
||||||
|
def test_guess_encoding_with_chardet_uninstalled(self):
|
||||||
|
if chardet:
|
||||||
|
raise SkipTest('chardet installed, euc_jp will not be mangled')
|
||||||
|
else:
|
||||||
|
tools.ok_(to_unicode(self.euc_jp_japanese,
|
||||||
|
misc.guess_encoding(self.euc_jp_japanese)) ==
|
||||||
|
self.u_mangled_euc_jp_as_latin1)
|
||||||
|
|
||||||
|
def test_str_eq(self):
|
||||||
|
# str vs str:
|
||||||
|
tools.ok_(misc.str_eq(self.euc_jp_japanese, self.euc_jp_japanese) == True)
|
||||||
|
tools.ok_(misc.str_eq(self.utf8_japanese, self.utf8_japanese) == True)
|
||||||
|
tools.ok_(misc.str_eq(self.b_ascii, self.b_ascii) == True)
|
||||||
|
tools.ok_(misc.str_eq(self.euc_jp_japanese, self.latin1_spanish) == False)
|
||||||
|
tools.ok_(misc.str_eq(self.utf8_japanese, self.euc_jp_japanese) == False)
|
||||||
|
tools.ok_(misc.str_eq(self.b_ascii, self.b_ascii[:-2]) == False)
|
||||||
|
|
||||||
|
# unicode vs unicode:
|
||||||
|
tools.ok_(misc.str_eq(self.u_japanese, self.u_japanese) == True)
|
||||||
|
tools.ok_(misc.str_eq(self.u_ascii, self.u_ascii) == True)
|
||||||
|
tools.ok_(misc.str_eq(self.u_japanese, self.u_spanish) == False)
|
||||||
|
tools.ok_(misc.str_eq(self.u_ascii, self.u_ascii[:-2]) == False)
|
||||||
|
|
||||||
|
# unicode vs str with default utf-8 conversion:
|
||||||
|
tools.ok_(misc.str_eq(self.u_japanese, self.utf8_japanese) == True)
|
||||||
|
tools.ok_(misc.str_eq(self.u_ascii, self.b_ascii) == True)
|
||||||
|
tools.ok_(misc.str_eq(self.u_japanese, self.euc_jp_japanese) == False)
|
||||||
|
tools.ok_(misc.str_eq(self.u_ascii, self.b_ascii[:-2]) == False)
|
||||||
|
|
||||||
|
# unicode vs str with explicit encodings:
|
||||||
|
tools.ok_(misc.str_eq(self.u_japanese, self.euc_jp_japanese, encoding='euc_jp') == True)
|
||||||
|
tools.ok_(misc.str_eq(self.u_japanese, self.utf8_japanese, encoding='utf8') == True)
|
||||||
|
tools.ok_(misc.str_eq(self.u_ascii, self.b_ascii, encoding='latin1') == True)
|
||||||
|
tools.ok_(misc.str_eq(self.u_japanese, self.euc_jp_japanese, encoding='latin1') == False)
|
||||||
|
tools.ok_(misc.str_eq(self.u_japanese, self.utf8_japanese, encoding='euc_jp') == False)
|
||||||
|
tools.ok_(misc.str_eq(self.u_japanese, self.utf8_japanese, encoding='euc_jp') == False)
|
||||||
|
tools.ok_(misc.str_eq(self.u_ascii, self.b_ascii[:-2], encoding='latin1') == False)
|
||||||
|
|
||||||
|
# str vs unicode (reverse parameter order of unicode vs str)
|
||||||
|
tools.ok_(misc.str_eq(self.utf8_japanese, self.u_japanese) == True)
|
||||||
|
tools.ok_(misc.str_eq(self.b_ascii, self.u_ascii) == True)
|
||||||
|
tools.ok_(misc.str_eq(self.euc_jp_japanese, self.u_japanese) == False)
|
||||||
|
tools.ok_(misc.str_eq(self.b_ascii, self.u_ascii[:-2]) == False)
|
||||||
|
|
||||||
|
tools.ok_(misc.str_eq(self.euc_jp_japanese, self.u_japanese, encoding='euc_jp') == True)
|
||||||
|
tools.ok_(misc.str_eq(self.utf8_japanese, self.u_japanese, encoding='utf8') == True)
|
||||||
|
tools.ok_(misc.str_eq(self.b_ascii, self.u_ascii, encoding='latin1') == True)
|
||||||
|
tools.ok_(misc.str_eq(self.euc_jp_japanese, self.u_japanese, encoding='latin1') == False)
|
||||||
|
tools.ok_(misc.str_eq(self.utf8_japanese, self.u_japanese, encoding='euc_jp') == False)
|
||||||
|
tools.ok_(misc.str_eq(self.utf8_japanese, self.u_japanese, encoding='euc_jp') == False)
|
||||||
|
tools.ok_(misc.str_eq(self.b_ascii, self.u_ascii[:-2], encoding='latin1') == False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_control_chars(self):
|
||||||
|
tools.assert_raises(TypeError, misc.process_control_chars, 'byte string')
|
||||||
|
tools.assert_raises(ControlCharError, misc.process_control_chars,
|
||||||
|
*[self.u_ascii_chars], **{'strategy':'strict'})
|
||||||
|
tools.ok_(misc.process_control_chars(self.u_ascii_chars,
|
||||||
|
strategy='ignore') == self.u_ascii_no_ctrl)
|
||||||
|
tools.ok_(misc.process_control_chars(self.u_ascii_chars,
|
||||||
|
strategy='replace') == self.u_ascii_ctrl_replace)
|
||||||
|
|
||||||
|
def test_html_entities_unescape(self):
|
||||||
|
tools.assert_raises(TypeError, misc.html_entities_unescape, 'byte string')
|
||||||
|
tools.ok_(misc.html_entities_unescape(self.u_entity_escape) == self.u_entity)
|
||||||
|
tools.ok_(misc.html_entities_unescape(u'<tag>%s</tag>'
|
||||||
|
% self.u_entity_escape) == self.u_entity)
|
||||||
|
tools.ok_(misc.html_entities_unescape(u'a�b') == u'a�b')
|
||||||
|
tools.ok_(misc.html_entities_unescape(u'a�b') == u'a\ufffdb')
|
||||||
|
tools.ok_(misc.html_entities_unescape(u'a�b') == u'a\ufffdb')
|
||||||
|
|
||||||
|
def test_byte_string_valid_xml(self):
|
||||||
|
tools.ok_(misc.byte_string_valid_xml(u'unicode string') == False)
|
||||||
|
|
||||||
|
tools.ok_(misc.byte_string_valid_xml(self.utf8_japanese))
|
||||||
|
tools.ok_(misc.byte_string_valid_xml(self.euc_jp_japanese, 'euc_jp'))
|
||||||
|
|
||||||
|
tools.ok_(misc.byte_string_valid_xml(self.utf8_japanese, 'euc_jp') == False)
|
||||||
|
tools.ok_(misc.byte_string_valid_xml(self.euc_jp_japanese, 'utf8') == False)
|
||||||
|
|
||||||
|
tools.ok_(misc.byte_string_valid_xml(self.utf8_ascii_chars) == False)
|
||||||
|
|
||||||
|
def test_byte_string_valid_encoding(self):
|
||||||
|
'''Test that a byte sequence is validated'''
|
||||||
|
tools.ok_(misc.byte_string_valid_encoding(self.utf8_japanese) == True)
|
||||||
|
tools.ok_(misc.byte_string_valid_encoding(self.euc_jp_japanese, encoding='euc_jp') == True)
|
||||||
|
|
||||||
|
def test_byte_string_invalid_encoding(self):
|
||||||
|
'''Test that we return False with non-encoded chars'''
|
||||||
|
tools.ok_(misc.byte_string_valid_encoding('\xff') == False)
|
||||||
|
tools.ok_(misc.byte_string_valid_encoding(self.euc_jp_japanese) == False)
|
92
tests/test_text_utf8.py
Normal file
92
tests/test_text_utf8.py
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from kitchen.text import utf8
|
||||||
|
|
||||||
|
import base_classes
|
||||||
|
|
||||||
|
class TestUTF8(base_classes.UnicodeTestData, unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
# All of the utf8* functions are deprecated
|
||||||
|
warnings.simplefilter('ignore', DeprecationWarning)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
warnings.simplefilter('default', DeprecationWarning)
|
||||||
|
|
||||||
|
def test_utf8_width(self):
|
||||||
|
'''Test that we find the proper number of spaces that a utf8 string will consume'''
|
||||||
|
tools.ok_(utf8.utf8_width(self.utf8_japanese) == 31)
|
||||||
|
tools.ok_(utf8.utf8_width(self.utf8_spanish) == 50)
|
||||||
|
tools.ok_(utf8.utf8_width(self.utf8_mixed) == 23)
|
||||||
|
|
||||||
|
def test_utf8_width_non_utf8(self):
|
||||||
|
'''Test that we handle non-utf8 bytes in utf8_width without backtracing'''
|
||||||
|
# utf8_width() treats non-utf8 byte sequences as undecodable so you
|
||||||
|
# end up with less characters than normal. In this string:
|
||||||
|
# Python-2.7+ replaces problematic characters in a different manner
|
||||||
|
# than older pythons.
|
||||||
|
# Python >= 2.7:
|
||||||
|
# El veloz murci<63>lago salt<6C> sobre el perro perezoso.
|
||||||
|
# Python < 2.7:
|
||||||
|
# El veloz murci<63>go salt<6C>bre el perro perezoso.
|
||||||
|
if len(unicode(u'\xe9la'.encode('latin1'), 'utf8', 'replace')) == 1:
|
||||||
|
# Python < 2.7
|
||||||
|
tools.ok_(utf8.utf8_width(self.latin1_spanish) == 45)
|
||||||
|
else:
|
||||||
|
# Python >= 2.7
|
||||||
|
tools.ok_(utf8.utf8_width(self.latin1_spanish) == 50)
|
||||||
|
|
||||||
|
def test_utf8_width_chop(self):
|
||||||
|
'''utf8_width_chop with byte strings'''
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed) == (23, self.utf8_mixed))
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 23) == (23, self.utf8_mixed))
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 22) == (22, self.utf8_mixed[:-1]))
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 19) == (18, self.u_mixed[:-4].encode('utf8')))
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 2) == (2, self.u_mixed[0].encode('utf8')))
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 1) == (0, ''))
|
||||||
|
|
||||||
|
def test_utf8_width_chop_unicode(self):
|
||||||
|
'''utf8_width_chop with unicode input'''
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.u_mixed) == (23, self.u_mixed))
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 23) == (23, self.u_mixed))
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 22) == (22, self.u_mixed[:-1]))
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 19) == (18, self.u_mixed[:-4]))
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 2) == (2, self.u_mixed[0]))
|
||||||
|
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 1) == (0, ''))
|
||||||
|
|
||||||
|
def test_utf8_width_fill(self):
|
||||||
|
'''Pad a utf8 string'''
|
||||||
|
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 1) == self.utf8_mixed)
|
||||||
|
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25) == self.utf8_mixed + ' ')
|
||||||
|
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, left=False) == ' ' + self.utf8_mixed)
|
||||||
|
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, chop=18) == self.u_mixed[:-4].encode('utf8') + ' ')
|
||||||
|
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, chop=18, prefix=self.utf8_spanish, suffix=self.utf8_spanish) == self.utf8_spanish + self.u_mixed[:-4].encode('utf8') + self.utf8_spanish + ' ')
|
||||||
|
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, chop=18) == self.u_mixed[:-4].encode('utf8') + ' ')
|
||||||
|
tools.ok_(utf8.utf8_width_fill(self.u_mixed, 25, chop=18, prefix=self.u_spanish, suffix=self.utf8_spanish) == self.u_spanish.encode('utf8') + self.u_mixed[:-4].encode('utf8') + self.u_spanish.encode('utf8') + ' ')
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_utf8_valid(self):
|
||||||
|
'''Test that a utf8 byte sequence is validated'''
|
||||||
|
warnings.simplefilter('ignore', DeprecationWarning)
|
||||||
|
tools.ok_(utf8.utf8_valid(self.utf8_japanese) == True)
|
||||||
|
tools.ok_(utf8.utf8_valid(self.utf8_spanish) == True)
|
||||||
|
warnings.simplefilter('default', DeprecationWarning)
|
||||||
|
|
||||||
|
def test_utf8_invalid(self):
|
||||||
|
'''Test that we return False with non-utf8 chars'''
|
||||||
|
warnings.simplefilter('ignore', DeprecationWarning)
|
||||||
|
tools.ok_(utf8.utf8_valid('\xff') == False)
|
||||||
|
tools.ok_(utf8.utf8_valid(self.latin1_spanish) == False)
|
||||||
|
warnings.simplefilter('default', DeprecationWarning)
|
||||||
|
|
||||||
|
def test_utf8_text_wrap(self):
|
||||||
|
tools.ok_(utf8.utf8_text_wrap(self.utf8_mixed) == [self.utf8_mixed])
|
||||||
|
tools.ok_(utf8.utf8_text_wrap(self.utf8_paragraph) == self.utf8_paragraph_out)
|
||||||
|
tools.ok_(utf8.utf8_text_wrap(self.utf8_mixed_para) == self.utf8_mixed_para_out)
|
||||||
|
tools.ok_(utf8.utf8_text_wrap(self.utf8_mixed_para, width=57,
|
||||||
|
initial_indent=' ', subsequent_indent='----') ==
|
||||||
|
self.utf8_mixed_para_57_initial_subsequent_out)
|
35
tests/test_versioning.py
Normal file
35
tests/test_versioning.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
import unittest
|
||||||
|
from nose import tools
|
||||||
|
|
||||||
|
from kitchen.versioning import version_tuple_to_string
|
||||||
|
|
||||||
|
# Note: Using nose's generator tests for this so we can't subclass
|
||||||
|
# unittest.TestCase
|
||||||
|
class TestVersionTuple(object):
|
||||||
|
ver_to_tuple = {u'1': ((1,),),
|
||||||
|
u'1.0': ((1, 0),),
|
||||||
|
u'1.0.0': ((1, 0, 0),),
|
||||||
|
u'1.0a1': ((1, 0), ('a', 1)),
|
||||||
|
u'1.0a1': ((1, 0), (u'a', 1)),
|
||||||
|
u'1.0rc1': ((1, 0), ('rc', 1)),
|
||||||
|
u'1.0rc1': ((1, 0), (u'rc', 1)),
|
||||||
|
u'1.0rc1.2': ((1, 0), ('rc', 1, 2)),
|
||||||
|
u'1.0rc1.2': ((1, 0), (u'rc', 1, 2)),
|
||||||
|
u'1.0.dev345': ((1, 0), ('dev', 345)),
|
||||||
|
u'1.0.dev345': ((1, 0), (u'dev', 345)),
|
||||||
|
u'1.0a1.dev345': ((1, 0), ('a', 1), ('dev', 345)),
|
||||||
|
u'1.0a1.dev345': ((1, 0), (u'a', 1), (u'dev', 345)),
|
||||||
|
u'1.0a1.2.dev345': ((1, 0), ('a', 1, 2), ('dev', 345)),
|
||||||
|
u'1.0a1.2.dev345': ((1, 0), (u'a', 1, 2), (u'dev', 345)),
|
||||||
|
}
|
||||||
|
|
||||||
|
def check_ver_tuple_to_str(self, v_tuple, v_str):
|
||||||
|
tools.ok_(version_tuple_to_string(v_tuple) == v_str)
|
||||||
|
|
||||||
|
def test_version_tuple_to_string(self):
|
||||||
|
'''Test that version_tuple_to_string outputs PEP-386 compliant strings
|
||||||
|
'''
|
||||||
|
for v_str, v_tuple in self.ver_to_tuple.items():
|
||||||
|
yield self.check_ver_tuple_to_str, v_tuple, v_str
|
Loading…
Reference in a new issue