Imported Upstream version 1.1.1
This commit is contained in:
commit
dfb12f36e6
84 changed files with 15184 additions and 0 deletions
340
COPYING
Normal file
340
COPYING
Normal file
|
@ -0,0 +1,340 @@
|
|||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
|
||||
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Library General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Library General
|
||||
Public License instead of this License.
|
504
COPYING.LESSER
Normal file
504
COPYING.LESSER
Normal file
|
@ -0,0 +1,504 @@
|
|||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
Version 2.1, February 1999
|
||||
|
||||
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
[This is the first released version of the Lesser GPL. It also counts
|
||||
as the successor of the GNU Library Public License, version 2, hence
|
||||
the version number 2.1.]
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
Licenses are intended to guarantee your freedom to share and change
|
||||
free software--to make sure the software is free for all its users.
|
||||
|
||||
This license, the Lesser General Public License, applies to some
|
||||
specially designated software packages--typically libraries--of the
|
||||
Free Software Foundation and other authors who decide to use it. You
|
||||
can use it too, but we suggest you first think carefully about whether
|
||||
this license or the ordinary General Public License is the better
|
||||
strategy to use in any particular case, based on the explanations below.
|
||||
|
||||
When we speak of free software, we are referring to freedom of use,
|
||||
not price. Our General Public Licenses are designed to make sure that
|
||||
you have the freedom to distribute copies of free software (and charge
|
||||
for this service if you wish); that you receive source code or can get
|
||||
it if you want it; that you can change the software and use pieces of
|
||||
it in new free programs; and that you are informed that you can do
|
||||
these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
distributors to deny you these rights or to ask you to surrender these
|
||||
rights. These restrictions translate to certain responsibilities for
|
||||
you if you distribute copies of the library or if you modify it.
|
||||
|
||||
For example, if you distribute copies of the library, whether gratis
|
||||
or for a fee, you must give the recipients all the rights that we gave
|
||||
you. You must make sure that they, too, receive or can get the source
|
||||
code. If you link other code with the library, you must provide
|
||||
complete object files to the recipients, so that they can relink them
|
||||
with the library after making changes to the library and recompiling
|
||||
it. And you must show them these terms so they know their rights.
|
||||
|
||||
We protect your rights with a two-step method: (1) we copyright the
|
||||
library, and (2) we offer you this license, which gives you legal
|
||||
permission to copy, distribute and/or modify the library.
|
||||
|
||||
To protect each distributor, we want to make it very clear that
|
||||
there is no warranty for the free library. Also, if the library is
|
||||
modified by someone else and passed on, the recipients should know
|
||||
that what they have is not the original version, so that the original
|
||||
author's reputation will not be affected by problems that might be
|
||||
introduced by others.
|
||||
|
||||
Finally, software patents pose a constant threat to the existence of
|
||||
any free program. We wish to make sure that a company cannot
|
||||
effectively restrict the users of a free program by obtaining a
|
||||
restrictive license from a patent holder. Therefore, we insist that
|
||||
any patent license obtained for a version of the library must be
|
||||
consistent with the full freedom of use specified in this license.
|
||||
|
||||
Most GNU software, including some libraries, is covered by the
|
||||
ordinary GNU General Public License. This license, the GNU Lesser
|
||||
General Public License, applies to certain designated libraries, and
|
||||
is quite different from the ordinary General Public License. We use
|
||||
this license for certain libraries in order to permit linking those
|
||||
libraries into non-free programs.
|
||||
|
||||
When a program is linked with a library, whether statically or using
|
||||
a shared library, the combination of the two is legally speaking a
|
||||
combined work, a derivative of the original library. The ordinary
|
||||
General Public License therefore permits such linking only if the
|
||||
entire combination fits its criteria of freedom. The Lesser General
|
||||
Public License permits more lax criteria for linking other code with
|
||||
the library.
|
||||
|
||||
We call this license the "Lesser" General Public License because it
|
||||
does Less to protect the user's freedom than the ordinary General
|
||||
Public License. It also provides other free software developers Less
|
||||
of an advantage over competing non-free programs. These disadvantages
|
||||
are the reason we use the ordinary General Public License for many
|
||||
libraries. However, the Lesser license provides advantages in certain
|
||||
special circumstances.
|
||||
|
||||
For example, on rare occasions, there may be a special need to
|
||||
encourage the widest possible use of a certain library, so that it becomes
|
||||
a de-facto standard. To achieve this, non-free programs must be
|
||||
allowed to use the library. A more frequent case is that a free
|
||||
library does the same job as widely used non-free libraries. In this
|
||||
case, there is little to gain by limiting the free library to free
|
||||
software only, so we use the Lesser General Public License.
|
||||
|
||||
In other cases, permission to use a particular library in non-free
|
||||
programs enables a greater number of people to use a large body of
|
||||
free software. For example, permission to use the GNU C Library in
|
||||
non-free programs enables many more people to use the whole GNU
|
||||
operating system, as well as its variant, the GNU/Linux operating
|
||||
system.
|
||||
|
||||
Although the Lesser General Public License is Less protective of the
|
||||
users' freedom, it does ensure that the user of a program that is
|
||||
linked with the Library has the freedom and the wherewithal to run
|
||||
that program using a modified version of the Library.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow. Pay close attention to the difference between a
|
||||
"work based on the library" and a "work that uses the library". The
|
||||
former contains code derived from the library, whereas the latter must
|
||||
be combined with the library in order to run.
|
||||
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License Agreement applies to any software library or other
|
||||
program which contains a notice placed by the copyright holder or
|
||||
other authorized party saying it may be distributed under the terms of
|
||||
this Lesser General Public License (also called "this License").
|
||||
Each licensee is addressed as "you".
|
||||
|
||||
A "library" means a collection of software functions and/or data
|
||||
prepared so as to be conveniently linked with application programs
|
||||
(which use some of those functions and data) to form executables.
|
||||
|
||||
The "Library", below, refers to any such software library or work
|
||||
which has been distributed under these terms. A "work based on the
|
||||
Library" means either the Library or any derivative work under
|
||||
copyright law: that is to say, a work containing the Library or a
|
||||
portion of it, either verbatim or with modifications and/or translated
|
||||
straightforwardly into another language. (Hereinafter, translation is
|
||||
included without limitation in the term "modification".)
|
||||
|
||||
"Source code" for a work means the preferred form of the work for
|
||||
making modifications to it. For a library, complete source code means
|
||||
all the source code for all modules it contains, plus any associated
|
||||
interface definition files, plus the scripts used to control compilation
|
||||
and installation of the library.
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running a program using the Library is not restricted, and output from
|
||||
such a program is covered only if its contents constitute a work based
|
||||
on the Library (independent of the use of the Library in a tool for
|
||||
writing it). Whether that is true depends on what the Library does
|
||||
and what the program that uses the Library does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Library's
|
||||
complete source code as you receive it, in any medium, provided that
|
||||
you conspicuously and appropriately publish on each copy an
|
||||
appropriate copyright notice and disclaimer of warranty; keep intact
|
||||
all the notices that refer to this License and to the absence of any
|
||||
warranty; and distribute a copy of this License along with the
|
||||
Library.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy,
|
||||
and you may at your option offer warranty protection in exchange for a
|
||||
fee.
|
||||
|
||||
2. You may modify your copy or copies of the Library or any portion
|
||||
of it, thus forming a work based on the Library, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) The modified work must itself be a software library.
|
||||
|
||||
b) You must cause the files modified to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
c) You must cause the whole of the work to be licensed at no
|
||||
charge to all third parties under the terms of this License.
|
||||
|
||||
d) If a facility in the modified Library refers to a function or a
|
||||
table of data to be supplied by an application program that uses
|
||||
the facility, other than as an argument passed when the facility
|
||||
is invoked, then you must make a good faith effort to ensure that,
|
||||
in the event an application does not supply such function or
|
||||
table, the facility still operates, and performs whatever part of
|
||||
its purpose remains meaningful.
|
||||
|
||||
(For example, a function in a library to compute square roots has
|
||||
a purpose that is entirely well-defined independent of the
|
||||
application. Therefore, Subsection 2d requires that any
|
||||
application-supplied function or table used by this function must
|
||||
be optional: if the application does not supply it, the square
|
||||
root function must still compute square roots.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Library,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Library, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote
|
||||
it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Library.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Library
|
||||
with the Library (or with a work based on the Library) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may opt to apply the terms of the ordinary GNU General Public
|
||||
License instead of this License to a given copy of the Library. To do
|
||||
this, you must alter all the notices that refer to this License, so
|
||||
that they refer to the ordinary GNU General Public License, version 2,
|
||||
instead of to this License. (If a newer version than version 2 of the
|
||||
ordinary GNU General Public License has appeared, then you can specify
|
||||
that version instead if you wish.) Do not make any other change in
|
||||
these notices.
|
||||
|
||||
Once this change is made in a given copy, it is irreversible for
|
||||
that copy, so the ordinary GNU General Public License applies to all
|
||||
subsequent copies and derivative works made from that copy.
|
||||
|
||||
This option is useful when you wish to copy part of the code of
|
||||
the Library into a program that is not a library.
|
||||
|
||||
4. You may copy and distribute the Library (or a portion or
|
||||
derivative of it, under Section 2) in object code or executable form
|
||||
under the terms of Sections 1 and 2 above provided that you accompany
|
||||
it with the complete corresponding machine-readable source code, which
|
||||
must be distributed under the terms of Sections 1 and 2 above on a
|
||||
medium customarily used for software interchange.
|
||||
|
||||
If distribution of object code is made by offering access to copy
|
||||
from a designated place, then offering equivalent access to copy the
|
||||
source code from the same place satisfies the requirement to
|
||||
distribute the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
5. A program that contains no derivative of any portion of the
|
||||
Library, but is designed to work with the Library by being compiled or
|
||||
linked with it, is called a "work that uses the Library". Such a
|
||||
work, in isolation, is not a derivative work of the Library, and
|
||||
therefore falls outside the scope of this License.
|
||||
|
||||
However, linking a "work that uses the Library" with the Library
|
||||
creates an executable that is a derivative of the Library (because it
|
||||
contains portions of the Library), rather than a "work that uses the
|
||||
library". The executable is therefore covered by this License.
|
||||
Section 6 states terms for distribution of such executables.
|
||||
|
||||
When a "work that uses the Library" uses material from a header file
|
||||
that is part of the Library, the object code for the work may be a
|
||||
derivative work of the Library even though the source code is not.
|
||||
Whether this is true is especially significant if the work can be
|
||||
linked without the Library, or if the work is itself a library. The
|
||||
threshold for this to be true is not precisely defined by law.
|
||||
|
||||
If such an object file uses only numerical parameters, data
|
||||
structure layouts and accessors, and small macros and small inline
|
||||
functions (ten lines or less in length), then the use of the object
|
||||
file is unrestricted, regardless of whether it is legally a derivative
|
||||
work. (Executables containing this object code plus portions of the
|
||||
Library will still fall under Section 6.)
|
||||
|
||||
Otherwise, if the work is a derivative of the Library, you may
|
||||
distribute the object code for the work under the terms of Section 6.
|
||||
Any executables containing that work also fall under Section 6,
|
||||
whether or not they are linked directly with the Library itself.
|
||||
|
||||
6. As an exception to the Sections above, you may also combine or
|
||||
link a "work that uses the Library" with the Library to produce a
|
||||
work containing portions of the Library, and distribute that work
|
||||
under terms of your choice, provided that the terms permit
|
||||
modification of the work for the customer's own use and reverse
|
||||
engineering for debugging such modifications.
|
||||
|
||||
You must give prominent notice with each copy of the work that the
|
||||
Library is used in it and that the Library and its use are covered by
|
||||
this License. You must supply a copy of this License. If the work
|
||||
during execution displays copyright notices, you must include the
|
||||
copyright notice for the Library among them, as well as a reference
|
||||
directing the user to the copy of this License. Also, you must do one
|
||||
of these things:
|
||||
|
||||
a) Accompany the work with the complete corresponding
|
||||
machine-readable source code for the Library including whatever
|
||||
changes were used in the work (which must be distributed under
|
||||
Sections 1 and 2 above); and, if the work is an executable linked
|
||||
with the Library, with the complete machine-readable "work that
|
||||
uses the Library", as object code and/or source code, so that the
|
||||
user can modify the Library and then relink to produce a modified
|
||||
executable containing the modified Library. (It is understood
|
||||
that the user who changes the contents of definitions files in the
|
||||
Library will not necessarily be able to recompile the application
|
||||
to use the modified definitions.)
|
||||
|
||||
b) Use a suitable shared library mechanism for linking with the
|
||||
Library. A suitable mechanism is one that (1) uses at run time a
|
||||
copy of the library already present on the user's computer system,
|
||||
rather than copying library functions into the executable, and (2)
|
||||
will operate properly with a modified version of the library, if
|
||||
the user installs one, as long as the modified version is
|
||||
interface-compatible with the version that the work was made with.
|
||||
|
||||
c) Accompany the work with a written offer, valid for at
|
||||
least three years, to give the same user the materials
|
||||
specified in Subsection 6a, above, for a charge no more
|
||||
than the cost of performing this distribution.
|
||||
|
||||
d) If distribution of the work is made by offering access to copy
|
||||
from a designated place, offer equivalent access to copy the above
|
||||
specified materials from the same place.
|
||||
|
||||
e) Verify that the user has already received a copy of these
|
||||
materials or that you have already sent this user a copy.
|
||||
|
||||
For an executable, the required form of the "work that uses the
|
||||
Library" must include any data and utility programs needed for
|
||||
reproducing the executable from it. However, as a special exception,
|
||||
the materials to be distributed need not include anything that is
|
||||
normally distributed (in either source or binary form) with the major
|
||||
components (compiler, kernel, and so on) of the operating system on
|
||||
which the executable runs, unless that component itself accompanies
|
||||
the executable.
|
||||
|
||||
It may happen that this requirement contradicts the license
|
||||
restrictions of other proprietary libraries that do not normally
|
||||
accompany the operating system. Such a contradiction means you cannot
|
||||
use both them and the Library together in an executable that you
|
||||
distribute.
|
||||
|
||||
7. You may place library facilities that are a work based on the
|
||||
Library side-by-side in a single library together with other library
|
||||
facilities not covered by this License, and distribute such a combined
|
||||
library, provided that the separate distribution of the work based on
|
||||
the Library and of the other library facilities is otherwise
|
||||
permitted, and provided that you do these two things:
|
||||
|
||||
a) Accompany the combined library with a copy of the same work
|
||||
based on the Library, uncombined with any other library
|
||||
facilities. This must be distributed under the terms of the
|
||||
Sections above.
|
||||
|
||||
b) Give prominent notice with the combined library of the fact
|
||||
that part of it is a work based on the Library, and explaining
|
||||
where to find the accompanying uncombined form of the same work.
|
||||
|
||||
8. You may not copy, modify, sublicense, link with, or distribute
|
||||
the Library except as expressly provided under this License. Any
|
||||
attempt otherwise to copy, modify, sublicense, link with, or
|
||||
distribute the Library is void, and will automatically terminate your
|
||||
rights under this License. However, parties who have received copies,
|
||||
or rights, from you under this License will not have their licenses
|
||||
terminated so long as such parties remain in full compliance.
|
||||
|
||||
9. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Library or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Library (or any work based on the
|
||||
Library), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Library or works based on it.
|
||||
|
||||
10. Each time you redistribute the Library (or any work based on the
|
||||
Library), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute, link with or modify the Library
|
||||
subject to these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties with
|
||||
this License.
|
||||
|
||||
11. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Library at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Library by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Library.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under any
|
||||
particular circumstance, the balance of the section is intended to apply,
|
||||
and the section as a whole is intended to apply in other circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
12. If the distribution and/or use of the Library is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Library under this License may add
|
||||
an explicit geographical distribution limitation excluding those countries,
|
||||
so that distribution is permitted only in or among countries not thus
|
||||
excluded. In such case, this License incorporates the limitation as if
|
||||
written in the body of this License.
|
||||
|
||||
13. The Free Software Foundation may publish revised and/or new
|
||||
versions of the Lesser General Public License from time to time.
|
||||
Such new versions will be similar in spirit to the present version,
|
||||
but may differ in detail to address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Library
|
||||
specifies a version number of this License which applies to it and
|
||||
"any later version", you have the option of following the terms and
|
||||
conditions either of that version or of any later version published by
|
||||
the Free Software Foundation. If the Library does not specify a
|
||||
license version number, you may choose any version ever published by
|
||||
the Free Software Foundation.
|
||||
|
||||
14. If you wish to incorporate parts of the Library into other free
|
||||
programs whose distribution conditions are incompatible with these,
|
||||
write to the author to ask for permission. For software which is
|
||||
copyrighted by the Free Software Foundation, write to the Free
|
||||
Software Foundation; we sometimes make exceptions for this. Our
|
||||
decision will be guided by the two goals of preserving the free status
|
||||
of all derivatives of our free software and of promoting the sharing
|
||||
and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
|
||||
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
|
||||
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
|
||||
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
|
||||
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
|
||||
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
|
||||
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
|
||||
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
|
||||
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
|
||||
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
|
||||
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
|
||||
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
|
||||
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
|
||||
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
|
||||
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Libraries
|
||||
|
||||
If you develop a new library, and you want it to be of the greatest
|
||||
possible use to the public, we recommend making it free software that
|
||||
everyone can redistribute and change. You can do so by permitting
|
||||
redistribution under these terms (or, alternatively, under the terms of the
|
||||
ordinary General Public License).
|
||||
|
||||
To apply these terms, attach the following notices to the library. It is
|
||||
safest to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least the
|
||||
"copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the library's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the library, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the
|
||||
library `Frob' (a library for tweaking knobs) written by James Random Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1990
|
||||
Ty Coon, President of Vice
|
||||
|
||||
That's all there is to it!
|
||||
|
||||
|
191
HACKING
Normal file
191
HACKING
Normal file
|
@ -0,0 +1,191 @@
|
|||
================================
|
||||
Some notes on hacking on kitchen
|
||||
================================
|
||||
|
||||
:Author: Toshio Kuratomi
|
||||
:Date: 2 Jan 2012
|
||||
:Version: 1.1.x
|
||||
|
||||
For coding and kitchen, see the style guide in the documentation.
|
||||
|
||||
This file documents meta-information about kitchen such as where to get the
|
||||
code and how to make a release.
|
||||
|
||||
.. contents::
|
||||
|
||||
-----------------------------------------
|
||||
Extra software needed for making releases
|
||||
-----------------------------------------
|
||||
Although kitchen has very few requirements for running, there are a few more
|
||||
that are required for making a release:
|
||||
|
||||
* python-2.4+ (tested on python-2.7)
|
||||
* transifex-client (/usr/bin/tx)
|
||||
* gettext (/usr/bin/msgfmt)
|
||||
* python-babel (/usr/bin/pybabel)
|
||||
* python-sphinx (/usr/bin/sphinx-build)
|
||||
* python-nose (/usr/bin/nosetests)
|
||||
* python-coverage (/usr/bin/coverage)
|
||||
|
||||
--------------
|
||||
Get translated
|
||||
--------------
|
||||
|
||||
We use the translation services at transifex.net to manage po files, coordinate
|
||||
people translating strings, and merge new strings to the files. The following
|
||||
instructions briefly tell how to use transifex to update the source languages'
|
||||
files and pull new translations for release. Actually doing translations can
|
||||
be found in the `transifex user's guide`_.
|
||||
|
||||
.. `transifex user's guide`:: http://help.transifex.net/user-guide/translating.html
|
||||
|
||||
To generate the POT file (located in the po/ subdirectory), use pybabel to
|
||||
extract the messages. Tun the following from the top level directory::
|
||||
|
||||
pybabel extract -o po/kitchen.pot kitchen -kb_ -kbN_
|
||||
|
||||
Then commit this pot file and upload to transifex::
|
||||
|
||||
tx push -s
|
||||
bzr commit -m 'Extract new strings from the source files' po/kitchen.pot
|
||||
bzr push
|
||||
|
||||
To pull messages from transifex prior to making a release, do::
|
||||
|
||||
tx pull -a
|
||||
bzr commit -m 'Merge new translations from transifex' po/*.po
|
||||
|
||||
If you see a status message from transifex like this::
|
||||
Pulling new translations for resource kitchen.kitchenpot (source: po/kitchen.pot)
|
||||
-> fr: po/fr.po
|
||||
|
||||
it means that transifex has created a brand new po file for you. You need to
|
||||
add the new file to source control and commit it like this::
|
||||
|
||||
bzr add po/fr.po
|
||||
bzr commit -m 'New French translation' po/fr.po
|
||||
|
||||
|
||||
TODO: Add information about announcing string freeze. Using transifex's add
|
||||
release function to coordinate with translators. Mailing a translators list,
|
||||
etc.
|
||||
|
||||
--------
|
||||
Releases
|
||||
--------
|
||||
|
||||
.. note:: If a release is not time critical, make an effort to get the
|
||||
software translated first. See :id:`Get translated` for details.
|
||||
|
||||
Testing
|
||||
=======
|
||||
|
||||
Even though python is a compiled language, there's several ways to test that
|
||||
the software is correct.
|
||||
|
||||
Test that docs build
|
||||
--------------------
|
||||
|
||||
Documentation is written in ReStructuredText format and built via the
|
||||
:mod:`sphinx` documentation system for python. There is a variety of
|
||||
hand-written and formatted documentation in the :file:`docs` directory. Those
|
||||
documents also pull some documentation out of the docstrings in the code.
|
||||
|
||||
Any of those places may have formatting that is not valid in the sphinx
|
||||
system. Building the documentation into html will see if there's any spots
|
||||
that need to be fixed::
|
||||
|
||||
python setup.py build_sphinx --fresh-env
|
||||
|
||||
The command will attempt to turn the documentation into html. Any errors or
|
||||
warnings in the output mean that there's some piece of documentation that
|
||||
sphinx doesn't know how to deal with. That should be fixed before publishing
|
||||
the release.
|
||||
|
||||
|
||||
Test that message catalogs compile
|
||||
----------------------------------
|
||||
|
||||
One of the pieces of creating a new release is downloading new message
|
||||
catalogs from transifex. Once in a great while, a translator will upload a
|
||||
translation there that causes problems (for instance, adding or omitting
|
||||
format strings from a translated string.) Luckily the commands to create the
|
||||
message catalogs will detect things like this so just compiling the catalogs
|
||||
will determine if any translations need to be adjusted::
|
||||
|
||||
./releaseutils.py
|
||||
|
||||
This will iterate through all the message catalogs that transifex downloaded
|
||||
to the :file:`po` directory and compile them into the :file:`locale`
|
||||
directory.
|
||||
|
||||
.. warning:: If :file:/usr/bin/msgfmt is not installed, this command will still
|
||||
compile the message catalogs but it will use babel. Babel, unfortunately,
|
||||
doesn't check for all the errors in message catalogs that msgfmt does so
|
||||
it may say that the messages are fine when they really aren't. Make sure
|
||||
you have msgfmt available by installing gettext.
|
||||
|
||||
Unittest
|
||||
--------
|
||||
|
||||
Kitchen has a large set of unittests. All of them should pass before release.
|
||||
You can run the unittests with the following command::
|
||||
nosetests --with-coverage --cover-package kitchen
|
||||
|
||||
This will run all the unittests under the tests directory and also generate
|
||||
some statistics about which lines of code were not accessed when kitchen ran.
|
||||
|
||||
.. warning:: Although 100% test coverage is a worthy goal, it doesn't mean
|
||||
that the code is bug free. This is especially true of code, like
|
||||
kitchen's, that deals with encoding issues. The same piece of code in
|
||||
kitchen will do different things depending on whether unicode or byte str
|
||||
(and the characters that are in the byte str) is passed as a parameter and
|
||||
what encoding is specified in certain environment variables. You can take
|
||||
a look at :file:`test_i18n.py` and :file:`test_converters.py` to see tests
|
||||
that attempt to cover enough input values to detect problems.
|
||||
|
||||
Since kitchen is currently supported on python-2.3.1+, it is desirable to test
|
||||
kitchen on at least one python major version from python-2.3 through
|
||||
python-2.7. We currently have access to a buildbot that has access to
|
||||
python-2.4, python-2.6, and python-2.7. You can view it at
|
||||
http://ci.csh.rit.edu:8080/view/Kitchen/ . The buildbot checks the devel
|
||||
repository hourly and if new checkins have occurred, it attempts to rebuild.
|
||||
If you need access to invoke builds on the buildbot more regularly than that,
|
||||
contact Toshio to get access.
|
||||
|
||||
We were unable to get python-2.3 working in the buildbot so I manually run the
|
||||
unittests on a CentOS-4 virtual machine (with python-2.3). I currently don't
|
||||
test on python-2.5 but I'd be happy to take bug reports or get a new committer
|
||||
that was interested in that platform.
|
||||
|
||||
Creating the release
|
||||
====================
|
||||
|
||||
1. Make sure that any feature branches you want have been merged.
|
||||
2. Pull in new translations and verify they are valid::
|
||||
tx pull -a
|
||||
# If msgfmt is installed, this will check that the catalogs are valid
|
||||
./releaseutils.py
|
||||
bzr commit -m 'Merge new translations from transifex.net'
|
||||
3. Update the version in kitchen/__init__.py and NEWS.
|
||||
4. Make a fresh clone of the repository::
|
||||
cd $PATH_TO_MY_SHARED_REPO
|
||||
bzr branch bzr://bzr.fedorahosted.org/bzr/kitchen/devel release
|
||||
5. Make the source tarball in that directory::
|
||||
cd release
|
||||
python setup.py sdist
|
||||
6. Make sure that the source tarball contains all of the files we want in the release::
|
||||
cd ..
|
||||
tar -xzvf release/dist/kitchen*tar.gz
|
||||
diff -uNr devel kitchen-$RELEASE_VERSION
|
||||
7. Upload the docs to pypi::
|
||||
cd release
|
||||
python setup.py upload_docs
|
||||
8. Upload the tarball to pypi::
|
||||
python setup.py sdist upload --sign
|
||||
9. Upload the tarball to fedorahosted::
|
||||
scp dist/kitchen*tar.gz fedorahosted.org:/srv/web/releases/k/i/kitchen/
|
||||
10. Tag the release::
|
||||
cd ../devel
|
||||
bzr tag $RELEASE_VERSION
|
||||
bzr push
|
170
NEWS
Normal file
170
NEWS
Normal file
|
@ -0,0 +1,170 @@
|
|||
====
|
||||
NEWS
|
||||
====
|
||||
|
||||
:Authors: Toshio Kuratomi
|
||||
:Date: 14 Feb 2012
|
||||
:Version: 1.1.1
|
||||
|
||||
-----
|
||||
1.1.1
|
||||
-----
|
||||
|
||||
* Fix a bug with easy_gettext_setup() and get_translation_object() when using
|
||||
the default value of localedirs.
|
||||
|
||||
-----
|
||||
1.1.0
|
||||
-----
|
||||
|
||||
* Add yum.i18n.exception2msg section to the porting docs
|
||||
* Deprecate BYTE_EXCEPTION_CONVERTERS as simplification of code lets
|
||||
us use EXCEPTION_CONVERTERS for both exception_to_unicode and
|
||||
exception_to_bytes.
|
||||
* kitchen.i18n.get_translation_object
|
||||
- Add more parameters to :func:`~kitchen.i18n.get_translation_object` so it
|
||||
can more easily be used as a replacement for :func:`gettext.translation`.
|
||||
- Change the way we use localedirs. We cycle through them until we find a
|
||||
suitable locale file rather than simply cycling through until we find a
|
||||
directory that exists.
|
||||
- When multiple message catalogs are found in localedirs (and via environment
|
||||
variables), set up the extra ones as fallbacks if the message isn't found
|
||||
in the first catalog.
|
||||
* Change the return values from gettext and lgettext family of functions.
|
||||
Instead of simply guaranteeing a byte str will be returned we now guarantee
|
||||
the byte str will be valid in a certain encoding (the str may still be
|
||||
mangled but it will be valid).
|
||||
* Updated subprocess and base64 modules from latest python-2.7 branch.
|
||||
* Fix i18n Translation objects to set input_charset and output_charset on any
|
||||
fallback objects.
|
||||
* Fix kitchen.i18n Translation objects' output_encoding() method on python-2.3.
|
||||
It was accessing a different self object than we wanted it to. Defining it
|
||||
in a different way makes it work on python-2.3.
|
||||
|
||||
-----
|
||||
1.0.0
|
||||
-----
|
||||
|
||||
* Add a pointer to ordereddict and iterutils in the docs
|
||||
* Change a few pieces of code to not internally mix bytes and unicode
|
||||
|
||||
-----
|
||||
0.2.4
|
||||
-----
|
||||
|
||||
* Have easy_gettext_setup return lgettext functions instead of gettext
|
||||
functions when use_unicode=False
|
||||
* Correct docstring for kitchen.text.converters.exception_to_bytes() -- we're
|
||||
transforming into a byte str, not into unicode.
|
||||
* Correct some examples in the unicode frustrations documentation
|
||||
* Correct some cross-references in the documentation
|
||||
|
||||
-----
|
||||
0.2.3
|
||||
-----
|
||||
|
||||
* Expose MAXFD, list2cmdline(), and mswindows in kitchen.pycompat27.subprocess.
|
||||
These are undocumented, and not in upstream's __all__ but google (and bug
|
||||
reports against kitchen) show that some people are using them. Note that
|
||||
upstream is leaning towards these being private so they may be deprecated in
|
||||
the python3 subprocess.
|
||||
|
||||
-----
|
||||
0.2.2
|
||||
-----
|
||||
|
||||
* Add kitchen.text.converters.exception_to_bytes() and
|
||||
kitchen.text.converters.exception_to_unicode() that take an exception object
|
||||
and convert it into a text representation.
|
||||
* Add a documentation section on how API can be simplified if you can limit your encodings
|
||||
|
||||
If all goes well, we'll be making a 1.0 release shortly which is basically this release.
|
||||
|
||||
-------
|
||||
0.2.2a1
|
||||
-------
|
||||
|
||||
* Fix exception messages that contain unicode characters
|
||||
* Speed up to_unicode for the common cases of utf-8 and latin-1.
|
||||
* kitchen.i18n.NewGNUTranslations object that always returns unicode for
|
||||
ugettext and ungettext, always returns str for the other gettext functions,
|
||||
and doesn't throw UnicodeError.
|
||||
* Change i18n functions to return either DummyTranslations or
|
||||
NewGNUTranslations so all strings returned are known to be unicode or str.
|
||||
* kitchen.pycompat24.base64 now synced from upstream python so it implements
|
||||
all of the python-2.4 API
|
||||
* unittest NewGNUTranslations
|
||||
* unittest that easy_gettext_setup returns the correct objects
|
||||
* Document kitchen.text.display
|
||||
* Proofread all of the documentation. Cross reference to the stdlib.
|
||||
* Write a porting guide for people porting from python-fedora and yum APIs.
|
||||
|
||||
-------
|
||||
0.2.1a1
|
||||
-------
|
||||
|
||||
* Fix failing unittest on python-2.7
|
||||
* Add iterutils module
|
||||
* Update table of combining utf8 characters from python-2.7
|
||||
* Speed up kitchen.text.misc.str_eq().
|
||||
* docs:
|
||||
- api-i18n
|
||||
- api-exceptions
|
||||
- api-collections
|
||||
- api-iterutils
|
||||
- Add two tutorial sections for unicode
|
||||
* unittests
|
||||
- kitchen.text.converters.getwriter()
|
||||
- kitchen.iterutils
|
||||
- tests for more input variations to str_eq
|
||||
|
||||
-----
|
||||
0.2a2
|
||||
-----
|
||||
* Add unittests for kitchen.text.display, update kitchen.text.utf8 and
|
||||
kitchen.text.misc test coverage
|
||||
* Bug fixes for python-2.3
|
||||
* Some doc updates. More to come.
|
||||
* New function kitchen.text.converters.getwriter()
|
||||
|
||||
-----
|
||||
0.2a1
|
||||
-----
|
||||
* Relicense to LGPLv2+
|
||||
* All API versions for subpackages moved to 1.0 to comply with new guidelines
|
||||
on hacking subpackages.
|
||||
* Documentation on hacking kitchen and addons
|
||||
* Kitchen.text API changed (new API version 1.0)
|
||||
* Move utils.* to misc.*
|
||||
* Deprecate kitchen.text.utf8.utf8_valid in favor of
|
||||
kitchen.text.misc.byte_string_valid_encoding
|
||||
- byte_string_valid_encoding is significantly faster and a bit more generic
|
||||
* Port utf8 functions to use unicode
|
||||
* Put the unicode versions of the utf8 functions into kitchen.text.display
|
||||
|
||||
-----
|
||||
0.1a3
|
||||
-----
|
||||
* Add a defaultdict implementation for pycompat25
|
||||
* Add documentation
|
||||
* Add a StrictDict class that never has str and unicode keys collide.
|
||||
|
||||
-----
|
||||
0.1a2
|
||||
-----
|
||||
* Fixes for python-2.3
|
||||
* versioning subpackage with version_tuple_to_string() function that creates
|
||||
PEP-386 compatible version strings.
|
||||
* Changed pycompat24.builtinset -- now you need to call the add_builtin_set()
|
||||
function to add set and frozenset to the __builtin__ namespace.
|
||||
* pycompat24.base64modern module that implements the modern interface to
|
||||
encode and decode base64. Note that it does't implement b32 or b16 at the
|
||||
moment.
|
||||
* pycompat27 with the 2.7 version of subprocess.
|
||||
* The 2.7 version of subprocess is also available at
|
||||
kitchen.pycompat24.subprocess since subprocess first appeared in python2.4
|
||||
|
||||
-----
|
||||
0.1a1
|
||||
-----
|
||||
* Initial releae of kitchen.core
|
39
PKG-INFO
Normal file
39
PKG-INFO
Normal file
|
@ -0,0 +1,39 @@
|
|||
Metadata-Version: 1.0
|
||||
Name: kitchen
|
||||
Version: 1.1.1
|
||||
Summary: Kitchen contains a cornucopia of useful code
|
||||
Home-page: https://fedorahosted.org/kitchen
|
||||
Author: Toshio Kuratomi
|
||||
Author-email: toshio@fedoraproject.org
|
||||
License: LGPLv2+
|
||||
Download-URL: https://fedorahosted.org/releases/k/i/kitchen
|
||||
Description:
|
||||
We've all done it. In the process of writing a brand new application we've
|
||||
discovered that we need a little bit of code that we've invented before.
|
||||
Perhaps it's something to handle unicode text. Perhaps it's something to make
|
||||
a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being
|
||||
a tiny bit of code that seems too small to worry about pushing into its own
|
||||
module so it sits there, a part of your current project, waiting to be cut and
|
||||
pasted into your next project. And the next. And the next. And since that
|
||||
little bittybit of code proved so useful to you, it's highly likely that it
|
||||
proved useful to someone else as well. Useful enough that they've written it
|
||||
and copy and pasted it over and over into each of their new projects.
|
||||
|
||||
Well, no longer! Kitchen aims to pull these small snippets of code into a few
|
||||
python modules which you can import and use within your project. No more copy
|
||||
and paste! Now you can let someone else maintain and release these small
|
||||
snippets so that you can get on with your life.
|
||||
|
||||
Keywords: Useful Small Code Snippets
|
||||
Platform: UNKNOWN
|
||||
Classifier: Development Status :: 4 - Beta
|
||||
Classifier: License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python :: 2.3
|
||||
Classifier: Programming Language :: Python :: 2.4
|
||||
Classifier: Programming Language :: Python :: 2.5
|
||||
Classifier: Programming Language :: Python :: 2.6
|
||||
Classifier: Programming Language :: Python :: 2.7
|
||||
Classifier: Topic :: Software Development :: Internationalization
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: Text Processing :: General
|
81
README
Normal file
81
README
Normal file
|
@ -0,0 +1,81 @@
|
|||
===================
|
||||
Kitchen.core Module
|
||||
===================
|
||||
|
||||
:Author: Toshio Kuratomi
|
||||
:Date: 2 Jan 2012
|
||||
:Version: 1.1.x
|
||||
|
||||
The Kitchen module provides a python API for all sorts of little useful
|
||||
snippets of code that everybody ends up writing for their projects but never
|
||||
seem big enough to build an independent release. Use kitchen and stop cutting
|
||||
and pasting that code over and over.
|
||||
|
||||
.. contents::
|
||||
|
||||
-------
|
||||
License
|
||||
-------
|
||||
|
||||
Since version 0.2a1, this python module has been distributed under the terms of
|
||||
the GNU Lesser General Public License Version 2 or later.
|
||||
|
||||
.. note:: Some parts of this module are licensed under terms less restrictive
|
||||
than the LGPL. If you separate these files from the work as a whole you
|
||||
are allowed to use them under the less restrictive licenses. The following
|
||||
is a list of the files that are known:
|
||||
|
||||
:subprocess.py: licensed under the Python 2 license by the PSF
|
||||
http://www.python.org/download/releases/2.4/license/
|
||||
:test_subprocess.py: Python Software Foundation License Version 2
|
||||
http://www.python.org/download/releases/2.7/license/
|
||||
:kitchen/pycompat25/defaultdict.py: Python Software Foundation License Version 2
|
||||
http://www.python.org/download/releases/2.6.2/license
|
||||
|
||||
------------
|
||||
Requirements
|
||||
------------
|
||||
|
||||
kitchen.core requires
|
||||
|
||||
:python: 2.3.1 or later
|
||||
|
||||
Soft Requirements
|
||||
=================
|
||||
|
||||
If found, these libraries will be used to make the implementation of soemthing
|
||||
better in some way. If they are not present, the API that they enable will
|
||||
still exist but may function in a different manner.
|
||||
|
||||
:chardet_: Used in kitchen.text.xml.guess_encoding__to_xml() to help guess encoding of
|
||||
byte strings being converted. If not present, unknown encodings will be
|
||||
converted as if they were latin1.
|
||||
|
||||
.. _chardet:: http://chardet.feedparser.org/
|
||||
|
||||
---------------------------
|
||||
Other Recommended Libraries
|
||||
---------------------------
|
||||
|
||||
These libraries implement commonly used functionality that everyone seems to
|
||||
invent. Rather than reinvent their wheel, I simply list the things that they
|
||||
do well for now. Perhaps if people can't find them normally, I'll add them as
|
||||
requirements in setup.py or link them into kitchen's namespace. For now, I
|
||||
just mention them here:
|
||||
|
||||
:bunch_: Bunch is a dictionary that you can use attribute lookup as well as
|
||||
bracket notation to access. Setting it apart from most homebrewed
|
||||
implementations is the bunchify() function which will descend nested
|
||||
structures of lists nad dicts, transforming the dicts to Bunch's.
|
||||
|
||||
.. _bunch:: http://pypi.python.org/pypi/bunch/
|
||||
|
||||
---------------------
|
||||
Building, and testing
|
||||
---------------------
|
||||
|
||||
Testing
|
||||
=======
|
||||
|
||||
You can run the unittests with this command::
|
||||
nosetests --with-coverage --cover-package kitchen
|
6
docs/api-collections.rst
Normal file
6
docs/api-collections.rst
Normal file
|
@ -0,0 +1,6 @@
|
|||
===================
|
||||
Kitchen.collections
|
||||
===================
|
||||
|
||||
.. automodule:: kitchen.collections.strictdict
|
||||
:members:
|
12
docs/api-exceptions.rst
Normal file
12
docs/api-exceptions.rst
Normal file
|
@ -0,0 +1,12 @@
|
|||
==========
|
||||
Exceptions
|
||||
==========
|
||||
|
||||
Kitchen has a hierarchy of exceptions that should make it easy to catch many
|
||||
errors emitted by kitchen itself.
|
||||
|
||||
.. automodule:: kitchen.exceptions
|
||||
:members:
|
||||
|
||||
.. automodule:: kitchen.text.exceptions
|
||||
:members:
|
38
docs/api-i18n.rst
Normal file
38
docs/api-i18n.rst
Normal file
|
@ -0,0 +1,38 @@
|
|||
===================
|
||||
Kitchen.i18n Module
|
||||
===================
|
||||
|
||||
.. automodule:: kitchen.i18n
|
||||
|
||||
Functions
|
||||
=========
|
||||
|
||||
:func:`easy_gettext_setup` should satisfy the needs of most users.
|
||||
:func:`get_translation_object` is designed to ease the way for anyone that
|
||||
needs more control.
|
||||
|
||||
.. autofunction:: easy_gettext_setup
|
||||
|
||||
.. autofunction:: get_translation_object
|
||||
|
||||
Translation Objects
|
||||
===================
|
||||
|
||||
The standard translation objects from the :mod:`gettext` module suffer from
|
||||
several problems:
|
||||
|
||||
* They can throw :exc:`UnicodeError`
|
||||
* They can't find translations for non-:term:`ASCII` byte :class:`str`
|
||||
messages
|
||||
* They may return either :class:`unicode` string or byte :class:`str` from the
|
||||
same function even though the functions say they will only return
|
||||
:class:`unicode` or only return byte :class:`str`.
|
||||
|
||||
:class:`DummyTranslations` and :class:`NewGNUTranslations` were written to fix
|
||||
these issues.
|
||||
|
||||
.. autoclass:: kitchen.i18n.DummyTranslations
|
||||
:members:
|
||||
|
||||
.. autoclass:: kitchen.i18n.NewGNUTranslations
|
||||
:members:
|
9
docs/api-iterutils.rst
Normal file
9
docs/api-iterutils.rst
Normal file
|
@ -0,0 +1,9 @@
|
|||
|
||||
========================
|
||||
Kitchen.iterutils Module
|
||||
========================
|
||||
|
||||
.. automodule:: kitchen.iterutils
|
||||
|
||||
.. autofunction:: kitchen.iterutils.isiterable
|
||||
.. autofunction:: kitchen.iterutils.iterate
|
24
docs/api-overview.rst
Normal file
24
docs/api-overview.rst
Normal file
|
@ -0,0 +1,24 @@
|
|||
.. _KitchenAPI:
|
||||
|
||||
===========
|
||||
Kitchen API
|
||||
===========
|
||||
|
||||
Kitchen is structured as a collection of modules. In its current
|
||||
configuration, Kitchen ships with the following modules. Other addon modules
|
||||
that may drag in more dependencies can be found on the `project webpage`_
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
api-i18n
|
||||
api-text
|
||||
api-collections
|
||||
api-iterutils
|
||||
api-versioning
|
||||
api-pycompat24
|
||||
api-pycompat25
|
||||
api-pycompat27
|
||||
api-exceptions
|
||||
|
||||
.. _`project webpage`: https://fedorahosted.org/kitchen
|
34
docs/api-pycompat24.rst
Normal file
34
docs/api-pycompat24.rst
Normal file
|
@ -0,0 +1,34 @@
|
|||
=======================
|
||||
Python 2.4 Compatibiity
|
||||
=======================
|
||||
|
||||
|
||||
-------------------
|
||||
Sets for python-2.3
|
||||
-------------------
|
||||
|
||||
.. automodule:: kitchen.pycompat24.sets
|
||||
.. autofunction:: kitchen.pycompat24.sets.add_builtin_set
|
||||
|
||||
----------------------------------
|
||||
Partial new style base64 interface
|
||||
----------------------------------
|
||||
|
||||
.. automodule:: kitchen.pycompat24.base64
|
||||
:members:
|
||||
|
||||
----------
|
||||
Subprocess
|
||||
----------
|
||||
|
||||
.. seealso::
|
||||
|
||||
:mod:`kitchen.pycompat27.subprocess`
|
||||
Kitchen includes the python-2.7 version of subprocess which has a new
|
||||
function, :func:`~kitchen.pycompat27.subprocess.check_output`. When
|
||||
you import :mod:`pycompat24.subprocess` you will be getting the
|
||||
python-2.7 version of subprocess rather than the 2.4 version (where
|
||||
subprocess first appeared). This choice was made so that we can
|
||||
concentrate our efforts on keeping the single version of subprocess up
|
||||
to date rather than working on a 2.4 version that very few people
|
||||
would need specifically.
|
8
docs/api-pycompat25.rst
Normal file
8
docs/api-pycompat25.rst
Normal file
|
@ -0,0 +1,8 @@
|
|||
========================
|
||||
Python 2.5 Compatibility
|
||||
========================
|
||||
|
||||
.. automodule:: kitchen.pycompat25
|
||||
|
||||
.. automodule:: kitchen.pycompat25.collections._defaultdict
|
||||
|
35
docs/api-pycompat27.rst
Normal file
35
docs/api-pycompat27.rst
Normal file
|
@ -0,0 +1,35 @@
|
|||
========================
|
||||
Python 2.7 Compatibility
|
||||
========================
|
||||
|
||||
.. module:: kitchen.pycompat27.subprocess
|
||||
|
||||
--------------------------
|
||||
Subprocess from Python 2.7
|
||||
--------------------------
|
||||
|
||||
The :mod:`subprocess` module included here is a direct import from
|
||||
python-2.7's |stdlib|_. You can access it via::
|
||||
|
||||
>>> from kitchen.pycompat27 import subprocess
|
||||
|
||||
The motivation for including this module is that various API changing
|
||||
improvements have been made to subprocess over time. The following is a list
|
||||
of the known changes to :mod:`subprocess` with the python version they were
|
||||
introduced in:
|
||||
|
||||
==================================== ===
|
||||
New API Feature Ver
|
||||
==================================== ===
|
||||
:exc:`subprocess.CalledProcessError` 2.5
|
||||
:func:`subprocess.check_call` 2.5
|
||||
:func:`subprocess.check_output` 2.7
|
||||
:meth:`subprocess.Popen.send_signal` 2.6
|
||||
:meth:`subprocess.Popen.terminate` 2.6
|
||||
:meth:`subprocess.Popen.kill` 2.6
|
||||
==================================== ===
|
||||
|
||||
.. seealso::
|
||||
|
||||
The stdlib :mod:`subprocess` documenation
|
||||
For complete documentation on how to use subprocess
|
405
docs/api-text-converters.rst
Normal file
405
docs/api-text-converters.rst
Normal file
|
@ -0,0 +1,405 @@
|
|||
-----------------------
|
||||
Kitchen.text.converters
|
||||
-----------------------
|
||||
|
||||
.. automodule:: kitchen.text.converters
|
||||
|
||||
Byte Strings and Unicode in Python2
|
||||
===================================
|
||||
|
||||
Python2 has two string types, :class:`str` and :class:`unicode`.
|
||||
:class:`unicode` represents an abstract sequence of text characters. It can
|
||||
hold any character that is present in the unicode standard. :class:`str` can
|
||||
hold any byte of data. The operating system and python work together to
|
||||
display these bytes as characters in many cases but you should always keep in
|
||||
mind that the information is really a sequence of bytes, not a sequence of
|
||||
characters. In python2 these types are interchangeable a large amount of the
|
||||
time. They are one of the few pairs of types that automatically convert when
|
||||
used in equality::
|
||||
|
||||
>>> # string is converted to unicode and then compared
|
||||
>>> "I am a string" == u"I am a string"
|
||||
True
|
||||
>>> # Other types, like int, don't have this special treatment
|
||||
>>> 5 == "5"
|
||||
False
|
||||
|
||||
However, this automatic conversion tends to lull people into a false sense of
|
||||
security. As long as you're dealing with :term:`ASCII` characters the
|
||||
automatic conversion will save you from seeing any differences. Once you
|
||||
start using characters that are not in :term:`ASCII`, you will start getting
|
||||
:exc:`UnicodeError` and :exc:`UnicodeWarning` as the automatic conversions
|
||||
between the types fail::
|
||||
|
||||
>>> "I am an ñ" == u"I am an ñ"
|
||||
__main__:1: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
|
||||
False
|
||||
|
||||
Why do these conversions fail? The reason is that the python2
|
||||
:class:`unicode` type represents an abstract sequence of unicode text known as
|
||||
:term:`code points`. :class:`str`, on the other hand, really represents
|
||||
a sequence of bytes. Those bytes are converted by your operating system to
|
||||
appear as characters on your screen using a particular encoding (usually
|
||||
with a default defined by the operating system and customizable by the
|
||||
individual user.) Although :term:`ASCII` characters are fairly standard in
|
||||
what bytes represent each character, the bytes outside of the :term:`ASCII`
|
||||
range are not. In general, each encoding will map a different character to
|
||||
a particular byte. Newer encodings map individual characters to multiple
|
||||
bytes (which the older encodings will instead treat as multiple characters).
|
||||
In the face of these differences, python refuses to guess at an encoding and
|
||||
instead issues a warning or exception and refuses to convert.
|
||||
|
||||
.. seealso::
|
||||
:ref:`overcoming-frustration`
|
||||
For a longer introduction on this subject.
|
||||
|
||||
Strategy for Explicit Conversion
|
||||
================================
|
||||
|
||||
So what is the best method of dealing with this weltering babble of incoherent
|
||||
encodings? The basic strategy is to explicitly turn everything into
|
||||
:class:`unicode` when it first enters your program. Then, when you send it to
|
||||
output, you can transform the unicode back into bytes. Doing this allows you
|
||||
to control the encodings that are used and avoid getting tracebacks due to
|
||||
:exc:`UnicodeError`. Using the functions defined in this module, that looks
|
||||
something like this:
|
||||
|
||||
.. code-block:: pycon
|
||||
:linenos:
|
||||
|
||||
>>> from kitchen.text.converters import to_unicode, to_bytes
|
||||
>>> name = raw_input('Enter your name: ')
|
||||
Enter your name: Toshio くらとみ
|
||||
>>> name
|
||||
'Toshio \xe3\x81\x8f\xe3\x82\x89\xe3\x81\xa8\xe3\x81\xbf'
|
||||
>>> type(name)
|
||||
<type 'str'>
|
||||
>>> unicode_name = to_unicode(name)
|
||||
>>> type(unicode_name)
|
||||
<type 'unicode'>
|
||||
>>> unicode_name
|
||||
u'Toshio \u304f\u3089\u3068\u307f'
|
||||
>>> # Do a lot of other things before needing to save/output again:
|
||||
>>> output = open('datafile', 'w')
|
||||
>>> output.write(to_bytes(u'Name: %s\\n' % unicode_name))
|
||||
|
||||
A few notes:
|
||||
|
||||
Looking at line 6, you'll notice that the input we took from the user was
|
||||
a byte :class:`str`. In general, anytime we're getting a value from outside
|
||||
of python (The filesystem, reading data from the network, interacting with an
|
||||
external command, reading values from the environment) we are interacting with
|
||||
something that will want to give us a byte :class:`str`. Some |stdlib|_
|
||||
modules and third party libraries will automatically attempt to convert a byte
|
||||
:class:`str` to :class:`unicode` strings for you. This is both a boon and
|
||||
a curse. If the library can guess correctly about the encoding that the data
|
||||
is in, it will return :class:`unicode` objects to you without you having to
|
||||
convert. However, if it can't guess correctly, you may end up with one of
|
||||
several problems:
|
||||
|
||||
:exc:`UnicodeError`
|
||||
The library attempted to decode a byte :class:`str` into
|
||||
a :class:`unicode`, string failed, and raises an exception.
|
||||
Garbled data
|
||||
If the library returns the data after decoding it with the wrong encoding,
|
||||
the characters you see in the :exc:`unicode` string won't be the ones that
|
||||
you expect.
|
||||
A byte :class:`str` instead of :class:`unicode` string
|
||||
Some libraries will return a :class:`unicode` string when they're able to
|
||||
decode the data and a byte :class:`str` when they can't. This is
|
||||
generally the hardest problem to debug when it occurs. Avoid it in your
|
||||
own code and try to avoid or open bugs against upstreams that do this. See
|
||||
:ref:`DesigningUnicodeAwareAPIs` for strategies to do this properly.
|
||||
|
||||
On line 8, we convert from a byte :class:`str` to a :class:`unicode` string.
|
||||
:func:`~kitchen.text.converters.to_unicode` does this for us. It has some
|
||||
error handling and sane defaults that make this a nicer function to use than
|
||||
calling :meth:`str.decode` directly:
|
||||
|
||||
* Instead of defaulting to the :term:`ASCII` encoding which fails with all
|
||||
but the simple American English characters, it defaults to :term:`UTF-8`.
|
||||
* Instead of raising an error if it cannot decode a value, it will replace
|
||||
the value with the unicode "Replacement character" symbol (``<EFBFBD>``).
|
||||
* If you happen to call this method with something that is not a :class:`str`
|
||||
or :class:`unicode`, it will return an empty :class:`unicode` string.
|
||||
|
||||
All three of these can be overridden using different keyword arguments to the
|
||||
function. See the :func:`to_unicode` documentation for more information.
|
||||
|
||||
On line 15 we push the data back out to a file. Two things you should note here:
|
||||
|
||||
1. We deal with the strings as :class:`unicode` until the last instant. The
|
||||
string format that we're using is :class:`unicode` and the variable also
|
||||
holds :class:`unicode`. People sometimes get into trouble when they mix
|
||||
a byte :class:`str` format with a variable that holds a :class:`unicode`
|
||||
string (or vice versa) at this stage.
|
||||
2. :func:`~kitchen.text.converters.to_bytes`, does the reverse of
|
||||
:func:`to_unicode`. In this case, we're using the default values which
|
||||
turn :class:`unicode` into a byte :class:`str` using :term:`UTF-8`. Any
|
||||
errors are replaced with a ``<EFBFBD>`` and sending nonstring objects yield empty
|
||||
:class:`unicode` strings. Just like :func:`to_unicode`, you can look at
|
||||
the documentation for :func:`to_bytes` to find out how to override any of
|
||||
these defaults.
|
||||
|
||||
When to use an alternate strategy
|
||||
---------------------------------
|
||||
|
||||
The default strategy of decoding to :class:`unicode` strings when you take
|
||||
data in and encoding to a byte :class:`str` when you send the data back out
|
||||
works great for most problems but there are a few times when you shouldn't:
|
||||
|
||||
* The values aren't meant to be read as text
|
||||
* The values need to be byte-for-byte when you send them back out -- for
|
||||
instance if they are database keys or filenames.
|
||||
* You are transferring the data between several libraries that all expect
|
||||
byte :class:`str`.
|
||||
|
||||
In each of these instances, there is a reason to keep around the byte
|
||||
:class:`str` version of a value. Here's a few hints to keep your sanity in
|
||||
these situations:
|
||||
|
||||
1. Keep your :class:`unicode` and :class:`str` values separate. Just like the
|
||||
pain caused when you have to use someone else's library that returns both
|
||||
:class:`unicode` and :class:`str` you can cause yourself pain if you have
|
||||
functions that can return both types or variables that could hold either
|
||||
type of value.
|
||||
2. Name your variables so that you can tell whether you're storing byte
|
||||
:class:`str` or :class:`unicode` string. One of the first things you end
|
||||
up having to do when debugging is determine what type of string you have in
|
||||
a variable and what type of string you are expecting. Naming your
|
||||
variables consistently so that you can tell which type they are supposed to
|
||||
hold will save you from at least one of those steps.
|
||||
3. When you get values initially, make sure that you're dealing with the type
|
||||
of value that you expect as you save it. You can use :func:`isinstance`
|
||||
or :func:`to_bytes` since :func:`to_bytes` doesn't do any modifications of
|
||||
the string if it's already a :class:`str`. When using :func:`to_bytes`
|
||||
for this purpose you might want to use::
|
||||
|
||||
try:
|
||||
b_input = to_bytes(input_should_be_bytes_already, errors='strict', nonstring='strict')
|
||||
except:
|
||||
handle_errors_somehow()
|
||||
|
||||
The reason is that the default of :func:`to_bytes` will take characters
|
||||
that are illegal in the chosen encoding and transform them to replacement
|
||||
characters. Since the point of keeping this data as a byte :class:`str` is
|
||||
to keep the exact same bytes when you send it outside of your code,
|
||||
changing things to replacement characters should be rasing red flags that
|
||||
something is wrong. Setting :attr:`errors` to ``strict`` will raise an
|
||||
exception which gives you an opportunity to fail gracefully.
|
||||
4. Sometimes you will want to print out the values that you have in your byte
|
||||
:class:`str`. When you do this you will need to make sure that you
|
||||
transform :class:`unicode` to :class:`str` before combining them. Also be
|
||||
sure that any other function calls (including :mod:`gettext`) are going to
|
||||
give you strings that are the same type. For instance::
|
||||
|
||||
print to_bytes(_('Username: %(user)s'), 'utf-8') % {'user': b_username}
|
||||
|
||||
Gotchas and how to avoid them
|
||||
=============================
|
||||
|
||||
Even when you have a good conceptual understanding of how python2 treats
|
||||
:class:`unicode` and :class:`str` there are still some things that can
|
||||
surprise you. In most cases this is because, as noted earlier, python or one
|
||||
of the python libraries you depend on is trying to convert a value
|
||||
automatically and failing. Explicit conversion at the appropriate place
|
||||
usually solves that.
|
||||
|
||||
str(obj)
|
||||
--------
|
||||
|
||||
One common idiom for getting a simple, string representation of an object is to use::
|
||||
|
||||
str(obj)
|
||||
|
||||
Unfortunately, this is not safe. Sometimes str(obj) will return
|
||||
:class:`unicode`. Sometimes it will return a byte :class:`str`. Sometimes,
|
||||
it will attempt to convert from a :class:`unicode` string to a byte
|
||||
:class:`str`, fail, and throw a :exc:`UnicodeError`. To be safe from all of
|
||||
these, first decide whether you need :class:`unicode` or :class:`str` to be
|
||||
returned. Then use :func:`to_unicode` or :func:`to_bytes` to get the simple
|
||||
representation like this::
|
||||
|
||||
u_representation = to_unicode(obj, nonstring='simplerepr')
|
||||
b_representation = to_bytes(obj, nonstring='simplerepr')
|
||||
|
||||
print
|
||||
-----
|
||||
|
||||
python has a builtin :func:`print` statement that outputs strings to the
|
||||
terminal. This originated in a time when python only dealt with byte
|
||||
:class:`str`. When :class:`unicode` strings came about, some enhancements
|
||||
were made to the :func:`print` statement so that it could print those as well.
|
||||
The enhancements make :func:`print` work most of the time. However, the times
|
||||
when it doesn't work tend to make for cryptic debugging.
|
||||
|
||||
The basic issue is that :func:`print` has to figure out what encoding to use
|
||||
when it prints a :class:`unicode` string to the terminal. When python is
|
||||
attached to your terminal (ie, you're running the interpreter or running
|
||||
a script that prints to the screen) python is able to take the encoding value
|
||||
from your locale settings :envvar:`LC_ALL` or :envvar:`LC_CTYPE` and print the
|
||||
characters allowed by that encoding. On most modern Unix systems, the
|
||||
encoding is :term:`utf-8` which means that you can print any :class:`unicode`
|
||||
character without problem.
|
||||
|
||||
There are two common cases of things going wrong:
|
||||
|
||||
1. Someone has a locale set that does not accept all valid unicode characters.
|
||||
For instance::
|
||||
|
||||
$ LC_ALL=C python
|
||||
>>> print u'\ufffd'
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
UnicodeEncodeError: 'ascii' codec can't encode character u'\ufffd' in position 0: ordinal not in range(128)
|
||||
|
||||
This often happens when a script that you've written and debugged from the
|
||||
terminal is run from an automated environment like :program:`cron`. It
|
||||
also occurs when you have written a script using a :term:`utf-8` aware
|
||||
locale and released it for consumption by people all over the internet.
|
||||
Inevitably, someone is running with a locale that can't handle all unicode
|
||||
characters and you get a traceback reported.
|
||||
2. You redirect output to a file. Python isn't using the values in
|
||||
:envvar:`LC_ALL` unconditionally to decide what encoding to use. Instead
|
||||
it is using the encoding set for the terminal you are printing to which is
|
||||
set to accept different encodings by :envvar:`LC_ALL`. If you redirect
|
||||
to a file, you are no longer printing to the terminal so :envvar:`LC_ALL`
|
||||
won't have any effect. At this point, python will decide it can't find an
|
||||
encoding and fallback to :term:`ASCII` which will likely lead to
|
||||
:exc:`UnicodeError` being raised. You can see this in a short script::
|
||||
|
||||
#! /usr/bin/python -tt
|
||||
print u'\ufffd'
|
||||
|
||||
And then look at the difference between running it normally and redirecting to a file:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ ./test.py
|
||||
<20>
|
||||
$ ./test.py > t
|
||||
Traceback (most recent call last):
|
||||
File "test.py", line 3, in <module>
|
||||
print u'\ufffd'
|
||||
UnicodeEncodeError: 'ascii' codec can't encode character u'\ufffd' in position 0: ordinal not in range(128)
|
||||
|
||||
The short answer to dealing with this is to always use bytes when writing
|
||||
output. You can do this by explicitly converting to bytes like this::
|
||||
|
||||
from kitchen.text.converters import to_bytes
|
||||
u_string = u'\ufffd'
|
||||
print to_bytes(u_string)
|
||||
|
||||
or you can wrap stdout and stderr with a :class:`~codecs.StreamWriter`.
|
||||
A :class:`~codecs.StreamWriter` is convenient in that you can assign it to
|
||||
encode for :data:`sys.stdout` or :data:`sys.stderr` and then have output
|
||||
automatically converted but it has the drawback of still being able to throw
|
||||
:exc:`UnicodeError` if the writer can't encode all possible unicode
|
||||
codepoints. Kitchen provides an alternate version which can be retrieved with
|
||||
:func:`kitchen.text.converters.getwriter` which will not traceback in its
|
||||
standard configuration.
|
||||
|
||||
.. _unicode-and-dict-keys:
|
||||
|
||||
Unicode, str, and dict keys
|
||||
---------------------------
|
||||
|
||||
The :func:`hash` of the :term:`ASCII` characters is the same for
|
||||
:class:`unicode` and byte :class:`str`. When you use them in :class:`dict`
|
||||
keys, they evaluate to the same dictionary slot::
|
||||
|
||||
>>> u_string = u'a'
|
||||
>>> b_string = 'a'
|
||||
>>> hash(u_string), hash(b_string)
|
||||
(12416037344, 12416037344)
|
||||
>>> d = {}
|
||||
>>> d[u_string] = 'unicode'
|
||||
>>> d[b_string] = 'bytes'
|
||||
>>> d
|
||||
{u'a': 'bytes'}
|
||||
|
||||
When you deal with key values outside of :term:`ASCII`, :class:`unicode` and
|
||||
byte :class:`str` evaluate unequally no matter what their character content or
|
||||
hash value::
|
||||
|
||||
>>> u_string = u'ñ'
|
||||
>>> b_string = u_string.encode('utf-8')
|
||||
>>> print u_string
|
||||
ñ
|
||||
>>> print b_string
|
||||
ñ
|
||||
>>> d = {}
|
||||
>>> d[u_string] = 'unicode'
|
||||
>>> d[b_string] = 'bytes'
|
||||
>>> d
|
||||
{u'\\xf1': 'unicode', '\\xc3\\xb1': 'bytes'}
|
||||
>>> b_string2 = '\\xf1'
|
||||
>>> hash(u_string), hash(b_string2)
|
||||
(30848092528, 30848092528)
|
||||
>>> d = {}
|
||||
>>> d[u_string] = 'unicode'
|
||||
>>> d[b_string2] = 'bytes'
|
||||
{u'\\xf1': 'unicode', '\\xf1': 'bytes'}
|
||||
|
||||
How do you work with this one? Remember rule #1: Keep your :class:`unicode`
|
||||
and byte :class:`str` values separate. That goes for keys in a dictionary
|
||||
just like anything else.
|
||||
|
||||
* For any given dictionary, make sure that all your keys are either
|
||||
:class:`unicode` or :class:`str`. **Do not mix the two.** If you're being
|
||||
given both :class:`unicode` and :class:`str` but you don't need to preserve
|
||||
separate keys for each, I recommend using :func:`to_unicode` or
|
||||
:func:`to_bytes` to convert all keys to one type or the other like this::
|
||||
|
||||
>>> from kitchen.text.converters import to_unicode
|
||||
>>> u_string = u'one'
|
||||
>>> b_string = 'two'
|
||||
>>> d = {}
|
||||
>>> d[to_unicode(u_string)] = 1
|
||||
>>> d[to_unicode(b_string)] = 2
|
||||
>>> d
|
||||
{u'two': 2, u'one': 1}
|
||||
|
||||
* These issues also apply to using dicts with tuple keys that contain
|
||||
a mixture of :class:`unicode` and :class:`str`. Once again the best fix
|
||||
is to standardise on either :class:`str` or :class:`unicode`.
|
||||
|
||||
* If you absolutely need to store values in a dictionary where the keys could
|
||||
be either :class:`unicode` or :class:`str` you can use
|
||||
:class:`~kitchen.collections.strictdict.StrictDict` which has separate
|
||||
entries for all :class:`unicode` and byte :class:`str` and deals correctly
|
||||
with any :class:`tuple` containing mixed :class:`unicode` and byte
|
||||
:class:`str`.
|
||||
|
||||
---------
|
||||
Functions
|
||||
---------
|
||||
|
||||
Unicode and byte str conversion
|
||||
===============================
|
||||
|
||||
.. autofunction:: kitchen.text.converters.to_unicode
|
||||
.. autofunction:: kitchen.text.converters.to_bytes
|
||||
.. autofunction:: kitchen.text.converters.getwriter
|
||||
.. autofunction:: kitchen.text.converters.to_str
|
||||
.. autofunction:: kitchen.text.converters.to_utf8
|
||||
|
||||
Transformation to XML
|
||||
=====================
|
||||
|
||||
.. autofunction:: kitchen.text.converters.unicode_to_xml
|
||||
.. autofunction:: kitchen.text.converters.xml_to_unicode
|
||||
.. autofunction:: kitchen.text.converters.byte_string_to_xml
|
||||
.. autofunction:: kitchen.text.converters.xml_to_byte_string
|
||||
.. autofunction:: kitchen.text.converters.bytes_to_xml
|
||||
.. autofunction:: kitchen.text.converters.xml_to_bytes
|
||||
.. autofunction:: kitchen.text.converters.guess_encoding_to_xml
|
||||
.. autofunction:: kitchen.text.converters.to_xml
|
||||
|
||||
Working with exception messages
|
||||
===============================
|
||||
|
||||
.. autodata:: kitchen.text.converters.EXCEPTION_CONVERTERS
|
||||
.. autodata:: kitchen.text.converters.BYTE_EXCEPTION_CONVERTERS
|
||||
.. autofunction:: kitchen.text.converters.exception_to_unicode
|
||||
.. autofunction:: kitchen.text.converters.exception_to_bytes
|
33
docs/api-text-display.rst
Normal file
33
docs/api-text-display.rst
Normal file
|
@ -0,0 +1,33 @@
|
|||
.. automodule:: kitchen.text.display
|
||||
|
||||
.. autofunction:: kitchen.text.display.textual_width
|
||||
|
||||
.. autofunction:: kitchen.text.display.textual_width_chop
|
||||
|
||||
.. autofunction:: kitchen.text.display.textual_width_fill
|
||||
|
||||
.. autofunction:: kitchen.text.display.wrap
|
||||
|
||||
.. autofunction:: kitchen.text.display.fill
|
||||
|
||||
.. autofunction:: kitchen.text.display.byte_string_textual_width_fill
|
||||
|
||||
Internal Data
|
||||
=============
|
||||
|
||||
There are a few internal functions and variables in this module. Code outside
|
||||
of kitchen shouldn't use them but people coding on kitchen itself may find
|
||||
them useful.
|
||||
|
||||
.. autodata:: kitchen.text.display._COMBINING
|
||||
|
||||
.. autofunction:: kitchen.text.display._generate_combining_table
|
||||
|
||||
.. autofunction:: kitchen.text.display._print_combining_table
|
||||
|
||||
.. autofunction:: kitchen.text.display._interval_bisearch
|
||||
|
||||
.. autofunction:: kitchen.text.display._ucp_width
|
||||
|
||||
.. autofunction:: kitchen.text.display._textual_width_le
|
||||
|
2
docs/api-text-misc.rst
Normal file
2
docs/api-text-misc.rst
Normal file
|
@ -0,0 +1,2 @@
|
|||
.. automodule:: kitchen.text.misc
|
||||
:members:
|
3
docs/api-text-utf8.rst
Normal file
3
docs/api-text-utf8.rst
Normal file
|
@ -0,0 +1,3 @@
|
|||
.. automodule:: kitchen.text.utf8
|
||||
:members:
|
||||
:deprecated:
|
22
docs/api-text.rst
Normal file
22
docs/api-text.rst
Normal file
|
@ -0,0 +1,22 @@
|
|||
=============================================
|
||||
Kitchen.text: unicode and utf8 and xml oh my!
|
||||
=============================================
|
||||
|
||||
The kitchen.text module contains functions that deal with text manipulation.
|
||||
|
||||
.. toctree::
|
||||
|
||||
api-text-converters
|
||||
api-text-display
|
||||
api-text-misc
|
||||
api-text-utf8
|
||||
|
||||
:mod:`~kitchen.text.converters`
|
||||
deals with converting text for different encodings and to and from XML
|
||||
:mod:`~kitchen.text.display`
|
||||
deals with issues with printing text to a screen
|
||||
:mod:`~kitchen.text.misc`
|
||||
is a catchall for text manipulation functions that don't seem to fit
|
||||
elsewhere
|
||||
:mod:`~kitchen.text.utf8`
|
||||
contains deprecated functions to manipulate utf8 byte strings
|
6
docs/api-versioning.rst
Normal file
6
docs/api-versioning.rst
Normal file
|
@ -0,0 +1,6 @@
|
|||
===============================
|
||||
Helpers for versioning software
|
||||
===============================
|
||||
|
||||
.. automodule:: kitchen.versioning
|
||||
:members:
|
220
docs/conf.py
Normal file
220
docs/conf.py
Normal file
|
@ -0,0 +1,220 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Kitchen documentation build configuration file, created by
|
||||
# sphinx-quickstart on Sat May 22 00:51:26 2010.
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its containing dir.
|
||||
#
|
||||
# Note that not all possible configuration values are present in this
|
||||
# autogenerated file.
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
|
||||
import sys, os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
import kitchen.release
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#sys.path.append(os.path.abspath('.'))
|
||||
|
||||
# -- General configuration -----------------------------------------------------
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be extensions
|
||||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
||||
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.pngmath', 'sphinx.ext.ifconfig']
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# The suffix of source filenames.
|
||||
source_suffix = '.rst'
|
||||
|
||||
# The encoding of source files.
|
||||
#source_encoding = 'utf-8'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = kitchen.release.NAME
|
||||
copyright = kitchen.release.COPYRIGHT
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = '0.2'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = kitchen.__version__
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
language = 'en'
|
||||
|
||||
# There are two options for replacing |today|: either, you set today to some
|
||||
# non-false value, then it is used:
|
||||
#today = ''
|
||||
# Else, today_fmt is used as the format for a strftime call.
|
||||
#today_fmt = '%B %d, %Y'
|
||||
|
||||
# List of documents that shouldn't be included in the build.
|
||||
#unused_docs = []
|
||||
|
||||
# List of directories, relative to source directory, that shouldn't be searched
|
||||
# for source files.
|
||||
exclude_trees = []
|
||||
|
||||
# The reST default role (used for this markup: `text`) to use for all documents.
|
||||
#default_role = None
|
||||
|
||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||
add_function_parentheses = True
|
||||
|
||||
# If true, the current module name will be prepended to all description
|
||||
# unit titles (such as .. function::).
|
||||
#add_module_names = True
|
||||
|
||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||
# output. They are ignored by default.
|
||||
show_authors = True
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'sphinx'
|
||||
|
||||
# A list of ignored prefixes for module index sorting.
|
||||
#modindex_common_prefix = []
|
||||
|
||||
highlight_language = 'python'
|
||||
|
||||
# -- Options for HTML output ---------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. Major themes that come with
|
||||
# Sphinx are currently 'default' and 'sphinxdoc'.
|
||||
html_theme = 'default'
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
#html_theme_options = {}
|
||||
|
||||
# Add any paths that contain custom themes here, relative to this directory.
|
||||
#html_theme_path = []
|
||||
|
||||
# The name for this set of Sphinx documents. If None, it defaults to
|
||||
# "<project> v<release> documentation".
|
||||
#html_title = None
|
||||
|
||||
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||
#html_short_title = None
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top
|
||||
# of the sidebar.
|
||||
#html_logo = None
|
||||
|
||||
# The name of an image file (within the static path) to use as favicon of the
|
||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||
# pixels large.
|
||||
#html_favicon = None
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
|
||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||
# using the given strftime format.
|
||||
#html_last_updated_fmt = '%b %d, %Y'
|
||||
|
||||
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||
# typographically correct entities.
|
||||
#html_use_smartypants = True
|
||||
|
||||
# Content template for the index page.
|
||||
html_index = 'index.html'
|
||||
|
||||
# Custom sidebar templates, maps document names to template names.
|
||||
#html_sidebars = {}
|
||||
|
||||
# Additional templates that should be rendered to pages, maps page names to
|
||||
# template names.
|
||||
#html_additional_pages = {}
|
||||
|
||||
# If false, no module index is generated.
|
||||
#html_use_modindex = True
|
||||
|
||||
# If false, no index is generated.
|
||||
#html_use_index = True
|
||||
|
||||
# If true, the index is split into individual pages for each letter.
|
||||
#html_split_index = False
|
||||
|
||||
# If true, links to the reST sources are added to the pages.
|
||||
#html_show_sourcelink = True
|
||||
|
||||
# If true, an OpenSearch description file will be output, and all pages will
|
||||
# contain a <link> tag referring to it. The value of this option must be the
|
||||
# base URL from which the finished HTML is served.
|
||||
html_use_opensearch = kitchen.release.DOWNLOAD_URL + 'docs/'
|
||||
|
||||
# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
|
||||
#html_file_suffix = ''
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'kitchendoc'
|
||||
|
||||
|
||||
# -- Options for LaTeX output --------------------------------------------------
|
||||
|
||||
# The paper size ('letter' or 'a4').
|
||||
#latex_paper_size = 'letter'
|
||||
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
#latex_font_size = '10pt'
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title, author, documentclass [howto/manual]).
|
||||
latex_documents = [
|
||||
('index', 'kitchen.tex', u'kitchen Documentation',
|
||||
u'Toshio Kuratomi', 'manual'),
|
||||
]
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top of
|
||||
# the title page.
|
||||
#latex_logo = None
|
||||
|
||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||
# not chapters.
|
||||
#latex_use_parts = False
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#latex_preamble = ''
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#latex_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#latex_use_modindex = True
|
||||
|
||||
automodule_skip_lines = 4
|
||||
autoclass_content = "class"
|
||||
|
||||
# Example configuration for intersphinx: refer to the Python standard library.
|
||||
intersphinx_mapping = {'http://docs.python.org/': None,
|
||||
'https://fedorahosted.org/releases/p/y/python-fedora/doc/': None,
|
||||
'https://fedorahosted.org/releases/p/a/packagedb/doc/': None}
|
||||
|
||||
rst_epilog = '''
|
||||
.. |projpage| replace:: project webpage
|
||||
.. _projpage: %(url)s
|
||||
.. |docpage| replace:: documentation page
|
||||
.. _docpage: %(download)s/docs
|
||||
.. |downldpage| replace:: download page
|
||||
.. _downldpage: %(download)s
|
||||
.. |stdlib| replace:: python standard library
|
||||
.. _stdlib: http://docs.python.org/library
|
||||
''' % {'url': kitchen.release.URL, 'download': kitchen.release.DOWNLOAD_URL}
|
690
docs/designing-unicode-apis.rst
Normal file
690
docs/designing-unicode-apis.rst
Normal file
|
@ -0,0 +1,690 @@
|
|||
.. _DesigningUnicodeAwareAPIs:
|
||||
|
||||
============================
|
||||
Designing Unicode Aware APIs
|
||||
============================
|
||||
|
||||
APIs that deal with byte :class:`str` and :class:`unicode` strings are
|
||||
difficult to get right. Here are a few strategies with pros and cons of each.
|
||||
|
||||
.. contents::
|
||||
|
||||
-------------------------------------------------
|
||||
Take either bytes or unicode, output only unicode
|
||||
-------------------------------------------------
|
||||
|
||||
In this strategy, you allow the user to enter either :class:`unicode` strings
|
||||
or byte :class:`str` but what you give back is always :class:`unicode`. This
|
||||
strategy is easy for novice endusers to start using immediately as they will
|
||||
be able to feed either type of string into the function and get back a string
|
||||
that they can use in other places.
|
||||
|
||||
However, it does lead to the novice writing code that functions correctly when
|
||||
testing it with :term:`ASCII`-only data but fails when given data that contains
|
||||
non-:term:`ASCII` characters. Worse, if your API is not designed to be
|
||||
flexible, the consumer of your code won't be able to easily correct those
|
||||
problems once they find them.
|
||||
|
||||
Here's a good API that uses this strategy::
|
||||
|
||||
from kitchen.text.converters import to_unicode
|
||||
|
||||
def truncate(msg, max_length, encoding='utf8', errors='replace'):
|
||||
msg = to_unicode(msg, encoding, errors)
|
||||
return msg[:max_length]
|
||||
|
||||
The call to :func:`truncate` starts with the essential parameters for
|
||||
performing the task. It ends with two optional keyword arguments that define
|
||||
the encoding to use to transform from a byte :class:`str` to :class:`unicode`
|
||||
and the strategy to use if undecodable bytes are encountered. The defaults
|
||||
may vary depending on the use cases you have in mind. When the output is
|
||||
generally going to be printed for the user to see, ``errors='replace'`` is
|
||||
a good default. If you are constructing keys to a database, raisng an
|
||||
exception (with ``errors='strict'``) may be a better default. In either case,
|
||||
having both parameters allows the person using your API to choose how they
|
||||
want to handle any problems. Having the values is also a clue to them that
|
||||
a conversion from byte :class:`str` to :class:`unicode` string is going to
|
||||
occur.
|
||||
|
||||
.. note::
|
||||
|
||||
If you're targeting python-3.1 and above, ``errors='surrogateescape'`` may
|
||||
be a better default than ``errors='strict'``. You need to be mindful of
|
||||
a few things when using ``surrogateescape`` though:
|
||||
|
||||
* ``surrogateescape`` will cause issues if a non-:term:`ASCII` compatible
|
||||
encoding is used (for instance, UTF-16 and UTF-32.) That makes it
|
||||
unhelpful in situations where a true general purpose method of encoding
|
||||
must be found. :pep:`383` mentions that ``surrogateescape`` was
|
||||
specifically designed with the limitations of translating using system
|
||||
locales (where :term:`ASCII` compatibility is generally seen as
|
||||
inescapable) so you should keep that in mind.
|
||||
* If you use ``surrogateescape`` to decode from :class:`bytes`
|
||||
to :class:`unicode` you will need to use an error handler other than
|
||||
``strict`` to encode as the lone surrogate that this error handler
|
||||
creates makes for invalid unicode that must be handled when encoding.
|
||||
In Python-3.1.2 or less, a bug in the encoder error handlers mean that
|
||||
you can only use ``surrogateescape`` to encode; anything else will throw
|
||||
an error.
|
||||
|
||||
Evaluate your usages of the variables in question to see what makes sense.
|
||||
|
||||
Here's a bad example of using this strategy::
|
||||
|
||||
from kitchen.text.converters import to_unicode
|
||||
|
||||
def truncate(msg, max_length):
|
||||
msg = to_unicode(msg)
|
||||
return msg[:max_length]
|
||||
|
||||
In this example, we don't have the optional keyword arguments for
|
||||
:attr:`encoding` and :attr:`errors`. A user who uses this function is more
|
||||
likely to miss the fact that a conversion from byte :class:`str` to
|
||||
:class:`unicode` is going to occur. And once an error is reported, they will
|
||||
have to look through their backtrace and think harder about where they want to
|
||||
transform their data into :class:`unicode` strings instead of having the
|
||||
opportunity to control how the conversion takes place in the function itself.
|
||||
Note that the user does have the ability to make this work by making the
|
||||
transformation to unicode themselves::
|
||||
|
||||
from kitchen.text.converters import to_unicode
|
||||
|
||||
msg = to_unicode(msg, encoding='euc_jp', errors='ignore')
|
||||
new_msg = truncate(msg, 5)
|
||||
|
||||
--------------------------------------------------
|
||||
Take either bytes or unicode, output the same type
|
||||
--------------------------------------------------
|
||||
|
||||
This strategy is sometimes called polymorphic because the type of data that is
|
||||
returned is dependent on the type of data that is received. The concept is
|
||||
that when you are given a byte :class:`str` to process, you return a byte
|
||||
:class:`str` in your output. When you are given :class:`unicode` strings to
|
||||
process, you return :class:`unicode` strings in your output.
|
||||
|
||||
This can work well for end users as the ones that know about the difference
|
||||
between the two string types will already have transformed the strings to
|
||||
their desired type before giving it to this function. The ones that don't can
|
||||
remain blissfully ignorant (at least, as far as your function is concerned) as
|
||||
the function does not change the type.
|
||||
|
||||
In cases where the encoding of the byte :class:`str` is known or can be
|
||||
discovered based on the input data this works well. If you can't figure out
|
||||
the input encoding, however, this strategy can fail in any of the following
|
||||
cases:
|
||||
|
||||
1. It needs to do an internal conversion between byte :class:`str` and
|
||||
:class:`unicode` string.
|
||||
2. It cannot return the same data as either a :class:`unicode` string or byte
|
||||
:class:`str`.
|
||||
3. You may need to deal with byte strings that are not byte-compatible with
|
||||
:term:`ASCII`
|
||||
|
||||
First, a couple examples of using this strategy in a good way::
|
||||
|
||||
def translate(msg, table):
|
||||
replacements = table.keys()
|
||||
new_msg = []
|
||||
for index, char in enumerate(msg):
|
||||
if char in replacements:
|
||||
new_msg.append(table[char])
|
||||
else:
|
||||
new_msg.append(char)
|
||||
|
||||
return ''.join(new_msg)
|
||||
|
||||
In this example, all of the strings that we use (except the empty string which
|
||||
is okay because it doesn't have any characters to encode) come from outside of
|
||||
the function. Due to that, the user is responsible for making sure that the
|
||||
:attr:`msg`, and the keys and values in :attr:`table` all match in terms of
|
||||
type (:class:`unicode` vs :class:`str`) and encoding (You can do some error
|
||||
checking to make sure the user gave all the same type but you can't do the
|
||||
same for the user giving different encodings). You do not need to make
|
||||
changes to the string that require you to know the encoding or type of the
|
||||
string; everything is a simple replacement of one element in the array of
|
||||
characters in message with the character in table.
|
||||
|
||||
::
|
||||
|
||||
import json
|
||||
from kitchen.text.converters import to_unicode, to_bytes
|
||||
|
||||
def first_field_from_json_data(json_string):
|
||||
'''Return the first field in a json data structure.
|
||||
|
||||
The format of the json data is a simple list of strings.
|
||||
'["one", "two", "three"]'
|
||||
'''
|
||||
if isinstance(json_string, unicode):
|
||||
# On all python versions, json.loads() returns unicode if given
|
||||
# a unicode string
|
||||
return json.loads(json_string)[0]
|
||||
|
||||
# Byte str: figure out which encoding we're dealing with
|
||||
if '\x00' not in json_data[:2]
|
||||
encoding = 'utf8'
|
||||
elif '\x00\x00\x00' == json_data[:3]:
|
||||
encoding = 'utf-32-be'
|
||||
elif '\x00\x00\x00' == json_data[1:4]:
|
||||
encoding = 'utf-32-le'
|
||||
elif '\x00' == json_data[0] and '\x00' == json_data[2]:
|
||||
encoding = 'utf-16-be'
|
||||
else:
|
||||
encoding = 'utf-16-le'
|
||||
|
||||
data = json.loads(unicode(json_string, encoding))
|
||||
return data[0].encode(encoding)
|
||||
|
||||
In this example the function takes either a byte :class:`str` type or
|
||||
a :class:`unicode` string that has a list in json format and returns the first
|
||||
field from it as the type of the input string. The first section of code is
|
||||
very straightforward; we receive a :class:`unicode` string, parse it with
|
||||
a function, and then return the first field from our parsed data (which our
|
||||
function returned to us as json data).
|
||||
|
||||
The second portion that deals with byte :class:`str` is not so
|
||||
straightforward. Before we can parse the string we have to determine what
|
||||
characters the bytes in the string map to. If we didn't do that, we wouldn't
|
||||
be able to properly find which characters are present in the string. In order
|
||||
to do that we have to figure out the encoding of the byte :class:`str`.
|
||||
Luckily, the json specification states that all strings are unicode and
|
||||
encoded with one of UTF32be, UTF32le, UTF16be, UTF16le, or :term:`UTF-8`. It further
|
||||
defines the format such that the first two characters are always
|
||||
:term:`ASCII`. Each of these has a different sequence of NULLs when they
|
||||
encode an :term:`ASCII` character. We can use that to detect which encoding
|
||||
was used to create the byte :class:`str`.
|
||||
|
||||
Finally, we return the byte :class:`str` by encoding the :class:`unicode` back
|
||||
to a byte :class:`str`.
|
||||
|
||||
As you can see, in this example we have to convert from byte :class:`str` to
|
||||
:class:`unicode` and back. But we know from the json specification that byte
|
||||
:class:`str` has to be one of a limited number of encodings that we are able
|
||||
to detect. That ability makes this strategy work.
|
||||
|
||||
Now for some examples of using this strategy in ways that fail::
|
||||
|
||||
import unicodedata
|
||||
def first_char(msg):
|
||||
'''Return the first character in a string'''
|
||||
if not isinstance(msg, unicode):
|
||||
try:
|
||||
msg = unicode(msg, 'utf8')
|
||||
except UnicodeError:
|
||||
msg = unicode(msg, 'latin1')
|
||||
msg = unicodedata.normalize('NFC', msg)
|
||||
return msg[0]
|
||||
|
||||
If you look at that code and think that there's something fragile and prone to
|
||||
breaking in the ``try: except:`` block you are correct in being suspicious.
|
||||
This code will fail on multi-byte character sets that aren't :term:`UTF-8`. It
|
||||
can also fail on data where the sequence of bytes is valid :term:`UTF-8` but
|
||||
the bytes are actually of a different encoding. The reasons this code fails
|
||||
is that we don't know what encoding the bytes are in and the code must convert
|
||||
from a byte :class:`str` to a :class:`unicode` string in order to function.
|
||||
|
||||
In order to make this code robust we must know the encoding of :attr:`msg`.
|
||||
The only way to know that is to ask the user so the API must do that::
|
||||
|
||||
import unicodedata
|
||||
def number_of_chars(msg, encoding='utf8', errors='strict'):
|
||||
if not isinstance(msg, unicode):
|
||||
msg = unicode(msg, encoding, errors)
|
||||
msg = unicodedata.normalize('NFC', msg)
|
||||
return len(msg)
|
||||
|
||||
Another example of failure::
|
||||
|
||||
import os
|
||||
def listdir(directory):
|
||||
files = os.listdir(directory)
|
||||
if isinstance(directory, str):
|
||||
return files
|
||||
# files could contain both bytes and unicode
|
||||
new_files = []
|
||||
for filename in files:
|
||||
if not isinstance(filename, unicode):
|
||||
# What to do here?
|
||||
continue
|
||||
new_files.appen(filename)
|
||||
return new_files
|
||||
|
||||
This function illustrates the second failure mode. Here, not all of the
|
||||
possible values can be represented as :class:`unicode` without knowing more
|
||||
about the encoding of each of the filenames involved. Since each filename
|
||||
could have a different encoding there's a few different options to pursue. We
|
||||
could make this function always return byte :class:`str` since that can
|
||||
accurately represent anything that could be returned. If we want to return
|
||||
:class:`unicode` we need to at least allow the user to specify what to do in
|
||||
case of an error decoding the bytes to :class:`unicode`. We can also let the
|
||||
user specify the encoding to use for doing the decoding but that won't help in
|
||||
all cases since not all files will be in the same encoding (or even
|
||||
necessarily in any encoding)::
|
||||
|
||||
import locale
|
||||
import os
|
||||
def listdir(directory, encoding=locale.getpreferredencoding(), errors='strict'):
|
||||
# Note: In python-3.1+, surrogateescape may be a better default
|
||||
files = os.listdir(directory)
|
||||
if isinstance(directory, str):
|
||||
return files
|
||||
new_files = []
|
||||
for filename in files:
|
||||
if not isinstance(filename, unicode):
|
||||
filename = unicode(filename, encoding=encoding, errors=errors)
|
||||
new_files.append(filename)
|
||||
return new_files
|
||||
|
||||
Note that although we use :attr:`errors` in this example as what to pass to
|
||||
the codec that decodes to :class:`unicode` we could also have an
|
||||
:attr:`errors` argument that decides other things to do like skip a filename
|
||||
entirely, return a placeholder (``Nondisplayable filename``), or raise an
|
||||
exception.
|
||||
|
||||
This leaves us with one last failure to describe::
|
||||
|
||||
def first_field(csv_string):
|
||||
'''Return the first field in a comma separated values string.'''
|
||||
try:
|
||||
return csv_string[:csv_string.index(',')]
|
||||
except ValueError:
|
||||
return csv_string
|
||||
|
||||
This code looks simple enough. The hidden error here is that we are searching
|
||||
for a comma character in a byte :class:`str` but not all encodings will use
|
||||
the same sequence of bytes to represent the comma. If you use an encoding
|
||||
that's not :term:`ASCII` compatible on the byte level, then the literal comma
|
||||
``','`` in the above code will match inappropriate bytes. Some examples of
|
||||
how it can fail:
|
||||
|
||||
* Will find the byte representing an :term:`ASCII` comma in another character
|
||||
* Will find the comma but leave trailing garbage bytes on the end of the
|
||||
string
|
||||
* Will not match the character that represents the comma in this encoding
|
||||
|
||||
There are two ways to solve this. You can either take the encoding value from
|
||||
the user or you can take the separator value from the user. Of the two,
|
||||
taking the encoding is the better option for two reasons:
|
||||
|
||||
1. Taking a separator argument doesn't clearly document for the API user that
|
||||
the reason they must give it is to properly match the encoding of the
|
||||
:attr:`csv_string`. They're just as likely to think that it's simply a way
|
||||
to specify an alternate character (like ":" or "|") for the separator.
|
||||
2. It's possible for a variable width encoding to reuse the same byte sequence
|
||||
for different characters in multiple sequences.
|
||||
|
||||
.. note::
|
||||
|
||||
:term:`UTF-8` is resistant to this as any character's sequence of
|
||||
bytes will never be a subset of another character's sequence of bytes.
|
||||
|
||||
With that in mind, here's how to improve the API::
|
||||
|
||||
def first_field(csv_string, encoding='utf-8', errors='replace'):
|
||||
if not isinstance(csv_string, unicode):
|
||||
u_string = unicode(csv_string, encoding, errors)
|
||||
is_unicode = False
|
||||
else:
|
||||
u_string = csv_string
|
||||
|
||||
try:
|
||||
field = u_string[:U_string.index(u',')]
|
||||
except ValueError:
|
||||
return csv_string
|
||||
|
||||
if not is_unicode:
|
||||
field = field.encode(encoding, errors)
|
||||
return field
|
||||
|
||||
.. note::
|
||||
|
||||
If you decide you'll never encounter a variable width encoding that reuses
|
||||
byte sequences you can use this code instead::
|
||||
|
||||
def first_field(csv_string, encoding='utf-8'):
|
||||
try:
|
||||
return csv_string[:csv_string.index(','.encode(encoding))]
|
||||
except ValueError:
|
||||
return csv_string
|
||||
|
||||
------------------
|
||||
Separate functions
|
||||
------------------
|
||||
|
||||
Sometimes you want to be able to take either byte :class:`str` or
|
||||
:class:`unicode` strings, perform similar operations on either one and then
|
||||
return data in the same format as was given. Probably the easiest way to do
|
||||
that is to have separate functions for each and adopt a naming convention to
|
||||
show that one is for working with byte :class:`str` and the other is for
|
||||
working with :class:`unicode` strings::
|
||||
|
||||
def translate_b(msg, table):
|
||||
'''Replace values in str with other byte values like unicode.translate'''
|
||||
if not isinstance(msg, str):
|
||||
raise TypeError('msg must be of type str')
|
||||
str_table = [chr(s) for s in xrange(0,256)]
|
||||
delete_chars = []
|
||||
for chr_val in (k for k in table.keys() if isinstance(k, int)):
|
||||
if chr_val > 255:
|
||||
raise ValueError('Keys in table must not exceed 255)')
|
||||
if table[chr_val] == None:
|
||||
delete_chars.append(chr(chr_val))
|
||||
elif isinstance(table[chr_val], int):
|
||||
if table[chr_val] > 255:
|
||||
raise TypeError('table values cannot be more than 255 or less than 0')
|
||||
str_table[chr_val] = chr(table[chr_val])
|
||||
else:
|
||||
if not isinstance(table[chr_val], str):
|
||||
raise TypeError('character mapping must return integer, None or str')
|
||||
str_table[chr_val] = table[chr_val]
|
||||
str_table = ''.join(str_table)
|
||||
delete_chars = ''.join(delete_chars)
|
||||
return msg.translate(str_table, delete_chars)
|
||||
|
||||
def translate(msg, table):
|
||||
'''Replace values in a unicode string with other values'''
|
||||
if not isinstance(msg, unicode):
|
||||
raise TypeError('msg must be of type unicode')
|
||||
return msg.translate(table)
|
||||
|
||||
There's several things that we have to do in this API:
|
||||
|
||||
* Because the function names might not be enough of a clue to the user of the
|
||||
functions of the value types that are expected, we have to check that the
|
||||
types are correct.
|
||||
|
||||
* We keep the behaviour of the two functions as close to the same as possible,
|
||||
just with byte :class:`str` and :class:`unicode` strings substituted for
|
||||
each other.
|
||||
|
||||
|
||||
-----------------------------------------------------------------
|
||||
Deciding whether to take str or unicode when no value is returned
|
||||
-----------------------------------------------------------------
|
||||
|
||||
Not all functions have a return value. Sometimes a function is there to
|
||||
interact with something external to python, for instance, writing a file out
|
||||
to disk or a method exists to update the internal state of a data structure.
|
||||
One of the main questions with these APIs is whether to take byte
|
||||
:class:`str`, :class:`unicode` string, or both. The answer depends on your
|
||||
use case but I'll give some examples here.
|
||||
|
||||
Writing to external data
|
||||
========================
|
||||
|
||||
When your information is going to an external data source like writing to
|
||||
a file you need to decide whether to take in :class:`unicode` strings or byte
|
||||
:class:`str`. Remember that most external data sources are not going to be
|
||||
dealing with unicode directly. Instead, they're going to be dealing with
|
||||
a sequence of bytes that may be interpreted as unicode. With that in mind,
|
||||
you either need to have the user give you a byte :class:`str` or convert to
|
||||
a byte :class:`str` inside the function.
|
||||
|
||||
Next you need to think about the type of data that you're receiving. If it's
|
||||
textual data, (for instance, this is a chat client and the user is typing
|
||||
messages that they expect to be read by another person) it probably makes sense to
|
||||
take in :class:`unicode` strings and do the conversion inside your function.
|
||||
On the other hand, if this is a lower level function that's passing data into
|
||||
a network socket, it probably should be taking byte :class:`str` instead.
|
||||
|
||||
Just as noted in the API notes above, you should specify an :attr:`encoding`
|
||||
and :attr:`errors` argument if you need to transform from :class:`unicode`
|
||||
string to byte :class:`str` and you are unable to guess the encoding from the
|
||||
data itself.
|
||||
|
||||
Updating data structures
|
||||
========================
|
||||
|
||||
Sometimes your API is just going to update a data structure and not
|
||||
immediately output that data anywhere. Just as when writing external data,
|
||||
you should think about both what your function is going to do with the data
|
||||
eventually and what the caller of your function is thinking that they're
|
||||
giving you. Most of the time, you'll want to take :class:`unicode` strings
|
||||
and enter them into the data structure as :class:`unicode` when the data is
|
||||
textual in nature. You'll want to take byte :class:`str` and enter them into
|
||||
the data structure as byte :class:`str` when the data is not text. Use
|
||||
a naming convention so the user knows what's expected.
|
||||
|
||||
-------------
|
||||
APIs to Avoid
|
||||
-------------
|
||||
|
||||
There are a few APIs that are just wrong. If you catch yourself making an API
|
||||
that does one of these things, change it before anyone sees your code.
|
||||
|
||||
Returning unicode unless a conversion fails
|
||||
===========================================
|
||||
|
||||
This type of API usually deals with byte :class:`str` at some point and
|
||||
converts it to :class:`unicode` because it's usually thought to be text.
|
||||
However, there are times when the bytes fail to convert to a :class:`unicode`
|
||||
string. When that happens, this API returns the raw byte :class:`str` instead
|
||||
of a :class:`unicode` string. One example of this is present in the |stdlib|_:
|
||||
python2's :func:`os.listdir`::
|
||||
|
||||
>>> import os
|
||||
>>> import locale
|
||||
>>> locale.getpreferredencoding()
|
||||
'UTF-8'
|
||||
>>> os.mkdir('/tmp/mine')
|
||||
>>> os.chdir('/tmp/mine')
|
||||
>>> open('nonsense_char_\xff', 'w').close()
|
||||
>>> open('all_ascii', 'w').close()
|
||||
>>> os.listdir(u'.')
|
||||
[u'all_ascii', 'nonsense_char_\xff']
|
||||
|
||||
The problem with APIs like this is that they cause failures that are hard to
|
||||
debug because they don't happen where the variables are set. For instance,
|
||||
let's say you take the filenames from :func:`os.listdir` and give it to this
|
||||
function::
|
||||
|
||||
def normalize_filename(filename):
|
||||
'''Change spaces and dashes into underscores'''
|
||||
return filename.translate({ord(u' '):u'_', ord(u' '):u'_'})
|
||||
|
||||
When you test this, you use filenames that all are decodable in your preferred
|
||||
encoding and everything seems to work. But when this code is run on a machine
|
||||
that has filenames in multiple encodings the filenames returned by
|
||||
:func:`os.listdir` suddenly include byte :class:`str`. And byte :class:`str`
|
||||
has a different :func:`string.translate` function that takes different values.
|
||||
So the code raises an exception where it's not immediately obvious that
|
||||
:func:`os.listdir` is at fault.
|
||||
|
||||
Ignoring values with no chance of recovery
|
||||
==========================================
|
||||
|
||||
An early version of python3 attempted to fix the :func:`os.listdir` problem
|
||||
pointed out in the last section by returning all values that were decodable to
|
||||
:class:`unicode` and omitting the filenames that were not. This lead to the
|
||||
following output::
|
||||
|
||||
>>> import os
|
||||
>>> import locale
|
||||
>>> locale.getpreferredencoding()
|
||||
'UTF-8'
|
||||
>>> os.mkdir('/tmp/mine')
|
||||
>>> os.chdir('/tmp/mine')
|
||||
>>> open(b'nonsense_char_\xff', 'w').close()
|
||||
>>> open('all_ascii', 'w').close()
|
||||
>>> os.listdir('.')
|
||||
['all_ascii']
|
||||
|
||||
The issue with this type of code is that it is silently doing something
|
||||
surprising. The caller expects to get a full list of files back from
|
||||
:func:`os.listdir`. Instead, it silently ignores some of the files, returning
|
||||
only a subset. This leads to code that doesn't do what is expected that may
|
||||
go unnoticed until the code is in production and someone notices that
|
||||
something important is being missed.
|
||||
|
||||
Raising a UnicodeException with no chance of recovery
|
||||
=====================================================
|
||||
|
||||
Believe it or not, a few libraries exist that make it impossible to deal
|
||||
with unicode text without raising a :exc:`UnicodeError`. What seems to occur
|
||||
in these libraries is that the library has functions that expect to receive
|
||||
a :class:`unicode` string. However, internally, those functions call other
|
||||
functions that expect to receive a byte :class:`str`. The programmer of the
|
||||
API was smart enough to convert from a :class:`unicode` string to a byte
|
||||
:class:`str` but they did not give the user the chance to specify the
|
||||
encodings to use or how to deal with errors. This results in exceptions when
|
||||
the user passes in a byte :class:`str` because the initial function wants
|
||||
a :class:`unicode` string and exceptions when the user passes in
|
||||
a :class:`unicode` string because the function can't convert the string to
|
||||
bytes in the encoding that it's selected.
|
||||
|
||||
Do not put the user in the position of not being able to use your API without
|
||||
raising a :exc:`UnicodeError` with certain values. If you can only safely
|
||||
take :class:`unicode` strings, document that byte :class:`str` is not allowed
|
||||
and vice versa. If you have to convert internally, make sure to give the
|
||||
caller of your function parameters to control the encoding and how to treat
|
||||
errors that may occur during the encoding/decoding process. If your code will
|
||||
raise a :exc:`UnicodeError` with non-:term:`ASCII` values no matter what, you
|
||||
should probably rethink your API.
|
||||
|
||||
-----------------
|
||||
Knowing your data
|
||||
-----------------
|
||||
|
||||
If you've read all the way down to this section without skipping you've seen
|
||||
several admonitions about the type of data you are processing affecting the
|
||||
viability of the various API choices.
|
||||
|
||||
Here's a few things to consider in your data:
|
||||
|
||||
Do you need to operate on both bytes and unicode?
|
||||
=================================================
|
||||
|
||||
Much of the data in libraries, programs, and the general environment outside
|
||||
of python is written where strings are sequences of bytes. So when we
|
||||
interact with data that comes from outside of python or data that is about to
|
||||
leave python it may make sense to only operate on the data as a byte
|
||||
:class:`str`. There's two times when this may make sense:
|
||||
|
||||
1. The user is intended to hand the data to the function and then the function
|
||||
takes care of sending the data outside of python (to the filesystem, over
|
||||
the network, etc).
|
||||
2. The data is not representable as text. For instance, writing a binary
|
||||
file format.
|
||||
|
||||
Even when your code is operating in this area you still need to think a little
|
||||
more about your data. For instance, it might make sense for the person using
|
||||
your API to pass in :class:`unicode` strings and let the function convert that
|
||||
into the byte :class:`str` that it then sends over the wire.
|
||||
|
||||
There are also times when it might make sense to operate only on
|
||||
:class:`unicode` strings. :class:`unicode` represents text so anytime that
|
||||
you are working on textual data that isn't going to leave python it has the
|
||||
potential to be a :class:`unicode`-only API. However, there's two things that
|
||||
you should consider when designing a :class:`unicode`-only API:
|
||||
|
||||
1. As your API gains popularity, people are going to use your API in places
|
||||
that you may not have thought of. Corner cases in these other places may
|
||||
mean that processing bytes is desirable.
|
||||
2. In python2, byte :class:`str` and :class:`unicode` are often used
|
||||
interchangably with each other. That means that people programming against
|
||||
your API may have received :class:`str` from some other API and it would be
|
||||
most convenient for their code if your API accepted it.
|
||||
|
||||
.. note::
|
||||
|
||||
In python3, the separation between the text type and the byte type
|
||||
are more clear. So in python3, there's less need to have all APIs take
|
||||
both unicode and bytes.
|
||||
|
||||
Can you restrict the encodings?
|
||||
===============================
|
||||
If you determine that you have to deal with byte :class:`str` you should
|
||||
realize that not all encodings are created equal. Each has different
|
||||
properties that may make it possible to provide a simpler API provided that
|
||||
you can reasonably tell the users of your API that they cannot use certain
|
||||
classes of encodings.
|
||||
|
||||
As one example, if you are required to find a comma (``,``) in a byte
|
||||
:class:`str` you have different choices based on what encodings are allowed.
|
||||
If you can reasonably restrict your API users to only giving :term:`ASCII
|
||||
compatible` encodings you can do this simply by searching for the literal
|
||||
comma character because that character will be represented by the same byte
|
||||
sequence in all :term:`ASCII compatible` encodings.
|
||||
|
||||
The following are some classes of encodings to be aware of as you decide how
|
||||
generic your code needs to be.
|
||||
|
||||
Single byte encodings
|
||||
---------------------
|
||||
|
||||
Single byte encodings can only represent 256 total characters. They encode
|
||||
the :term:`code points` for a character to the equivalent number in a single
|
||||
byte.
|
||||
|
||||
Most single byte encodings are :term:`ASCII compatible`. :term:`ASCII
|
||||
compatible` encodings are the most likely to be usable without changes to code
|
||||
so this is good news. A notable exception to this is the `EBDIC
|
||||
<http://en.wikipedia.org/wiki/Extended_Binary_Coded_Decimal_Interchange_Code>`_
|
||||
family of encodings.
|
||||
|
||||
Multibyte encodings
|
||||
-------------------
|
||||
|
||||
Multibyte encodings use more than one byte to encode some characters.
|
||||
|
||||
Fixed width
|
||||
~~~~~~~~~~~
|
||||
|
||||
Fixed width encodings have a set number of bytes to represent all of the
|
||||
characters in the character set. ``UTF-32`` is an example of a fixed width
|
||||
encoding that uses four bytes per character and can express every unicode
|
||||
characters. There are a number of problems with writing APIs that need to
|
||||
operate on fixed width, multibyte characters. To go back to our earlier
|
||||
example of finding a comma in a string, we have to realize that even in
|
||||
``UTF-32`` where the :term:`code point` for :term:`ASCII` characters is the
|
||||
same as in :term:`ASCII`, the byte sequence for them is different. So you
|
||||
cannot search for the literal byte character as it may pick up false
|
||||
positives and may break a byte sequence in an odd place.
|
||||
|
||||
Variable Width
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
ASCII compatible
|
||||
""""""""""""""""
|
||||
|
||||
:term:`UTF-8` and the `EUC <http://en.wikipedia.org/wiki/Extended_Unix_Code>`_
|
||||
family of encodings are examples of :term:`ASCII compatible` multi-byte
|
||||
encodings. They achieve this by adhering to two principles:
|
||||
|
||||
* All of the :term:`ASCII` characters are represented by the byte that they
|
||||
are in the :term:`ASCII` encoding.
|
||||
* None of the :term:`ASCII` byte sequences are reused in any other byte
|
||||
sequence for a different character.
|
||||
|
||||
Escaped
|
||||
"""""""
|
||||
|
||||
Some multibyte encodings work by using only bytes from the :term:`ASCII`
|
||||
encoding but when a particular sequence of those byes is found, they are
|
||||
interpreted as meaning something other than their :term:`ASCII` values.
|
||||
``UTF-7`` is one such encoding that can encode all of the unicode
|
||||
:term:`code points`. For instance, here's a some Japanese characters encoded as
|
||||
``UTF-7``::
|
||||
|
||||
>>> a = u'\u304f\u3089\u3068\u307f'
|
||||
>>> print a
|
||||
くらとみ
|
||||
>>> print a.encode('utf-7')
|
||||
+ME8wiTBoMH8-
|
||||
|
||||
These encodings can be used when you need to encode unicode data that may
|
||||
contain non-:term:`ASCII` characters for inclusion in an :term:`ASCII` only
|
||||
transport medium or file.
|
||||
|
||||
However, they are not :term:`ASCII compatible` in the sense that we used
|
||||
earlier as the bytes that represent a :term:`ASCII` character are being reused
|
||||
as part of other characters. If you were to search for a literal plus sign in
|
||||
this encoded string, you would run across many false positives, for instance.
|
||||
|
||||
Other
|
||||
"""""
|
||||
|
||||
There are many other popular variable width encodings, for instance ``UTF-16``
|
||||
and ``shift-JIS``. Many of these are not :term:`ASCII compatible` so you
|
||||
cannot search for a literal :term:`ASCII` character without danger of false
|
||||
positives or false negatives.
|
107
docs/glossary.rst
Normal file
107
docs/glossary.rst
Normal file
|
@ -0,0 +1,107 @@
|
|||
========
|
||||
Glossary
|
||||
========
|
||||
|
||||
.. glossary::
|
||||
|
||||
"Everything but the kitchen sink"
|
||||
An English idiom meaning to include nearly everything that you can
|
||||
think of.
|
||||
|
||||
API version
|
||||
Version that is meant for computer consumption. This version is
|
||||
parsable and comparable by computers. It contains information about
|
||||
a library's API so that computer software can decide whether it works
|
||||
with the software.
|
||||
|
||||
ASCII
|
||||
A character encoding that maps numbers to characters essential to
|
||||
American English. It maps 128 characters using 7bits.
|
||||
|
||||
.. seealso:: http://en.wikipedia.org/wiki/ASCII
|
||||
|
||||
ASCII compatible
|
||||
An encoding in which the particular byte that maps to a character in
|
||||
the :term:`ASCII` character set is only used to map to that character.
|
||||
This excludes EBDIC based encodings and many multi-byte fixed and
|
||||
variable width encodings since they reuse the bytes that make up the
|
||||
:term:`ASCII` encoding for other purposes. :term:`UTF-8` is notable
|
||||
as a variable width encoding that is :term:`ASCII` compatible.
|
||||
|
||||
.. seealso::
|
||||
|
||||
http://en.wikipedia.org/wiki/Variable-width_encoding
|
||||
For another explanation of various ways bytes are mapped to
|
||||
characters in a possibly incompatible manner.
|
||||
|
||||
code points
|
||||
:term:`code point`
|
||||
|
||||
code point
|
||||
A number that maps to a particular abstract character. Code points
|
||||
make it so that we have a number pointing to a character without
|
||||
worrying about implementation details of how those numbers are stored
|
||||
for the computer to read. Encodings define how the code points map to
|
||||
particular sequences of bytes on disk and in memory.
|
||||
|
||||
control characters
|
||||
:term:`control character`
|
||||
|
||||
control character
|
||||
The set of characters in unicode that are used, not to display glyphs
|
||||
on the screen, but to tell the display in program to do something.
|
||||
|
||||
.. seealso:: http://en.wikipedia.org/wiki/Control_character
|
||||
|
||||
grapheme
|
||||
characters or pieces of characters that you might write on a page to
|
||||
make words, sentences, or other pieces of text.
|
||||
|
||||
.. seealso:: http://en.wikipedia.org/wiki/Grapheme
|
||||
|
||||
I18N
|
||||
I18N is an abbreviation for internationalization. It's often used to
|
||||
signify the need to translate words, number and date formats, and
|
||||
other pieces of data in a computer program so that it will work well
|
||||
for people who speak another language than yourself.
|
||||
|
||||
message catalogs
|
||||
:term:`message catalog`
|
||||
|
||||
message catalog
|
||||
Message catalogs contain translations for user-visible strings that
|
||||
are present in your code. Normally, you need to mark the strings to
|
||||
be translated by wrapping them in one of several :mod:`gettext`
|
||||
functions. The function serves two purposes:
|
||||
|
||||
1. It allows automated tools to find which strings are supposed to be
|
||||
extracted for translation.
|
||||
2. The functions perform the translation when the program is running.
|
||||
|
||||
.. seealso::
|
||||
`babel's documentation
|
||||
<http://babel.edgewall.org/wiki/Documentation/messages.html>`_
|
||||
for one method of extracting message catalogs from source
|
||||
code.
|
||||
|
||||
Murphy's Law
|
||||
"Anything that can go wrong, will go wrong."
|
||||
|
||||
.. seealso:: http://en.wikipedia.org/wiki/Murphy%27s_Law
|
||||
|
||||
release version
|
||||
Version that is meant for human consumption. This version is easy for
|
||||
a human to look at to decide how a particular version relates to other
|
||||
versions of the software.
|
||||
|
||||
textual width
|
||||
The amount of horizontal space a character takes up on a monospaced
|
||||
screen. The units are number of character cells or columns that it
|
||||
takes the place of.
|
||||
|
||||
UTF-8
|
||||
A character encoding that maps all unicode :term:`code points` to a sequence
|
||||
of bytes. It is compatible with :term:`ASCII`. It uses a variable
|
||||
number of bytes to encode all of unicode. ASCII characters take one
|
||||
byte. Characters from other parts of unicode take two to four bytes.
|
||||
It is widespread as an encoding on the internet and in Linux.
|
359
docs/hacking.rst
Normal file
359
docs/hacking.rst
Normal file
|
@ -0,0 +1,359 @@
|
|||
=======================================
|
||||
Conventions for contributing to kitchen
|
||||
=======================================
|
||||
|
||||
-----
|
||||
Style
|
||||
-----
|
||||
|
||||
* Strive to be :pep:`8` compliant
|
||||
* Run `:command:`pylint` ` over the code and try to resolve most of its nitpicking
|
||||
|
||||
------------------------
|
||||
Python 2.3 compatibility
|
||||
------------------------
|
||||
|
||||
At the moment, we're supporting python-2.3 and above. Understand that there's
|
||||
a lot of python features that we cannot use because of this.
|
||||
|
||||
Sometimes modules in the |stdlib|_ can be added to kitchen so that they're
|
||||
available. When we do that we need to be careful of several things:
|
||||
|
||||
1. Keep the module in sync with the version in the python-2.x trunk. Use
|
||||
:file:`maintainers/sync-copied-files.py` for this.
|
||||
2. Sync the unittests as well as the module.
|
||||
3. Be aware that not all modules are written to remain compatible with
|
||||
Python-2.3 and might use python language features that were not present
|
||||
then (generator expressions, relative imports, decorators, with, try: with
|
||||
both except: and finally:, etc) These are not good candidates for
|
||||
importing into kitchen as they require more work to keep synced.
|
||||
|
||||
---------
|
||||
Unittests
|
||||
---------
|
||||
|
||||
* At least smoketest your code (make sure a function will return expected
|
||||
values for one set of inputs).
|
||||
* Note that even 100% coverage is not a guarantee of working code! Good tests
|
||||
will realize that you need to also give multiple inputs that test the code
|
||||
paths of called functions that are outside of your code. Example::
|
||||
|
||||
def to_unicode(msg, encoding='utf8', errors='replace'):
|
||||
return unicode(msg, encoding, errors)
|
||||
|
||||
# Smoketest only. This will give 100% coverage for your code (it
|
||||
# tests all of the code inside of to_unicode) but it leaves a lot of
|
||||
# room for errors as it doesn't test all combinations of arguments
|
||||
# that are then passed to the unicode() function.
|
||||
|
||||
tools.ok_(to_unicode('abc') == u'abc')
|
||||
|
||||
# Better -- tests now cover non-ascii characters and that error conditions
|
||||
# occur properly. There's a lot of other permutations that can be
|
||||
# added along these same lines.
|
||||
tools.ok_(to_unicode(u'café', 'utf8', 'replace'))
|
||||
tools.assert_raises(UnicodeError, to_unicode, [u'cafè ñunru'.encode('latin1')])
|
||||
|
||||
* We're using nose for unittesting. Rather than depend on unittest2
|
||||
functionality, use the functions that nose provides.
|
||||
* Remember to maintain python-2.3 compatibility even in unittests.
|
||||
|
||||
----------------------------
|
||||
Docstrings and documentation
|
||||
----------------------------
|
||||
|
||||
We use sphinx to build our documentation. We use the sphinx autodoc extension
|
||||
to pull docstrings out of the modules for API documentation. This means that
|
||||
docstrings for subpackages and modules should follow a certain pattern. The
|
||||
general structure is:
|
||||
|
||||
* Introductory material about a module in the module's top level docstring.
|
||||
|
||||
* Introductory material should begin with a level two title: an overbar and
|
||||
underbar of '-'.
|
||||
|
||||
* docstrings for every function.
|
||||
|
||||
* The first line is a short summary of what the function does
|
||||
* This is followed by a blank line
|
||||
* The next lines are a `field list
|
||||
<http://sphinx.pocoo.org/markup/desc.html#info-field-lists>_` giving
|
||||
information about the function's signature. We use the keywords:
|
||||
``arg``, ``kwarg``, ``raises``, ``returns``, and sometimes ``rtype``. Use
|
||||
these to describe all arguments, key word arguments, exceptions raised,
|
||||
and return values using these.
|
||||
|
||||
* Parameters that are ``kwarg`` should specify what their default
|
||||
behaviour is.
|
||||
|
||||
.. _kitchen-versioning:
|
||||
|
||||
------------------
|
||||
Kitchen versioning
|
||||
------------------
|
||||
|
||||
Currently the kitchen library is in early stages of development. While we're
|
||||
in this state, the main kitchen library uses the following pattern for version
|
||||
information:
|
||||
|
||||
* Versions look like this::
|
||||
__version_info__ = ((0, 1, 2),)
|
||||
__version__ = '0.1.2'
|
||||
|
||||
* The Major version number remains at 0 until we decide to make the first 1.0
|
||||
release of kitchen. At that point, we're declaring that we have some
|
||||
confidence that we won't need to break backwards compatibility for a while.
|
||||
* The Minor version increments for any backwards incompatible API changes.
|
||||
When this is updated, we reset micro to zero.
|
||||
* The Micro version increments for any other changes (backwards compatible API
|
||||
changes, pure bugfixes, etc).
|
||||
|
||||
.. note::
|
||||
|
||||
Versioning is only updated for releases that generate sdists and new
|
||||
uploads to the download directory. Usually we update the version
|
||||
information for the library just before release. By contrast, we update
|
||||
kitchen :ref:`subpackage-versioning` when an API change is made. When in
|
||||
doubt, look at the version information in the last release.
|
||||
|
||||
----
|
||||
I18N
|
||||
----
|
||||
|
||||
All strings that are used as feedback for users need to be translated.
|
||||
:mod:`kitchen` sets up several functions for this. :func:`_` is used for
|
||||
marking things that are shown to users via print, GUIs, or other "standard"
|
||||
methods. Strings for exceptions are marked with :func:`b_`. This function
|
||||
returns a byte :class:`str` which is needed for use with exceptions::
|
||||
|
||||
from kitchen import _, b_
|
||||
|
||||
def print_message(msg, username):
|
||||
print _('%(user)s, your message of the day is: %(message)s') % {
|
||||
'message': msg, 'user': username}
|
||||
|
||||
raise Exception b_('Test message')
|
||||
|
||||
This serves several purposes:
|
||||
|
||||
* It marks the strings to be extracted by an xgettext-like program.
|
||||
* :func:`_` is a function that will substitute available translations at
|
||||
runtime.
|
||||
|
||||
.. note::
|
||||
|
||||
By using the ``%()s with dict`` style of string formatting, we make this
|
||||
string friendly to translators that may need to reorder the variables when
|
||||
they're translating the string.
|
||||
|
||||
`paver <http://www.blueskyonmars.com/projects/paver/>_` and `babel
|
||||
<http://babel.edgewall.org/>_` are used to extract the strings.
|
||||
|
||||
-----------
|
||||
API updates
|
||||
-----------
|
||||
|
||||
Kitchen strives to have a long deprecation cycle so that people have time to
|
||||
switch away from any APIs that we decide to discard. Discarded APIs should
|
||||
raise a :exc:`DeprecationWarning` and clearly state in the warning message and
|
||||
the docstring how to convert old code to use the new interface. An example of
|
||||
deprecating a function::
|
||||
|
||||
import warnings
|
||||
|
||||
from kitchen import _
|
||||
from kitchen.text.converters import to_bytes, to_unicode
|
||||
from kitchen.text.new_module import new_function
|
||||
|
||||
def old_function(param):
|
||||
'''**Deprecated**
|
||||
|
||||
This function is deprecated. Use
|
||||
:func:`kitchen.text.new_module.new_function` instead. If you want
|
||||
unicode strngs as output, switch to::
|
||||
|
||||
>>> from kitchen.text.new_module import new_function
|
||||
>>> output = new_function(param)
|
||||
|
||||
If you want byte strings, use::
|
||||
|
||||
>>> from kitchen.text.new_module import new_function
|
||||
>>> from kitchen.text.converters import to_bytes
|
||||
>>> output = to_bytes(new_function(param))
|
||||
'''
|
||||
warnings.warn(_('kitchen.text.old_function is deprecated. Use'
|
||||
' kitchen.text.new_module.new_function instead'),
|
||||
DeprecationWarning, stacklevel=2)
|
||||
|
||||
as_unicode = isinstance(param, unicode)
|
||||
message = new_function(to_unicode(param))
|
||||
if not as_unicode:
|
||||
message = to_bytes(message)
|
||||
return message
|
||||
|
||||
If a particular API change is very intrusive, it may be better to create a new
|
||||
version of the subpackage and ship both the old version and the new version.
|
||||
|
||||
---------
|
||||
NEWS file
|
||||
---------
|
||||
|
||||
Update the :file:`NEWS` file when you make a change that will be visible to
|
||||
the users. This is not a ChangeLog file so we don't need to list absolutely
|
||||
everything but it should give the user an idea of how this version differs
|
||||
from prior versions. API changes should be listed here explicitly. bugfixes
|
||||
can be more general::
|
||||
|
||||
-----
|
||||
0.2.0
|
||||
-----
|
||||
* Relicense to LGPLv2+
|
||||
* Add kitchen.text.format module with the following functions:
|
||||
textual_width, textual_width_chop.
|
||||
* Rename the kitchen.text.utils module to kitchen.text.misc. use of the
|
||||
old names is deprecated but still available.
|
||||
* bugfixes applied to kitchen.pycompat24.defaultdict that fixes some
|
||||
tracebacks
|
||||
|
||||
-------------------
|
||||
Kitchen subpackages
|
||||
-------------------
|
||||
|
||||
Kitchen itself is a namespace. The kitchen sdist (tarball) provides certain
|
||||
useful subpackages.
|
||||
|
||||
.. seealso::
|
||||
|
||||
`Kitchen addon packages`_
|
||||
For information about subpackages not distributed in the kitchen sdist
|
||||
that install into the kitchen namespace.
|
||||
|
||||
.. _subpackage-versioning:
|
||||
|
||||
Versioning
|
||||
==========
|
||||
|
||||
Each subpackage should have its own version information which is independent
|
||||
of the other kitchen subpackages and the main kitchen library version. This is
|
||||
used so that code that depends on kitchen APIs can check the version
|
||||
information. The standard way to do this is to put something like this in the
|
||||
subpackage's :file:`__init__.py`::
|
||||
|
||||
from kitchen.versioning import version_tuple_to_string
|
||||
|
||||
__version_info__ = ((1, 0, 0),)
|
||||
__version__ = version_tuple_to_string(__version_info__)
|
||||
|
||||
:attr:`__version_info__` is documented in :mod:`kitchen.versioning`. The
|
||||
values of the first tuple should describe API changes to the module. There
|
||||
are at least three numbers present in the tuple: (Major, minor, micro). The
|
||||
major version number is for backwards incompatible changes (For
|
||||
instance, removing a function, or adding a new mandatory argument to
|
||||
a function). Whenever one of these occurs, you should increment the major
|
||||
number and reset minor and micro to zero. The second number is the minor
|
||||
version. Anytime new but backwards compatible changes are introduced this
|
||||
number should be incremented and the micro version number reset to zero. The
|
||||
micro version should be incremented when a change is made that does not change
|
||||
the API at all. This is a common case for bugfixes, for instance.
|
||||
|
||||
Version information beyond the first three parts of the first tuple may be
|
||||
useful for versioning but semantically have similar meaning to the micro
|
||||
version.
|
||||
|
||||
.. note::
|
||||
|
||||
We update the :attr:`__version_info__` tuple when the API is updated.
|
||||
This way there's less chance of forgetting to update the API version when
|
||||
a new release is made. However, we try to only increment the version
|
||||
numbers a single step for any release. So if kitchen-0.1.0 has
|
||||
kitchen.text.__version__ == '1.0.1', kitchen-0.1.1 should have
|
||||
kitchen.text.__version__ == '1.0.2' or '1.1.0' or '2.0.0'.
|
||||
|
||||
Criteria for subpackages in kitchen
|
||||
===================================
|
||||
|
||||
Supackages within kitchen should meet these criteria:
|
||||
|
||||
* Generally useful or needed for other pieces of kitchen.
|
||||
|
||||
* No mandatory requirements outside of the |stdlib|_.
|
||||
|
||||
* Optional requirements from outside the |stdlib|_ are allowed. Things with
|
||||
mandatory requirements are better placed in `kitchen addon packages`_
|
||||
|
||||
* Somewhat API stable -- this is not a hard requirement. We can change the
|
||||
kitchen api. However, it is better not to as people may come to depend on
|
||||
it.
|
||||
|
||||
.. seealso::
|
||||
|
||||
`API Updates`_
|
||||
|
||||
----------------------
|
||||
Kitchen addon packages
|
||||
----------------------
|
||||
|
||||
Addon packages are very similar to subpackages integrated into the kitchen
|
||||
sdist. This section just lists some of the differences to watch out for.
|
||||
|
||||
setup.py
|
||||
========
|
||||
|
||||
Your :file:`setup.py` should contain entries like this::
|
||||
|
||||
# It's suggested to use a dotted name like this so the package is easily
|
||||
# findable on pypi:
|
||||
setup(name='kitchen.config',
|
||||
# Include kitchen in the keywords, again, for searching on pypi
|
||||
keywords=['kitchen', 'configuration'],
|
||||
# This package lives in the directory kitchen/config
|
||||
packages=['kitchen.config'],
|
||||
# [...]
|
||||
)
|
||||
|
||||
Package directory layout
|
||||
========================
|
||||
|
||||
Create a :file:`kitchen` directory in the toplevel. Place the addon
|
||||
subpackage in there. For example::
|
||||
|
||||
./ <== toplevel with README, setup.py, NEWS, etc
|
||||
kitchen/
|
||||
kitchen/__init__.py
|
||||
kitchen/config/ <== subpackage directory
|
||||
kitchen/config/__init__.py
|
||||
|
||||
Fake kitchen module
|
||||
===================
|
||||
|
||||
The :file::`__init__.py` in the :file:`kitchen` directory is special. It
|
||||
won't be installed. It just needs to pull in the kitchen from the system so
|
||||
that you are able to test your module. You should be able to use this
|
||||
boilerplate::
|
||||
|
||||
# Fake module. This is not installed, It's just made to import the real
|
||||
# kitchen modules for testing this module
|
||||
import pkgutil
|
||||
|
||||
# Extend the __path__ with everything in the real kitchen module
|
||||
__path__ = pkgutil.extend_path(__path__, __name__)
|
||||
|
||||
.. note::
|
||||
|
||||
:mod:`kitchen` needs to be findable by python for this to work. Installed
|
||||
in the :file:`site-packages` directory or adding it to the
|
||||
:envvar:`PYTHONPATH` will work.
|
||||
|
||||
Your unittests should now be able to find both your submodule and the main
|
||||
kitchen module.
|
||||
|
||||
Versioning
|
||||
==========
|
||||
|
||||
It is recommended that addon packages version similarly to
|
||||
:ref:`subpackage-versioning`. The :data:`__version_info__` and
|
||||
:data:`__version__` strings can be changed independently of the version
|
||||
exposed by setup.py so that you have both an API version
|
||||
(:data:`__version_info__`) and release version that's easier for people to
|
||||
parse. However, you aren't required to do this and you could follow
|
||||
a different methodology if you want (for instance, :ref:`kitchen-versioning`)
|
142
docs/index.rst
Normal file
142
docs/index.rst
Normal file
|
@ -0,0 +1,142 @@
|
|||
================================
|
||||
Kitchen, everything but the sink
|
||||
================================
|
||||
|
||||
:Author: Toshio Kuratomi
|
||||
:Date: 19 March 2011
|
||||
:Version: 1.0.x
|
||||
|
||||
We've all done it. In the process of writing a brand new application we've
|
||||
discovered that we need a little bit of code that we've invented before.
|
||||
Perhaps it's something to handle unicode text. Perhaps it's something to make
|
||||
a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being
|
||||
a tiny bit of code that seems too small to worry about pushing into its own
|
||||
module so it sits there, a part of your current project, waiting to be cut and
|
||||
pasted into your next project. And the next. And the next. And since that
|
||||
little bittybit of code proved so useful to you, it's highly likely that it
|
||||
proved useful to someone else as well. Useful enough that they've written it
|
||||
and copy and pasted it over and over into each of their new projects.
|
||||
|
||||
Well, no longer! Kitchen aims to pull these small snippets of code into a few
|
||||
python modules which you can import and use within your project. No more copy
|
||||
and paste! Now you can let someone else maintain and release these small
|
||||
snippets so that you can get on with your life.
|
||||
|
||||
This package forms the core of Kitchen. It contains some useful modules for
|
||||
using newer |stdlib|_ modules on older python versions, text manipulation,
|
||||
:pep:`386` versioning, and initializing :mod:`gettext`. With this package we're
|
||||
trying to provide a few useful features that don't have too many dependencies
|
||||
outside of the |stdlib|_. We'll be releasing other modules that drop into the
|
||||
kitchen namespace to add other features (possibly with larger deps) as time
|
||||
goes on.
|
||||
|
||||
------------
|
||||
Requirements
|
||||
------------
|
||||
|
||||
We've tried to keep the core kitchen module's requirements lightweight. At the
|
||||
moment kitchen only requires
|
||||
|
||||
:python: 2.3.1 or later
|
||||
|
||||
.. warning:: Kitchen-1.1.0 is likely to be the last release that supports
|
||||
python-2.3.x. Future releases will target python-2.4 as the minimum
|
||||
required version.
|
||||
|
||||
Soft Requirements
|
||||
=================
|
||||
|
||||
If found, these libraries will be used to make the implementation of some part
|
||||
of kitchen better in some way. If they are not present, the API that they
|
||||
enable will still exist but may function in a different manner.
|
||||
|
||||
`chardet <http://pypi.python.org/pypi/chardet>`_
|
||||
Used in :func:`~kitchen.text.misc.guess_encoding` and
|
||||
:func:`~kitchen.text.converters.guess_encoding_to_xml` to help guess
|
||||
encoding of byte strings being converted. If not present, unknown
|
||||
encodings will be converted as if they were ``latin1``
|
||||
|
||||
---------------------------
|
||||
Other Recommended Libraries
|
||||
---------------------------
|
||||
|
||||
These libraries implement commonly used functionality that everyone seems to
|
||||
invent. Rather than reinvent their wheel, I simply list the things that they
|
||||
do well for now. Perhaps if people can't find them normally, I'll add them as
|
||||
requirements in :file:`setup.py` or link them into kitchen's namespace. For
|
||||
now, I just mention them here:
|
||||
|
||||
`bunch <http://pypi.python.org/pypi/bunch/>`_
|
||||
Bunch is a dictionary that you can use attribute lookup as well as bracket
|
||||
notation to access. Setting it apart from most homebrewed implementations
|
||||
is the :func:`bunchify` function which will descend nested structures of
|
||||
lists and dicts, transforming the dicts to Bunch's.
|
||||
`hashlib <http://code.krypto.org/python/hashlib/>`_
|
||||
Python 2.5 and forward have a :mod:`hashlib` library that provides secure
|
||||
hash functions to python. If you're developing for python2.3 or
|
||||
python2.4, though, you can install the standalone hashlib library and have
|
||||
access to the same functions.
|
||||
`iterutils <http://pypi.python.org/pypi/iterutils/>`_
|
||||
The python documentation for :mod:`itertools` has some examples
|
||||
of other nice iterable functions that can be built from the
|
||||
:mod:`itertools` functions. This third-party module creates those recipes
|
||||
as a module.
|
||||
`ordereddict <http://pypi.python.org/pypi/ordereddict/>`_
|
||||
Python 2.7 and forward have a :mod:`~collections.OrderedDict` that
|
||||
provides a :class:`dict` whose items are ordered (and indexable) as well
|
||||
as named.
|
||||
`unittest2 <http://pypi.python.org/pypi/unittest2>`_
|
||||
Python 2.7 has an updated :mod:`unittest` library with new functions not
|
||||
present in the |stdlib|_ for Python 2.6 or less. If you want to use those
|
||||
new functions but need your testing framework to be compatible with older
|
||||
Python the unittest2 library provides the update as an external module.
|
||||
`nose <http://somethingaboutorange.com/mrl/projects/nose/>`_
|
||||
If you want to use a test discovery tool instead of the unittest
|
||||
framework, nosetests provides a simple to use way to do that.
|
||||
|
||||
-------
|
||||
License
|
||||
-------
|
||||
|
||||
This python module is distributed under the terms of the
|
||||
`GNU Lesser General Public License Version 2 or later
|
||||
<http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html>`_.
|
||||
|
||||
.. note:: Some parts of this module are licensed under terms less restrictive
|
||||
than the LGPLv2+. If you separate these files from the work as a whole
|
||||
you are allowed to use them under the less restrictive licenses. The
|
||||
following is a list of the files that are known:
|
||||
|
||||
`Python 2 license <http://www.python.org/download/releases/2.4/license/>`_
|
||||
:file:`_subprocess.py`, :file:`test_subprocess.py`,
|
||||
:file:`defaultdict.py`, :file:`test_defaultdict.py`,
|
||||
:file:`_base64.py`, and :file:`test_base64.py`
|
||||
|
||||
--------
|
||||
Contents
|
||||
--------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
tutorial
|
||||
api-overview
|
||||
porting-guide-0.3
|
||||
hacking
|
||||
glossary
|
||||
|
||||
------------------
|
||||
Indices and tables
|
||||
------------------
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
|
||||
-------------
|
||||
Project Pages
|
||||
-------------
|
||||
|
||||
More information about the project can be found on the |projpage|_
|
||||
|
||||
The latest published version of this documentation can be found on the |docpage|_
|
209
docs/porting-guide-0.3.rst
Normal file
209
docs/porting-guide-0.3.rst
Normal file
|
@ -0,0 +1,209 @@
|
|||
===================
|
||||
1.0.0 Porting Guide
|
||||
===================
|
||||
|
||||
The 0.1 through 1.0.0 releases focused on bringing in functions from yum and
|
||||
python-fedora. This porting guide tells how to port from those APIs to their
|
||||
kitchen replacements.
|
||||
|
||||
-------------
|
||||
python-fedora
|
||||
-------------
|
||||
|
||||
=================================== ===================
|
||||
python-fedora kitchen replacement
|
||||
----------------------------------- -------------------
|
||||
:func:`fedora.iterutils.isiterable` :func:`kitchen.iterutils.isiterable` [#f1]_
|
||||
:func:`fedora.textutils.to_unicode` :func:`kitchen.text.converters.to_unicode`
|
||||
:func:`fedora.textutils.to_bytes` :func:`kitchen.text.converters.to_bytes`
|
||||
=================================== ===================
|
||||
|
||||
.. [#f1] :func:`~kitchen.iterutils.isiterable` has changed slightly in
|
||||
kitchen. The :attr:`include_string` attribute has switched its default value
|
||||
from :data:`True` to :data:`False`. So you need to change code like::
|
||||
|
||||
>>> # Old code
|
||||
>>> isiterable('abcdef')
|
||||
True
|
||||
>>> # New code
|
||||
>>> isiterable('abcdef', include_string=True)
|
||||
True
|
||||
|
||||
---
|
||||
yum
|
||||
---
|
||||
|
||||
================================= ===================
|
||||
yum kitchen replacement
|
||||
--------------------------------- -------------------
|
||||
:func:`yum.i18n.dummy_wrapper` :meth:`kitchen.i18n.DummyTranslations.ugettext` [#y1]_
|
||||
:func:`yum.i18n.dummyP_wrapper` :meth:`kitchen.i18n.DummyTanslations.ungettext` [#y1]_
|
||||
:func:`yum.i18n.utf8_width` :func:`kitchen.text.display.textual_width`
|
||||
:func:`yum.i18n.utf8_width_chop` :func:`kitchen.text.display.textual_width_chop`
|
||||
and :func:`kitchen.text.display.textual_width` [#y2]_ [#y4]_
|
||||
:func:`yum.i18n.utf8_valid` :func:`kitchen.text.misc.byte_string_valid_encoding`
|
||||
:func:`yum.i18n.utf8_text_wrap` :func:`kitchen.text.display.wrap` [#y3]_
|
||||
:func:`yum.i18n.utf8_text_fill` :func:`kitchen.text.display.fill` [#y3]_
|
||||
:func:`yum.i18n.to_unicode` :func:`kitchen.text.converters.to_unicode` [#y5]_
|
||||
:func:`yum.i18n.to_unicode_maybe` :func:`kitchen.text.converters.to_unicode` [#y5]_
|
||||
:func:`yum.i18n.to_utf8` :func:`kitchen.text.converters.to_bytes` [#y5]_
|
||||
:func:`yum.i18n.to_str` :func:`kitchen.text.converters.to_unicode`
|
||||
or :func:`kitchen.text.converters.to_bytes` [#y6]_
|
||||
:func:`yum.i18n.str_eq` :func:`kitchen.text.misc.str_eq`
|
||||
:func:`yum.misc.to_xml` :func:`kitchen.text.converters.unicode_to_xml`
|
||||
or :func:`kitchen.text.converters.byte_string_to_xml` [#y7]_
|
||||
:func:`yum.i18n._` See: :ref:`yum-i18n-init`
|
||||
:func:`yum.i18n.P_` See: :ref:`yum-i18n-init`
|
||||
:func:`yum.i18n.exception2msg` :func:`kitchen.text.converters.exception_to_unicode`
|
||||
or :func:`kitchen.text.converter.exception_to_bytes` [#y8]_
|
||||
================================= ===================
|
||||
|
||||
.. [#y1] These yum methods provided fallback support for :mod:`gettext`
|
||||
functions in case either ``gaftonmode`` was set or :mod:`gettext` failed
|
||||
to return an object. In kitchen, we can use the
|
||||
:class:`kitchen.i18n.DummyTranslations` object to fulfill that role.
|
||||
Please see :ref:`yum-i18n-init` for more suggestions on how to do this.
|
||||
|
||||
.. [#y2] The yum version of these functions returned a byte :class:`str`. The
|
||||
kitchen version listed here returns a :class:`unicode` string. If you
|
||||
need a byte :class:`str` simply call
|
||||
:func:`kitchen.text.converters.to_bytes` on the result.
|
||||
|
||||
.. [#y3] The yum version of these functions would return either a byte
|
||||
:class:`str` or a :class:`unicode` string depending on what the input
|
||||
value was. The kitchen version always returns :class:`unicode` strings.
|
||||
|
||||
.. [#y4] :func:`yum.i18n.utf8_width_chop` performed two functions. It
|
||||
returned the piece of the message that fit in a specified width and the
|
||||
width of that message. In kitchen, you need to call two functions, one
|
||||
for each action::
|
||||
|
||||
>>> # Old way
|
||||
>>> utf8_width_chop(msg, 5)
|
||||
(5, 'く ku')
|
||||
>>> # New way
|
||||
>>> from kitchen.text.display import textual_width, textual_width_chop
|
||||
>>> (textual_width(msg), textual_width_chop(msg, 5))
|
||||
(5, u'く ku')
|
||||
|
||||
.. [#y5] If the yum version of :func:`~yum.i18n.to_unicode` or
|
||||
:func:`~yum.i18n.to_utf8` is given an object that is not a string, it
|
||||
returns the object itself. :func:`kitchen.text.converters.to_unicode` and
|
||||
:func:`kitchen.text.converters.to_bytes` default to returning the
|
||||
``simplerepr`` of the object instead. If you want the yum behaviour, set
|
||||
the :attr:`nonstring` parameter to ``passthru``::
|
||||
|
||||
>>> from kitchen.text.converters import to_unicode
|
||||
>>> to_unicode(5)
|
||||
u'5'
|
||||
>>> to_unicode(5, nonstring='passthru')
|
||||
5
|
||||
|
||||
.. [#y6] :func:`yum.i18n.to_str` could return either a byte :class:`str`. or
|
||||
a :class:`unicode` string In kitchen you can get the same effect but you
|
||||
get to choose whether you want a byte :class:`str` or a :class:`unicode`
|
||||
string. Use :func:`~kitchen.text.converters.to_bytes` for :class:`str`
|
||||
and :func:`~kitchen.text.converters.to_unicode` for :class:`unicode`.
|
||||
|
||||
.. [#y7] :func:`yum.misc.to_xml` was buggy as written. I think the intention
|
||||
was for you to be able to pass a byte :class:`str` or :class:`unicode`
|
||||
string in and get out a byte :class:`str` that was valid to use in an xml
|
||||
file. The two kitchen functions
|
||||
:func:`~kitchen.text.converters.byte_string_to_xml` and
|
||||
:func:`~kitchen.text.converters.unicode_to_xml` do that for each string
|
||||
type.
|
||||
|
||||
.. [#y8] When porting :func:`yum.i18n.exception2msg` to use kitchen, you
|
||||
should setup two wrapper functions to aid in your port. They'll look like
|
||||
this:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from kitchen.text.converters import EXCEPTION_CONVERTERS, \
|
||||
BYTE_EXCEPTION_CONVERTERS, exception_to_unicode, \
|
||||
exception_to_bytes
|
||||
def exception2umsg(e):
|
||||
'''Return a unicode representation of an exception'''
|
||||
c = [lambda e: e.value]
|
||||
c.extend(EXCEPTION_CONVERTERS)
|
||||
return exception_to_unicode(e, converters=c)
|
||||
def exception2bmsg(e):
|
||||
'''Return a utf8 encoded str representation of an exception'''
|
||||
c = [lambda e: e.value]
|
||||
c.extend(BYTE_EXCEPTION_CONVERTERS)
|
||||
return exception_to_bytes(e, converters=c)
|
||||
|
||||
The reason to define this wrapper is that many of the exceptions in yum
|
||||
put the message in the :attr:`value` attribute of the :exc:`Exception`
|
||||
instead of adding it to the :attr:`args` attribute. So the default
|
||||
:data:`~kitchen.text.converters.EXCEPTION_CONVERTERS` don't know where to
|
||||
find the message. The wrapper tells kitchen to check the :attr:`value`
|
||||
attribute for the message. The reason to define two wrappers may be less
|
||||
obvious. :func:`yum.i18n.exception2msg` can return a :class:`unicode`
|
||||
string or a byte :class:`str` depending on a combination of what
|
||||
attributes are present on the :exc:`Exception` and what locale the
|
||||
function is being run in. By contrast,
|
||||
:func:`kitchen.text.converters.exception_to_unicode` only returns
|
||||
:class:`unicode` strings and
|
||||
:func:`kitchen.text.converters.exception_to_bytes` only returns byte
|
||||
:class:`str`. This is much safer as it keeps code that can only handle
|
||||
:class:`unicode` or only handle byte :class:`str` correctly from getting
|
||||
the wrong type when an input changes but it means you need to examine the
|
||||
calling code when porting from :func:`yum.i18n.exception2msg` and use the
|
||||
appropriate wrapper.
|
||||
|
||||
.. _yum-i18n-init:
|
||||
|
||||
Initializing Yum i18n
|
||||
=====================
|
||||
|
||||
Previously, yum had several pieces of code to initialize i18n. From the
|
||||
toplevel of :file:`yum/i18n.py`::
|
||||
|
||||
try:.
|
||||
'''
|
||||
Setup the yum translation domain and make _() and P_() translation wrappers
|
||||
available.
|
||||
using ugettext to make sure translated strings are in Unicode.
|
||||
'''
|
||||
import gettext
|
||||
t = gettext.translation('yum', fallback=True)
|
||||
_ = t.ugettext
|
||||
P_ = t.ungettext
|
||||
except:
|
||||
'''
|
||||
Something went wrong so we make a dummy _() wrapper there is just
|
||||
returning the same text
|
||||
'''
|
||||
_ = dummy_wrapper
|
||||
P_ = dummyP_wrapper
|
||||
|
||||
With kitchen, this can be changed to this::
|
||||
|
||||
from kitchen.i18n import easy_gettext_setup, DummyTranslations
|
||||
try:
|
||||
_, P_ = easy_gettext_setup('yum')
|
||||
except:
|
||||
translations = DummyTranslations()
|
||||
_ = translations.ugettext
|
||||
P_ = translations.ungettext
|
||||
|
||||
.. note:: In :ref:`overcoming-frustration`, it is mentioned that for some
|
||||
things (like exception messages), using the byte :class:`str` oriented
|
||||
functions is more appropriate. If this is desired, the setup portion is
|
||||
only a second call to :func:`kitchen.i18n.easy_gettext_setup`::
|
||||
|
||||
b_, bP_ = easy_gettext_setup('yum', use_unicode=False)
|
||||
|
||||
The second place where i18n is setup is in :meth:`yum.YumBase._getConfig` in
|
||||
:file:`yum/__init_.py` if ``gaftonmode`` is in effect::
|
||||
|
||||
if startupconf.gaftonmode:
|
||||
global _
|
||||
_ = yum.i18n.dummy_wrapper
|
||||
|
||||
This can be changed to::
|
||||
|
||||
if startupconf.gaftonmode:
|
||||
global _
|
||||
_ = DummyTranslations().ugettext()
|
19
docs/tutorial.rst
Normal file
19
docs/tutorial.rst
Normal file
|
@ -0,0 +1,19 @@
|
|||
================================
|
||||
Using kitchen to write good code
|
||||
================================
|
||||
|
||||
Kitchen's functions won't automatically make you a better programmer. You
|
||||
have to learn when and how to use them as well. This section of the
|
||||
documentation is intended to show you some of the ways that you can apply
|
||||
kitchen's functions to problems that may have arisen in your life. The goal
|
||||
of this section is to give you enough information to understand what the
|
||||
kitchen API can do for you and where in the :ref:`KitchenAPI` docs to look
|
||||
for something that can help you with your next issue. Along the way,
|
||||
you might pick up the knack for identifying issues with your code before you
|
||||
publish it. And that *will* make you a better coder.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
unicode-frustrations
|
||||
designing-unicode-apis
|
571
docs/unicode-frustrations.rst
Normal file
571
docs/unicode-frustrations.rst
Normal file
|
@ -0,0 +1,571 @@
|
|||
.. _overcoming-frustration:
|
||||
|
||||
==========================================================
|
||||
Overcoming frustration: Correctly using unicode in python2
|
||||
==========================================================
|
||||
|
||||
In python-2.x, there's two types that deal with text.
|
||||
|
||||
1. :class:`str` is for strings of bytes. These are very similar in nature to
|
||||
how strings are handled in C.
|
||||
2. :class:`unicode` is for strings of unicode :term:`code points`.
|
||||
|
||||
.. note::
|
||||
|
||||
**Just what the dickens is "Unicode"?**
|
||||
|
||||
One mistake that people encountering this issue for the first time make is
|
||||
confusing the :class:`unicode` type and the encodings of unicode stored in
|
||||
the :class:`str` type. In python, the :class:`unicode` type stores an
|
||||
abstract sequence of :term:`code points`. Each :term:`code point`
|
||||
represents a :term:`grapheme`. By contrast, byte :class:`str` stores
|
||||
a sequence of bytes which can then be mapped to a sequence of :term:`code
|
||||
points`. Each unicode encoding (:term:`UTF-8`, UTF-7, UTF-16, UTF-32,
|
||||
etc) maps different sequences of bytes to the unicode :term:`code points`.
|
||||
|
||||
What does that mean to you as a programmer? When you're dealing with text
|
||||
manipulations (finding the number of characters in a string or cutting
|
||||
a string on word boundaries) you should be dealing with :class:`unicode`
|
||||
strings as they abstract characters in a manner that's appropriate for
|
||||
thinking of them as a sequence of letters that you will see on a page.
|
||||
When dealing with I/O, reading to and from the disk, printing to
|
||||
a terminal, sending something over a network link, etc, you should be dealing
|
||||
with byte :class:`str` as those devices are going to need to deal with
|
||||
concrete implementations of what bytes represent your abstract characters.
|
||||
|
||||
In the python2 world many APIs use these two classes interchangably but there
|
||||
are several important APIs where only one or the other will do the right
|
||||
thing. When you give the wrong type of string to an API that wants the other
|
||||
type, you may end up with an exception being raised (:exc:`UnicodeDecodeError`
|
||||
or :exc:`UnicodeEncodeError`). However, these exceptions aren't always raised
|
||||
because python implicitly converts between types... *sometimes*.
|
||||
|
||||
-----------------------------------
|
||||
Frustration #1: Inconsistent Errors
|
||||
-----------------------------------
|
||||
|
||||
Although converting when possible seems like the right thing to do, it's
|
||||
actually the first source of frustration. A programmer can test out their
|
||||
program with a string like: ``The quick brown fox jumped over the lazy dog``
|
||||
and not encounter any issues. But when they release their software into the
|
||||
wild, someone enters the string: ``I sat down for coffee at the café`` and
|
||||
suddenly an exception is thrown. The reason? The mechanism that converts
|
||||
between the two types is only able to deal with :term:`ASCII` characters.
|
||||
Once you throw non-:term:`ASCII` characters into your strings, you have to
|
||||
start dealing with the conversion manually.
|
||||
|
||||
So, if I manually convert everything to either byte :class:`str` or
|
||||
:class:`unicode` strings, will I be okay? The answer is.... *sometimes*.
|
||||
|
||||
---------------------------------
|
||||
Frustration #2: Inconsistent APIs
|
||||
---------------------------------
|
||||
|
||||
The problem you run into when converting everything to byte :class:`str` or
|
||||
:class:`unicode` strings is that you'll be using someone else's API quite
|
||||
often (this includes the APIs in the |stdlib|_) and find that the API will only
|
||||
accept byte :class:`str` or only accept :class:`unicode` strings. Or worse,
|
||||
that the code will accept either when you're dealing with strings that consist
|
||||
solely of :term:`ASCII` but throw an error when you give it a string that's
|
||||
got non-:term:`ASCII` characters. When you encounter these APIs you first
|
||||
need to identify which type will work better and then you have to convert your
|
||||
values to the correct type for that code. Thus the programmer that wants to
|
||||
proactively fix all unicode errors in their code needs to do two things:
|
||||
|
||||
1. You must keep track of what type your sequences of text are. Does
|
||||
``my_sentence`` contain :class:`unicode` or :class:`str`? If you don't
|
||||
know that then you're going to be in for a world of hurt.
|
||||
2. Anytime you call a function you need to evaluate whether that function will
|
||||
do the right thing with :class:`str` or :class:`unicode` values. Sending
|
||||
the wrong value here will lead to a :exc:`UnicodeError` being thrown when
|
||||
the string contains non-:term:`ASCII` characters.
|
||||
|
||||
.. note::
|
||||
|
||||
There is one mitigating factor here. The python community has been
|
||||
standardizing on using :class:`unicode` in all its APIs. Although there
|
||||
are some APIs that you need to send byte :class:`str` to in order to be
|
||||
safe, (including things as ubiquitous as :func:`print` as we'll see in the
|
||||
next section), it's getting easier and easier to use :class:`unicode`
|
||||
strings with most APIs.
|
||||
|
||||
------------------------------------------------
|
||||
Frustration #3: Inconsistent treatment of output
|
||||
------------------------------------------------
|
||||
|
||||
Alright, since the python community is moving to using :class:`unicode`
|
||||
strings everywhere, we might as well convert everything to :class:`unicode`
|
||||
strings and use that by default, right? Sounds good most of the time but
|
||||
there's at least one huge caveat to be aware of. Anytime you output text to
|
||||
the terminal or to a file, the text has to be converted into a byte
|
||||
:class:`str`. Python will try to implicitly convert from :class:`unicode` to
|
||||
byte :class:`str`... but it will throw an exception if the bytes are
|
||||
non-:term:`ASCII`::
|
||||
|
||||
>>> string = unicode(raw_input(), 'utf8')
|
||||
café
|
||||
>>> log = open('/var/tmp/debug.log', 'w')
|
||||
>>> log.write(string)
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
UnicodeEncodeError: 'ascii' codec can't encode character u'\xe9' in position 3: ordinal not in range(128)
|
||||
|
||||
Okay, this is simple enough to solve: Just convert to a byte :class:`str` and
|
||||
we're all set::
|
||||
|
||||
>>> string = unicode(raw_input(), 'utf8')
|
||||
café
|
||||
>>> string_for_output = string.encode('utf8', 'replace')
|
||||
>>> log = open('/var/tmp/debug.log', 'w')
|
||||
>>> log.write(string_for_output)
|
||||
>>>
|
||||
|
||||
So that was simple, right? Well... there's one gotcha that makes things a bit
|
||||
harder to debug sometimes. When you attempt to write non-:term:`ASCII`
|
||||
:class:`unicode` strings to a file-like object you get a traceback everytime.
|
||||
But what happens when you use :func:`print`? The terminal is a file-like object
|
||||
so it should raise an exception right? The answer to that is....
|
||||
*sometimes*:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
$ python
|
||||
>>> print u'café'
|
||||
café
|
||||
|
||||
No exception. Okay, we're fine then?
|
||||
|
||||
We are until someone does one of the following:
|
||||
|
||||
* Runs the script in a different locale:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
$ LC_ALL=C python
|
||||
>>> # Note: if you're using a good terminal program when running in the C locale
|
||||
>>> # The terminal program will prevent you from entering non-ASCII characters
|
||||
>>> # python will still recognize them if you use the codepoint instead:
|
||||
>>> print u'caf\xe9'
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
UnicodeEncodeError: 'ascii' codec can't encode character u'\xe9' in position 3: ordinal not in range(128)
|
||||
|
||||
* Redirects output to a file:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
$ cat test.py
|
||||
#!/usr/bin/python -tt
|
||||
# -*- coding: utf-8 -*-
|
||||
print u'café'
|
||||
$ ./test.py >t
|
||||
Traceback (most recent call last):
|
||||
File "./test.py", line 4, in <module>
|
||||
print u'café'
|
||||
UnicodeEncodeError: 'ascii' codec can't encode character u'\xe9' in position 3: ordinal not in range(128)
|
||||
|
||||
Okay, the locale thing is a pain but understandable: the C locale doesn't
|
||||
understand any characters outside of :term:`ASCII` so naturally attempting to
|
||||
display those won't work. Now why does redirecting to a file cause problems?
|
||||
It's because :func:`print` in python2 is treated specially. Whereas the other
|
||||
file-like objects in python always convert to :term:`ASCII` unless you set
|
||||
them up differently, using :func:`print` to output to the terminal will use
|
||||
the user's locale to convert before sending the output to the terminal. When
|
||||
:func:`print` is not outputting to the terminal (being redirected to a file,
|
||||
for instance), :func:`print` decides that it doesn't know what locale to use
|
||||
for that file and so it tries to convert to :term:`ASCII` instead.
|
||||
|
||||
So what does this mean for you, as a programmer? Unless you have the luxury
|
||||
of controlling how your users use your code, you should always, always, always
|
||||
convert to a byte :class:`str` before outputting strings to the terminal or to
|
||||
a file. Python even provides you with a facility to do just this. If you
|
||||
know that every :class:`unicode` string you send to a particular file-like
|
||||
object (for instance, :data:`~sys.stdout`) should be converted to a particular
|
||||
encoding you can use a :class:`codecs.StreamWriter` object to convert from
|
||||
a :class:`unicode` string into a byte :class:`str`. In particular,
|
||||
:func:`codecs.getwriter` will return a :class:`~codecs.StreamWriter` class
|
||||
that will help you to wrap a file-like object for output. Using our
|
||||
:func:`print` example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
$ cat test.py
|
||||
#!/usr/bin/python -tt
|
||||
# -*- coding: utf-8 -*-
|
||||
import codecs
|
||||
import sys
|
||||
|
||||
UTF8Writer = codecs.getwriter('utf8')
|
||||
sys.stdout = UTF8Writer(sys.stdout)
|
||||
print u'café'
|
||||
$ ./test.py >t
|
||||
$ cat t
|
||||
café
|
||||
|
||||
-----------------------------------------
|
||||
Frustrations #4 and #5 -- The other shoes
|
||||
-----------------------------------------
|
||||
|
||||
In English, there's a saying "waiting for the other shoe to drop". It means
|
||||
that when one event (usually bad) happens, you come to expect another event
|
||||
(usually worse) to come after. In this case we have two other shoes.
|
||||
|
||||
|
||||
Frustration #4: Now it doesn't take byte strings?!
|
||||
==================================================
|
||||
|
||||
If you wrap :data:`sys.stdout` using :func:`codecs.getwriter` and think you
|
||||
are now safe to print any variable without checking its type I am afraid
|
||||
I must inform you that you're not paying enough attention to :term:`Murphy's
|
||||
Law`. The :class:`~codecs.StreamWriter` that :func:`codecs.getwriter`
|
||||
provides will take :class:`unicode` strings and transform them into byte
|
||||
:class:`str` before they get to :data:`sys.stdout`. The problem is if you
|
||||
give it something that's already a byte :class:`str` it tries to transform
|
||||
that as well. To do that it tries to turn the byte :class:`str` you give it
|
||||
into :class:`unicode` and then transform that back into a byte :class:`str`...
|
||||
and since it uses the :term:`ASCII` codec to perform those conversions,
|
||||
chances are that it'll blow up when making them::
|
||||
|
||||
>>> import codecs
|
||||
>>> import sys
|
||||
>>> UTF8Writer = codecs.getwriter('utf8')
|
||||
>>> sys.stdout = UTF8Writer(sys.stdout)
|
||||
>>> print 'café'
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
File "/usr/lib64/python2.6/codecs.py", line 351, in write
|
||||
data, consumed = self.encode(object, self.errors)
|
||||
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 3: ordinal not in range(128)
|
||||
|
||||
To work around this, kitchen provides an alternate version of
|
||||
:func:`codecs.getwriter` that can deal with both byte :class:`str` and
|
||||
:class:`unicode` strings. Use :func:`kitchen.text.converters.getwriter` in
|
||||
place of the :mod:`codecs` version like this::
|
||||
|
||||
>>> import sys
|
||||
>>> from kitchen.text.converters import getwriter
|
||||
>>> UTF8Writer = getwriter('utf8')
|
||||
>>> sys.stdout = UTF8Writer(sys.stdout)
|
||||
>>> print u'café'
|
||||
café
|
||||
>>> print 'café'
|
||||
café
|
||||
|
||||
Frustration #5: Exceptions
|
||||
==========================
|
||||
|
||||
Okay, so we've gotten ourselves this far. We convert everything to
|
||||
:class:`unicode` strings. We're aware that we need to convert back into byte
|
||||
:class:`str` before we write to the terminal. We've worked around the
|
||||
inability of the standard :func:`~codecs.getwriter` to deal with both byte
|
||||
:class:`str` and :class:`unicode` strings. Are we all set? Well, there's at
|
||||
least one more gotcha: raising exceptions with a :class:`unicode` message.
|
||||
Take a look:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> class MyException(Exception):
|
||||
>>> pass
|
||||
>>>
|
||||
>>> raise MyException(u'Cannot do this')
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
__main__.MyException: Cannot do this
|
||||
>>> raise MyException(u'Cannot do this while at a café')
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
__main__.MyException:
|
||||
>>>
|
||||
|
||||
No, I didn't truncate that last line; raising exceptions really cannot handle
|
||||
non-:term:`ASCII` characters in a :class:`unicode` string and will output an
|
||||
exception without the message if the message contains them. What happens if
|
||||
we try to use the handy dandy :func:`~kitchen.text.converters.getwriter` trick
|
||||
to work around this?
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> import sys
|
||||
>>> from kitchen.text.converters import getwriter
|
||||
>>> sys.stderr = getwriter('utf8')(sys.stderr)
|
||||
>>> raise MyException(u'Cannot do this')
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
__main__.MyException: Cannot do this
|
||||
>>> raise MyException(u'Cannot do this while at a café')
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
__main__.MyException>>>
|
||||
|
||||
Not only did this also fail, it even swallowed the trailing newline that's
|
||||
normally there.... So how to make this work? Transform from :class:`unicode`
|
||||
strings to byte :class:`str` manually before outputting::
|
||||
|
||||
>>> from kitchen.text.converters import to_bytes
|
||||
>>> raise MyException(to_bytes(u'Cannot do this while at a café'))
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
__main__.MyException: Cannot do this while at a café
|
||||
>>>
|
||||
|
||||
.. warning::
|
||||
|
||||
If you use :func:`codecs.getwriter` on :data:`sys.stderr`, you'll find
|
||||
that raising an exception with a byte :class:`str` is broken by the
|
||||
default :class:`~codecs.StreamWriter` as well. Don't do that or you'll
|
||||
have no way to output non-:term:`ASCII` characters. If you want to use
|
||||
a :class:`~codecs.StreamWriter` to encode other things on stderr while
|
||||
still having working exceptions, use
|
||||
:func:`kitchen.text.converters.getwriter`.
|
||||
|
||||
-------------------------------------------
|
||||
Frustration #6: Inconsistent APIs Part deux
|
||||
-------------------------------------------
|
||||
Sometimes you do everything right in your code but other people's code fails
|
||||
you. With unicode issues this happens more often than we want. A glaring
|
||||
example of this is when you get values back from a function that aren't
|
||||
consistently :class:`unicode` string or byte :class:`str`.
|
||||
|
||||
An example from the |stdlib|_ is :mod:`gettext`. The :mod:`gettext` functions
|
||||
are used to help translate messages that you display to users in the users'
|
||||
native languages. Since most languages contain letters outside of the
|
||||
:term:`ASCII` range, the values that are returned contain unicode characters.
|
||||
:mod:`gettext` provides you with :meth:`~gettext.GNUTranslations.ugettext` and
|
||||
:meth:`~gettext.GNUTranslations.ungettext` to return these translations as
|
||||
:class:`unicode` strings and :meth:`~gettext.GNUTranslations.gettext`,
|
||||
:meth:`~gettext.GNUTranslations.ngettext`,
|
||||
:meth:`~gettext.GNUTranslations.lgettext`, and
|
||||
:meth:`~gettext.GNUTranslations.lngettext` to return them as encoded byte
|
||||
:class:`str`. Unfortunately, even though they're documented to return only
|
||||
one type of string or the other, the implementation has corner cases where the
|
||||
wrong type can be returned.
|
||||
|
||||
This means that even if you separate your :class:`unicode` string and byte
|
||||
:class:`str` correctly before you pass your strings to a :mod:`gettext`
|
||||
function, afterwards, you might have to check that you have the right sort of
|
||||
string type again.
|
||||
|
||||
.. note::
|
||||
|
||||
:mod:`kitchen.i18n` provides alternate gettext translation objects that
|
||||
return only byte :class:`str` or only :class:`unicode` string.
|
||||
|
||||
---------------
|
||||
A few solutions
|
||||
---------------
|
||||
|
||||
Now that we've identified the issues, can we define a comprehensive strategy
|
||||
for dealing with them?
|
||||
|
||||
Convert text at the border
|
||||
==========================
|
||||
|
||||
If you get some piece of text from a library, read from a file, etc, turn it
|
||||
into a :class:`unicode` string immediately. Since python is moving in the
|
||||
direction of :class:`unicode` strings everywhere it's going to be easier to
|
||||
work with :class:`unicode` strings within your code.
|
||||
|
||||
If your code is heavily involved with using things that are bytes, you can do
|
||||
the opposite and convert all text into byte :class:`str` at the border and
|
||||
only convert to :class:`unicode` when you need it for passing to another
|
||||
library or performing string operations on it.
|
||||
|
||||
In either case, the important thing is to pick a default type for strings and
|
||||
stick with it throughout your code. When you mix the types it becomes much
|
||||
easier to operate on a string with a function that can only use the other type
|
||||
by mistake.
|
||||
|
||||
.. note:: In python3, the abstract unicode type becomes much more prominent.
|
||||
The type named ``str`` is the equivalent of python2's :class:`unicode` and
|
||||
python3's ``bytes`` type replaces python2's :class:`str`. Most APIs deal
|
||||
in the unicode type of string with just some pieces that are low level
|
||||
dealing with bytes. The implicit conversions between bytes and unicode
|
||||
is removed and whenever you want to make the conversion you need to do so
|
||||
explicitly.
|
||||
|
||||
When the data needs to be treated as bytes (or unicode) use a naming convention
|
||||
===============================================================================
|
||||
|
||||
Sometimes you're converting nearly all of your data to :class:`unicode`
|
||||
strings but you have one or two values where you have to keep byte
|
||||
:class:`str` around. This is often the case when you need to use the value
|
||||
verbatim with some external resource. For instance, filenames or key values
|
||||
in a database. When you do this, use a naming convention for the data you're
|
||||
working with so you (and others reading your code later) don't get confused
|
||||
about what's being stored in the value.
|
||||
|
||||
If you need both a textual string to present to the user and a byte value for
|
||||
an exact match, consider keeping both versions around. You can either use two
|
||||
variables for this or a :class:`dict` whose key is the byte value.
|
||||
|
||||
.. note:: You can use the naming convention used in kitchen as a guide for
|
||||
implementing your own naming convention. It prefixes byte :class:`str`
|
||||
variables of unknown encoding with ``b_`` and byte :class:`str` of known
|
||||
encoding with the encoding name like: ``utf8_``. If the default was to
|
||||
handle :class:`str` and only keep a few :class:`unicode` values, those
|
||||
variables would be prefixed with ``u_``.
|
||||
|
||||
When outputting data, convert back into bytes
|
||||
=============================================
|
||||
|
||||
When you go to send your data back outside of your program (to the filesystem,
|
||||
over the network, displaying to the user, etc) turn the data back into a byte
|
||||
:class:`str`. How you do this will depend on the expected output format of
|
||||
the data. For displaying to the user, you can use the user's default encoding
|
||||
using :func:`locale.getpreferredencoding`. For entering into a file, you're best
|
||||
bet is to pick a single encoding and stick with it.
|
||||
|
||||
.. warning::
|
||||
|
||||
When using the encoding that the user has set (for instance, using
|
||||
:func:`locale.getpreferredencoding`, remember that they may have their
|
||||
encoding set to something that can't display every single unicode
|
||||
character. That means when you convert from :class:`unicode` to a byte
|
||||
:class:`str` you need to decide what should happen if the byte value is
|
||||
not valid in the user's encoding. For purposes of displaying messages to
|
||||
the user, it's usually okay to use the ``replace`` encoding error handler
|
||||
to replace the invalid characters with a question mark or other symbol
|
||||
meaning the character couldn't be displayed.
|
||||
|
||||
You can use :func:`kitchen.text.converters.getwriter` to do this automatically
|
||||
for :data:`sys.stdout`. When creating exception messages be sure to convert
|
||||
to bytes manually.
|
||||
|
||||
When writing unittests, include non-ASCII values and both unicode and str type
|
||||
==============================================================================
|
||||
|
||||
Unless you know that a specific portion of your code will only deal with
|
||||
:term:`ASCII`, be sure to include non-:term:`ASCII` values in your unittests.
|
||||
Including a few characters from several different scripts is highly advised as
|
||||
well because some code may have special cased accented roman characters but
|
||||
not know how to handle characters used in Asian alphabets.
|
||||
|
||||
Similarly, unless you know that that portion of your code will only be given
|
||||
:class:`unicode` strings or only byte :class:`str` be sure to try variables
|
||||
of both types in your unittests. When doing this, make sure that the
|
||||
variables are also non-:term:`ASCII` as python's implicit conversion will mask
|
||||
problems with pure :term:`ASCII` data. In many cases, it makes sense to check
|
||||
what happens if byte :class:`str` and :class:`unicode` strings that won't
|
||||
decode in the present locale are given.
|
||||
|
||||
Be vigilant about spotting poor APIs
|
||||
====================================
|
||||
|
||||
Make sure that the libraries you use return only :class:`unicode` strings or
|
||||
byte :class:`str`. Unittests can help you spot issues here by running many
|
||||
variations of data through your functions and checking that you're still
|
||||
getting the types of string that you expect.
|
||||
|
||||
Example: Putting this all together with kitchen
|
||||
===============================================
|
||||
|
||||
The kitchen library provides a wide array of functions to help you deal with
|
||||
byte :class:`str` and :class:`unicode` strings in your program. Here's
|
||||
a short example that uses many kitchen functions to do its work::
|
||||
|
||||
#!/usr/bin/python -tt
|
||||
# -*- coding: utf-8 -*-
|
||||
import locale
|
||||
import os
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
from kitchen.text.converters import getwriter, to_bytes, to_unicode
|
||||
from kitchen.i18n import get_translation_object
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Setup gettext driven translations but use the kitchen functions so
|
||||
# we don't have the mismatched bytes-unicode issues.
|
||||
translations = get_translation_object('example')
|
||||
# We use _() for marking strings that we operate on as unicode
|
||||
# This is pretty much everything
|
||||
_ = translations.ugettext
|
||||
# And b_() for marking strings that we operate on as bytes.
|
||||
# This is limited to exceptions
|
||||
b_ = translations.lgettext
|
||||
|
||||
# Setup stdout
|
||||
encoding = locale.getpreferredencoding()
|
||||
Writer = getwriter(encoding)
|
||||
sys.stdout = Writer(sys.stdout)
|
||||
|
||||
# Load data. Format is filename\0description
|
||||
# description should be utf-8 but filename can be any legal filename
|
||||
# on the filesystem
|
||||
# Sample datafile.txt:
|
||||
# /etc/shells\x00Shells available on caf\xc3\xa9.lan
|
||||
# /var/tmp/file\xff\x00File with non-utf8 data in the filename
|
||||
#
|
||||
# And to create /var/tmp/file\xff (under bash or zsh) do:
|
||||
# echo 'Some data' > /var/tmp/file$'\377'
|
||||
datafile = open('datafile.txt', 'r')
|
||||
data = {}
|
||||
for line in datafile:
|
||||
# We're going to keep filename as bytes because we will need the
|
||||
# exact bytes to access files on a POSIX operating system.
|
||||
# description, we'll immediately transform into unicode type.
|
||||
b_filename, description = line.split('\0', 1)
|
||||
|
||||
# to_unicode defaults to decoding output from utf-8 and replacing
|
||||
# any problematic bytes with the unicode replacement character
|
||||
# We accept mangling of the description here knowing that our file
|
||||
# format is supposed to use utf-8 in that field and that the
|
||||
# description will only be displayed to the user, not used as
|
||||
# a key value.
|
||||
description = to_unicode(description, 'utf-8').strip()
|
||||
data[b_filename] = description
|
||||
datafile.close()
|
||||
|
||||
# We're going to add a pair of extra fields onto our data to show the
|
||||
# length of the description and the filesize. We put those between
|
||||
# the filename and description because we haven't checked that the
|
||||
# description is free of NULLs.
|
||||
datafile = open('newdatafile.txt', 'w')
|
||||
|
||||
# Name filename with a b_ prefix to denote byte string of unknown encoding
|
||||
for b_filename in data:
|
||||
# Since we have the byte representation of filename, we can read any
|
||||
# filename
|
||||
if os.access(b_filename, os.F_OK):
|
||||
size = os.path.getsize(b_filename)
|
||||
else:
|
||||
size = 0
|
||||
# Because the description is unicode type, we know the number of
|
||||
# characters corresponds to the length of the normalized unicode
|
||||
# string.
|
||||
length = len(unicodedata.normalize('NFC', description))
|
||||
|
||||
# Print a summary to the screen
|
||||
# Note that we do not let implici type conversion from str to
|
||||
# unicode transform b_filename into a unicode string. That might
|
||||
# fail as python would use the ASCII filename. Instead we use
|
||||
# to_unicode() to explictly transform in a way that we know will
|
||||
# not traceback.
|
||||
print _(u'filename: %s') % to_unicode(b_filename)
|
||||
print _(u'file size: %s') % size
|
||||
print _(u'desc length: %s') % length
|
||||
print _(u'description: %s') % data[b_filename]
|
||||
|
||||
# First combine the unicode portion
|
||||
line = u'%s\0%s\0%s' % (size, length, data[b_filename])
|
||||
# Since the filenames are bytes, turn everything else to bytes before combining
|
||||
# Turning into unicode first would be wrong as the bytes in b_filename
|
||||
# might not convert
|
||||
b_line = '%s\0%s\n' % (b_filename, to_bytes(line))
|
||||
|
||||
# Just to demonstrate that getwriter will pass bytes through fine
|
||||
print b_('Wrote: %s') % b_line
|
||||
datafile.write(b_line)
|
||||
datafile.close()
|
||||
|
||||
# And just to show how to properly deal with an exception.
|
||||
# Note two things about this:
|
||||
# 1) We use the b_() function to translate the string. This returns a
|
||||
# byte string instead of a unicode string
|
||||
# 2) We're using the b_() function returned by kitchen. If we had
|
||||
# used the one from gettext we would need to convert the message to
|
||||
# a byte str first
|
||||
message = u'Demonstrate the proper way to raise exceptions. Sincerely, \u3068\u3057\u304a'
|
||||
raise Exception(b_(message))
|
||||
|
||||
.. seealso:: :mod:`kitchen.text.converters`
|
41
kitchen/__init__.py
Normal file
41
kitchen/__init__.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2011 Red Hat, Inc
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
#
|
||||
'''
|
||||
Kitchen
|
||||
|
||||
Aggregate of a bunch of unrelated but helpful python modules.
|
||||
'''
|
||||
|
||||
# Pylint disabled messages:
|
||||
# :C0103: We need gettext aliases for both unicode strings and byte strings.
|
||||
# The byte string one (b_) triggers this warning.
|
||||
from kitchen import i18n
|
||||
from kitchen import versioning
|
||||
|
||||
(_, N_) = i18n.easy_gettext_setup('kitchen.core')
|
||||
#pylint: disable-msg=C0103
|
||||
(b_, bN_) = i18n.easy_gettext_setup('kitchen.core', use_unicode=False)
|
||||
#pylint: enable-msg=C0103
|
||||
|
||||
__version_info__ = ((1, 1, 1),)
|
||||
__version__ = versioning.version_tuple_to_string(__version_info__)
|
||||
|
||||
__all__ = ('exceptions', 'release',)
|
9
kitchen/collections/__init__.py
Normal file
9
kitchen/collections/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
from kitchen.versioning import version_tuple_to_string
|
||||
|
||||
__version_info__ = ((1, 1, 0),)
|
||||
__version__ = version_tuple_to_string(__version_info__)
|
||||
|
||||
from kitchen.collections import strictdict
|
||||
from kitchen.collections.strictdict import StrictDict
|
||||
|
||||
__all__ = ('strictdict', 'StrictDict',)
|
87
kitchen/collections/strictdict.py
Normal file
87
kitchen/collections/strictdict.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2010 Red Hat, Inc
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
'''
|
||||
----------
|
||||
StrictDict
|
||||
----------
|
||||
|
||||
:class:`kitchen.collections.StrictDict` provides a dictionary that treats
|
||||
:class:`str` and :class:`unicode` as distinct key values.
|
||||
'''
|
||||
|
||||
# Pylint disabled messages:
|
||||
# :C0111: We're implementing the dict interface so just reference the dict
|
||||
# documentation rather than having our own docstrings
|
||||
|
||||
try:
|
||||
# :E0611: Pylint false positive. We try to import from the stdlib but we
|
||||
# have a fallback so this is okay.
|
||||
#pylint:disable-msg=E0611
|
||||
from collections import defaultdict
|
||||
except ImportError:
|
||||
from kitchen.pycompat25.collections import defaultdict
|
||||
|
||||
class StrictDict(defaultdict):
|
||||
'''
|
||||
Map class that considers :class:`unicode` and :class:`str` different keys
|
||||
|
||||
Ordinarily when you are dealing with a :class:`dict` keyed on strings you
|
||||
want to have keys that have the same characters end up in the same bucket
|
||||
even if one key is :class:`unicode` and the other is a byte :class:`str`.
|
||||
The normal :class:`dict` type does this for :term:`ASCII` characters (but
|
||||
not for anything outside of the :term:`ASCII` range.)
|
||||
|
||||
Sometimes, however, you want to keep the two string classes strictly
|
||||
separate, for instance, if you're creating a single table that can map
|
||||
from :class:`unicode` characters to :class:`str` characters and vice
|
||||
versa. This class will help you do that by making all :class:`unicode`
|
||||
keys evaluate to a different key than all :class:`str` keys.
|
||||
|
||||
.. seealso::
|
||||
:class:`dict`
|
||||
for documentation on this class's methods. This class implements
|
||||
all the standard :class:`dict` methods. Its treatment of
|
||||
:class:`unicode` and :class:`str` keys as separate is the only
|
||||
difference.
|
||||
|
||||
'''
|
||||
#pylint:disable-msg=C0111
|
||||
def __getitem__(self, key):
|
||||
return defaultdict.__getitem__(self, (repr(key), key))
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
defaultdict.__setitem__(self, (repr(key), key), value)
|
||||
|
||||
def __delitem__(self, key):
|
||||
defaultdict.__delitem__(self, (repr(key), key))
|
||||
|
||||
def __iter__(self):
|
||||
for i in defaultdict.__iter__(self):
|
||||
yield i[1]
|
||||
|
||||
iterkeys = __iter__
|
||||
|
||||
def keys(self):
|
||||
return list(self.__iter__())
|
||||
|
||||
def __contains__(self, key):
|
||||
return defaultdict.__contains__(self, (repr(key), key))
|
||||
|
||||
__all__ = ('StrictDict',)
|
35
kitchen/exceptions.py
Normal file
35
kitchen/exceptions.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2010 Red Hat, Inc
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU Lesser General Public License as published by the Free
|
||||
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||
# more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
#
|
||||
'''
|
||||
-----------------------
|
||||
Base kitchen exceptions
|
||||
-----------------------
|
||||
|
||||
Exception classes for kitchen and the root of the exception hierarchy for
|
||||
all kitchen modules.
|
||||
'''
|
||||
|
||||
class KitchenError(Exception):
|
||||
'''Base exception class for any error thrown directly by kitchen.
|
||||
'''
|
||||
pass
|
||||
|
||||
__all__ = ('KitchenError',)
|
827
kitchen/i18n/__init__.py
Normal file
827
kitchen/i18n/__init__.py
Normal file
|
@ -0,0 +1,827 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2010-2011 Red Hat, Inc
|
||||
# Copyright (c) 2009 Milos Komarcevic
|
||||
# Copyright (c) 2008 Tim Lauridsen
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU Lesser General Public License as published by the Free
|
||||
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||
# more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors: James Antill
|
||||
# Milos Komarcevic
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
# Tim Lauridsen
|
||||
# Luke Macken <lmacken@redhat.com>
|
||||
# Seth Vidal <skvidal@fedoraproject.org>
|
||||
#
|
||||
# Portions of code taken from yum/i18n.py
|
||||
# Portions of code adapted from |stdlib|_ gettext.py
|
||||
'''
|
||||
:term:`I18N` is an important piece of any modern program. Unfortunately,
|
||||
setting up :term:`i18n` in your program is often a confusing process. The
|
||||
functions provided here aim to make the programming side of that a little
|
||||
easier.
|
||||
|
||||
Most projects will be able to do something like this when they startup::
|
||||
|
||||
# myprogram/__init__.py:
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from kitchen.i18n import easy_gettext_setup
|
||||
|
||||
_, N_ = easy_gettext_setup('myprogram', localedirs=(
|
||||
os.path.join(os.path.realpath(os.path.dirname(__file__)), 'locale'),
|
||||
os.path.join(sys.prefix, 'lib', 'locale')
|
||||
))
|
||||
|
||||
Then, in other files that have strings that need translating::
|
||||
|
||||
# myprogram/commands.py:
|
||||
|
||||
from myprogram import _, N_
|
||||
|
||||
def print_usage():
|
||||
print _(u"""available commands are:
|
||||
--help Display help
|
||||
--version Display version of this program
|
||||
--bake-me-a-cake as fast as you can
|
||||
""")
|
||||
|
||||
def print_invitations(age):
|
||||
print _('Please come to my party.')
|
||||
print N_('I will be turning %(age)s year old',
|
||||
'I will be turning %(age)s years old', age) % {'age': age}
|
||||
|
||||
See the documentation of :func:`easy_gettext_setup` and
|
||||
:func:`get_translation_object` for more details.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:mod:`gettext`
|
||||
for details of how the python gettext facilities work
|
||||
`babel <http://babel.edgewall.org>`_
|
||||
The babel module for in depth information on gettext, :term:`message
|
||||
catalogs`, and translating your app. babel provides some nice
|
||||
features for :term:`i18n` on top of :mod:`gettext`
|
||||
'''
|
||||
# Pylint disabled messages:
|
||||
# :E1101: NewGNUTranslations is modeled as a replacement for GNUTranslations.
|
||||
# That module invokes the _parse message to create some of its attributes.
|
||||
# Pylint doesn't see those attributes being defined since it doesn't know
|
||||
# when _parse() is called. We disable E1101 when accessing self._catalog
|
||||
# and self.plural for this reason.
|
||||
# :C0103: We're replicating the gettext API here so we need to use method and
|
||||
# parameter names that mirror gettext.
|
||||
# :C0111: We're replicating the gettext API here so for the gettext
|
||||
# translation object methods we point people at the stdlib docs
|
||||
|
||||
from kitchen.versioning import version_tuple_to_string
|
||||
|
||||
__version_info__ = ((2, 1, 1),)
|
||||
__version__ = version_tuple_to_string(__version_info__)
|
||||
|
||||
import copy
|
||||
from errno import ENOENT
|
||||
import gettext
|
||||
import itertools
|
||||
import locale
|
||||
import os
|
||||
import sys
|
||||
|
||||
# We use the _default_localedir definition in get_translation_object
|
||||
try:
|
||||
from gettext import _default_localedir as _DEFAULT_LOCALEDIR
|
||||
except ImportError:
|
||||
_DEFAULT_LOCALEDIR = os.path.join(sys.prefix, 'share', 'locale')
|
||||
|
||||
from kitchen.text.converters import to_bytes, to_unicode
|
||||
from kitchen.text.misc import byte_string_valid_encoding
|
||||
|
||||
# We cache parts of the translation objects just like stdlib's gettext so that
|
||||
# we don't reparse the message files and keep them in memory separately if the
|
||||
# same catalog is opened twice.
|
||||
_translations = {}
|
||||
|
||||
class DummyTranslations(object, gettext.NullTranslations):
|
||||
'''Safer version of :class:`gettext.NullTranslations`
|
||||
|
||||
This Translations class doesn't translate the strings and is intended to
|
||||
be used as a fallback when there were errors setting up a real
|
||||
Translations object. It's safer than :class:`gettext.NullTranslations` in
|
||||
its handling of byte :class:`str` vs :class:`unicode` strings.
|
||||
|
||||
Unlike :class:`~gettext.NullTranslations`, this Translation class will
|
||||
never throw a :exc:`~exceptions.UnicodeError`. The code that you have
|
||||
around a call to :class:`DummyTranslations` might throw
|
||||
a :exc:`~exceptions.UnicodeError` but at least that will be in code you
|
||||
control and can fix. Also, unlike :class:`~gettext.NullTranslations` all
|
||||
of this Translation object's methods guarantee to return byte :class:`str`
|
||||
except for :meth:`ugettext` and :meth:`ungettext` which guarantee to
|
||||
return :class:`unicode` strings.
|
||||
|
||||
When byte :class:`str` are returned, the strings will be encoded according
|
||||
to this algorithm:
|
||||
|
||||
1) If a fallback has been added, the fallback will be called first.
|
||||
You'll need to consult the fallback to see whether it performs any
|
||||
encoding changes.
|
||||
2) If a byte :class:`str` was given, the same byte :class:`str` will
|
||||
be returned.
|
||||
3) If a :class:`unicode` string was given and :meth:`set_output_charset`
|
||||
has been called then we encode the string using the
|
||||
:attr:`output_charset`
|
||||
4) If a :class:`unicode` string was given and this is :meth:`gettext` or
|
||||
:meth:`ngettext` and :attr:`_charset` was set output in that charset.
|
||||
5) If a :class:`unicode` string was given and this is :meth:`gettext`
|
||||
or :meth:`ngettext` we encode it using 'utf-8'.
|
||||
6) If a :class:`unicode` string was given and this is :meth:`lgettext`
|
||||
or :meth:`lngettext` we encode using the value of
|
||||
:func:`locale.getpreferredencoding`
|
||||
|
||||
For :meth:`ugettext` and :meth:`ungettext`, we go through the same set of
|
||||
steps with the following differences:
|
||||
|
||||
* We transform byte :class:`str` into :class:`unicode` strings for
|
||||
these methods.
|
||||
* The encoding used to decode the byte :class:`str` is taken from
|
||||
:attr:`input_charset` if it's set, otherwise we decode using
|
||||
:term:`UTF-8`.
|
||||
|
||||
.. attribute:: input_charset
|
||||
|
||||
is an extension to the |stdlib|_ :mod:`gettext` that specifies what
|
||||
charset a message is encoded in when decoding a message to
|
||||
:class:`unicode`. This is used for two purposes:
|
||||
|
||||
1) If the message string is a byte :class:`str`, this is used to decode
|
||||
the string to a :class:`unicode` string before looking it up in the
|
||||
:term:`message catalog`.
|
||||
2) In :meth:`~kitchen.i18n.DummyTranslations.ugettext` and
|
||||
:meth:`~kitchen.i18n.DummyTranslations.ungettext` methods, if a byte
|
||||
:class:`str` is given as the message and is untranslated this is used
|
||||
as the encoding when decoding to :class:`unicode`. This is different
|
||||
from :attr:`_charset` which may be set when a :term:`message catalog`
|
||||
is loaded because :attr:`input_charset` is used to describe an encoding
|
||||
used in a python source file while :attr:`_charset` describes the
|
||||
encoding used in the :term:`message catalog` file.
|
||||
|
||||
Any characters that aren't able to be transformed from a byte :class:`str`
|
||||
to :class:`unicode` string or vice versa will be replaced with
|
||||
a replacement character (ie: ``u'<EFBFBD>'`` in unicode based encodings, ``'?'`` in other
|
||||
:term:`ASCII` compatible encodings).
|
||||
|
||||
.. seealso::
|
||||
|
||||
:class:`gettext.NullTranslations`
|
||||
For information about what methods are available and what they do.
|
||||
|
||||
.. versionchanged:: kitchen-1.1.0 ; API kitchen.i18n 2.1.0
|
||||
* Although we had adapted :meth:`gettext`, :meth:`ngettext`,
|
||||
:meth:`lgettext`, and :meth:`lngettext` to always return byte
|
||||
:class:`str`, we hadn't forced those byte :class:`str` to always be
|
||||
in a specified charset. We now make sure that :meth:`gettext` and
|
||||
:meth:`ngettext` return byte :class:`str` encoded using
|
||||
:attr:`output_charset` if set, otherwise :attr:`charset` and if
|
||||
neither of those, :term:`UTF-8`. With :meth:`lgettext` and
|
||||
:meth:`lngettext` :attr:`output_charset` if set, otherwise
|
||||
:func:`locale.getpreferredencoding`.
|
||||
* Make setting :attr:`input_charset` and :attr:`output_charset` also
|
||||
set those attributes on any fallback translation objects.
|
||||
'''
|
||||
#pylint: disable-msg=C0103,C0111
|
||||
def __init__(self, fp=None):
|
||||
gettext.NullTranslations.__init__(self, fp)
|
||||
|
||||
# Python 2.3 compat
|
||||
if not hasattr(self, '_output_charset'):
|
||||
self._output_charset = None
|
||||
|
||||
# Extension for making ugettext and ungettext more sane
|
||||
# 'utf-8' is only a default here. Users can override.
|
||||
self._input_charset = 'utf-8'
|
||||
|
||||
def _set_input_charset(self, charset):
|
||||
if self._fallback:
|
||||
try:
|
||||
self._fallback.input_charset = charset
|
||||
except AttributeError:
|
||||
pass
|
||||
self._input_charset = charset
|
||||
|
||||
def _get_input_charset(self):
|
||||
return self._input_charset
|
||||
|
||||
input_charset = property(_get_input_charset, _set_input_charset)
|
||||
|
||||
def set_output_charset(self, charset):
|
||||
'''Set the output charset
|
||||
|
||||
This serves two purposes. The normal
|
||||
:meth:`gettext.NullTranslations.set_output_charset` does not set the
|
||||
output on fallback objects. On python-2.3,
|
||||
:class:`gettext.NullTranslations` objects don't contain this method.
|
||||
'''
|
||||
if self._fallback:
|
||||
try:
|
||||
self._fallback.set_output_charset(charset)
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
gettext.NullTranslations.set_output_charset(self, charset)
|
||||
except AttributeError:
|
||||
self._output_charset = charset
|
||||
|
||||
if not hasattr(gettext.NullTranslations, 'output_charset'):
|
||||
def output_charset(self):
|
||||
'''Compatibility for python2.3 which doesn't have output_charset'''
|
||||
return self._output_charset
|
||||
|
||||
def _reencode_if_necessary(self, message, output_encoding):
|
||||
'''Return a byte string that's valid in a specific charset.
|
||||
|
||||
.. warning:: This method may mangle the message if the inpput encoding
|
||||
is not known or the message isn't represntable in the chosen
|
||||
output encoding.
|
||||
'''
|
||||
valid = False
|
||||
msg = None
|
||||
try:
|
||||
valid = byte_string_valid_encoding(message, output_encoding)
|
||||
except TypeError:
|
||||
# input was unicode, so it needs to be encoded
|
||||
pass
|
||||
|
||||
if valid:
|
||||
return message
|
||||
try:
|
||||
# Decode to unicode so we can re-encode to desired encoding
|
||||
msg = to_unicode(message, encoding=self.input_charset,
|
||||
nonstring='strict')
|
||||
except TypeError:
|
||||
# Not a string; return an empty byte string
|
||||
return ''
|
||||
|
||||
# Make sure that we're returning a str of the desired encoding
|
||||
return to_bytes(msg, encoding=output_encoding)
|
||||
|
||||
def gettext(self, message):
|
||||
# First use any fallback gettext objects. Since DummyTranslations
|
||||
# doesn't do any translation on its own, this is a good first step.
|
||||
if self._fallback:
|
||||
try:
|
||||
message = self._fallback.gettext(message)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||
pass
|
||||
|
||||
# Next decide what encoding to use for the strings we return
|
||||
output_encoding = (self._output_charset or self._charset or
|
||||
self.input_charset)
|
||||
|
||||
return self._reencode_if_necessary(message, output_encoding)
|
||||
|
||||
def ngettext(self, msgid1, msgid2, n):
|
||||
# Default
|
||||
if n == 1:
|
||||
message = msgid1
|
||||
else:
|
||||
message = msgid2
|
||||
|
||||
# The fallback method might return something different
|
||||
if self._fallback:
|
||||
try:
|
||||
message = self._fallback.ngettext(msgid1, msgid2, n)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||
pass
|
||||
|
||||
# Next decide what encoding to use for the strings we return
|
||||
output_encoding = (self._output_charset or self._charset or
|
||||
self.input_charset)
|
||||
|
||||
return self._reencode_if_necessary(message, output_encoding)
|
||||
|
||||
def lgettext(self, message):
|
||||
if self._fallback:
|
||||
try:
|
||||
message = self._fallback.lgettext(message)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: we'll do our own encoding next
|
||||
# AttributeErrors happen on py2.3 where lgettext is not
|
||||
# implemented
|
||||
pass
|
||||
|
||||
# Next decide what encoding to use for the strings we return
|
||||
output_encoding = (self._output_charset or
|
||||
locale.getpreferredencoding())
|
||||
|
||||
return self._reencode_if_necessary(message, output_encoding)
|
||||
|
||||
def lngettext(self, msgid1, msgid2, n):
|
||||
# Default
|
||||
if n == 1:
|
||||
message = msgid1
|
||||
else:
|
||||
message = msgid2
|
||||
# Fallback method might have something different
|
||||
if self._fallback:
|
||||
try:
|
||||
message = self._fallback.lngettext(msgid1, msgid2, n)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: we'll do our own encoding next
|
||||
# AttributeError happens on py2.3 where lngettext is not
|
||||
# implemented
|
||||
pass
|
||||
|
||||
# Next decide what encoding to use for the strings we return
|
||||
output_encoding = (self._output_charset or
|
||||
locale.getpreferredencoding())
|
||||
|
||||
return self._reencode_if_necessary(message, output_encoding)
|
||||
|
||||
def ugettext(self, message):
|
||||
if not isinstance(message, basestring):
|
||||
return u''
|
||||
if self._fallback:
|
||||
msg = to_unicode(message, encoding=self.input_charset)
|
||||
try:
|
||||
message = self._fallback.ugettext(msg)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: We'll do our own decoding later
|
||||
pass
|
||||
|
||||
# Make sure we're returning unicode
|
||||
return to_unicode(message, encoding=self.input_charset)
|
||||
|
||||
def ungettext(self, msgid1, msgid2, n):
|
||||
# Default
|
||||
if n == 1:
|
||||
message = msgid1
|
||||
else:
|
||||
message = msgid2
|
||||
# Fallback might override this
|
||||
if self._fallback:
|
||||
msgid1 = to_unicode(msgid1, encoding=self.input_charset)
|
||||
msgid2 = to_unicode(msgid2, encoding=self.input_charset)
|
||||
try:
|
||||
message = self._fallback.ungettext(msgid1, msgid2, n)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: We'll do our own decoding later
|
||||
pass
|
||||
|
||||
# Make sure we're returning unicode
|
||||
return to_unicode(message, encoding=self.input_charset,
|
||||
nonstring='empty')
|
||||
|
||||
|
||||
class NewGNUTranslations(DummyTranslations, gettext.GNUTranslations):
|
||||
'''Safer version of :class:`gettext.GNUTranslations`
|
||||
|
||||
:class:`gettext.GNUTranslations` suffers from two problems that this
|
||||
class fixes.
|
||||
|
||||
1) :class:`gettext.GNUTranslations` can throw a
|
||||
:exc:`~exceptions.UnicodeError` in
|
||||
:meth:`gettext.GNUTranslations.ugettext` if the message being
|
||||
translated has non-:term:`ASCII` characters and there is no translation
|
||||
for it.
|
||||
2) :class:`gettext.GNUTranslations` can return byte :class:`str` from
|
||||
:meth:`gettext.GNUTranslations.ugettext` and :class:`unicode`
|
||||
strings from the other :meth:`~gettext.GNUTranslations.gettext`
|
||||
methods if the message being translated is the wrong type
|
||||
|
||||
When byte :class:`str` are returned, the strings will be encoded
|
||||
according to this algorithm:
|
||||
|
||||
1) If a fallback has been added, the fallback will be called first.
|
||||
You'll need to consult the fallback to see whether it performs any
|
||||
encoding changes.
|
||||
2) If a byte :class:`str` was given, the same byte :class:`str` will
|
||||
be returned.
|
||||
3) If a :class:`unicode` string was given and
|
||||
:meth:`set_output_charset` has been called then we encode the
|
||||
string using the :attr:`output_charset`
|
||||
4) If a :class:`unicode` string was given and this is :meth:`gettext`
|
||||
or :meth:`ngettext` and a charset was detected when parsing the
|
||||
:term:`message catalog`, output in that charset.
|
||||
5) If a :class:`unicode` string was given and this is :meth:`gettext`
|
||||
or :meth:`ngettext` we encode it using :term:`UTF-8`.
|
||||
6) If a :class:`unicode` string was given and this is :meth:`lgettext`
|
||||
or :meth:`lngettext` we encode using the value of
|
||||
:func:`locale.getpreferredencoding`
|
||||
|
||||
For :meth:`ugettext` and :meth:`ungettext`, we go through the same set of
|
||||
steps with the following differences:
|
||||
|
||||
* We transform byte :class:`str` into :class:`unicode` strings for these
|
||||
methods.
|
||||
* The encoding used to decode the byte :class:`str` is taken from
|
||||
:attr:`input_charset` if it's set, otherwise we decode using
|
||||
:term:`UTF-8`
|
||||
|
||||
.. attribute:: input_charset
|
||||
|
||||
an extension to the |stdlib|_ :mod:`gettext` that specifies what
|
||||
charset a message is encoded in when decoding a message to
|
||||
:class:`unicode`. This is used for two purposes:
|
||||
|
||||
1) If the message string is a byte :class:`str`, this is used to decode
|
||||
the string to a :class:`unicode` string before looking it up in the
|
||||
:term:`message catalog`.
|
||||
2) In :meth:`~kitchen.i18n.DummyTranslations.ugettext` and
|
||||
:meth:`~kitchen.i18n.DummyTranslations.ungettext` methods, if a byte
|
||||
:class:`str` is given as the message and is untranslated his is used as
|
||||
the encoding when decoding to :class:`unicode`. This is different from
|
||||
the :attr:`_charset` parameter that may be set when a :term:`message
|
||||
catalog` is loaded because :attr:`input_charset` is used to describe an
|
||||
encoding used in a python source file while :attr:`_charset` describes
|
||||
the encoding used in the :term:`message catalog` file.
|
||||
|
||||
Any characters that aren't able to be transformed from a byte
|
||||
:class:`str` to :class:`unicode` string or vice versa will be replaced
|
||||
with a replacement character (ie: ``u'<EFBFBD>'`` in unicode based encodings,
|
||||
``'?'`` in other :term:`ASCII` compatible encodings).
|
||||
|
||||
.. seealso::
|
||||
|
||||
:class:`gettext.GNUTranslations.gettext`
|
||||
For information about what methods this class has and what they do
|
||||
|
||||
.. versionchanged:: kitchen-1.1.0 ; API kitchen.i18n 2.1.0
|
||||
Although we had adapted :meth:`gettext`, :meth:`ngettext`,
|
||||
:meth:`lgettext`, and :meth:`lngettext` to always return
|
||||
byte :class:`str`, we hadn't forced those byte :class:`str` to always
|
||||
be in a specified charset. We now make sure that :meth:`gettext` and
|
||||
:meth:`ngettext` return byte :class:`str` encoded using
|
||||
:attr:`output_charset` if set, otherwise :attr:`charset` and if
|
||||
neither of those, :term:`UTF-8`. With :meth:`lgettext` and
|
||||
:meth:`lngettext` :attr:`output_charset` if set, otherwise
|
||||
:func:`locale.getpreferredencoding`.
|
||||
'''
|
||||
#pylint: disable-msg=C0103,C0111
|
||||
def _parse(self, fp):
|
||||
gettext.GNUTranslations._parse(self, fp)
|
||||
|
||||
def gettext(self, message):
|
||||
if not isinstance(message, basestring):
|
||||
return ''
|
||||
tmsg = message
|
||||
u_message = to_unicode(message, encoding=self.input_charset)
|
||||
try:
|
||||
tmsg = self._catalog[u_message] #pylint:disable-msg=E1101
|
||||
except KeyError:
|
||||
if self._fallback:
|
||||
try:
|
||||
tmsg = self._fallback.gettext(message)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||
pass
|
||||
|
||||
# Next decide what encoding to use for the strings we return
|
||||
output_encoding = (self._output_charset or self._charset or
|
||||
self.input_charset)
|
||||
|
||||
return self._reencode_if_necessary(tmsg, output_encoding)
|
||||
|
||||
def ngettext(self, msgid1, msgid2, n):
|
||||
if n == 1:
|
||||
tmsg = msgid1
|
||||
else:
|
||||
tmsg = msgid2
|
||||
|
||||
if not isinstance(msgid1, basestring):
|
||||
return ''
|
||||
u_msgid1 = to_unicode(msgid1, encoding=self.input_charset)
|
||||
try:
|
||||
#pylint:disable-msg=E1101
|
||||
tmsg = self._catalog[(u_msgid1, self.plural(n))]
|
||||
except KeyError:
|
||||
if self._fallback:
|
||||
try:
|
||||
tmsg = self._fallback.ngettext(msgid1, msgid2, n)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||
pass
|
||||
|
||||
# Next decide what encoding to use for the strings we return
|
||||
output_encoding = (self._output_charset or self._charset or
|
||||
self.input_charset)
|
||||
|
||||
return self._reencode_if_necessary(tmsg, output_encoding)
|
||||
|
||||
def lgettext(self, message):
|
||||
if not isinstance(message, basestring):
|
||||
return ''
|
||||
tmsg = message
|
||||
u_message = to_unicode(message, encoding=self.input_charset)
|
||||
try:
|
||||
tmsg = self._catalog[u_message] #pylint:disable-msg=E1101
|
||||
except KeyError:
|
||||
if self._fallback:
|
||||
try:
|
||||
tmsg = self._fallback.lgettext(message)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||
pass
|
||||
|
||||
# Next decide what encoding to use for the strings we return
|
||||
output_encoding = (self._output_charset or
|
||||
locale.getpreferredencoding())
|
||||
|
||||
return self._reencode_if_necessary(tmsg, output_encoding)
|
||||
|
||||
def lngettext(self, msgid1, msgid2, n):
|
||||
if n == 1:
|
||||
tmsg = msgid1
|
||||
else:
|
||||
tmsg = msgid2
|
||||
|
||||
if not isinstance(msgid1, basestring):
|
||||
return ''
|
||||
u_msgid1 = to_unicode(msgid1, encoding=self.input_charset)
|
||||
try:
|
||||
#pylint:disable-msg=E1101
|
||||
tmsg = self._catalog[(u_msgid1, self.plural(n))]
|
||||
except KeyError:
|
||||
if self._fallback:
|
||||
try:
|
||||
tmsg = self._fallback.ngettext(msgid1, msgid2, n)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||
pass
|
||||
|
||||
# Next decide what encoding to use for the strings we return
|
||||
output_encoding = (self._output_charset or
|
||||
locale.getpreferredencoding())
|
||||
|
||||
return self._reencode_if_necessary(tmsg, output_encoding)
|
||||
|
||||
|
||||
def ugettext(self, message):
|
||||
if not isinstance(message, basestring):
|
||||
return u''
|
||||
message = to_unicode(message, encoding=self.input_charset)
|
||||
try:
|
||||
message = self._catalog[message] #pylint:disable-msg=E1101
|
||||
except KeyError:
|
||||
if self._fallback:
|
||||
try:
|
||||
message = self._fallback.ugettext(message)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||
pass
|
||||
|
||||
# Make sure that we're returning unicode
|
||||
return to_unicode(message, encoding=self.input_charset)
|
||||
|
||||
def ungettext(self, msgid1, msgid2, n):
|
||||
if n == 1:
|
||||
tmsg = msgid1
|
||||
else:
|
||||
tmsg = msgid2
|
||||
|
||||
if not isinstance(msgid1, basestring):
|
||||
return u''
|
||||
u_msgid1 = to_unicode(msgid1, encoding=self.input_charset)
|
||||
try:
|
||||
#pylint:disable-msg=E1101
|
||||
tmsg = self._catalog[(u_msgid1, self.plural(n))]
|
||||
except KeyError:
|
||||
if self._fallback:
|
||||
try:
|
||||
tmsg = self._fallback.ungettext(msgid1, msgid2, n)
|
||||
except (AttributeError, UnicodeError):
|
||||
# Ignore UnicodeErrors: We'll do our own encoding next
|
||||
pass
|
||||
|
||||
# Make sure that we're returning unicode
|
||||
return to_unicode(tmsg, encoding=self.input_charset,
|
||||
nonstring='empty')
|
||||
|
||||
|
||||
def get_translation_object(domain, localedirs=tuple(), languages=None,
|
||||
class_=None, fallback=True, codeset=None):
|
||||
'''Get a translation object bound to the :term:`message catalogs`
|
||||
|
||||
:arg domain: Name of the message domain. This should be a unique name
|
||||
that can be used to lookup the :term:`message catalog` for this app or
|
||||
library.
|
||||
:kwarg localedirs: Iterator of directories to look for
|
||||
:term:`message catalogs` under. The directories are searched in order
|
||||
for :term:`message catalogs`. For each of the directories searched,
|
||||
we check for message catalogs in any language specified
|
||||
in:attr:`languages`. The :term:`message catalogs` are used to create
|
||||
the Translation object that we return. The Translation object will
|
||||
attempt to lookup the msgid in the first catalog that we found. If
|
||||
it's not in there, it will go through each subsequent catalog looking
|
||||
for a match. For this reason, the order in which you specify the
|
||||
:attr:`localedirs` may be important. If no :term:`message catalogs`
|
||||
are found, either return a :class:`DummyTranslations` object or raise
|
||||
an :exc:`IOError` depending on the value of :attr:`fallback`.
|
||||
Rhe default localedir from :mod:`gettext` which is
|
||||
:file:`os.path.join(sys.prefix, 'share', 'locale')` on Unix is
|
||||
implicitly appended to the :attr:`localedirs`, making it the last
|
||||
directory searched.
|
||||
:kwarg languages: Iterator of language codes to check for
|
||||
:term:`message catalogs`. If unspecified, the user's locale settings
|
||||
will be used.
|
||||
|
||||
.. seealso:: :func:`gettext.find` for information on what environment
|
||||
variables are used.
|
||||
|
||||
:kwarg class_: The class to use to extract translations from the
|
||||
:term:`message catalogs`. Defaults to :class:`NewGNUTranslations`.
|
||||
:kwarg fallback: If set to data:`False`, raise an :exc:`IOError` if no
|
||||
:term:`message catalogs` are found. If :data:`True`, the default,
|
||||
return a :class:`DummyTranslations` object.
|
||||
:kwarg codeset: Set the character encoding to use when returning byte
|
||||
:class:`str` objects. This is equivalent to calling
|
||||
:meth:`~gettext.GNUTranslations.output_charset` on the Translations
|
||||
object that is returned from this function.
|
||||
:return: Translation object to get :mod:`gettext` methods from
|
||||
|
||||
If you need more flexibility than :func:`easy_gettext_setup`, use this
|
||||
function. It sets up a :mod:`gettext` Translation object and returns it
|
||||
to you. Then you can access any of the methods of the object that you
|
||||
need directly. For instance, if you specifically need to access
|
||||
:func:`~gettext.GNUTranslations.lgettext`::
|
||||
|
||||
translations = get_translation_object('foo')
|
||||
translations.lgettext('My Message')
|
||||
|
||||
This function is similar to the |stdlib|_ :func:`gettext.translation` but
|
||||
makes it better in two ways
|
||||
|
||||
1. It returns :class:`NewGNUTranslations` or :class:`DummyTranslations`
|
||||
objects by default. These are superior to the
|
||||
:class:`gettext.GNUTranslations` and :class:`gettext.NullTranslations`
|
||||
objects because they are consistent in the string type they return and
|
||||
they fix several issues that can causethe |stdlib|_ objects to throw
|
||||
:exc:`UnicodeError`.
|
||||
2. This function takes multiple directories to search for
|
||||
:term:`message catalogs`.
|
||||
|
||||
The latter is important when setting up :mod:`gettext` in a portable
|
||||
manner. There is not a common directory for translations across operating
|
||||
systems so one needs to look in multiple directories for the translations.
|
||||
:func:`get_translation_object` is able to handle that if you give it
|
||||
a list of directories to search for catalogs::
|
||||
|
||||
translations = get_translation_object('foo', localedirs=(
|
||||
os.path.join(os.path.realpath(os.path.dirname(__file__)), 'locale'),
|
||||
os.path.join(sys.prefix, 'lib', 'locale')))
|
||||
|
||||
This will search for several different directories:
|
||||
|
||||
1. A directory named :file:`locale` in the same directory as the module
|
||||
that called :func:`get_translation_object`,
|
||||
2. In :file:`/usr/lib/locale`
|
||||
3. In :file:`/usr/share/locale` (the fallback directory)
|
||||
|
||||
This allows :mod:`gettext` to work on Windows and in development (where the
|
||||
:term:`message catalogs` are typically in the toplevel module directory)
|
||||
and also when installed under Linux (where the :term:`message catalogs`
|
||||
are installed in :file:`/usr/share/locale`). You (or the system packager)
|
||||
just need to install the :term:`message catalogs` in
|
||||
:file:`/usr/share/locale` and remove the :file:`locale` directory from the
|
||||
module to make this work. ie::
|
||||
|
||||
In development:
|
||||
~/foo # Toplevel module directory
|
||||
~/foo/__init__.py
|
||||
~/foo/locale # With message catalogs below here:
|
||||
~/foo/locale/es/LC_MESSAGES/foo.mo
|
||||
|
||||
Installed on Linux:
|
||||
/usr/lib/python2.7/site-packages/foo
|
||||
/usr/lib/python2.7/site-packages/foo/__init__.py
|
||||
/usr/share/locale/ # With message catalogs below here:
|
||||
/usr/share/locale/es/LC_MESSAGES/foo.mo
|
||||
|
||||
.. note::
|
||||
|
||||
This function will setup Translation objects that attempt to lookup
|
||||
msgids in all of the found :term:`message catalogs`. This means if
|
||||
you have several versions of the :term:`message catalogs` installed
|
||||
in different directories that the function searches, you need to make
|
||||
sure that :attr:`localedirs` specifies the directories so that newer
|
||||
:term:`message catalogs` are searched first. It also means that if
|
||||
a newer catalog does not contain a translation for a msgid but an
|
||||
older one that's in :attr:`localedirs` does, the translation from that
|
||||
older catalog will be returned.
|
||||
|
||||
.. versionchanged:: kitchen-1.1.0 ; API kitchen.i18n 2.1.0
|
||||
Add more parameters to :func:`~kitchen.i18n.get_translation_object` so
|
||||
it can more easily be used as a replacement for
|
||||
:func:`gettext.translation`. Also change the way we use localedirs.
|
||||
We cycle through them until we find a suitable locale file rather
|
||||
than simply cycling through until we find a directory that exists.
|
||||
The new code is based heavily on the |stdlib|_
|
||||
:func:`gettext.translation` function.
|
||||
'''
|
||||
if not class_:
|
||||
class_ = NewGNUTranslations
|
||||
|
||||
mofiles = []
|
||||
for localedir in itertools.chain(localedirs, (_DEFAULT_LOCALEDIR,)):
|
||||
mofiles.extend(gettext.find(domain, localedir, languages, all=1))
|
||||
if not mofiles:
|
||||
if fallback:
|
||||
return DummyTranslations()
|
||||
raise IOError(ENOENT, 'No translation file found for domain', domain)
|
||||
|
||||
# Accumulate a translation with fallbacks to all the other mofiles
|
||||
stacked_translations = None
|
||||
for mofile in mofiles:
|
||||
full_path = os.path.abspath(mofile)
|
||||
translation = _translations.get(full_path)
|
||||
if not translation:
|
||||
mofile_fh = open(full_path, 'rb')
|
||||
try:
|
||||
translation = _translations.setdefault(full_path,
|
||||
class_(mofile_fh))
|
||||
finally:
|
||||
mofile_fh.close()
|
||||
|
||||
# Shallow copy the object so that the fallbacks and output charset can
|
||||
# differ but the data we read from the mofile is shared.
|
||||
translation = copy.copy(translation)
|
||||
if codeset:
|
||||
translation.set_output_charset(codeset)
|
||||
if not stacked_translations:
|
||||
stacked_translations = translation
|
||||
else:
|
||||
stacked_translations.add_fallback(translation)
|
||||
|
||||
return stacked_translations
|
||||
|
||||
def easy_gettext_setup(domain, localedirs=tuple(), use_unicode=True):
|
||||
''' Setup translation functions for an application
|
||||
|
||||
:arg domain: Name of the message domain. This should be a unique name
|
||||
that can be used to lookup the :term:`message catalog` for this app.
|
||||
:kwarg localedirs: Iterator of directories to look for :term:`message
|
||||
catalogs` under. The first directory to exist is used regardless of
|
||||
whether messages for this domain are present. If none of the
|
||||
directories exist, fallback on ``sys.prefix`` + :file:`/share/locale`
|
||||
Default: No directories to search so we just use the fallback.
|
||||
:kwarg use_unicode: If :data:`True` return the :mod:`gettext` functions
|
||||
for :class:`unicode` strings else return the functions for byte
|
||||
:class:`str` for the translations. Default is :data:`True`.
|
||||
:return: tuple of the :mod:`gettext` function and :mod:`gettext` function
|
||||
for plurals
|
||||
|
||||
Setting up :mod:`gettext` can be a little tricky because of lack of
|
||||
documentation. This function will setup :mod:`gettext` using the
|
||||
`Class-based API
|
||||
<http://docs.python.org/library/gettext.html#class-based-api>`_ for you.
|
||||
For the simple case, you can use the default arguments and call it like
|
||||
this::
|
||||
|
||||
_, N_ = easy_gettext_setup()
|
||||
|
||||
This will get you two functions, :func:`_` and :func:`N_` that you can use
|
||||
to mark strings in your code for translation. :func:`_` is used to mark
|
||||
strings that don't need to worry about plural forms no matter what the
|
||||
value of the variable is. :func:`N_` is used to mark strings that do need
|
||||
to have a different form if a variable in the string is plural.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:doc:`api-i18n`
|
||||
This module's documentation has examples of using :func:`_` and :func:`N_`
|
||||
:func:`get_translation_object`
|
||||
for information on how to use :attr:`localedirs` to get the
|
||||
proper :term:`message catalogs` both when in development and when
|
||||
installed to FHS compliant directories on Linux.
|
||||
|
||||
.. note::
|
||||
|
||||
The gettext functions returned from this function should be superior
|
||||
to the ones returned from :mod:`gettext`. The traits that make them
|
||||
better are described in the :class:`DummyTranslations` and
|
||||
:class:`NewGNUTranslations` documentation.
|
||||
|
||||
.. versionchanged:: kitchen-0.2.4 ; API kitchen.i18n 2.0.0
|
||||
Changed :func:`~kitchen.i18n.easy_gettext_setup` to return the lgettext
|
||||
functions instead of gettext functions when use_unicode=False.
|
||||
'''
|
||||
translations = get_translation_object(domain, localedirs=localedirs)
|
||||
if use_unicode:
|
||||
return(translations.ugettext, translations.ungettext)
|
||||
return(translations.lgettext, translations.lngettext)
|
||||
|
||||
__all__ = ('DummyTranslations', 'NewGNUTranslations', 'easy_gettext_setup',
|
||||
'get_translation_object')
|
96
kitchen/iterutils/__init__.py
Normal file
96
kitchen/iterutils/__init__.py
Normal file
|
@ -0,0 +1,96 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2010 Red Hat, Inc
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU Lesser General Public License as published by the Free
|
||||
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
# any later version.
|
||||
#.
|
||||
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||
# more details.
|
||||
#.
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
# Luke Macken <lmacken@redhat.com>
|
||||
#
|
||||
# Portions of code taken from python-fedora fedora/iterutils.py
|
||||
'''
|
||||
Functions to manipulate iterables
|
||||
|
||||
.. versionadded:: Kitchen: 0.2.1a1
|
||||
|
||||
.. moduleauthor:: Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
.. moduleauthor:: Luke Macken <lmacken@redhat.com>
|
||||
'''
|
||||
|
||||
from kitchen.versioning import version_tuple_to_string
|
||||
|
||||
__version_info__ = ((0, 0, 1),)
|
||||
__version__ = version_tuple_to_string(__version_info__)
|
||||
|
||||
def isiterable(obj, include_string=False):
|
||||
'''Check whether an object is an iterable
|
||||
|
||||
:arg obj: Object to test whether it is an iterable
|
||||
:kwarg include_string: If :data:`True` and :attr:`obj` is a byte
|
||||
:class:`str` or :class:`unicode` string this function will return
|
||||
:data:`True`. If set to :data:`False`, byte :class:`str` and
|
||||
:class:`unicode` strings will cause this function to return
|
||||
:data:`False`. Default :data:`False`.
|
||||
:returns: :data:`True` if :attr:`obj` is iterable, otherwise
|
||||
:data:`False`.
|
||||
'''
|
||||
if include_string or not isinstance(obj, basestring):
|
||||
try:
|
||||
iter(obj)
|
||||
except TypeError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
return False
|
||||
|
||||
def iterate(obj, include_string=False):
|
||||
'''Generator that can be used to iterate over anything
|
||||
|
||||
:arg obj: The object to iterate over
|
||||
:kwarg include_string: if :data:`True`, treat strings as iterables.
|
||||
Otherwise treat them as a single scalar value. Default :data:`False`
|
||||
|
||||
This function will create an iterator out of any scalar or iterable. It
|
||||
is useful for making a value given to you an iterable before operating on it.
|
||||
Iterables have their items returned. scalars are transformed into iterables.
|
||||
A string is treated as a scalar value unless the :attr:`include_string`
|
||||
parameter is set to :data:`True`. Example usage::
|
||||
|
||||
>>> list(iterate(None))
|
||||
[None]
|
||||
>>> list(iterate([None]))
|
||||
[None]
|
||||
>>> list(iterate([1, 2, 3]))
|
||||
[1, 2, 3]
|
||||
>>> list(iterate(set([1, 2, 3])))
|
||||
[1, 2, 3]
|
||||
>>> list(iterate(dict(a='1', b='2')))
|
||||
['a', 'b']
|
||||
>>> list(iterate(1))
|
||||
[1]
|
||||
>>> list(iterate(iter([1, 2, 3])))
|
||||
[1, 2, 3]
|
||||
>>> list(iterate('abc'))
|
||||
['abc']
|
||||
>>> list(iterate('abc', include_string=True))
|
||||
['a', 'b', 'c']
|
||||
'''
|
||||
if isiterable(obj, include_string=include_string):
|
||||
for item in obj:
|
||||
yield item
|
||||
else:
|
||||
yield obj
|
||||
|
||||
__all__ = ('isiterable', 'iterate',)
|
10
kitchen/pycompat24/__init__.py
Normal file
10
kitchen/pycompat24/__init__.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
'''
|
||||
The :mod:`kitchen.pycompat24` module contains implementations of functionality
|
||||
introduced in python-2.4 for use on earlier versions of python.
|
||||
'''
|
||||
from kitchen.versioning import version_tuple_to_string
|
||||
|
||||
__version_info__ = ((1, 1, 0),)
|
||||
__version__ = version_tuple_to_string(__version_info__)
|
||||
|
||||
__all__ = ('base64', 'sets', 'subprocess')
|
46
kitchen/pycompat24/base64/__init__.py
Normal file
46
kitchen/pycompat24/base64/__init__.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2010 Red Hat, Inc
|
||||
#
|
||||
# This file is part of kitchen
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU Lesser General Public License as published by the Free
|
||||
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||
# more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
|
||||
'''
|
||||
Implement the modern base64 interface.
|
||||
|
||||
Python-2.4 and above have a new API for the base64 module. This is a backport
|
||||
of that module for use on python-2.3.
|
||||
|
||||
.. seealso::
|
||||
:mod:`base64`
|
||||
for information about using the functions provided here.
|
||||
'''
|
||||
import sys
|
||||
|
||||
# :W0401,W0614: The purpose of this module is to create a backport of base64
|
||||
# so we ignore these pylint warnings
|
||||
#pylint:disable-msg=W0401,W0614
|
||||
if sys.version_info >= (2, 4):
|
||||
from base64 import *
|
||||
else:
|
||||
from kitchen.pycompat24.base64._base64 import *
|
||||
|
||||
__all__ = ( 'b16decode', 'b16encode', 'b32decode', 'b32encode', 'b64decode',
|
||||
'b64encode', 'decode', 'decodestring', 'encode', 'encodestring',
|
||||
'standard_b64decode', 'standard_b64encode', 'urlsafe_b64decode',
|
||||
'urlsafe_b64encode',)
|
363
kitchen/pycompat24/base64/_base64.py
Normal file
363
kitchen/pycompat24/base64/_base64.py
Normal file
|
@ -0,0 +1,363 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
"""RFC 3548: Base16, Base32, Base64 Data Encodings"""
|
||||
|
||||
# Modified 04-Oct-1995 by Jack Jansen to use binascii module
|
||||
# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
|
||||
|
||||
import re
|
||||
import struct
|
||||
import binascii
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Legacy interface exports traditional RFC 1521 Base64 encodings
|
||||
'encode', 'decode', 'encodestring', 'decodestring',
|
||||
# Generalized interface for other encodings
|
||||
'b64encode', 'b64decode', 'b32encode', 'b32decode',
|
||||
'b16encode', 'b16decode',
|
||||
# Standard Base64 encoding
|
||||
'standard_b64encode', 'standard_b64decode',
|
||||
# Some common Base64 alternatives. As referenced by RFC 3458, see thread
|
||||
# starting at:
|
||||
#
|
||||
# http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
|
||||
'urlsafe_b64encode', 'urlsafe_b64decode',
|
||||
]
|
||||
|
||||
_translation = [chr(_x) for _x in range(256)]
|
||||
EMPTYSTRING = ''
|
||||
|
||||
|
||||
def _translate(s, altchars):
|
||||
translation = _translation[:]
|
||||
for k, v in altchars.items():
|
||||
translation[ord(k)] = v
|
||||
return s.translate(''.join(translation))
|
||||
|
||||
|
||||
|
||||
# Base64 encoding/decoding uses binascii
|
||||
|
||||
def b64encode(s, altchars=None):
|
||||
"""Encode a string using Base64.
|
||||
|
||||
s is the string to encode. Optional altchars must be a string of at least
|
||||
length 2 (additional characters are ignored) which specifies an
|
||||
alternative alphabet for the '+' and '/' characters. This allows an
|
||||
application to e.g. generate url or filesystem safe Base64 strings.
|
||||
|
||||
The encoded string is returned.
|
||||
"""
|
||||
# Strip off the trailing newline
|
||||
encoded = binascii.b2a_base64(s)[:-1]
|
||||
if altchars is not None:
|
||||
return _translate(encoded, {'+': altchars[0], '/': altchars[1]})
|
||||
return encoded
|
||||
|
||||
|
||||
def b64decode(s, altchars=None):
|
||||
"""Decode a Base64 encoded string.
|
||||
|
||||
s is the string to decode. Optional altchars must be a string of at least
|
||||
length 2 (additional characters are ignored) which specifies the
|
||||
alternative alphabet used instead of the '+' and '/' characters.
|
||||
|
||||
The decoded string is returned. A TypeError is raised if s were
|
||||
incorrectly padded or if there are non-alphabet characters present in the
|
||||
string.
|
||||
"""
|
||||
if altchars is not None:
|
||||
s = _translate(s, {altchars[0]: '+', altchars[1]: '/'})
|
||||
try:
|
||||
return binascii.a2b_base64(s)
|
||||
except binascii.Error, msg:
|
||||
# Transform this exception for consistency
|
||||
raise TypeError(msg)
|
||||
|
||||
|
||||
def standard_b64encode(s):
|
||||
"""Encode a string using the standard Base64 alphabet.
|
||||
|
||||
s is the string to encode. The encoded string is returned.
|
||||
"""
|
||||
return b64encode(s)
|
||||
|
||||
def standard_b64decode(s):
|
||||
"""Decode a string encoded with the standard Base64 alphabet.
|
||||
|
||||
s is the string to decode. The decoded string is returned. A TypeError
|
||||
is raised if the string is incorrectly padded or if there are non-alphabet
|
||||
characters present in the string.
|
||||
"""
|
||||
return b64decode(s)
|
||||
|
||||
def urlsafe_b64encode(s):
|
||||
"""Encode a string using a url-safe Base64 alphabet.
|
||||
|
||||
s is the string to encode. The encoded string is returned. The alphabet
|
||||
uses '-' instead of '+' and '_' instead of '/'.
|
||||
"""
|
||||
return b64encode(s, '-_')
|
||||
|
||||
def urlsafe_b64decode(s):
|
||||
"""Decode a string encoded with the standard Base64 alphabet.
|
||||
|
||||
s is the string to decode. The decoded string is returned. A TypeError
|
||||
is raised if the string is incorrectly padded or if there are non-alphabet
|
||||
characters present in the string.
|
||||
|
||||
The alphabet uses '-' instead of '+' and '_' instead of '/'.
|
||||
"""
|
||||
return b64decode(s, '-_')
|
||||
|
||||
|
||||
|
||||
# Base32 encoding/decoding must be done in Python
|
||||
_b32alphabet = {
|
||||
0: 'A', 9: 'J', 18: 'S', 27: '3',
|
||||
1: 'B', 10: 'K', 19: 'T', 28: '4',
|
||||
2: 'C', 11: 'L', 20: 'U', 29: '5',
|
||||
3: 'D', 12: 'M', 21: 'V', 30: '6',
|
||||
4: 'E', 13: 'N', 22: 'W', 31: '7',
|
||||
5: 'F', 14: 'O', 23: 'X',
|
||||
6: 'G', 15: 'P', 24: 'Y',
|
||||
7: 'H', 16: 'Q', 25: 'Z',
|
||||
8: 'I', 17: 'R', 26: '2',
|
||||
}
|
||||
|
||||
_b32tab = _b32alphabet.items()
|
||||
_b32tab.sort()
|
||||
_b32tab = [v for k, v in _b32tab]
|
||||
_b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()])
|
||||
|
||||
|
||||
def b32encode(s):
|
||||
"""Encode a string using Base32.
|
||||
|
||||
s is the string to encode. The encoded string is returned.
|
||||
"""
|
||||
parts = []
|
||||
quanta, leftover = divmod(len(s), 5)
|
||||
# Pad the last quantum with zero bits if necessary
|
||||
if leftover:
|
||||
s += ('\0' * (5 - leftover))
|
||||
quanta += 1
|
||||
for i in range(quanta):
|
||||
# c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this
|
||||
# code is to process the 40 bits in units of 5 bits. So we take the 1
|
||||
# leftover bit of c1 and tack it onto c2. Then we take the 2 leftover
|
||||
# bits of c2 and tack them onto c3. The shifts and masks are intended
|
||||
# to give us values of exactly 5 bits in width.
|
||||
c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5])
|
||||
c2 += (c1 & 1) << 16 # 17 bits wide
|
||||
c3 += (c2 & 3) << 8 # 10 bits wide
|
||||
parts.extend([_b32tab[c1 >> 11], # bits 1 - 5
|
||||
_b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
|
||||
_b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
|
||||
_b32tab[c2 >> 12], # bits 16 - 20 (1 - 5)
|
||||
_b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
|
||||
_b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
|
||||
_b32tab[c3 >> 5], # bits 31 - 35 (1 - 5)
|
||||
_b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5)
|
||||
])
|
||||
encoded = EMPTYSTRING.join(parts)
|
||||
# Adjust for any leftover partial quanta
|
||||
if leftover == 1:
|
||||
return encoded[:-6] + '======'
|
||||
elif leftover == 2:
|
||||
return encoded[:-4] + '===='
|
||||
elif leftover == 3:
|
||||
return encoded[:-3] + '==='
|
||||
elif leftover == 4:
|
||||
return encoded[:-1] + '='
|
||||
return encoded
|
||||
|
||||
|
||||
def b32decode(s, casefold=False, map01=None):
|
||||
"""Decode a Base32 encoded string.
|
||||
|
||||
s is the string to decode. Optional casefold is a flag specifying whether
|
||||
a lowercase alphabet is acceptable as input. For security purposes, the
|
||||
default is False.
|
||||
|
||||
RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O
|
||||
(oh), and for optional mapping of the digit 1 (one) to either the letter I
|
||||
(eye) or letter L (el). The optional argument map01 when not None,
|
||||
specifies which letter the digit 1 should be mapped to (when map01 is not
|
||||
None, the digit 0 is always mapped to the letter O). For security
|
||||
purposes the default is None, so that 0 and 1 are not allowed in the
|
||||
input.
|
||||
|
||||
The decoded string is returned. A TypeError is raised if s were
|
||||
incorrectly padded or if there are non-alphabet characters present in the
|
||||
string.
|
||||
"""
|
||||
quanta, leftover = divmod(len(s), 8)
|
||||
if leftover:
|
||||
raise TypeError('Incorrect padding')
|
||||
# Handle section 2.4 zero and one mapping. The flag map01 will be either
|
||||
# False, or the character to map the digit 1 (one) to. It should be
|
||||
# either L (el) or I (eye).
|
||||
if map01:
|
||||
s = _translate(s, {'0': 'O', '1': map01})
|
||||
if casefold:
|
||||
s = s.upper()
|
||||
# Strip off pad characters from the right. We need to count the pad
|
||||
# characters because this will tell us how many null bytes to remove from
|
||||
# the end of the decoded string.
|
||||
padchars = 0
|
||||
mo = re.search('(?P<pad>[=]*)$', s)
|
||||
if mo:
|
||||
padchars = len(mo.group('pad'))
|
||||
if padchars > 0:
|
||||
s = s[:-padchars]
|
||||
# Now decode the full quanta
|
||||
parts = []
|
||||
acc = 0
|
||||
shift = 35
|
||||
for c in s:
|
||||
val = _b32rev.get(c)
|
||||
if val is None:
|
||||
raise TypeError('Non-base32 digit found')
|
||||
acc += _b32rev[c] << shift
|
||||
shift -= 5
|
||||
if shift < 0:
|
||||
parts.append(binascii.unhexlify('%010x' % acc))
|
||||
acc = 0
|
||||
shift = 35
|
||||
# Process the last, partial quanta
|
||||
last = binascii.unhexlify('%010x' % acc)
|
||||
if padchars == 0:
|
||||
last = '' # No characters
|
||||
elif padchars == 1:
|
||||
last = last[:-1]
|
||||
elif padchars == 3:
|
||||
last = last[:-2]
|
||||
elif padchars == 4:
|
||||
last = last[:-3]
|
||||
elif padchars == 6:
|
||||
last = last[:-4]
|
||||
else:
|
||||
raise TypeError('Incorrect padding')
|
||||
parts.append(last)
|
||||
return EMPTYSTRING.join(parts)
|
||||
|
||||
|
||||
|
||||
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
|
||||
# lowercase. The RFC also recommends against accepting input case
|
||||
# insensitively.
|
||||
def b16encode(s):
|
||||
"""Encode a string using Base16.
|
||||
|
||||
s is the string to encode. The encoded string is returned.
|
||||
"""
|
||||
return binascii.hexlify(s).upper()
|
||||
|
||||
|
||||
def b16decode(s, casefold=False):
|
||||
"""Decode a Base16 encoded string.
|
||||
|
||||
s is the string to decode. Optional casefold is a flag specifying whether
|
||||
a lowercase alphabet is acceptable as input. For security purposes, the
|
||||
default is False.
|
||||
|
||||
The decoded string is returned. A TypeError is raised if s were
|
||||
incorrectly padded or if there are non-alphabet characters present in the
|
||||
string.
|
||||
"""
|
||||
if casefold:
|
||||
s = s.upper()
|
||||
if re.search('[^0-9A-F]', s):
|
||||
raise TypeError('Non-base16 digit found')
|
||||
return binascii.unhexlify(s)
|
||||
|
||||
|
||||
|
||||
# Legacy interface. This code could be cleaned up since I don't believe
|
||||
# binascii has any line length limitations. It just doesn't seem worth it
|
||||
# though.
|
||||
|
||||
MAXLINESIZE = 76 # Excluding the CRLF
|
||||
MAXBINSIZE = (MAXLINESIZE//4)*3
|
||||
|
||||
def encode(input, output):
|
||||
"""Encode a file."""
|
||||
while True:
|
||||
s = input.read(MAXBINSIZE)
|
||||
if not s:
|
||||
break
|
||||
while len(s) < MAXBINSIZE:
|
||||
ns = input.read(MAXBINSIZE-len(s))
|
||||
if not ns:
|
||||
break
|
||||
s += ns
|
||||
line = binascii.b2a_base64(s)
|
||||
output.write(line)
|
||||
|
||||
|
||||
def decode(input, output):
|
||||
"""Decode a file."""
|
||||
while True:
|
||||
line = input.readline()
|
||||
if not line:
|
||||
break
|
||||
s = binascii.a2b_base64(line)
|
||||
output.write(s)
|
||||
|
||||
|
||||
def encodestring(s):
|
||||
"""Encode a string into multiple lines of base-64 data."""
|
||||
pieces = []
|
||||
for i in range(0, len(s), MAXBINSIZE):
|
||||
chunk = s[i : i + MAXBINSIZE]
|
||||
pieces.append(binascii.b2a_base64(chunk))
|
||||
return "".join(pieces)
|
||||
|
||||
|
||||
def decodestring(s):
|
||||
"""Decode a string."""
|
||||
return binascii.a2b_base64(s)
|
||||
|
||||
|
||||
|
||||
# Useable as a script...
|
||||
def test():
|
||||
"""Small test program"""
|
||||
import sys, getopt
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'deut')
|
||||
except getopt.error, msg:
|
||||
sys.stdout = sys.stderr
|
||||
print msg
|
||||
print """usage: %s [-d|-e|-u|-t] [file|-]
|
||||
-d, -u: decode
|
||||
-e: encode (default)
|
||||
-t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]
|
||||
sys.exit(2)
|
||||
func = encode
|
||||
for o, a in opts:
|
||||
if o == '-e': func = encode
|
||||
if o == '-d': func = decode
|
||||
if o == '-u': func = decode
|
||||
if o == '-t': test1(); return
|
||||
if args and args[0] != '-':
|
||||
fh = open(args[0], 'rb')
|
||||
try:
|
||||
func(fh, sys.stdout)
|
||||
finally:
|
||||
fh.close()
|
||||
else:
|
||||
func(sys.stdin, sys.stdout)
|
||||
|
||||
|
||||
def test1():
|
||||
s0 = "Aladdin:open sesame"
|
||||
s1 = encodestring(s0)
|
||||
s2 = decodestring(s1)
|
||||
print s0, repr(s1), s2
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
92
kitchen/pycompat24/sets/__init__.py
Normal file
92
kitchen/pycompat24/sets/__init__.py
Normal file
|
@ -0,0 +1,92 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2010 Red Hat, Inc
|
||||
#
|
||||
# This file is part of kitchen
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU Lesser General Public License as published by the Free
|
||||
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||
# more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
|
||||
'''
|
||||
In python-2.4, a builtin :class:`set` type was added to python. This module
|
||||
provides a function to emulate that on python-2.3 by using the :mod:`sets`
|
||||
module.
|
||||
|
||||
:func:`set`
|
||||
Create a set. If running on python 2.4+ this is the :class:`set`
|
||||
constructor. If using python-2.3, it's :class:`sets.Set`.
|
||||
|
||||
:func:`frozenset`
|
||||
Create a frozenset. If running on python2.4+ this is the
|
||||
:class:`frozenset` constructor. If using python-2.3, it's
|
||||
:class:`sets.ImmutableSet`.
|
||||
|
||||
.. versionchanged:: 0.2.0 API: kitchen.pycompat24 1.0.0
|
||||
Added set and frozenset
|
||||
'''
|
||||
import __builtin__
|
||||
|
||||
# Setup set and frozenset on this module
|
||||
# :W0622,C0103: The purpose of this module is to define set and frozenset if
|
||||
# they aren't in builtins already so we disregard these pylint warnings
|
||||
#pylint:disable-msg=W0622,C0103
|
||||
if not hasattr(__builtin__, 'set'):
|
||||
import sets
|
||||
set = sets.Set
|
||||
else:
|
||||
set = set
|
||||
|
||||
if not hasattr(__builtin__, 'frozenset'):
|
||||
import sets
|
||||
frozenset = sets.ImmutableSet
|
||||
else:
|
||||
frozenset = frozenset
|
||||
#pylint:enable-msg=W0622,C0103
|
||||
|
||||
def add_builtin_set():
|
||||
'''If there's no set builtin, us the :mod:`sets` module to make one
|
||||
|
||||
This function makes sure that a :class:`set` and :class:`frozenset` type
|
||||
are available in the :mod:`__builtin__` namespace. Since the function
|
||||
checks whether :class:`set` and :class:`frozenset` are already present in
|
||||
the :mod:`__builtin__` namespace and refuses to overwrite those if found,
|
||||
it's safe to call this in multiple places and in scripts run under
|
||||
python-2.4+, where a more efficient set implementation is already present
|
||||
in the :mod:`__builtin__` namespace.
|
||||
|
||||
However, since this function modifies :mod:`__builtin__` there's no need
|
||||
to call it more than once so you likely want to do something like this
|
||||
when your program loads::
|
||||
|
||||
myprogram/__init__.py:
|
||||
|
||||
from kitchen.pycompat24 import sets
|
||||
builtinset.add_builtin_set()
|
||||
|
||||
You can then use :func:`set` and :func:`frozenset` anywhere in your code::
|
||||
|
||||
myprogram/compute.py:
|
||||
|
||||
def math_students(algebra_student_list, geometry_student_list):
|
||||
return set(algebra_student_list) union set(geometry_student_list)
|
||||
'''
|
||||
if not hasattr(__builtin__, 'set'):
|
||||
__builtin__.set = set
|
||||
|
||||
if not hasattr(__builtin__, 'frozenset'):
|
||||
__builtin__.frozenset = frozenset
|
||||
|
||||
__all__ = ('add_builtin_set', 'set', 'frozenset')
|
5
kitchen/pycompat24/subprocess.py
Normal file
5
kitchen/pycompat24/subprocess.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
# :W0401, W0611, W0614: Rather than have two versions of subprocess, we import
|
||||
# the python2.7 version here as well
|
||||
#pylint:disable-msg=W0401,W0611,W0614
|
||||
from kitchen.pycompat27.subprocess import *
|
||||
from kitchen.pycompat27.subprocess import __all__
|
12
kitchen/pycompat25/__init__.py
Normal file
12
kitchen/pycompat25/__init__.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
'''
|
||||
The :mod:`kitchen.pycompat25` module contains implementations of functionality
|
||||
introduced in python-2.5.
|
||||
'''
|
||||
|
||||
from kitchen.versioning import version_tuple_to_string
|
||||
|
||||
__version_info__ = ((1, 0, 0),)
|
||||
__version__ = version_tuple_to_string(__version_info__)
|
||||
|
||||
|
||||
__all__ = ('collections',)
|
9
kitchen/pycompat25/collections/__init__.py
Normal file
9
kitchen/pycompat25/collections/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
try:
|
||||
#:E0611: deafultdict doesn't exist in python-2.4 or less but that's why we
|
||||
# have it in a try: except:. So we can use our version if necessary.
|
||||
#pylint:disable-msg=E0611
|
||||
from collections import defaultdict
|
||||
except ImportError:
|
||||
from kitchen.pycompat25.collections._defaultdict import defaultdict
|
||||
|
||||
__all__ = ('defaultdict',)
|
137
kitchen/pycompat25/collections/_defaultdict.py
Normal file
137
kitchen/pycompat25/collections/_defaultdict.py
Normal file
|
@ -0,0 +1,137 @@
|
|||
##
|
||||
# Transcribed from http://code.activestate.com/recipes/523034/ on May 1, 2009
|
||||
# by Jef Spaleta This code provides an emulation for the defaultdict
|
||||
# functionality introduced in python 2.5's collection module
|
||||
#
|
||||
# Changes from the original:
|
||||
# * Change the return value from __reduce__ to use iteritems() to prevent
|
||||
# a segfault when pickling. (Jef Spaleta)
|
||||
# * Change how we setup the module to use collections.defaultdict by default
|
||||
# (Toshio Kuratomi)
|
||||
#
|
||||
# Copyright (c) 2007 Justin Kirtland
|
||||
#
|
||||
# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
|
||||
# --------------------------------------------
|
||||
#
|
||||
# 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"),
|
||||
# and the Individual or Organization ("Licensee") accessing and otherwise
|
||||
# using this software ("Python") in source or binary form and its
|
||||
# associated documentation.
|
||||
#
|
||||
# 2. Subject to the terms and conditions of this License Agreement, PSF hereby
|
||||
# grants Licensee a nonexclusive, royalty-free, world-wide license to
|
||||
# reproduce, analyze, test, perform and/or display publicly, prepare
|
||||
# derivative works, distribute, and otherwise use Python alone or in any
|
||||
# derivative version, provided, however, that PSF's License Agreement and
|
||||
# PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004,
|
||||
# 2005, 2006 Python Software Foundation; All Rights Reserved" are retained
|
||||
# in Python alone or in any derivative version prepared by Licensee.
|
||||
#
|
||||
# 3. In the event Licensee prepares a derivative work that is based on or
|
||||
# incorporates Python or any part thereof, and wants to make the derivative
|
||||
# work available to others as provided herein, then Licensee hereby agrees
|
||||
# to include in any such work a brief summary of the changes made to
|
||||
# Python.
|
||||
#
|
||||
# 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF
|
||||
# MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF
|
||||
# EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY
|
||||
# REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY
|
||||
# PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD
|
||||
# PARTY RIGHTS.
|
||||
#
|
||||
# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY
|
||||
# INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF
|
||||
# MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE
|
||||
# THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
|
||||
#
|
||||
# 6. This License Agreement will automatically terminate upon a material
|
||||
# breach of its terms and conditions.
|
||||
#
|
||||
# 7. Nothing in this License Agreement shall be deemed to create any
|
||||
# relationship of agency, partnership, or joint venture between PSF and
|
||||
# Licensee. This License Agreement does not grant permission to use PSF
|
||||
# trademarks or trade name in a trademark sense to endorse or promote
|
||||
# products or services of Licensee, or any third party.
|
||||
#
|
||||
# 8. By copying, installing or otherwise using Python, Licensee agrees to be
|
||||
# bound by the terms and conditions of this License Agreement.
|
||||
|
||||
'''
|
||||
-----------
|
||||
defaultdict
|
||||
-----------
|
||||
|
||||
This is a pure python implementation of defaultdict that is compatible with
|
||||
the defaultdict class provided by python-2.5 and above.
|
||||
|
||||
.. seealso::
|
||||
:class:`collections.defaultdict`
|
||||
for documentation on this module
|
||||
'''
|
||||
|
||||
# Pylint disabled messages
|
||||
#
|
||||
# :C0103: We're defnining a compatible class name therefore we need to match
|
||||
# the format of that name.
|
||||
|
||||
import types
|
||||
|
||||
from kitchen import b_
|
||||
|
||||
# :C0103, W0613: We're implementing the python-2.5 defaultdict API so
|
||||
# we have to use the same names as python.
|
||||
# :C0111: We point people at the stdlib API docs for defaultdict rather than
|
||||
# reproduce it here.
|
||||
#pylint:disable-msg=C0103,W0613,C0111
|
||||
|
||||
class defaultdict(dict):
|
||||
def __init__(self, default_factory=None, *args, **kwargs):
|
||||
if (default_factory is not None and
|
||||
not hasattr(default_factory, '__call__')):
|
||||
raise TypeError(b_('First argument must be callable'))
|
||||
dict.__init__(self, *args, **kwargs)
|
||||
self.default_factory = default_factory
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return dict.__getitem__(self, key)
|
||||
except KeyError:
|
||||
return self.__missing__(key)
|
||||
|
||||
def __missing__(self, key):
|
||||
if self.default_factory is None:
|
||||
raise KeyError(key)
|
||||
self[key] = value = self.default_factory()
|
||||
return value
|
||||
|
||||
def __reduce__(self):
|
||||
if self.default_factory is None:
|
||||
args = tuple()
|
||||
else:
|
||||
args = self.default_factory,
|
||||
return type(self), args, None, None, self.iteritems()
|
||||
|
||||
def copy(self):
|
||||
return self.__copy__()
|
||||
|
||||
def __copy__(self):
|
||||
return type(self)(self.default_factory, self)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
import copy
|
||||
return type(self)(self.default_factory,
|
||||
copy.deepcopy(self.items()))
|
||||
def __repr__(self):
|
||||
# Note: Have to use "is not None" otherwise we get an infinite
|
||||
# recursion
|
||||
if isinstance(self.default_factory, types.MethodType) \
|
||||
and self.default_factory.im_self is not None \
|
||||
and issubclass(self.default_factory.im_class, defaultdict):
|
||||
defrepr = '<bound method sub._factory of defaultdict(...'
|
||||
else:
|
||||
defrepr = repr(self.default_factory)
|
||||
return 'defaultdict(%s, %s)' % (defrepr, dict.__repr__(self))
|
||||
|
||||
__all__ = ('defaultdict',)
|
13
kitchen/pycompat27/__init__.py
Normal file
13
kitchen/pycompat27/__init__.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
'''
|
||||
The :mod:`kitchen.pycompat27` module contains implementations of functionality
|
||||
introduced in python-2.7 for use on earlier versions of python.
|
||||
|
||||
.. versionchanged:: 0.2.3
|
||||
Made mswindows, MAXFD, and list2cmdline available from the module
|
||||
'''
|
||||
from kitchen.versioning import version_tuple_to_string
|
||||
|
||||
__version_info__ = ((1, 1, 0),)
|
||||
__version__ = version_tuple_to_string(__version_info__)
|
||||
|
||||
__all__ = ('subprocess',)
|
46
kitchen/pycompat27/subprocess/__init__.py
Normal file
46
kitchen/pycompat27/subprocess/__init__.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2011 Red Hat, Inc
|
||||
#
|
||||
# This file is part of kitchen
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU Lesser General Public License as published by the Free
|
||||
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||
# more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
|
||||
'''
|
||||
Implement the modern subprocess interface
|
||||
|
||||
Python-2.5 and python-2.7 introduce new API features to subprocess. This is
|
||||
a backport of that module for use on earlier python versions.
|
||||
|
||||
.. seealso::
|
||||
:mod:`subprocess`
|
||||
for information about using the functions provided here.
|
||||
'''
|
||||
import sys
|
||||
|
||||
# :W0401,W0611,W0614: We're importing compatibility to the python-2.7 version
|
||||
# of subprocess.
|
||||
#pylint:disable-msg=W0401,W0611,W0614
|
||||
if sys.version_info >= (2, 7):
|
||||
from subprocess import *
|
||||
from subprocess import MAXFD, list2cmdline, mswindows
|
||||
from subprocess import __all__
|
||||
else:
|
||||
from kitchen.pycompat27.subprocess._subprocess import *
|
||||
from kitchen.pycompat27.subprocess._subprocess import MAXFD, \
|
||||
list2cmdline, mswindows
|
||||
from kitchen.pycompat27.subprocess._subprocess import __all__
|
1538
kitchen/pycompat27/subprocess/_subprocess.py
Normal file
1538
kitchen/pycompat27/subprocess/_subprocess.py
Normal file
File diff suppressed because it is too large
Load diff
35
kitchen/release.py
Normal file
35
kitchen/release.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
'''
|
||||
Information about this kitchen release.
|
||||
'''
|
||||
|
||||
from kitchen import _, __version__
|
||||
|
||||
NAME = 'kitchen'
|
||||
VERSION = __version__
|
||||
DESCRIPTION = _('Kitchen contains a cornucopia of useful code')
|
||||
LONG_DESCRIPTION = _('''
|
||||
We've all done it. In the process of writing a brand new application we've
|
||||
discovered that we need a little bit of code that we've invented before.
|
||||
Perhaps it's something to handle unicode text. Perhaps it's something to make
|
||||
a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being
|
||||
a tiny bit of code that seems too small to worry about pushing into its own
|
||||
module so it sits there, a part of your current project, waiting to be cut and
|
||||
pasted into your next project. And the next. And the next. And since that
|
||||
little bittybit of code proved so useful to you, it's highly likely that it
|
||||
proved useful to someone else as well. Useful enough that they've written it
|
||||
and copy and pasted it over and over into each of their new projects.
|
||||
|
||||
Well, no longer! Kitchen aims to pull these small snippets of code into a few
|
||||
python modules which you can import and use within your project. No more copy
|
||||
and paste! Now you can let someone else maintain and release these small
|
||||
snippets so that you can get on with your life.
|
||||
''')
|
||||
AUTHOR = 'Toshio Kuratomi, Seth Vidal, others'
|
||||
EMAIL = 'toshio@fedoraproject.org'
|
||||
COPYRIGHT = '2011 Red Hat, Inc. and others'
|
||||
URL = 'https://fedorahosted.org/kitchen'
|
||||
DOWNLOAD_URL = 'https://fedorahosted.org/releases/k/i/kitchen'
|
||||
LICENSE = 'LGPLv2+'
|
||||
|
||||
__all__ = ('NAME', 'VERSION', 'DESCRIPTION', 'LONG_DESCRIPTION', 'AUTHOR',
|
||||
'EMAIL', 'COPYRIGHT', 'URL', 'DOWNLOAD_URL', 'LICENSE')
|
17
kitchen/text/__init__.py
Normal file
17
kitchen/text/__init__.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
'''
|
||||
------------
|
||||
Kitchen.text
|
||||
------------
|
||||
|
||||
Kitchen.text contains functions for manipulating text in python.
|
||||
|
||||
This includes things like converting between byte strings and unicode,
|
||||
and displaying text on the screen.
|
||||
'''
|
||||
|
||||
from kitchen.versioning import version_tuple_to_string
|
||||
|
||||
__version_info__ = ((2, 1, 1),)
|
||||
__version__ = version_tuple_to_string(__version_info__)
|
||||
|
||||
__all__ = ('converters', 'exceptions', 'misc',)
|
921
kitchen/text/converters.py
Normal file
921
kitchen/text/converters.py
Normal file
|
@ -0,0 +1,921 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2011 Red Hat, Inc.
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
# Seth Vidal
|
||||
#
|
||||
# Portions of code taken from yum/i18n.py and
|
||||
# python-fedora: fedora/textutils.py
|
||||
|
||||
'''
|
||||
Functions to handle conversion of byte :class:`str` and :class:`unicode`
|
||||
strings.
|
||||
|
||||
.. versionchanged:: kitchen 0.2a2 ; API kitchen.text 2.0.0
|
||||
Added :func:`~kitchen.text.converters.getwriter`
|
||||
|
||||
.. versionchanged:: kitchen 0.2.2 ; API kitchen.text 2.1.0
|
||||
Added :func:`~kitchen.text.converters.exception_to_unicode`,
|
||||
:func:`~kitchen.text.converters.exception_to_bytes`,
|
||||
:data:`~kitchen.text.converters.EXCEPTION_CONVERTERS`,
|
||||
and :data:`~kitchen.text.converters.BYTE_EXCEPTION_CONVERTERS`
|
||||
|
||||
.. versionchanged:: kitchen 1.0.1 ; API kitchen.text 2.1.1
|
||||
Deprecated :data:`~kitchen.text.converters.BYTE_EXCEPTION_CONVERTERS` as
|
||||
we've simplified :func:`~kitchen.text.converters.exception_to_unicode` and
|
||||
:func:`~kitchen.text.converters.exception_to_bytes` to make it unnecessary
|
||||
|
||||
'''
|
||||
try:
|
||||
from base64 import b64encode, b64decode
|
||||
except ImportError:
|
||||
from kitchen.pycompat24.base64 import b64encode, b64decode
|
||||
|
||||
import codecs
|
||||
import warnings
|
||||
import xml.sax.saxutils
|
||||
|
||||
# We need to access b_() for localizing our strings but we'll end up with
|
||||
# a circular import if we import it directly.
|
||||
import kitchen as k
|
||||
from kitchen.pycompat24 import sets
|
||||
sets.add_builtin_set()
|
||||
|
||||
from kitchen.text.exceptions import ControlCharError, XmlEncodeError
|
||||
from kitchen.text.misc import guess_encoding, html_entities_unescape, \
|
||||
process_control_chars
|
||||
|
||||
#: Aliases for the utf-8 codec
|
||||
_UTF8_ALIASES = frozenset(('utf-8', 'UTF-8', 'utf8', 'UTF8', 'utf_8', 'UTF_8',
|
||||
'utf', 'UTF', 'u8', 'U8'))
|
||||
#: Aliases for the latin-1 codec
|
||||
_LATIN1_ALIASES = frozenset(('latin-1', 'LATIN-1', 'latin1', 'LATIN1',
|
||||
'latin', 'LATIN', 'l1', 'L1', 'cp819', 'CP819', '8859', 'iso8859-1',
|
||||
'ISO8859-1', 'iso-8859-1', 'ISO-8859-1'))
|
||||
|
||||
# EXCEPTION_CONVERTERS is defined below due to using to_unicode
|
||||
|
||||
def to_unicode(obj, encoding='utf-8', errors='replace', nonstring=None,
|
||||
non_string=None):
|
||||
'''Convert an object into a :class:`unicode` string
|
||||
|
||||
:arg obj: Object to convert to a :class:`unicode` string. This should
|
||||
normally be a byte :class:`str`
|
||||
:kwarg encoding: What encoding to try converting the byte :class:`str` as.
|
||||
Defaults to :term:`utf-8`
|
||||
:kwarg errors: If errors are found while decoding, perform this action.
|
||||
Defaults to ``replace`` which replaces the invalid bytes with
|
||||
a character that means the bytes were unable to be decoded. Other
|
||||
values are the same as the error handling schemes in the `codec base
|
||||
classes
|
||||
<http://docs.python.org/library/codecs.html#codec-base-classes>`_.
|
||||
For instance ``strict`` which raises an exception and ``ignore`` which
|
||||
simply omits the non-decodable characters.
|
||||
:kwarg nonstring: How to treat nonstring values. Possible values are:
|
||||
|
||||
:simplerepr: Attempt to call the object's "simple representation"
|
||||
method and return that value. Python-2.3+ has two methods that
|
||||
try to return a simple representation: :meth:`object.__unicode__`
|
||||
and :meth:`object.__str__`. We first try to get a usable value
|
||||
from :meth:`object.__unicode__`. If that fails we try the same
|
||||
with :meth:`object.__str__`.
|
||||
:empty: Return an empty :class:`unicode` string
|
||||
:strict: Raise a :exc:`TypeError`
|
||||
:passthru: Return the object unchanged
|
||||
:repr: Attempt to return a :class:`unicode` string of the repr of the
|
||||
object
|
||||
|
||||
Default is ``simplerepr``
|
||||
|
||||
:kwarg non_string: *Deprecated* Use :attr:`nonstring` instead
|
||||
:raises TypeError: if :attr:`nonstring` is ``strict`` and
|
||||
a non-:class:`basestring` object is passed in or if :attr:`nonstring`
|
||||
is set to an unknown value
|
||||
:raises UnicodeDecodeError: if :attr:`errors` is ``strict`` and
|
||||
:attr:`obj` is not decodable using the given encoding
|
||||
:returns: :class:`unicode` string or the original object depending on the
|
||||
value of :attr:`nonstring`.
|
||||
|
||||
Usually this should be used on a byte :class:`str` but it can take both
|
||||
byte :class:`str` and :class:`unicode` strings intelligently. Nonstring
|
||||
objects are handled in different ways depending on the setting of the
|
||||
:attr:`nonstring` parameter.
|
||||
|
||||
The default values of this function are set so as to always return
|
||||
a :class:`unicode` string and never raise an error when converting from
|
||||
a byte :class:`str` to a :class:`unicode` string. However, when you do
|
||||
not pass validly encoded text (or a nonstring object), you may end up with
|
||||
output that you don't expect. Be sure you understand the requirements of
|
||||
your data, not just ignore errors by passing it through this function.
|
||||
|
||||
.. versionchanged:: 0.2.1a2
|
||||
Deprecated :attr:`non_string` in favor of :attr:`nonstring` parameter and changed
|
||||
default value to ``simplerepr``
|
||||
'''
|
||||
if isinstance(obj, basestring):
|
||||
if isinstance(obj, unicode):
|
||||
return obj
|
||||
if encoding in _UTF8_ALIASES:
|
||||
return unicode(obj, 'utf-8', errors)
|
||||
if encoding in _LATIN1_ALIASES:
|
||||
return unicode(obj, 'latin-1', errors)
|
||||
return obj.decode(encoding, errors)
|
||||
|
||||
if non_string:
|
||||
warnings.warn(k.b_('non_string is a deprecated parameter of'
|
||||
' to_unicode(). Use nonstring instead'), DeprecationWarning,
|
||||
stacklevel=2)
|
||||
if not nonstring:
|
||||
nonstring = non_string
|
||||
|
||||
if not nonstring:
|
||||
nonstring = 'simplerepr'
|
||||
if nonstring == 'empty':
|
||||
return u''
|
||||
elif nonstring == 'passthru':
|
||||
return obj
|
||||
elif nonstring == 'simplerepr':
|
||||
try:
|
||||
simple = obj.__unicode__()
|
||||
except (AttributeError, UnicodeError):
|
||||
simple = None
|
||||
if not simple:
|
||||
try:
|
||||
simple = str(obj)
|
||||
except UnicodeError:
|
||||
try:
|
||||
simple = obj.__str__()
|
||||
except (UnicodeError, AttributeError):
|
||||
simple = u''
|
||||
if not isinstance(simple, unicode):
|
||||
return unicode(simple, encoding, errors)
|
||||
return simple
|
||||
elif nonstring in ('repr', 'strict'):
|
||||
obj_repr = repr(obj)
|
||||
if not isinstance(obj_repr, unicode):
|
||||
obj_repr = unicode(obj_repr, encoding, errors)
|
||||
if nonstring == 'repr':
|
||||
return obj_repr
|
||||
raise TypeError(k.b_('to_unicode was given "%(obj)s" which is neither'
|
||||
' a byte string (str) or a unicode string') %
|
||||
{'obj': obj_repr.encode(encoding, 'replace')})
|
||||
|
||||
raise TypeError(k.b_('nonstring value, %(param)s, is not set to a valid'
|
||||
' action') % {'param': nonstring})
|
||||
|
||||
def to_bytes(obj, encoding='utf-8', errors='replace', nonstring=None,
|
||||
non_string=None):
|
||||
'''Convert an object into a byte :class:`str`
|
||||
|
||||
:arg obj: Object to convert to a byte :class:`str`. This should normally
|
||||
be a :class:`unicode` string.
|
||||
:kwarg encoding: Encoding to use to convert the :class:`unicode` string
|
||||
into a byte :class:`str`. Defaults to :term:`utf-8`.
|
||||
:kwarg errors: If errors are found while encoding, perform this action.
|
||||
Defaults to ``replace`` which replaces the invalid bytes with
|
||||
a character that means the bytes were unable to be encoded. Other
|
||||
values are the same as the error handling schemes in the `codec base
|
||||
classes
|
||||
<http://docs.python.org/library/codecs.html#codec-base-classes>`_.
|
||||
For instance ``strict`` which raises an exception and ``ignore`` which
|
||||
simply omits the non-encodable characters.
|
||||
:kwarg nonstring: How to treat nonstring values. Possible values are:
|
||||
|
||||
:simplerepr: Attempt to call the object's "simple representation"
|
||||
method and return that value. Python-2.3+ has two methods that
|
||||
try to return a simple representation: :meth:`object.__unicode__`
|
||||
and :meth:`object.__str__`. We first try to get a usable value
|
||||
from :meth:`object.__str__`. If that fails we try the same
|
||||
with :meth:`object.__unicode__`.
|
||||
:empty: Return an empty byte :class:`str`
|
||||
:strict: Raise a :exc:`TypeError`
|
||||
:passthru: Return the object unchanged
|
||||
:repr: Attempt to return a byte :class:`str` of the :func:`repr` of the
|
||||
object
|
||||
|
||||
Default is ``simplerepr``.
|
||||
|
||||
:kwarg non_string: *Deprecated* Use :attr:`nonstring` instead.
|
||||
:raises TypeError: if :attr:`nonstring` is ``strict`` and
|
||||
a non-:class:`basestring` object is passed in or if :attr:`nonstring`
|
||||
is set to an unknown value.
|
||||
:raises UnicodeEncodeError: if :attr:`errors` is ``strict`` and all of the
|
||||
bytes of :attr:`obj` are unable to be encoded using :attr:`encoding`.
|
||||
:returns: byte :class:`str` or the original object depending on the value
|
||||
of :attr:`nonstring`.
|
||||
|
||||
.. warning::
|
||||
|
||||
If you pass a byte :class:`str` into this function the byte
|
||||
:class:`str` is returned unmodified. It is **not** re-encoded with
|
||||
the specified :attr:`encoding`. The easiest way to achieve that is::
|
||||
|
||||
to_bytes(to_unicode(text), encoding='utf-8')
|
||||
|
||||
The initial :func:`to_unicode` call will ensure text is
|
||||
a :class:`unicode` string. Then, :func:`to_bytes` will turn that into
|
||||
a byte :class:`str` with the specified encoding.
|
||||
|
||||
Usually, this should be used on a :class:`unicode` string but it can take
|
||||
either a byte :class:`str` or a :class:`unicode` string intelligently.
|
||||
Nonstring objects are handled in different ways depending on the setting
|
||||
of the :attr:`nonstring` parameter.
|
||||
|
||||
The default values of this function are set so as to always return a byte
|
||||
:class:`str` and never raise an error when converting from unicode to
|
||||
bytes. However, when you do not pass an encoding that can validly encode
|
||||
the object (or a non-string object), you may end up with output that you
|
||||
don't expect. Be sure you understand the requirements of your data, not
|
||||
just ignore errors by passing it through this function.
|
||||
|
||||
.. versionchanged:: 0.2.1a2
|
||||
Deprecated :attr:`non_string` in favor of :attr:`nonstring` parameter
|
||||
and changed default value to ``simplerepr``
|
||||
'''
|
||||
if isinstance(obj, basestring):
|
||||
if isinstance(obj, str):
|
||||
return obj
|
||||
return obj.encode(encoding, errors)
|
||||
if non_string:
|
||||
warnings.warn(k.b_('non_string is a deprecated parameter of'
|
||||
' to_bytes(). Use nonstring instead'), DeprecationWarning,
|
||||
stacklevel=2)
|
||||
if not nonstring:
|
||||
nonstring = non_string
|
||||
if not nonstring:
|
||||
nonstring = 'simplerepr'
|
||||
|
||||
if nonstring == 'empty':
|
||||
return ''
|
||||
elif nonstring == 'passthru':
|
||||
return obj
|
||||
elif nonstring == 'simplerepr':
|
||||
try:
|
||||
simple = str(obj)
|
||||
except UnicodeError:
|
||||
try:
|
||||
simple = obj.__str__()
|
||||
except (AttributeError, UnicodeError):
|
||||
simple = None
|
||||
if not simple:
|
||||
try:
|
||||
simple = obj.__unicode__()
|
||||
except (AttributeError, UnicodeError):
|
||||
simple = ''
|
||||
if isinstance(simple, unicode):
|
||||
simple = simple.encode(encoding, 'replace')
|
||||
return simple
|
||||
elif nonstring in ('repr', 'strict'):
|
||||
try:
|
||||
obj_repr = obj.__repr__()
|
||||
except (AttributeError, UnicodeError):
|
||||
obj_repr = ''
|
||||
if isinstance(obj_repr, unicode):
|
||||
obj_repr = obj_repr.encode(encoding, errors)
|
||||
else:
|
||||
obj_repr = str(obj_repr)
|
||||
if nonstring == 'repr':
|
||||
return obj_repr
|
||||
raise TypeError(k.b_('to_bytes was given "%(obj)s" which is neither'
|
||||
' a unicode string or a byte string (str)') % {'obj': obj_repr})
|
||||
|
||||
raise TypeError(k.b_('nonstring value, %(param)s, is not set to a valid'
|
||||
' action') % {'param': nonstring})
|
||||
|
||||
def getwriter(encoding):
|
||||
'''Return a :class:`codecs.StreamWriter` that resists tracing back.
|
||||
|
||||
:arg encoding: Encoding to use for transforming :class:`unicode` strings
|
||||
into byte :class:`str`.
|
||||
:rtype: :class:`codecs.StreamWriter`
|
||||
:returns: :class:`~codecs.StreamWriter` that you can instantiate to wrap output
|
||||
streams to automatically translate :class:`unicode` strings into :attr:`encoding`.
|
||||
|
||||
This is a reimplemetation of :func:`codecs.getwriter` that returns
|
||||
a :class:`~codecs.StreamWriter` that resists issuing tracebacks. The
|
||||
:class:`~codecs.StreamWriter` that is returned uses
|
||||
:func:`kitchen.text.converters.to_bytes` to convert :class:`unicode`
|
||||
strings into byte :class:`str`. The departures from
|
||||
:func:`codecs.getwriter` are:
|
||||
|
||||
1) The :class:`~codecs.StreamWriter` that is returned will take byte
|
||||
:class:`str` as well as :class:`unicode` strings. Any byte
|
||||
:class:`str` will be passed through unmodified.
|
||||
2) The default error handler for unknown bytes is to ``replace`` the bytes
|
||||
with the unknown character (``?`` in most ascii-based encodings, ``<EFBFBD>``
|
||||
in the utf encodings) whereas :func:`codecs.getwriter` defaults to
|
||||
``strict``. Like :class:`codecs.StreamWriter`, the returned
|
||||
:class:`~codecs.StreamWriter` can have its error handler changed in
|
||||
code by setting ``stream.errors = 'new_handler_name'``
|
||||
|
||||
Example usage::
|
||||
|
||||
$ LC_ALL=C python
|
||||
>>> import sys
|
||||
>>> from kitchen.text.converters import getwriter
|
||||
>>> UTF8Writer = getwriter('utf-8')
|
||||
>>> unwrapped_stdout = sys.stdout
|
||||
>>> sys.stdout = UTF8Writer(unwrapped_stdout)
|
||||
>>> print 'caf\\xc3\\xa9'
|
||||
café
|
||||
>>> print u'caf\\xe9'
|
||||
café
|
||||
>>> ASCIIWriter = getwriter('ascii')
|
||||
>>> sys.stdout = ASCIIWriter(unwrapped_stdout)
|
||||
>>> print 'caf\\xc3\\xa9'
|
||||
café
|
||||
>>> print u'caf\\xe9'
|
||||
caf?
|
||||
|
||||
.. seealso::
|
||||
|
||||
API docs for :class:`codecs.StreamWriter` and :func:`codecs.getwriter`
|
||||
and `Print Fails <http://wiki.python.org/moin/PrintFails>`_ on the
|
||||
python wiki.
|
||||
|
||||
.. versionadded:: kitchen 0.2a2, API: kitchen.text 1.1.0
|
||||
'''
|
||||
class _StreamWriter(codecs.StreamWriter):
|
||||
# :W0223: We don't need to implement all methods of StreamWriter.
|
||||
# This is not the actual class that gets used but a replacement for
|
||||
# the actual class.
|
||||
# :C0111: We're implementing an API from the stdlib. Just point
|
||||
# people at that documentation instead of writing docstrings here.
|
||||
#pylint:disable-msg=W0223,C0111
|
||||
def __init__(self, stream, errors='replace'):
|
||||
codecs.StreamWriter.__init__(self, stream, errors)
|
||||
|
||||
def encode(self, msg, errors='replace'):
|
||||
return (to_bytes(msg, encoding=self.encoding, errors=errors),
|
||||
len(msg))
|
||||
|
||||
_StreamWriter.encoding = encoding
|
||||
return _StreamWriter
|
||||
|
||||
def to_utf8(obj, errors='replace', non_string='passthru'):
|
||||
'''*Deprecated*
|
||||
|
||||
Convert :class:`unicode` to an encoded :term:`utf-8` byte :class:`str`.
|
||||
You should be using :func:`to_bytes` instead::
|
||||
|
||||
to_bytes(obj, encoding='utf-8', non_string='passthru')
|
||||
'''
|
||||
warnings.warn(k.b_('kitchen.text.converters.to_utf8 is deprecated. Use'
|
||||
' kitchen.text.converters.to_bytes(obj, encoding="utf-8",'
|
||||
' nonstring="passthru" instead.'), DeprecationWarning, stacklevel=2)
|
||||
return to_bytes(obj, encoding='utf-8', errors=errors,
|
||||
nonstring=non_string)
|
||||
|
||||
### str is also the type name for byte strings so it's not a good name for
|
||||
### something that can return unicode strings
|
||||
def to_str(obj):
|
||||
'''*Deprecated*
|
||||
|
||||
This function converts something to a byte :class:`str` if it isn't one.
|
||||
It's used to call :func:`str` or :func:`unicode` on the object to get its
|
||||
simple representation without danger of getting a :exc:`UnicodeError`.
|
||||
You should be using :func:`to_unicode` or :func:`to_bytes` explicitly
|
||||
instead.
|
||||
|
||||
If you need :class:`unicode` strings::
|
||||
|
||||
to_unicode(obj, nonstring='simplerepr')
|
||||
|
||||
If you need byte :class:`str`::
|
||||
|
||||
to_bytes(obj, nonstring='simplerepr')
|
||||
'''
|
||||
warnings.warn(k.b_('to_str is deprecated. Use to_unicode or to_bytes'
|
||||
' instead. See the to_str docstring for'
|
||||
' porting information.'),
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return to_bytes(obj, nonstring='simplerepr')
|
||||
|
||||
# Exception message extraction functions
|
||||
EXCEPTION_CONVERTERS = (lambda e: e.args[0], lambda e: e)
|
||||
''' Tuple of functions to try to use to convert an exception into a string
|
||||
representation. Its main use is to extract a string (:class:`unicode` or
|
||||
:class:`str`) from an exception object in :func:`exception_to_unicode` and
|
||||
:func:`exception_to_bytes`. The functions here will try the exception's
|
||||
``args[0]`` and the exception itself (roughly equivalent to
|
||||
`str(exception)`) to extract the message. This is only a default and can
|
||||
be easily overridden when calling those functions. There are several
|
||||
reasons you might wish to do that. If you have exceptions where the best
|
||||
string representing the exception is not returned by the default
|
||||
functions, you can add another function to extract from a different
|
||||
field::
|
||||
|
||||
from kitchen.text.converters import (EXCEPTION_CONVERTERS,
|
||||
exception_to_unicode)
|
||||
|
||||
class MyError(Exception):
|
||||
def __init__(self, message):
|
||||
self.value = message
|
||||
|
||||
c = [lambda e: e.value]
|
||||
c.extend(EXCEPTION_CONVERTERS)
|
||||
try:
|
||||
raise MyError('An Exception message')
|
||||
except MyError, e:
|
||||
print exception_to_unicode(e, converters=c)
|
||||
|
||||
Another reason would be if you're converting to a byte :class:`str` and
|
||||
you know the :class:`str` needs to be a non-:term:`utf-8` encoding.
|
||||
:func:`exception_to_bytes` defaults to :term:`utf-8` but if you convert
|
||||
into a byte :class:`str` explicitly using a converter then you can choose
|
||||
a different encoding::
|
||||
|
||||
from kitchen.text.converters import (EXCEPTION_CONVERTERS,
|
||||
exception_to_bytes, to_bytes)
|
||||
c = [lambda e: to_bytes(e.args[0], encoding='euc_jp'),
|
||||
lambda e: to_bytes(e, encoding='euc_jp')]
|
||||
c.extend(EXCEPTION_CONVERTERS)
|
||||
try:
|
||||
do_something()
|
||||
except Exception, e:
|
||||
log = open('logfile.euc_jp', 'a')
|
||||
log.write('%s\n' % exception_to_bytes(e, converters=c)
|
||||
log.close()
|
||||
|
||||
Each function in this list should take the exception as its sole argument
|
||||
and return a string containing the message representing the exception.
|
||||
The functions may return the message as a :byte class:`str`,
|
||||
a :class:`unicode` string, or even an object if you trust the object to
|
||||
return a decent string representation. The :func:`exception_to_unicode`
|
||||
and :func:`exception_to_bytes` functions will make sure to convert the
|
||||
string to the proper type before returning.
|
||||
|
||||
.. versionadded:: 0.2.2
|
||||
'''
|
||||
|
||||
BYTE_EXCEPTION_CONVERTERS = (lambda e: to_bytes(e.args[0]), to_bytes)
|
||||
'''*Deprecated*: Use :data:`EXCEPTION_CONVERTERS` instead.
|
||||
|
||||
Tuple of functions to try to use to convert an exception into a string
|
||||
representation. This tuple is similar to the one in
|
||||
:data:`EXCEPTION_CONVERTERS` but it's used with :func:`exception_to_bytes`
|
||||
instead. Ideally, these functions should do their best to return the data
|
||||
as a byte :class:`str` but the results will be run through
|
||||
:func:`to_bytes` before being returned.
|
||||
|
||||
.. versionadded:: 0.2.2
|
||||
.. versionchanged:: 1.0.1
|
||||
Deprecated as simplifications allow :data:`EXCEPTION_CONVERTERS` to
|
||||
perform the same function.
|
||||
'''
|
||||
|
||||
def exception_to_unicode(exc, converters=EXCEPTION_CONVERTERS):
|
||||
'''Convert an exception object into a unicode representation
|
||||
|
||||
:arg exc: Exception object to convert
|
||||
:kwarg converters: List of functions to use to convert the exception into
|
||||
a string. See :data:`EXCEPTION_CONVERTERS` for the default value and
|
||||
an example of adding other converters to the defaults. The functions
|
||||
in the list are tried one at a time to see if they can extract
|
||||
a string from the exception. The first one to do so without raising
|
||||
an exception is used.
|
||||
:returns: :class:`unicode` string representation of the exception. The
|
||||
value extracted by the :attr:`converters` will be converted into
|
||||
:class:`unicode` before being returned using the :term:`utf-8`
|
||||
encoding. If you know you need to use an alternate encoding add
|
||||
a function that does that to the list of functions in
|
||||
:attr:`converters`)
|
||||
|
||||
.. versionadded:: 0.2.2
|
||||
'''
|
||||
msg = u'<exception failed to convert to text>'
|
||||
for func in converters:
|
||||
try:
|
||||
msg = func(exc)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
break
|
||||
return to_unicode(msg)
|
||||
|
||||
def exception_to_bytes(exc, converters=EXCEPTION_CONVERTERS):
|
||||
'''Convert an exception object into a str representation
|
||||
|
||||
:arg exc: Exception object to convert
|
||||
:kwarg converters: List of functions to use to convert the exception into
|
||||
a string. See :data:`EXCEPTION_CONVERTERS` for the default value and
|
||||
an example of adding other converters to the defaults. The functions
|
||||
in the list are tried one at a time to see if they can extract
|
||||
a string from the exception. The first one to do so without raising
|
||||
an exception is used.
|
||||
:returns: byte :class:`str` representation of the exception. The value
|
||||
extracted by the :attr:`converters` will be converted into
|
||||
:class:`str` before being returned using the :term:`utf-8` encoding.
|
||||
If you know you need to use an alternate encoding add a function that
|
||||
does that to the list of functions in :attr:`converters`)
|
||||
|
||||
.. versionadded:: 0.2.2
|
||||
.. versionchanged:: 1.0.1
|
||||
Code simplification allowed us to switch to using
|
||||
:data:`EXCEPTION_CONVERTERS` as the default value of
|
||||
:attr:`converters`.
|
||||
'''
|
||||
msg = '<exception failed to convert to text>'
|
||||
for func in converters:
|
||||
try:
|
||||
msg = func(exc)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
break
|
||||
return to_bytes(msg)
|
||||
|
||||
#
|
||||
# XML Related Functions
|
||||
#
|
||||
|
||||
def unicode_to_xml(string, encoding='utf-8', attrib=False,
|
||||
control_chars='replace'):
|
||||
'''Take a :class:`unicode` string and turn it into a byte :class:`str`
|
||||
suitable for xml
|
||||
|
||||
:arg string: :class:`unicode` string to encode into an XML compatible byte
|
||||
:class:`str`
|
||||
:kwarg encoding: encoding to use for the returned byte :class:`str`.
|
||||
Default is to encode to :term:`UTF-8`. If some of the characters in
|
||||
:attr:`string` are not encodable in this encoding, the unknown
|
||||
characters will be entered into the output string using xml character
|
||||
references.
|
||||
:kwarg attrib: If :data:`True`, quote the string for use in an xml
|
||||
attribute. If :data:`False` (default), quote for use in an xml text
|
||||
field.
|
||||
:kwarg control_chars: :term:`control characters` are not allowed in XML
|
||||
documents. When we encounter those we need to know what to do. Valid
|
||||
options are:
|
||||
|
||||
:replace: (default) Replace the control characters with ``?``
|
||||
:ignore: Remove the characters altogether from the output
|
||||
:strict: Raise an :exc:`~kitchen.text.exceptions.XmlEncodeError` when
|
||||
we encounter a :term:`control character`
|
||||
|
||||
:raises kitchen.text.exceptions.XmlEncodeError: If :attr:`control_chars`
|
||||
is set to ``strict`` and the string to be made suitable for output to
|
||||
xml contains :term:`control characters` or if :attr:`string` is not
|
||||
a :class:`unicode` string then we raise this exception.
|
||||
:raises ValueError: If :attr:`control_chars` is set to something other than
|
||||
``replace``, ``ignore``, or ``strict``.
|
||||
:rtype: byte :class:`str`
|
||||
:returns: representation of the :class:`unicode` string as a valid XML
|
||||
byte :class:`str`
|
||||
|
||||
XML files consist mainly of text encoded using a particular charset. XML
|
||||
also denies the use of certain bytes in the encoded text (example: ``ASCII
|
||||
Null``). There are also special characters that must be escaped if they
|
||||
are present in the input (example: ``<``). This function takes care of
|
||||
all of those issues for you.
|
||||
|
||||
There are a few different ways to use this function depending on your
|
||||
needs. The simplest invocation is like this::
|
||||
|
||||
unicode_to_xml(u'String with non-ASCII characters: <"á と">')
|
||||
|
||||
This will return the following to you, encoded in :term:`utf-8`::
|
||||
|
||||
'String with non-ASCII characters: <"á と">'
|
||||
|
||||
Pretty straightforward. Now, what if you need to encode your document in
|
||||
something other than :term:`utf-8`? For instance, ``latin-1``? Let's
|
||||
see::
|
||||
|
||||
unicode_to_xml(u'String with non-ASCII characters: <"á と">', encoding='latin-1')
|
||||
'String with non-ASCII characters: <"á と">'
|
||||
|
||||
Because the ``と`` character is not available in the ``latin-1`` charset,
|
||||
it is replaced with ``と`` in our output. This is an xml character
|
||||
reference which represents the character at unicode codepoint ``12392``, the
|
||||
``と`` character.
|
||||
|
||||
When you want to reverse this, use :func:`xml_to_unicode` which will turn
|
||||
a byte :class:`str` into a :class:`unicode` string and replace the xml
|
||||
character references with the unicode characters.
|
||||
|
||||
XML also has the quirk of not allowing :term:`control characters` in its
|
||||
output. The :attr:`control_chars` parameter allows us to specify what to
|
||||
do with those. For use cases that don't need absolute character by
|
||||
character fidelity (example: holding strings that will just be used for
|
||||
display in a GUI app later), the default value of ``replace`` works well::
|
||||
|
||||
unicode_to_xml(u'String with disallowed control chars: \u0000\u0007')
|
||||
'String with disallowed control chars: ??'
|
||||
|
||||
If you do need to be able to reproduce all of the characters at a later
|
||||
date (examples: if the string is a key value in a database or a path on a
|
||||
filesystem) you have many choices. Here are a few that rely on ``utf-7``,
|
||||
a verbose encoding that encodes :term:`control characters` (as well as
|
||||
non-:term:`ASCII` unicode values) to characters from within the
|
||||
:term:`ASCII` printable characters. The good thing about doing this is
|
||||
that the code is pretty simple. You just need to use ``utf-7`` both when
|
||||
encoding the field for xml and when decoding it for use in your python
|
||||
program::
|
||||
|
||||
unicode_to_xml(u'String with unicode: と and control char: \u0007', encoding='utf7')
|
||||
'String with unicode: +MGg and control char: +AAc-'
|
||||
# [...]
|
||||
xml_to_unicode('String with unicode: +MGg and control char: +AAc-', encoding='utf7')
|
||||
u'String with unicode: と and control char: \u0007'
|
||||
|
||||
As you can see, the ``utf-7`` encoding will transform even characters that
|
||||
would be representable in :term:`utf-8`. This can be a drawback if you
|
||||
want unicode characters in the file to be readable without being decoded
|
||||
first. You can work around this with increased complexity in your
|
||||
application code::
|
||||
|
||||
encoding = 'utf-8'
|
||||
u_string = u'String with unicode: と and control char: \u0007'
|
||||
try:
|
||||
# First attempt to encode to utf8
|
||||
data = unicode_to_xml(u_string, encoding=encoding, errors='strict')
|
||||
except XmlEncodeError:
|
||||
# Fallback to utf-7
|
||||
encoding = 'utf-7'
|
||||
data = unicode_to_xml(u_string, encoding=encoding, errors='strict')
|
||||
write_tag('<mytag encoding=%s>%s</mytag>' % (encoding, data))
|
||||
# [...]
|
||||
encoding = tag.attributes.encoding
|
||||
u_string = xml_to_unicode(u_string, encoding=encoding)
|
||||
|
||||
Using code similar to that, you can have some fields encoded using your
|
||||
default encoding and fallback to ``utf-7`` if there are :term:`control
|
||||
characters` present.
|
||||
|
||||
.. note::
|
||||
|
||||
If your goal is to preserve the :term:`control characters` you cannot
|
||||
save the entire file as ``utf-7`` and set the xml encoding parameter
|
||||
to ``utf-7`` if your goal is to preserve the :term:`control
|
||||
characters`. Because XML doesn't allow :term:`control characters`,
|
||||
you have to encode those separate from any encoding work that the XML
|
||||
parser itself knows about.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:func:`bytes_to_xml`
|
||||
if you're dealing with bytes that are non-text or of an unknown
|
||||
encoding that you must preserve on a byte for byte level.
|
||||
:func:`guess_encoding_to_xml`
|
||||
if you're dealing with strings in unknown encodings that you don't
|
||||
need to save with char-for-char fidelity.
|
||||
'''
|
||||
if not string:
|
||||
# Small optimization
|
||||
return ''
|
||||
try:
|
||||
process_control_chars(string, strategy=control_chars)
|
||||
except TypeError:
|
||||
raise XmlEncodeError(k.b_('unicode_to_xml must have a unicode type as'
|
||||
' the first argument. Use bytes_string_to_xml for byte'
|
||||
' strings.'))
|
||||
except ValueError:
|
||||
raise ValueError(k.b_('The control_chars argument to unicode_to_xml'
|
||||
' must be one of ignore, replace, or strict'))
|
||||
except ControlCharError, exc:
|
||||
raise XmlEncodeError(exc.args[0])
|
||||
|
||||
string = string.encode(encoding, 'xmlcharrefreplace')
|
||||
|
||||
# Escape characters that have special meaning in xml
|
||||
if attrib:
|
||||
string = xml.sax.saxutils.escape(string, entities={'"':"""})
|
||||
else:
|
||||
string = xml.sax.saxutils.escape(string)
|
||||
return string
|
||||
|
||||
def xml_to_unicode(byte_string, encoding='utf-8', errors='replace'):
|
||||
'''Transform a byte :class:`str` from an xml file into a :class:`unicode`
|
||||
string
|
||||
|
||||
:arg byte_string: byte :class:`str` to decode
|
||||
:kwarg encoding: encoding that the byte :class:`str` is in
|
||||
:kwarg errors: What to do if not every character is valid in
|
||||
:attr:`encoding`. See the :func:`to_unicode` documentation for legal
|
||||
values.
|
||||
:rtype: :class:`unicode` string
|
||||
:returns: string decoded from :attr:`byte_string`
|
||||
|
||||
This function attempts to reverse what :func:`unicode_to_xml` does. It
|
||||
takes a byte :class:`str` (presumably read in from an xml file) and
|
||||
expands all the html entities into unicode characters and decodes the byte
|
||||
:class:`str` into a :class:`unicode` string. One thing it cannot do is
|
||||
restore any :term:`control characters` that were removed prior to
|
||||
inserting into the file. If you need to keep such characters you need to
|
||||
use :func:`xml_to_bytes` and :func:`bytes_to_xml` or use on of the
|
||||
strategies documented in :func:`unicode_to_xml` instead.
|
||||
'''
|
||||
string = to_unicode(byte_string, encoding=encoding, errors=errors)
|
||||
string = html_entities_unescape(string)
|
||||
return string
|
||||
|
||||
def byte_string_to_xml(byte_string, input_encoding='utf-8', errors='replace',
|
||||
output_encoding='utf-8', attrib=False, control_chars='replace'):
|
||||
'''Make sure a byte :class:`str` is validly encoded for xml output
|
||||
|
||||
:arg byte_string: Byte :class:`str` to turn into valid xml output
|
||||
:kwarg input_encoding: Encoding of :attr:`byte_string`. Default ``utf-8``
|
||||
:kwarg errors: How to handle errors encountered while decoding the
|
||||
:attr:`byte_string` into :class:`unicode` at the beginning of the
|
||||
process. Values are:
|
||||
|
||||
:replace: (default) Replace the invalid bytes with a ``?``
|
||||
:ignore: Remove the characters altogether from the output
|
||||
:strict: Raise an :exc:`UnicodeDecodeError` when we encounter
|
||||
a non-decodable character
|
||||
|
||||
:kwarg output_encoding: Encoding for the xml file that this string will go
|
||||
into. Default is ``utf-8``. If all the characters in
|
||||
:attr:`byte_string` are not encodable in this encoding, the unknown
|
||||
characters will be entered into the output string using xml character
|
||||
references.
|
||||
:kwarg attrib: If :data:`True`, quote the string for use in an xml
|
||||
attribute. If :data:`False` (default), quote for use in an xml text
|
||||
field.
|
||||
:kwarg control_chars: XML does not allow :term:`control characters`. When
|
||||
we encounter those we need to know what to do. Valid options are:
|
||||
|
||||
:replace: (default) Replace the :term:`control characters` with ``?``
|
||||
:ignore: Remove the characters altogether from the output
|
||||
:strict: Raise an error when we encounter a :term:`control character`
|
||||
|
||||
:raises XmlEncodeError: If :attr:`control_chars` is set to ``strict`` and
|
||||
the string to be made suitable for output to xml contains
|
||||
:term:`control characters` then we raise this exception.
|
||||
:raises UnicodeDecodeError: If errors is set to ``strict`` and the
|
||||
:attr:`byte_string` contains bytes that are not decodable using
|
||||
:attr:`input_encoding`, this error is raised
|
||||
:rtype: byte :class:`str`
|
||||
:returns: representation of the byte :class:`str` in the output encoding with
|
||||
any bytes that aren't available in xml taken care of.
|
||||
|
||||
Use this when you have a byte :class:`str` representing text that you need
|
||||
to make suitable for output to xml. There are several cases where this
|
||||
is the case. For instance, if you need to transform some strings encoded
|
||||
in ``latin-1`` to :term:`utf-8` for output::
|
||||
|
||||
utf8_string = byte_string_to_xml(latin1_string, input_encoding='latin-1')
|
||||
|
||||
If you already have strings in the proper encoding you may still want to
|
||||
use this function to remove :term:`control characters`::
|
||||
|
||||
cleaned_string = byte_string_to_xml(string, input_encoding='utf-8', output_encoding='utf-8')
|
||||
|
||||
.. seealso::
|
||||
|
||||
:func:`unicode_to_xml`
|
||||
for other ideas on using this function
|
||||
'''
|
||||
if not isinstance(byte_string, str):
|
||||
raise XmlEncodeError(k.b_('byte_string_to_xml can only take a byte'
|
||||
' string as its first argument. Use unicode_to_xml for'
|
||||
' unicode strings'))
|
||||
|
||||
# Decode the string into unicode
|
||||
u_string = unicode(byte_string, input_encoding, errors)
|
||||
return unicode_to_xml(u_string, encoding=output_encoding,
|
||||
attrib=attrib, control_chars=control_chars)
|
||||
|
||||
def xml_to_byte_string(byte_string, input_encoding='utf-8', errors='replace',
|
||||
output_encoding='utf-8'):
|
||||
'''Transform a byte :class:`str` from an xml file into :class:`unicode`
|
||||
string
|
||||
|
||||
:arg byte_string: byte :class:`str` to decode
|
||||
:kwarg input_encoding: encoding that the byte :class:`str` is in
|
||||
:kwarg errors: What to do if not every character is valid in
|
||||
:attr:`encoding`. See the :func:`to_unicode` docstring for legal
|
||||
values.
|
||||
:kwarg output_encoding: Encoding for the output byte :class:`str`
|
||||
:returns: :class:`unicode` string decoded from :attr:`byte_string`
|
||||
|
||||
This function attempts to reverse what :func:`unicode_to_xml` does. It
|
||||
takes a byte :class:`str` (presumably read in from an xml file) and
|
||||
expands all the html entities into unicode characters and decodes the
|
||||
byte :class:`str` into a :class:`unicode` string. One thing it cannot do
|
||||
is restore any :term:`control characters` that were removed prior to
|
||||
inserting into the file. If you need to keep such characters you need to
|
||||
use :func:`xml_to_bytes` and :func:`bytes_to_xml` or use one of the
|
||||
strategies documented in :func:`unicode_to_xml` instead.
|
||||
'''
|
||||
string = xml_to_unicode(byte_string, input_encoding, errors)
|
||||
return to_bytes(string, output_encoding, errors)
|
||||
|
||||
def bytes_to_xml(byte_string, *args, **kwargs):
|
||||
'''Return a byte :class:`str` encoded so it is valid inside of any xml
|
||||
file
|
||||
|
||||
:arg byte_string: byte :class:`str` to transform
|
||||
:arg \*args, \*\*kwargs: extra arguments to this function are passed on to
|
||||
the function actually implementing the encoding. You can use this to
|
||||
tweak the output in some cases but, as a general rule, you shouldn't
|
||||
because the underlying encoding function is not guaranteed to remain
|
||||
the same.
|
||||
:rtype: byte :class:`str` consisting of all :term:`ASCII` characters
|
||||
:returns: byte :class:`str` representation of the input. This will be encoded
|
||||
using base64.
|
||||
|
||||
This function is made especially to put binary information into xml
|
||||
documents.
|
||||
|
||||
This function is intended for encoding things that must be preserved
|
||||
byte-for-byte. If you want to encode a byte string that's text and don't
|
||||
mind losing the actual bytes you probably want to try :func:`byte_string_to_xml`
|
||||
or :func:`guess_encoding_to_xml` instead.
|
||||
|
||||
.. note::
|
||||
|
||||
Although the current implementation uses :func:`base64.b64encode` and
|
||||
there's no plans to change it, that isn't guaranteed. If you want to
|
||||
make sure that you can encode and decode these messages it's best to
|
||||
use :func:`xml_to_bytes` if you use this function to encode.
|
||||
'''
|
||||
# Can you do this yourself? Yes, you can.
|
||||
return b64encode(byte_string, *args, **kwargs)
|
||||
|
||||
def xml_to_bytes(byte_string, *args, **kwargs):
|
||||
'''Decode a string encoded using :func:`bytes_to_xml`
|
||||
|
||||
:arg byte_string: byte :class:`str` to transform. This should be a base64
|
||||
encoded sequence of bytes originally generated by :func:`bytes_to_xml`.
|
||||
:arg \*args, \*\*kwargs: extra arguments to this function are passed on to
|
||||
the function actually implementing the encoding. You can use this to
|
||||
tweak the output in some cases but, as a general rule, you shouldn't
|
||||
because the underlying encoding function is not guaranteed to remain
|
||||
the same.
|
||||
:rtype: byte :class:`str`
|
||||
:returns: byte :class:`str` that's the decoded input
|
||||
|
||||
If you've got fields in an xml document that were encoded with
|
||||
:func:`bytes_to_xml` then you want to use this function to undecode them.
|
||||
It converts a base64 encoded string into a byte :class:`str`.
|
||||
|
||||
.. note::
|
||||
|
||||
Although the current implementation uses :func:`base64.b64decode` and
|
||||
there's no plans to change it, that isn't guaranteed. If you want to
|
||||
make sure that you can encode and decode these messages it's best to
|
||||
use :func:`bytes_to_xml` if you use this function to decode.
|
||||
'''
|
||||
return b64decode(byte_string, *args, **kwargs)
|
||||
|
||||
def guess_encoding_to_xml(string, output_encoding='utf-8', attrib=False,
|
||||
control_chars='replace'):
|
||||
'''Return a byte :class:`str` suitable for inclusion in xml
|
||||
|
||||
:arg string: :class:`unicode` or byte :class:`str` to be transformed into
|
||||
a byte :class:`str` suitable for inclusion in xml. If string is
|
||||
a byte :class:`str` we attempt to guess the encoding. If we cannot guess,
|
||||
we fallback to ``latin-1``.
|
||||
:kwarg output_encoding: Output encoding for the byte :class:`str`. This
|
||||
should match the encoding of your xml file.
|
||||
:kwarg attrib: If :data:`True`, escape the item for use in an xml
|
||||
attribute. If :data:`False` (default) escape the item for use in
|
||||
a text node.
|
||||
:returns: :term:`utf-8` encoded byte :class:`str`
|
||||
|
||||
'''
|
||||
# Unicode strings can just be run through unicode_to_xml()
|
||||
if isinstance(string, unicode):
|
||||
return unicode_to_xml(string, encoding=output_encoding,
|
||||
attrib=attrib, control_chars=control_chars)
|
||||
|
||||
# Guess the encoding of the byte strings
|
||||
input_encoding = guess_encoding(string)
|
||||
|
||||
# Return the new byte string
|
||||
return byte_string_to_xml(string, input_encoding=input_encoding,
|
||||
errors='replace', output_encoding=output_encoding,
|
||||
attrib=attrib, control_chars=control_chars)
|
||||
|
||||
def to_xml(string, encoding='utf-8', attrib=False, control_chars='ignore'):
|
||||
'''*Deprecated*: Use :func:`guess_encoding_to_xml` instead
|
||||
'''
|
||||
warnings.warn(k.b_('kitchen.text.converters.to_xml is deprecated. Use'
|
||||
' kitchen.text.converters.guess_encoding_to_xml instead.'),
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return guess_encoding_to_xml(string, output_encoding=encoding,
|
||||
attrib=attrib, control_chars=control_chars)
|
||||
|
||||
__all__ = ('BYTE_EXCEPTION_CONVERTERS', 'EXCEPTION_CONVERTERS',
|
||||
'byte_string_to_xml', 'bytes_to_xml', 'exception_to_bytes',
|
||||
'exception_to_unicode', 'getwriter', 'guess_encoding_to_xml',
|
||||
'to_bytes', 'to_str', 'to_unicode', 'to_utf8', 'to_xml',
|
||||
'unicode_to_xml', 'xml_to_byte_string', 'xml_to_bytes',
|
||||
'xml_to_unicode')
|
901
kitchen/text/display.py
Normal file
901
kitchen/text/display.py
Normal file
|
@ -0,0 +1,901 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2010 Red Hat, Inc.
|
||||
# Copyright (c) 2010 Ville Skyttä
|
||||
# Copyright (c) 2009 Tim Lauridsen
|
||||
# Copyright (c) 2007 Marcus Kuhn
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU Lesser General Public License as published by the Free
|
||||
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||
# more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# James Antill <james@fedoraproject.org>
|
||||
# Marcus Kuhn
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
# Tim Lauridsen
|
||||
# Ville Skyttä
|
||||
#
|
||||
# Portions of this are from yum/i18n.py
|
||||
'''
|
||||
-----------------------
|
||||
Format Text for Display
|
||||
-----------------------
|
||||
|
||||
Functions related to displaying unicode text. Unicode characters don't all
|
||||
have the same width so we need helper functions for displaying them.
|
||||
|
||||
.. versionadded:: 0.2 kitchen.display API 1.0.0
|
||||
'''
|
||||
import itertools
|
||||
import unicodedata
|
||||
|
||||
from kitchen import b_
|
||||
from kitchen.text.converters import to_unicode, to_bytes
|
||||
from kitchen.text.exceptions import ControlCharError
|
||||
|
||||
# This is ported from ustr_utf8_* which I got from:
|
||||
# http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
|
||||
# I've tried to leave it close to the original C (same names etc.) so that
|
||||
# it is easy to read/compare both versions... James Antilles
|
||||
|
||||
#
|
||||
# Reimplemented quite a bit of this for speed. Use the bzr log or annotate
|
||||
# commands to see what I've changed since importing this file.-Toshio Kuratomi
|
||||
|
||||
# ----------------------------- BEG utf8 ------------------to-----------
|
||||
# This is an implementation of wcwidth() and wcswidth() (defined in
|
||||
# IEEE Std 1002.1-2001) for Unicode.
|
||||
#
|
||||
# http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
|
||||
# http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
|
||||
#
|
||||
# In fixed-width output devices, Latin characters all occupy a single
|
||||
# "cell" position of equal width, whereas ideographic CJK characters
|
||||
# occupy two such cells. Interoperability between terminal-line
|
||||
# applications and (teletype-style) character terminals using the
|
||||
# UTF-8 encoding requires agreement on which character should advance
|
||||
# the cursor by how many cell positions. No established formal
|
||||
# standards exist at present on which Unicode character shall occupy
|
||||
# how many cell positions on character terminals. These routines are
|
||||
# a first attempt of defining such behavior based on simple rules
|
||||
# applied to data provided by the Unicode Consortium.
|
||||
#
|
||||
# [...]
|
||||
#
|
||||
# Markus Kuhn -- 2007-05-26 (Unicode 5.0)
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software
|
||||
# for any purpose and without fee is hereby granted. The author
|
||||
# disclaims all warranties with regard to this software.
|
||||
#
|
||||
# Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
|
||||
|
||||
# Renamed but still pretty much JA's port of MK's code
|
||||
def _interval_bisearch(value, table):
|
||||
'''Binary search in an interval table.
|
||||
|
||||
:arg value: numeric value to search for
|
||||
:arg table: Ordered list of intervals. This is a list of two-tuples. The
|
||||
elements of the two-tuple define an interval's start and end points.
|
||||
:returns: If :attr:`value` is found within an interval in the :attr:`table`
|
||||
return :data:`True`. Otherwise, :data:`False`
|
||||
|
||||
This function checks whether a numeric value is present within a table
|
||||
of intervals. It checks using a binary search algorithm, dividing the
|
||||
list of values in half and checking against the values until it determines
|
||||
whether the value is in the table.
|
||||
'''
|
||||
minimum = 0
|
||||
maximum = len(table) - 1
|
||||
if value < table[minimum][0] or value > table[maximum][1]:
|
||||
return False
|
||||
|
||||
while maximum >= minimum:
|
||||
mid = (minimum + maximum) / 2
|
||||
if value > table[mid][1]:
|
||||
minimum = mid + 1
|
||||
elif value < table[mid][0]:
|
||||
maximum = mid - 1
|
||||
else:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
_COMBINING = (
|
||||
(0x300, 0x36f), (0x483, 0x489), (0x591, 0x5bd),
|
||||
(0x5bf, 0x5bf), (0x5c1, 0x5c2), (0x5c4, 0x5c5),
|
||||
(0x5c7, 0x5c7), (0x600, 0x603), (0x610, 0x61a),
|
||||
(0x64b, 0x65e), (0x670, 0x670), (0x6d6, 0x6e4),
|
||||
(0x6e7, 0x6e8), (0x6ea, 0x6ed), (0x70f, 0x70f),
|
||||
(0x711, 0x711), (0x730, 0x74a), (0x7a6, 0x7b0),
|
||||
(0x7eb, 0x7f3), (0x816, 0x819), (0x81b, 0x823),
|
||||
(0x825, 0x827), (0x829, 0x82d), (0x901, 0x902),
|
||||
(0x93c, 0x93c), (0x941, 0x948), (0x94d, 0x94d),
|
||||
(0x951, 0x954), (0x962, 0x963), (0x981, 0x981),
|
||||
(0x9bc, 0x9bc), (0x9c1, 0x9c4), (0x9cd, 0x9cd),
|
||||
(0x9e2, 0x9e3), (0xa01, 0xa02), (0xa3c, 0xa3c),
|
||||
(0xa41, 0xa42), (0xa47, 0xa48), (0xa4b, 0xa4d),
|
||||
(0xa70, 0xa71), (0xa81, 0xa82), (0xabc, 0xabc),
|
||||
(0xac1, 0xac5), (0xac7, 0xac8), (0xacd, 0xacd),
|
||||
(0xae2, 0xae3), (0xb01, 0xb01), (0xb3c, 0xb3c),
|
||||
(0xb3f, 0xb3f), (0xb41, 0xb43), (0xb4d, 0xb4d),
|
||||
(0xb56, 0xb56), (0xb82, 0xb82), (0xbc0, 0xbc0),
|
||||
(0xbcd, 0xbcd), (0xc3e, 0xc40), (0xc46, 0xc48),
|
||||
(0xc4a, 0xc4d), (0xc55, 0xc56), (0xcbc, 0xcbc),
|
||||
(0xcbf, 0xcbf), (0xcc6, 0xcc6), (0xccc, 0xccd),
|
||||
(0xce2, 0xce3), (0xd41, 0xd43), (0xd4d, 0xd4d),
|
||||
(0xdca, 0xdca), (0xdd2, 0xdd4), (0xdd6, 0xdd6),
|
||||
(0xe31, 0xe31), (0xe34, 0xe3a), (0xe47, 0xe4e),
|
||||
(0xeb1, 0xeb1), (0xeb4, 0xeb9), (0xebb, 0xebc),
|
||||
(0xec8, 0xecd), (0xf18, 0xf19), (0xf35, 0xf35),
|
||||
(0xf37, 0xf37), (0xf39, 0xf39), (0xf71, 0xf7e),
|
||||
(0xf80, 0xf84), (0xf86, 0xf87), (0xf90, 0xf97),
|
||||
(0xf99, 0xfbc), (0xfc6, 0xfc6), (0x102d, 0x1030),
|
||||
(0x1032, 0x1032), (0x1036, 0x1037), (0x1039, 0x103a),
|
||||
(0x1058, 0x1059), (0x108d, 0x108d), (0x1160, 0x11ff),
|
||||
(0x135f, 0x135f), (0x1712, 0x1714), (0x1732, 0x1734),
|
||||
(0x1752, 0x1753), (0x1772, 0x1773), (0x17b4, 0x17b5),
|
||||
(0x17b7, 0x17bd), (0x17c6, 0x17c6), (0x17c9, 0x17d3),
|
||||
(0x17dd, 0x17dd), (0x180b, 0x180d), (0x18a9, 0x18a9),
|
||||
(0x1920, 0x1922), (0x1927, 0x1928), (0x1932, 0x1932),
|
||||
(0x1939, 0x193b), (0x1a17, 0x1a18), (0x1a60, 0x1a60),
|
||||
(0x1a75, 0x1a7c), (0x1a7f, 0x1a7f), (0x1b00, 0x1b03),
|
||||
(0x1b34, 0x1b34), (0x1b36, 0x1b3a), (0x1b3c, 0x1b3c),
|
||||
(0x1b42, 0x1b42), (0x1b44, 0x1b44), (0x1b6b, 0x1b73),
|
||||
(0x1baa, 0x1baa), (0x1c37, 0x1c37), (0x1cd0, 0x1cd2),
|
||||
(0x1cd4, 0x1ce0), (0x1ce2, 0x1ce8), (0x1ced, 0x1ced),
|
||||
(0x1dc0, 0x1de6), (0x1dfd, 0x1dff), (0x200b, 0x200f),
|
||||
(0x202a, 0x202e), (0x2060, 0x2063), (0x206a, 0x206f),
|
||||
(0x20d0, 0x20f0), (0x2cef, 0x2cf1), (0x2de0, 0x2dff),
|
||||
(0x302a, 0x302f), (0x3099, 0x309a), (0xa66f, 0xa66f),
|
||||
(0xa67c, 0xa67d), (0xa6f0, 0xa6f1), (0xa806, 0xa806),
|
||||
(0xa80b, 0xa80b), (0xa825, 0xa826), (0xa8c4, 0xa8c4),
|
||||
(0xa8e0, 0xa8f1), (0xa92b, 0xa92d), (0xa953, 0xa953),
|
||||
(0xa9b3, 0xa9b3), (0xa9c0, 0xa9c0), (0xaab0, 0xaab0),
|
||||
(0xaab2, 0xaab4), (0xaab7, 0xaab8), (0xaabe, 0xaabf),
|
||||
(0xaac1, 0xaac1), (0xabed, 0xabed), (0xfb1e, 0xfb1e),
|
||||
(0xfe00, 0xfe0f), (0xfe20, 0xfe26), (0xfeff, 0xfeff),
|
||||
(0xfff9, 0xfffb), (0x101fd, 0x101fd), (0x10a01, 0x10a03),
|
||||
(0x10a05, 0x10a06), (0x10a0c, 0x10a0f), (0x10a38, 0x10a3a),
|
||||
(0x10a3f, 0x10a3f), (0x110b9, 0x110ba), (0x1d165, 0x1d169),
|
||||
(0x1d16d, 0x1d182), (0x1d185, 0x1d18b), (0x1d1aa, 0x1d1ad),
|
||||
(0x1d242, 0x1d244), (0xe0001, 0xe0001), (0xe0020, 0xe007f),
|
||||
(0xe0100, 0xe01ef), )
|
||||
'''
|
||||
Internal table, provided by this module to list :term:`code points` which
|
||||
combine with other characters and therefore should have no :term:`textual
|
||||
width`. This is a sorted :class:`tuple` of non-overlapping intervals. Each
|
||||
interval is a :class:`tuple` listing a starting :term:`code point` and ending
|
||||
:term:`code point`. Every :term:`code point` between the two end points is
|
||||
a combining character.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:func:`~kitchen.text.display._generate_combining_table`
|
||||
for how this table is generated
|
||||
|
||||
This table was last regenerated on python-2.7.0 with
|
||||
:data:`unicodedata.unidata_version` 5.1.0
|
||||
'''
|
||||
|
||||
# New function from Toshio Kuratomi (LGPLv2+)
|
||||
def _generate_combining_table():
|
||||
'''Combine Markus Kuhn's data with :mod:`unicodedata` to make combining
|
||||
char list
|
||||
|
||||
:rtype: :class:`tuple` of tuples
|
||||
:returns: :class:`tuple` of intervals of :term:`code points` that are
|
||||
combining character. Each interval is a 2-:class:`tuple` of the
|
||||
starting :term:`code point` and the ending :term:`code point` for the
|
||||
combining characters.
|
||||
|
||||
In normal use, this function serves to tell how we're generating the
|
||||
combining char list. For speed reasons, we use this to generate a static
|
||||
list and just use that later.
|
||||
|
||||
Markus Kuhn's list of combining characters is more complete than what's in
|
||||
the python :mod:`unicodedata` library but the python :mod:`unicodedata` is
|
||||
synced against later versions of the unicode database
|
||||
|
||||
This is used to generate the :data:`~kitchen.text.display._COMBINING`
|
||||
table.
|
||||
'''
|
||||
# Marcus Kuhn's sorted list of non-overlapping intervals of non-spacing
|
||||
# characters generated ifrom Unicode 5.0 data by:
|
||||
# "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c"
|
||||
markus_kuhn_combining_5_0 = (
|
||||
( 0x0300, 0x036F ), ( 0x0483, 0x0486 ), ( 0x0488, 0x0489 ),
|
||||
( 0x0591, 0x05BD ), ( 0x05BF, 0x05BF ), ( 0x05C1, 0x05C2 ),
|
||||
( 0x05C4, 0x05C5 ), ( 0x05C7, 0x05C7 ), ( 0x0600, 0x0603 ),
|
||||
( 0x0610, 0x0615 ), ( 0x064B, 0x065E ), ( 0x0670, 0x0670 ),
|
||||
( 0x06D6, 0x06E4 ), ( 0x06E7, 0x06E8 ), ( 0x06EA, 0x06ED ),
|
||||
( 0x070F, 0x070F ), ( 0x0711, 0x0711 ), ( 0x0730, 0x074A ),
|
||||
( 0x07A6, 0x07B0 ), ( 0x07EB, 0x07F3 ), ( 0x0901, 0x0902 ),
|
||||
( 0x093C, 0x093C ), ( 0x0941, 0x0948 ), ( 0x094D, 0x094D ),
|
||||
( 0x0951, 0x0954 ), ( 0x0962, 0x0963 ), ( 0x0981, 0x0981 ),
|
||||
( 0x09BC, 0x09BC ), ( 0x09C1, 0x09C4 ), ( 0x09CD, 0x09CD ),
|
||||
( 0x09E2, 0x09E3 ), ( 0x0A01, 0x0A02 ), ( 0x0A3C, 0x0A3C ),
|
||||
( 0x0A41, 0x0A42 ), ( 0x0A47, 0x0A48 ), ( 0x0A4B, 0x0A4D ),
|
||||
( 0x0A70, 0x0A71 ), ( 0x0A81, 0x0A82 ), ( 0x0ABC, 0x0ABC ),
|
||||
( 0x0AC1, 0x0AC5 ), ( 0x0AC7, 0x0AC8 ), ( 0x0ACD, 0x0ACD ),
|
||||
( 0x0AE2, 0x0AE3 ), ( 0x0B01, 0x0B01 ), ( 0x0B3C, 0x0B3C ),
|
||||
( 0x0B3F, 0x0B3F ), ( 0x0B41, 0x0B43 ), ( 0x0B4D, 0x0B4D ),
|
||||
( 0x0B56, 0x0B56 ), ( 0x0B82, 0x0B82 ), ( 0x0BC0, 0x0BC0 ),
|
||||
( 0x0BCD, 0x0BCD ), ( 0x0C3E, 0x0C40 ), ( 0x0C46, 0x0C48 ),
|
||||
( 0x0C4A, 0x0C4D ), ( 0x0C55, 0x0C56 ), ( 0x0CBC, 0x0CBC ),
|
||||
( 0x0CBF, 0x0CBF ), ( 0x0CC6, 0x0CC6 ), ( 0x0CCC, 0x0CCD ),
|
||||
( 0x0CE2, 0x0CE3 ), ( 0x0D41, 0x0D43 ), ( 0x0D4D, 0x0D4D ),
|
||||
( 0x0DCA, 0x0DCA ), ( 0x0DD2, 0x0DD4 ), ( 0x0DD6, 0x0DD6 ),
|
||||
( 0x0E31, 0x0E31 ), ( 0x0E34, 0x0E3A ), ( 0x0E47, 0x0E4E ),
|
||||
( 0x0EB1, 0x0EB1 ), ( 0x0EB4, 0x0EB9 ), ( 0x0EBB, 0x0EBC ),
|
||||
( 0x0EC8, 0x0ECD ), ( 0x0F18, 0x0F19 ), ( 0x0F35, 0x0F35 ),
|
||||
( 0x0F37, 0x0F37 ), ( 0x0F39, 0x0F39 ), ( 0x0F71, 0x0F7E ),
|
||||
( 0x0F80, 0x0F84 ), ( 0x0F86, 0x0F87 ), ( 0x0F90, 0x0F97 ),
|
||||
( 0x0F99, 0x0FBC ), ( 0x0FC6, 0x0FC6 ), ( 0x102D, 0x1030 ),
|
||||
( 0x1032, 0x1032 ), ( 0x1036, 0x1037 ), ( 0x1039, 0x1039 ),
|
||||
( 0x1058, 0x1059 ), ( 0x1160, 0x11FF ), ( 0x135F, 0x135F ),
|
||||
( 0x1712, 0x1714 ), ( 0x1732, 0x1734 ), ( 0x1752, 0x1753 ),
|
||||
( 0x1772, 0x1773 ), ( 0x17B4, 0x17B5 ), ( 0x17B7, 0x17BD ),
|
||||
( 0x17C6, 0x17C6 ), ( 0x17C9, 0x17D3 ), ( 0x17DD, 0x17DD ),
|
||||
( 0x180B, 0x180D ), ( 0x18A9, 0x18A9 ), ( 0x1920, 0x1922 ),
|
||||
( 0x1927, 0x1928 ), ( 0x1932, 0x1932 ), ( 0x1939, 0x193B ),
|
||||
( 0x1A17, 0x1A18 ), ( 0x1B00, 0x1B03 ), ( 0x1B34, 0x1B34 ),
|
||||
( 0x1B36, 0x1B3A ), ( 0x1B3C, 0x1B3C ), ( 0x1B42, 0x1B42 ),
|
||||
( 0x1B6B, 0x1B73 ), ( 0x1DC0, 0x1DCA ), ( 0x1DFE, 0x1DFF ),
|
||||
( 0x200B, 0x200F ), ( 0x202A, 0x202E ), ( 0x2060, 0x2063 ),
|
||||
( 0x206A, 0x206F ), ( 0x20D0, 0x20EF ), ( 0x302A, 0x302F ),
|
||||
( 0x3099, 0x309A ), ( 0xA806, 0xA806 ), ( 0xA80B, 0xA80B ),
|
||||
( 0xA825, 0xA826 ), ( 0xFB1E, 0xFB1E ), ( 0xFE00, 0xFE0F ),
|
||||
( 0xFE20, 0xFE23 ), ( 0xFEFF, 0xFEFF ), ( 0xFFF9, 0xFFFB ),
|
||||
( 0x10A01, 0x10A03 ), ( 0x10A05, 0x10A06 ), ( 0x10A0C, 0x10A0F ),
|
||||
( 0x10A38, 0x10A3A ), ( 0x10A3F, 0x10A3F ), ( 0x1D167, 0x1D169 ),
|
||||
( 0x1D173, 0x1D182 ), ( 0x1D185, 0x1D18B ), ( 0x1D1AA, 0x1D1AD ),
|
||||
( 0x1D242, 0x1D244 ), ( 0xE0001, 0xE0001 ), ( 0xE0020, 0xE007F ),
|
||||
( 0xE0100, 0xE01EF ))
|
||||
combining = []
|
||||
in_interval = False
|
||||
interval = []
|
||||
for codepoint in xrange (0, 0xFFFFF + 1):
|
||||
if _interval_bisearch(codepoint, markus_kuhn_combining_5_0) or \
|
||||
unicodedata.combining(unichr(codepoint)):
|
||||
if not in_interval:
|
||||
# Found first part of an interval
|
||||
interval = [codepoint]
|
||||
in_interval = True
|
||||
else:
|
||||
if in_interval:
|
||||
in_interval = False
|
||||
interval.append(codepoint - 1)
|
||||
combining.append(interval)
|
||||
|
||||
if in_interval:
|
||||
# If we're at the end and the interval is open, close it.
|
||||
# :W0631: We looped through a static range so we know codepoint is
|
||||
# defined here
|
||||
#pylint:disable-msg=W0631
|
||||
interval.append(codepoint)
|
||||
combining.append(interval)
|
||||
|
||||
return tuple(itertools.imap(tuple, combining))
|
||||
|
||||
# New function from Toshio Kuratomi (LGPLv2+)
|
||||
def _print_combining_table():
|
||||
'''Print out a new :data:`_COMBINING` table
|
||||
|
||||
This will print a new :data:`_COMBINING` table in the format used in
|
||||
:file:`kitchen/text/display.py`. It's useful for updating the
|
||||
:data:`_COMBINING` table with updated data from a new python as the format
|
||||
won't change from what's already in the file.
|
||||
'''
|
||||
table = _generate_combining_table()
|
||||
entries = 0
|
||||
print '_COMBINING = ('
|
||||
for pair in table:
|
||||
if entries >= 3:
|
||||
entries = 0
|
||||
print
|
||||
if entries == 0:
|
||||
print ' ',
|
||||
entries += 1
|
||||
entry = '(0x%x, 0x%x),' % pair
|
||||
print entry,
|
||||
print ')'
|
||||
|
||||
# Handling of control chars rewritten. Rest is JA's port of MK's C code.
|
||||
# -Toshio Kuratomi
|
||||
def _ucp_width(ucs, control_chars='guess'):
|
||||
'''Get the :term:`textual width` of a ucs character
|
||||
|
||||
:arg ucs: integer representing a single unicode :term:`code point`
|
||||
:kwarg control_chars: specify how to deal with :term:`control characters`.
|
||||
Possible values are:
|
||||
|
||||
:guess: (default) will take a guess for :term:`control character`
|
||||
widths. Most codes will return zero width. ``backspace``,
|
||||
``delete``, and ``clear delete`` return -1. ``escape`` currently
|
||||
returns -1 as well but this is not guaranteed as it's not always
|
||||
correct
|
||||
:strict: will raise :exc:`~kitchen.text.exceptions.ControlCharError`
|
||||
if a :term:`control character` is encountered
|
||||
|
||||
:raises ControlCharError: if the :term:`code point` is a unicode
|
||||
:term:`control character` and :attr:`control_chars` is set to 'strict'
|
||||
:returns: :term:`textual width` of the character.
|
||||
|
||||
.. note::
|
||||
|
||||
It's important to remember this is :term:`textual width` and not the
|
||||
number of characters or bytes.
|
||||
'''
|
||||
# test for 8-bit control characters
|
||||
if ucs < 32 or (ucs < 0xa0 and ucs >= 0x7f):
|
||||
# Control character detected
|
||||
if control_chars == 'strict':
|
||||
raise ControlCharError(b_('_ucp_width does not understand how to'
|
||||
' assign a width value to control characters.'))
|
||||
if ucs in (0x08, 0x07F, 0x94):
|
||||
# Backspace, delete, and clear delete remove a single character
|
||||
return -1
|
||||
if ucs == 0x1b:
|
||||
# Excape is tricky. It removes some number of characters that
|
||||
# come after it but the amount is dependent on what is
|
||||
# interpreting the code.
|
||||
# So this is going to often be wrong but other values will be
|
||||
# wrong as well.
|
||||
return -1
|
||||
# All other control characters get 0 width
|
||||
return 0
|
||||
|
||||
if _interval_bisearch(ucs, _COMBINING):
|
||||
# Combining characters return 0 width as they will be combined with
|
||||
# the width from other characters
|
||||
return 0
|
||||
|
||||
# if we arrive here, ucs is not a combining or C0/C1 control character
|
||||
|
||||
return (1 +
|
||||
(ucs >= 0x1100 and
|
||||
(ucs <= 0x115f or # Hangul Jamo init. consonants
|
||||
ucs == 0x2329 or ucs == 0x232a or
|
||||
(ucs >= 0x2e80 and ucs <= 0xa4cf and
|
||||
ucs != 0x303f) or # CJK ... Yi
|
||||
(ucs >= 0xac00 and ucs <= 0xd7a3) or # Hangul Syllables
|
||||
(ucs >= 0xf900 and ucs <= 0xfaff) or # CJK Compatibility Ideographs
|
||||
(ucs >= 0xfe10 and ucs <= 0xfe19) or # Vertical forms
|
||||
(ucs >= 0xfe30 and ucs <= 0xfe6f) or # CJK Compatibility Forms
|
||||
(ucs >= 0xff00 and ucs <= 0xff60) or # Fullwidth Forms
|
||||
(ucs >= 0xffe0 and ucs <= 0xffe6) or
|
||||
(ucs >= 0x20000 and ucs <= 0x2fffd) or
|
||||
(ucs >= 0x30000 and ucs <= 0x3fffd))))
|
||||
|
||||
# Wholly rewritten by me (LGPLv2+) -Toshio Kuratomi
|
||||
def textual_width(msg, control_chars='guess', encoding='utf-8',
|
||||
errors='replace'):
|
||||
'''Get the :term:`textual width` of a string
|
||||
|
||||
:arg msg: :class:`unicode` string or byte :class:`str` to get the width of
|
||||
:kwarg control_chars: specify how to deal with :term:`control characters`.
|
||||
Possible values are:
|
||||
|
||||
:guess: (default) will take a guess for :term:`control character`
|
||||
widths. Most codes will return zero width. ``backspace``,
|
||||
``delete``, and ``clear delete`` return -1. ``escape`` currently
|
||||
returns -1 as well but this is not guaranteed as it's not always
|
||||
correct
|
||||
:strict: will raise :exc:`kitchen.text.exceptions.ControlCharError`
|
||||
if a :term:`control character` is encountered
|
||||
|
||||
:kwarg encoding: If we are given a byte :class:`str` this is used to
|
||||
decode it into :class:`unicode` string. Any characters that are not
|
||||
decodable in this encoding will get a value dependent on the
|
||||
:attr:`errors` parameter.
|
||||
:kwarg errors: How to treat errors encoding the byte :class:`str` to
|
||||
:class:`unicode` string. Legal values are the same as for
|
||||
:func:`kitchen.text.converters.to_unicode`. The default value of
|
||||
``replace`` will cause undecodable byte sequences to have a width of
|
||||
one. ``ignore`` will have a width of zero.
|
||||
:raises ControlCharError: if :attr:`msg` contains a :term:`control
|
||||
character` and :attr:`control_chars` is ``strict``.
|
||||
:returns: :term:`Textual width` of the :attr:`msg`. This is the amount of
|
||||
space that the string will consume on a monospace display. It's
|
||||
measured in the number of cell positions or columns it will take up on
|
||||
a monospace display. This is **not** the number of glyphs that are in
|
||||
the string.
|
||||
|
||||
.. note::
|
||||
|
||||
This function can be wrong sometimes because Unicode does not specify
|
||||
a strict width value for all of the :term:`code points`. In
|
||||
particular, we've found that some Tamil characters take up to four
|
||||
character cells but we return a lesser amount.
|
||||
'''
|
||||
# On python 2.6.4, x86_64, I've benchmarked a few alternate
|
||||
# implementations::
|
||||
#
|
||||
# timeit.repeat('display.textual_width(data)',
|
||||
# 'from __main__ import display, data', number=100)
|
||||
# I varied data by size and content (1MB of ascii, a few words, 43K utf8,
|
||||
# unicode type
|
||||
#
|
||||
# :this implementation: fastest across the board
|
||||
#
|
||||
# :list comprehension: 6-16% slower
|
||||
# return sum([_ucp_width(ord(c), control_chars=control_chars)
|
||||
# for c in msg])
|
||||
#
|
||||
# :generator expression: 9-18% slower
|
||||
# return sum((_ucp_width(ord(c), control_chars=control_chars) for c in
|
||||
# msg))
|
||||
#
|
||||
# :lambda: 10-19% slower
|
||||
# return sum(itertools.imap(lambda x: _ucp_width(ord(x), control_chars),
|
||||
# msg))
|
||||
#
|
||||
# :partial application: 13-22% slower
|
||||
# func = functools.partial(_ucp_width, control_chars=control_chars)
|
||||
# return sum(itertools.imap(func, itertools.imap(ord, msg)))
|
||||
#
|
||||
# :the original code: 4-38% slower
|
||||
# The 4% was for the short, ascii only string. All the other pieces of
|
||||
# data yielded over 30% slower times.
|
||||
|
||||
# Non decodable data is just assigned a single cell width
|
||||
msg = to_unicode(msg, encoding=encoding, errors=errors)
|
||||
# Add the width of each char
|
||||
return sum(
|
||||
# calculate width of each char
|
||||
itertools.starmap(_ucp_width,
|
||||
# Setup the arguments to _ucp_width
|
||||
itertools.izip(
|
||||
# int value of each char
|
||||
itertools.imap(ord, msg),
|
||||
# control_chars arg in a form that izip will deal with
|
||||
itertools.repeat(control_chars))))
|
||||
|
||||
# Wholly rewritten by me -Toshio Kuratomi
|
||||
def textual_width_chop(msg, chop, encoding='utf-8', errors='replace'):
|
||||
'''Given a string, return it chopped to a given :term:`textual width`
|
||||
|
||||
:arg msg: :class:`unicode` string or byte :class:`str` to chop
|
||||
:arg chop: Chop :attr:`msg` if it exceeds this :term:`textual width`
|
||||
:kwarg encoding: If we are given a byte :class:`str`, this is used to
|
||||
decode it into a :class:`unicode` string. Any characters that are not
|
||||
decodable in this encoding will be assigned a width of one.
|
||||
:kwarg errors: How to treat errors encoding the byte :class:`str` to
|
||||
:class:`unicode`. Legal values are the same as for
|
||||
:func:`kitchen.text.converters.to_unicode`
|
||||
:rtype: :class:`unicode` string
|
||||
:returns: :class:`unicode` string of the :attr:`msg` chopped at the given
|
||||
:term:`textual width`
|
||||
|
||||
This is what you want to use instead of ``%.*s``, as it does the "right"
|
||||
thing with regard to :term:`UTF-8` sequences, :term:`control characters`,
|
||||
and characters that take more than one cell position. Eg::
|
||||
|
||||
>>> # Wrong: only displays 8 characters because it is operating on bytes
|
||||
>>> print "%.*s" % (10, 'café ñunru!')
|
||||
café ñun
|
||||
>>> # Properly operates on graphemes
|
||||
>>> '%s' % (textual_width_chop('café ñunru!', 10))
|
||||
café ñunru
|
||||
>>> # takes too many columns because the kanji need two cell positions
|
||||
>>> print '1234567890\\n%.*s' % (10, u'一二三四五六七八九十')
|
||||
1234567890
|
||||
一二三四五六七八九十
|
||||
>>> # Properly chops at 10 columns
|
||||
>>> print '1234567890\\n%s' % (textual_width_chop(u'一二三四五六七八九十', 10))
|
||||
1234567890
|
||||
一二三四五
|
||||
|
||||
'''
|
||||
|
||||
msg = to_unicode(msg, encoding=encoding, errors=errors)
|
||||
|
||||
width = textual_width(msg)
|
||||
if width <= chop:
|
||||
return msg
|
||||
maximum = len(msg)
|
||||
if maximum > chop * 2:
|
||||
# A character can take at most 2 cell positions so this is the actual
|
||||
# maximum
|
||||
maximum = chop * 2
|
||||
minimum = 0
|
||||
eos = maximum
|
||||
if eos > chop:
|
||||
eos = chop
|
||||
width = textual_width(msg[:eos])
|
||||
|
||||
while True:
|
||||
# if current width is high,
|
||||
if width > chop:
|
||||
# calculate new midpoint
|
||||
mid = minimum + (eos - minimum) / 2
|
||||
if mid == eos:
|
||||
break
|
||||
if (eos - chop) < (eos - mid):
|
||||
while width > chop:
|
||||
width = width - _ucp_width(ord(msg[eos-1]))
|
||||
eos -= 1
|
||||
return msg[:eos]
|
||||
# subtract distance between eos and mid from width
|
||||
width = width - textual_width(msg[mid:eos])
|
||||
maximum = eos
|
||||
eos = mid
|
||||
# if current width is low,
|
||||
elif width < chop:
|
||||
# Note: at present, the if (eos - chop) < (eos - mid):
|
||||
# short-circuit above means that we never use this branch.
|
||||
|
||||
# calculate new midpoint
|
||||
mid = eos + (maximum - eos) / 2
|
||||
if mid == eos:
|
||||
break
|
||||
if (chop - eos) < (mid - eos):
|
||||
while width < chop:
|
||||
new_width = _ucp_width(ord(msg[eos]))
|
||||
width = width + new_width
|
||||
eos += 1
|
||||
return msg[:eos]
|
||||
|
||||
# add distance between eos and new mid to width
|
||||
width = width + textual_width(msg[eos:mid])
|
||||
minimum = eos
|
||||
eos = mid
|
||||
if eos > maximum:
|
||||
eos = maximum
|
||||
break
|
||||
# if current is just right
|
||||
else:
|
||||
return msg[:eos]
|
||||
return msg[:eos]
|
||||
|
||||
# I made some adjustments for using unicode but largely unchanged from JA's
|
||||
# port of MK's code -Toshio
|
||||
def textual_width_fill(msg, fill, chop=None, left=True, prefix='', suffix=''):
|
||||
'''Expand a :class:`unicode` string to a specified :term:`textual width`
|
||||
or chop to same
|
||||
|
||||
:arg msg: :class:`unicode` string to format
|
||||
:arg fill: pad string until the :term:`textual width` of the string is
|
||||
this length
|
||||
:kwarg chop: before doing anything else, chop the string to this length.
|
||||
Default: Don't chop the string at all
|
||||
:kwarg left: If :data:`True` (default) left justify the string and put the
|
||||
padding on the right. If :data:`False`, pad on the left side.
|
||||
:kwarg prefix: Attach this string before the field we're filling
|
||||
:kwarg suffix: Append this string to the end of the field we're filling
|
||||
:rtype: :class:`unicode` string
|
||||
:returns: :attr:`msg` formatted to fill the specified width. If no
|
||||
:attr:`chop` is specified, the string could exceed the fill length
|
||||
when completed. If :attr:`prefix` or :attr:`suffix` are printable
|
||||
characters, the string could be longer than the fill width.
|
||||
|
||||
.. note::
|
||||
|
||||
:attr:`prefix` and :attr:`suffix` should be used for "invisible"
|
||||
characters like highlighting, color changing escape codes, etc. The
|
||||
fill characters are appended outside of any :attr:`prefix` or
|
||||
:attr:`suffix` elements. This allows you to only highlight
|
||||
:attr:`msg` inside of the field you're filling.
|
||||
|
||||
.. warning::
|
||||
|
||||
:attr:`msg`, :attr:`prefix`, and :attr:`suffix` should all be
|
||||
representable as unicode characters. In particular, any escape
|
||||
sequences in :attr:`prefix` and :attr:`suffix` need to be convertible
|
||||
to :class:`unicode`. If you need to use byte sequences here rather
|
||||
than unicode characters, use
|
||||
:func:`~kitchen.text.display.byte_string_textual_width_fill` instead.
|
||||
|
||||
This function expands a string to fill a field of a particular
|
||||
:term:`textual width`. Use it instead of ``%*.*s``, as it does the
|
||||
"right" thing with regard to :term:`UTF-8` sequences, :term:`control
|
||||
characters`, and characters that take more than one cell position in
|
||||
a display. Example usage::
|
||||
|
||||
>>> msg = u'一二三四五六七八九十'
|
||||
>>> # Wrong: This uses 10 characters instead of 10 cells:
|
||||
>>> u":%-*.*s:" % (10, 10, msg[:9])
|
||||
:一二三四五六七八九 :
|
||||
>>> # This uses 10 cells like we really want:
|
||||
>>> u":%s:" % (textual_width_fill(msg[:9], 10, 10))
|
||||
:一二三四五:
|
||||
|
||||
>>> # Wrong: Right aligned in the field, but too many cells
|
||||
>>> u"%20.10s" % (msg)
|
||||
一二三四五六七八九十
|
||||
>>> # Correct: Right aligned with proper number of cells
|
||||
>>> u"%s" % (textual_width_fill(msg, 20, 10, left=False))
|
||||
一二三四五
|
||||
|
||||
>>> # Wrong: Adding some escape characters to highlight the line but too many cells
|
||||
>>> u"%s%20.10s%s" % (prefix, msg, suffix)
|
||||
u'\x1b[7m 一二三四五六七八九十\x1b[0m'
|
||||
>>> # Correct highlight of the line
|
||||
>>> u"%s%s%s" % (prefix, display.textual_width_fill(msg, 20, 10, left=False), suffix)
|
||||
u'\x1b[7m 一二三四五\x1b[0m'
|
||||
|
||||
>>> # Correct way to not highlight the fill
|
||||
>>> u"%s" % (display.textual_width_fill(msg, 20, 10, left=False, prefix=prefix, suffix=suffix))
|
||||
u' \x1b[7m一二三四五\x1b[0m'
|
||||
'''
|
||||
msg = to_unicode(msg)
|
||||
if chop is not None:
|
||||
msg = textual_width_chop(msg, chop)
|
||||
width = textual_width(msg)
|
||||
if width >= fill:
|
||||
if prefix or suffix:
|
||||
msg = u''.join([prefix, msg, suffix])
|
||||
else:
|
||||
extra = u' ' * (fill - width)
|
||||
if left:
|
||||
msg = u''.join([prefix, msg, suffix, extra])
|
||||
else:
|
||||
msg = u''.join([extra, prefix, msg, suffix])
|
||||
return msg
|
||||
|
||||
def _textual_width_le(width, *args):
|
||||
'''Optimize the common case when deciding which :term:`textual width` is
|
||||
larger
|
||||
|
||||
:arg width: :term:`textual width` to compare against.
|
||||
:arg \*args: :class:`unicode` strings to check the total :term:`textual
|
||||
width` of
|
||||
:returns: :data:`True` if the total length of :attr:`args` are less than
|
||||
or equal to :attr:`width`. Otherwise :data:`False`.
|
||||
|
||||
We often want to know "does X fit in Y". It takes a while to use
|
||||
:func:`textual_width` to calculate this. However, we know that the number
|
||||
of canonically composed :class:`unicode` characters is always going to
|
||||
have 1 or 2 for the :term:`textual width` per character. With this we can
|
||||
take the following shortcuts:
|
||||
|
||||
1) If the number of canonically composed characters is more than width,
|
||||
the true :term:`textual width` cannot be less than width.
|
||||
2) If the number of canonically composed characters * 2 is less than the
|
||||
width then the :term:`textual width` must be ok.
|
||||
|
||||
:term:`textual width` of a canonically composed :class:`unicode` string
|
||||
will always be greater than or equal to the the number of :class:`unicode`
|
||||
characters. So we can first check if the number of composed
|
||||
:class:`unicode` characters is less than the asked for width. If it is we
|
||||
can return :data:`True` immediately. If not, then we must do a full
|
||||
:term:`textual width` lookup.
|
||||
'''
|
||||
string = ''.join(args)
|
||||
string = unicodedata.normalize('NFC', string)
|
||||
if len(string) > width:
|
||||
return False
|
||||
elif len(string) * 2 <= width:
|
||||
return True
|
||||
elif len(to_bytes(string)) <= width:
|
||||
# Check against bytes.
|
||||
# utf8 has the property of having the same amount or more bytes per
|
||||
# character than textual width.
|
||||
return True
|
||||
else:
|
||||
true_width = textual_width(string)
|
||||
return true_width <= width
|
||||
|
||||
def wrap(text, width=70, initial_indent=u'', subsequent_indent=u'',
|
||||
encoding='utf-8', errors='replace'):
|
||||
'''Works like we want :func:`textwrap.wrap` to work,
|
||||
|
||||
:arg text: :class:`unicode` string or byte :class:`str` to wrap
|
||||
:kwarg width: :term:`textual width` at which to wrap. Default: 70
|
||||
:kwarg initial_indent: string to use to indent the first line. Default:
|
||||
do not indent.
|
||||
:kwarg subsequent_indent: string to use to wrap subsequent lines.
|
||||
Default: do not indent
|
||||
:kwarg encoding: Encoding to use if :attr:`text` is a byte :class:`str`
|
||||
:kwarg errors: error handler to use if :attr:`text` is a byte :class:`str`
|
||||
and contains some undecodable characters.
|
||||
:rtype: :class:`list` of :class:`unicode` strings
|
||||
:returns: list of lines that have been text wrapped and indented.
|
||||
|
||||
:func:`textwrap.wrap` from the |stdlib|_ has two drawbacks that this
|
||||
attempts to fix:
|
||||
|
||||
1. It does not handle :term:`textual width`. It only operates on bytes or
|
||||
characters which are both inadequate (due to multi-byte and double
|
||||
width characters).
|
||||
2. It malforms lists and blocks.
|
||||
'''
|
||||
# Tested with:
|
||||
# yum info robodoc gpicview php-pear-Net-Socket wmctrl ustr moreutils
|
||||
# mediawiki-HNP ocspd insight yum mousepad
|
||||
# ...at 120, 80 and 40 chars.
|
||||
# Also, notable among lots of others, searching for "\n ":
|
||||
# exim-clamav, jpackage-utils, tcldom, synaptics, "quake3",
|
||||
# perl-Class-Container, ez-ipupdate, perl-Net-XMPP, "kipi-plugins",
|
||||
# perl-Apache-DBI, netcdf, python-configobj, "translate-toolkit", alpine,
|
||||
# "udunits", "conntrack-tools"
|
||||
#
|
||||
# Note that, we "fail" on:
|
||||
# alsa-plugins-jack, setools*, dblatex, uisp, "perl-Getopt-GUI-Long",
|
||||
# suitesparse, "synce-serial", writer2latex, xenwatch, ltsp-utils
|
||||
|
||||
def _indent_at_beg(line):
|
||||
'''Return the indent to use for this and (possibly) subsequent lines
|
||||
|
||||
:arg line: :class:`unicode` line of text to process
|
||||
:rtype: tuple
|
||||
:returns: tuple of count of whitespace before getting to the start of
|
||||
this line followed by a count to the following indent if this
|
||||
block of text is an entry in a list.
|
||||
'''
|
||||
# Find the first non-whitespace character
|
||||
try:
|
||||
char = line.strip()[0]
|
||||
except IndexError:
|
||||
# All whitespace
|
||||
return 0, 0
|
||||
else:
|
||||
count = line.find(char)
|
||||
|
||||
# if we have a bullet character, check for list
|
||||
if char not in u'-*.o\u2022\u2023\u2218':
|
||||
# No bullet; not a list
|
||||
return count, 0
|
||||
|
||||
# List: Keep searching until we hit the innermost list
|
||||
nxt = _indent_at_beg(line[count+1:])
|
||||
nxt = nxt[1] or nxt[0]
|
||||
if nxt:
|
||||
return count, count + 1 + nxt
|
||||
return count, 0
|
||||
|
||||
initial_indent = to_unicode(initial_indent, encoding=encoding,
|
||||
errors=errors)
|
||||
subsequent_indent = to_unicode(subsequent_indent, encoding=encoding,
|
||||
errors=errors)
|
||||
subsequent_indent_width = textual_width(subsequent_indent)
|
||||
|
||||
text = to_unicode(text, encoding=encoding, errors=errors).rstrip(u'\n')
|
||||
lines = text.expandtabs().split(u'\n')
|
||||
|
||||
ret = []
|
||||
indent = initial_indent
|
||||
wrap_last = False
|
||||
cur_sab = 0
|
||||
cur_spc_indent = 0
|
||||
for line in lines:
|
||||
line = line.rstrip(u' ')
|
||||
(last_sab, last_spc_indent) = (cur_sab, cur_spc_indent)
|
||||
(cur_sab, cur_spc_indent) = _indent_at_beg(line)
|
||||
force_nl = False # We want to stop wrapping under "certain" conditions:
|
||||
if wrap_last and cur_spc_indent: # if line starts a list or
|
||||
force_nl = True
|
||||
if wrap_last and cur_sab == len(line):# is empty line
|
||||
force_nl = True
|
||||
if wrap_last and not last_spc_indent: # if we don't continue a list
|
||||
if cur_sab >= 4 and cur_sab != last_sab: # and is "block indented"
|
||||
force_nl = True
|
||||
if force_nl:
|
||||
ret.append(indent.rstrip(u' '))
|
||||
indent = subsequent_indent
|
||||
wrap_last = False
|
||||
if cur_sab == len(line): # empty line, remove spaces to make it easier.
|
||||
line = u''
|
||||
if wrap_last:
|
||||
line = line.lstrip(u' ')
|
||||
cur_spc_indent = last_spc_indent
|
||||
|
||||
if _textual_width_le(width, indent, line):
|
||||
wrap_last = False
|
||||
ret.append(indent + line)
|
||||
indent = subsequent_indent
|
||||
continue
|
||||
|
||||
wrap_last = True
|
||||
words = line.split(u' ')
|
||||
line = indent
|
||||
spcs = cur_spc_indent
|
||||
if not spcs and cur_sab >= 4:
|
||||
spcs = cur_sab
|
||||
for word in words:
|
||||
if (not _textual_width_le(width, line, word) and
|
||||
textual_width(line) > subsequent_indent_width):
|
||||
ret.append(line.rstrip(u' '))
|
||||
line = subsequent_indent + u' ' * spcs
|
||||
line += word
|
||||
line += u' '
|
||||
indent = line.rstrip(u' ') + u' '
|
||||
if wrap_last:
|
||||
ret.append(indent.rstrip(u' '))
|
||||
|
||||
return ret
|
||||
|
||||
def fill(text, *args, **kwargs):
|
||||
'''Works like we want :func:`textwrap.fill` to work
|
||||
|
||||
:arg text: :class:`unicode` string or byte :class:`str` to process
|
||||
:returns: :class:`unicode` string with each line separated by a newline
|
||||
|
||||
.. seealso::
|
||||
|
||||
:func:`kitchen.text.display.wrap`
|
||||
for other parameters that you can give this command.
|
||||
|
||||
This function is a light wrapper around :func:`kitchen.text.display.wrap`.
|
||||
Where that function returns a :class:`list` of lines, this function
|
||||
returns one string with each line separated by a newline.
|
||||
'''
|
||||
return u'\n'.join(wrap(text, *args, **kwargs))
|
||||
|
||||
#
|
||||
# Byte strings
|
||||
#
|
||||
|
||||
def byte_string_textual_width_fill(msg, fill, chop=None, left=True, prefix='',
|
||||
suffix='', encoding='utf-8', errors='replace'):
|
||||
'''Expand a byte :class:`str` to a specified :term:`textual width` or chop
|
||||
to same
|
||||
|
||||
:arg msg: byte :class:`str` encoded in :term:`UTF-8` that we want formatted
|
||||
:arg fill: pad :attr:`msg` until the :term:`textual width` is this long
|
||||
:kwarg chop: before doing anything else, chop the string to this length.
|
||||
Default: Don't chop the string at all
|
||||
:kwarg left: If :data:`True` (default) left justify the string and put the
|
||||
padding on the right. If :data:`False`, pad on the left side.
|
||||
:kwarg prefix: Attach this byte :class:`str` before the field we're
|
||||
filling
|
||||
:kwarg suffix: Append this byte :class:`str` to the end of the field we're
|
||||
filling
|
||||
:rtype: byte :class:`str`
|
||||
:returns: :attr:`msg` formatted to fill the specified :term:`textual
|
||||
width`. If no :attr:`chop` is specified, the string could exceed the
|
||||
fill length when completed. If :attr:`prefix` or :attr:`suffix` are
|
||||
printable characters, the string could be longer than fill width.
|
||||
|
||||
.. note::
|
||||
|
||||
:attr:`prefix` and :attr:`suffix` should be used for "invisible"
|
||||
characters like highlighting, color changing escape codes, etc. The
|
||||
fill characters are appended outside of any :attr:`prefix` or
|
||||
:attr:`suffix` elements. This allows you to only highlight
|
||||
:attr:`msg` inside of the field you're filling.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:func:`~kitchen.text.display.textual_width_fill`
|
||||
For example usage. This function has only two differences.
|
||||
|
||||
1. it takes byte :class:`str` for :attr:`prefix` and
|
||||
:attr:`suffix` so you can pass in arbitrary sequences of
|
||||
bytes, not just unicode characters.
|
||||
2. it returns a byte :class:`str` instead of a :class:`unicode`
|
||||
string.
|
||||
'''
|
||||
prefix = to_bytes(prefix, encoding=encoding, errors=errors)
|
||||
suffix = to_bytes(suffix, encoding=encoding, errors=errors)
|
||||
|
||||
if chop is not None:
|
||||
msg = textual_width_chop(msg, chop, encoding=encoding, errors=errors)
|
||||
width = textual_width(msg)
|
||||
msg = to_bytes(msg)
|
||||
|
||||
if width >= fill:
|
||||
if prefix or suffix:
|
||||
msg = ''.join([prefix, msg, suffix])
|
||||
else:
|
||||
extra = ' ' * (fill - width)
|
||||
if left:
|
||||
msg = ''.join([prefix, msg, suffix, extra])
|
||||
else:
|
||||
msg = ''.join([extra, prefix, msg, suffix])
|
||||
|
||||
return msg
|
||||
|
||||
__all__ = ('byte_string_textual_width_fill', 'fill', 'textual_width',
|
||||
'textual_width_chop', 'textual_width_fill', 'wrap')
|
40
kitchen/text/exceptions.py
Normal file
40
kitchen/text/exceptions.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2010 Red Hat, Inc
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
#
|
||||
'''
|
||||
-----------------------
|
||||
Kitchen.text exceptions
|
||||
-----------------------
|
||||
|
||||
Exception classes thrown by kitchen's text processing routines.
|
||||
'''
|
||||
from kitchen import exceptions
|
||||
|
||||
class XmlEncodeError(exceptions.KitchenError):
|
||||
'''Exception thrown by error conditions when encoding an xml string.
|
||||
'''
|
||||
pass
|
||||
|
||||
class ControlCharError(exceptions.KitchenError):
|
||||
'''Exception thrown when an ascii control character is encountered.
|
||||
'''
|
||||
pass
|
||||
|
||||
__all__ = ('XmlEncodeError', 'ControlCharError')
|
313
kitchen/text/misc.py
Normal file
313
kitchen/text/misc.py
Normal file
|
@ -0,0 +1,313 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2011 Red Hat, Inc
|
||||
# Copyright (c) 2010 Seth Vidal
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# James Antill
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
# Seth Vidal
|
||||
#
|
||||
# Portions of this code taken from yum/misc.py and yum/i18n.py
|
||||
'''
|
||||
---------------------------------------------
|
||||
Miscellaneous functions for manipulating text
|
||||
---------------------------------------------
|
||||
|
||||
Collection of text functions that don't fit in another category.
|
||||
'''
|
||||
import htmlentitydefs
|
||||
import itertools
|
||||
import re
|
||||
|
||||
try:
|
||||
import chardet
|
||||
except ImportError:
|
||||
chardet = None
|
||||
|
||||
# We need to access b_() for localizing our strings but we'll end up with
|
||||
# a circular import if we import it directly.
|
||||
import kitchen as k
|
||||
from kitchen.pycompat24 import sets
|
||||
from kitchen.text.exceptions import ControlCharError
|
||||
|
||||
sets.add_builtin_set()
|
||||
|
||||
# Define a threshold for chardet confidence. If we fall below this we decode
|
||||
# byte strings we're guessing about as latin1
|
||||
_CHARDET_THRESHHOLD = 0.6
|
||||
|
||||
# ASCII control codes that are illegal in xml 1.0
|
||||
_CONTROL_CODES = frozenset(range(0, 8) + [11, 12] + range(14, 32))
|
||||
_CONTROL_CHARS = frozenset(itertools.imap(unichr, _CONTROL_CODES))
|
||||
|
||||
# _ENTITY_RE
|
||||
_ENTITY_RE = re.compile(r'(?s)<[^>]*>|&#?\w+;')
|
||||
|
||||
def guess_encoding(byte_string, disable_chardet=False):
|
||||
'''Try to guess the encoding of a byte :class:`str`
|
||||
|
||||
:arg byte_string: byte :class:`str` to guess the encoding of
|
||||
:kwarg disable_chardet: If this is True, we never attempt to use
|
||||
:mod:`chardet` to guess the encoding. This is useful if you need to
|
||||
have reproducibility whether :mod:`chardet` is installed or not.
|
||||
Default: :data:`False`.
|
||||
:raises TypeError: if :attr:`byte_string` is not a byte :class:`str` type
|
||||
:returns: string containing a guess at the encoding of
|
||||
:attr:`byte_string`. This is appropriate to pass as the encoding
|
||||
argument when encoding and decoding unicode strings.
|
||||
|
||||
We start by attempting to decode the byte :class:`str` as :term:`UTF-8`.
|
||||
If this succeeds we tell the world it's :term:`UTF-8` text. If it doesn't
|
||||
and :mod:`chardet` is installed on the system and :attr:`disable_chardet`
|
||||
is False this function will use it to try detecting the encoding of
|
||||
:attr:`byte_string`. If it is not installed or :mod:`chardet` cannot
|
||||
determine the encoding with a high enough confidence then we rather
|
||||
arbitrarily claim that it is ``latin-1``. Since ``latin-1`` will encode
|
||||
to every byte, decoding from ``latin-1`` to :class:`unicode` will not
|
||||
cause :exc:`UnicodeErrors` although the output might be mangled.
|
||||
'''
|
||||
if not isinstance(byte_string, str):
|
||||
raise TypeError(k.b_('byte_string must be a byte string (str)'))
|
||||
input_encoding = 'utf-8'
|
||||
try:
|
||||
unicode(byte_string, input_encoding, 'strict')
|
||||
except UnicodeDecodeError:
|
||||
input_encoding = None
|
||||
|
||||
if not input_encoding and chardet and not disable_chardet:
|
||||
detection_info = chardet.detect(byte_string)
|
||||
if detection_info['confidence'] >= _CHARDET_THRESHHOLD:
|
||||
input_encoding = detection_info['encoding']
|
||||
|
||||
if not input_encoding:
|
||||
input_encoding = 'latin-1'
|
||||
|
||||
return input_encoding
|
||||
|
||||
def str_eq(str1, str2, encoding='utf-8', errors='replace'):
|
||||
'''Compare two stringsi, converting to byte :class:`str` if one is
|
||||
:class:`unicode`
|
||||
|
||||
:arg str1: First string to compare
|
||||
:arg str2: Second string to compare
|
||||
:kwarg encoding: If we need to convert one string into a byte :class:`str`
|
||||
to compare, the encoding to use. Default is :term:`utf-8`.
|
||||
:kwarg errors: What to do if we encounter errors when encoding the string.
|
||||
See the :func:`kitchen.text.converters.to_bytes` documentation for
|
||||
possible values. The default is ``replace``.
|
||||
|
||||
This function prevents :exc:`UnicodeError` (python-2.4 or less) and
|
||||
:exc:`UnicodeWarning` (python 2.5 and higher) when we compare
|
||||
a :class:`unicode` string to a byte :class:`str`. The errors normally
|
||||
arise because the conversion is done to :term:`ASCII`. This function
|
||||
lets you convert to :term:`utf-8` or another encoding instead.
|
||||
|
||||
.. note::
|
||||
|
||||
When we need to convert one of the strings from :class:`unicode` in
|
||||
order to compare them we convert the :class:`unicode` string into
|
||||
a byte :class:`str`. That means that strings can compare differently
|
||||
if you use different encodings for each.
|
||||
|
||||
Note that ``str1 == str2`` is faster than this function if you can accept
|
||||
the following limitations:
|
||||
|
||||
* Limited to python-2.5+ (otherwise a :exc:`UnicodeDecodeError` may be
|
||||
thrown)
|
||||
* Will generate a :exc:`UnicodeWarning` if non-:term:`ASCII` byte
|
||||
:class:`str` is compared to :class:`unicode` string.
|
||||
'''
|
||||
try:
|
||||
return (not str1 < str2) and (not str1 > str2)
|
||||
except UnicodeError:
|
||||
pass
|
||||
|
||||
if isinstance(str1, unicode):
|
||||
str1 = str1.encode(encoding, errors)
|
||||
else:
|
||||
str2 = str2.encode(encoding, errors)
|
||||
if str1 == str2:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def process_control_chars(string, strategy='replace'):
|
||||
'''Look for and transform :term:`control characters` in a string
|
||||
|
||||
:arg string: string to search for and transform :term:`control characters`
|
||||
within
|
||||
:kwarg strategy: XML does not allow :term:`ASCII` :term:`control
|
||||
characters`. When we encounter those we need to know what to do.
|
||||
Valid options are:
|
||||
|
||||
:replace: (default) Replace the :term:`control characters`
|
||||
with ``"?"``
|
||||
:ignore: Remove the characters altogether from the output
|
||||
:strict: Raise a :exc:`~kitchen.text.exceptions.ControlCharError` when
|
||||
we encounter a control character
|
||||
:raises TypeError: if :attr:`string` is not a unicode string.
|
||||
:raises ValueError: if the strategy is not one of replace, ignore, or
|
||||
strict.
|
||||
:raises kitchen.text.exceptions.ControlCharError: if the strategy is
|
||||
``strict`` and a :term:`control character` is present in the
|
||||
:attr:`string`
|
||||
:returns: :class:`unicode` string with no :term:`control characters` in
|
||||
it.
|
||||
'''
|
||||
if not isinstance(string, unicode):
|
||||
raise TypeError(k.b_('process_control_char must have a unicode type as'
|
||||
' the first argument.'))
|
||||
if strategy == 'ignore':
|
||||
control_table = dict(zip(_CONTROL_CODES, [None] * len(_CONTROL_CODES)))
|
||||
elif strategy == 'replace':
|
||||
control_table = dict(zip(_CONTROL_CODES, [u'?'] * len(_CONTROL_CODES)))
|
||||
elif strategy == 'strict':
|
||||
control_table = None
|
||||
# Test that there are no control codes present
|
||||
data = frozenset(string)
|
||||
if [c for c in _CONTROL_CHARS if c in data]:
|
||||
raise ControlCharError(k.b_('ASCII control code present in string'
|
||||
' input'))
|
||||
else:
|
||||
raise ValueError(k.b_('The strategy argument to process_control_chars'
|
||||
' must be one of ignore, replace, or strict'))
|
||||
|
||||
if control_table:
|
||||
string = string.translate(control_table)
|
||||
|
||||
return string
|
||||
|
||||
# Originally written by Fredrik Lundh (January 15, 2003) and placed in the
|
||||
# public domain::
|
||||
#
|
||||
# Unless otherwise noted, source code can be be used freely. Examples, test
|
||||
# scripts and other short code fragments can be considered as being in the
|
||||
# public domain.
|
||||
#
|
||||
# http://effbot.org/zone/re-sub.htm#unescape-html
|
||||
# http://effbot.org/zone/copyright.htm
|
||||
#
|
||||
def html_entities_unescape(string):
|
||||
'''Substitute unicode characters for HTML entities
|
||||
|
||||
:arg string: :class:`unicode` string to substitute out html entities
|
||||
:raises TypeError: if something other than a :class:`unicode` string is
|
||||
given
|
||||
:rtype: :class:`unicode` string
|
||||
:returns: The plain text without html entities
|
||||
'''
|
||||
def fixup(match):
|
||||
string = match.group(0)
|
||||
if string[:1] == u"<":
|
||||
return "" # ignore tags
|
||||
if string[:2] == u"&#":
|
||||
try:
|
||||
if string[:3] == u"&#x":
|
||||
return unichr(int(string[3:-1], 16))
|
||||
else:
|
||||
return unichr(int(string[2:-1]))
|
||||
except ValueError:
|
||||
# If the value is outside the unicode codepoint range, leave
|
||||
# it in the output as is
|
||||
pass
|
||||
elif string[:1] == u"&":
|
||||
entity = htmlentitydefs.entitydefs.get(string[1:-1].encode('utf-8'))
|
||||
if entity:
|
||||
if entity[:2] == "&#":
|
||||
try:
|
||||
return unichr(int(entity[2:-1]))
|
||||
except ValueError:
|
||||
# If the value is outside the unicode codepoint range,
|
||||
# leave it in the output as is
|
||||
pass
|
||||
else:
|
||||
return unicode(entity, "iso-8859-1")
|
||||
return string # leave as is
|
||||
|
||||
if not isinstance(string, unicode):
|
||||
raise TypeError(k.b_('html_entities_unescape must have a unicode type'
|
||||
' for its first argument'))
|
||||
return re.sub(_ENTITY_RE, fixup, string)
|
||||
|
||||
def byte_string_valid_xml(byte_string, encoding='utf-8'):
|
||||
'''Check that a byte :class:`str` would be valid in xml
|
||||
|
||||
:arg byte_string: Byte :class:`str` to check
|
||||
:arg encoding: Encoding of the xml file. Default: :term:`UTF-8`
|
||||
:returns: :data:`True` if the string is valid. :data:`False` if it would
|
||||
be invalid in the xml file
|
||||
|
||||
In some cases you'll have a whole bunch of byte strings and rather than
|
||||
transforming them to :class:`unicode` and back to byte :class:`str` for
|
||||
output to xml, you will just want to make sure they work with the xml file
|
||||
you're constructing. This function will help you do that. Example::
|
||||
|
||||
ARRAY_OF_MOSTLY_UTF8_STRINGS = [...]
|
||||
processed_array = []
|
||||
for string in ARRAY_OF_MOSTLY_UTF8_STRINGS:
|
||||
if byte_string_valid_xml(string, 'utf-8'):
|
||||
processed_array.append(string)
|
||||
else:
|
||||
processed_array.append(guess_bytes_to_xml(string, encoding='utf-8'))
|
||||
output_xml(processed_array)
|
||||
'''
|
||||
if not isinstance(byte_string, str):
|
||||
# Not a byte string
|
||||
return False
|
||||
|
||||
try:
|
||||
u_string = unicode(byte_string, encoding)
|
||||
except UnicodeError:
|
||||
# Not encoded with the xml file's encoding
|
||||
return False
|
||||
|
||||
data = frozenset(u_string)
|
||||
if data.intersection(_CONTROL_CHARS):
|
||||
# Contains control codes
|
||||
return False
|
||||
|
||||
# The byte string is compatible with this xml file
|
||||
return True
|
||||
|
||||
def byte_string_valid_encoding(byte_string, encoding='utf-8'):
|
||||
'''Detect if a byte :class:`str` is valid in a specific encoding
|
||||
|
||||
:arg byte_string: Byte :class:`str` to test for bytes not valid in this
|
||||
encoding
|
||||
:kwarg encoding: encoding to test against. Defaults to :term:`UTF-8`.
|
||||
:returns: :data:`True` if there are no invalid :term:`UTF-8` characters.
|
||||
:data:`False` if an invalid character is detected.
|
||||
|
||||
.. note::
|
||||
|
||||
This function checks whether the byte :class:`str` is valid in the
|
||||
specified encoding. It **does not** detect whether the byte
|
||||
:class:`str` actually was encoded in that encoding. If you want that
|
||||
sort of functionality, you probably want to use
|
||||
:func:`~kitchen.text.misc.guess_encoding` instead.
|
||||
'''
|
||||
try:
|
||||
unicode(byte_string, encoding)
|
||||
except UnicodeError:
|
||||
# Not encoded with the xml file's encoding
|
||||
return False
|
||||
|
||||
# byte string is valid in this encoding
|
||||
return True
|
||||
|
||||
__all__ = ('byte_string_valid_encoding', 'byte_string_valid_xml',
|
||||
'guess_encoding', 'html_entities_unescape', 'process_control_chars',
|
||||
'str_eq')
|
170
kitchen/text/utf8.py
Normal file
170
kitchen/text/utf8.py
Normal file
|
@ -0,0 +1,170 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2011 Red Hat, Inc.
|
||||
# Copyright (c) 2010 Ville Skyttä
|
||||
# Copyright (c) 2009 Tim Lauridsen
|
||||
# Copyright (c) 2007 Marcus Kuhn
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU Lesser General Public License as published by the Free
|
||||
# Software Foundation; either version 2.1 of the License, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||
# more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# James Antill <james@fedoraproject.org>
|
||||
# Marcus Kuhn
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
# Tim Lauridsen
|
||||
# Ville Skyttä
|
||||
#
|
||||
# Portions of this are from yum/i18n.py
|
||||
'''
|
||||
-----
|
||||
UTF-8
|
||||
-----
|
||||
|
||||
Functions for operating on byte :class:`str` encoded as :term:`UTF-8`
|
||||
|
||||
.. note::
|
||||
|
||||
In many cases, it is better to convert to :class:`unicode`, operate on the
|
||||
strings, then convert back to :term:`UTF-8`. :class:`unicode` type can
|
||||
handle many of these functions itself. For those that it doesn't
|
||||
(removing control characters from length calculations, for instance) the
|
||||
code to do so with a :class:`unicode` type is often simpler.
|
||||
|
||||
.. warning::
|
||||
|
||||
All of the functions in this module are deprecated. Most of them have
|
||||
been replaced with functions that operate on unicode values in
|
||||
:mod:`kitchen.text.display`. :func:`kitchen.text.utf8.utf8_valid` has
|
||||
been replaced with a function in :mod:`kitchen.text.misc`.
|
||||
'''
|
||||
import warnings
|
||||
|
||||
from kitchen import b_
|
||||
from kitchen.text.converters import to_unicode, to_bytes
|
||||
from kitchen.text.misc import byte_string_valid_encoding
|
||||
from kitchen.text.display import _textual_width_le, \
|
||||
byte_string_textual_width_fill, fill, textual_width, \
|
||||
textual_width_chop, wrap
|
||||
|
||||
#
|
||||
# Deprecated functions
|
||||
#
|
||||
|
||||
def utf8_valid(msg):
|
||||
'''**Deprecated** Detect if a string is valid :term:`utf-8`
|
||||
|
||||
Use :func:`kitchen.text.misc.byte_string_valid_encoding` instead.
|
||||
'''
|
||||
warnings.warn(b_('kitchen.text.utf8.utf8_valid is deprecated. Use'
|
||||
' kitchen.text.misc.byte_string_valid_encoding(msg) instead'),
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return byte_string_valid_encoding(msg)
|
||||
|
||||
def utf8_width(msg):
|
||||
'''**Deprecated** Get the :term:`textual width` of a :term:`utf-8` string
|
||||
|
||||
Use :func:`kitchen.text.display.textual_width` instead.
|
||||
'''
|
||||
warnings.warn(b_('kitchen.text.utf8.utf8_width is deprecated. Use'
|
||||
' kitchen.text.display.textual_width(msg) instead'),
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return textual_width(msg)
|
||||
|
||||
|
||||
def utf8_width_chop(msg, chop=None):
|
||||
'''**Deprecated** Return a string chopped to a given :term:`textual width`
|
||||
|
||||
Use :func:`~kitchen.text.display.textual_width_chop` and
|
||||
:func:`~kitchen.text.display.textual_width` instead::
|
||||
|
||||
>>> msg = 'く ku ら ra と to み mi'
|
||||
>>> # Old way:
|
||||
>>> utf8_width_chop(msg, 5)
|
||||
(5, 'く ku')
|
||||
>>> # New way
|
||||
>>> from kitchen.text.converters import to_bytes
|
||||
>>> from kitchen.text.display import textual_width, textual_width_chop
|
||||
>>> (textual_width(msg), to_bytes(textual_width_chop(msg, 5)))
|
||||
(5, 'く ku')
|
||||
'''
|
||||
warnings.warn(b_('kitchen.text.utf8.utf8_width_chop is deprecated. Use'
|
||||
' kitchen.text.display.textual_width_chop instead'), DeprecationWarning,
|
||||
stacklevel=2)
|
||||
|
||||
if chop == None:
|
||||
return textual_width(msg), msg
|
||||
|
||||
as_bytes = not isinstance(msg, unicode)
|
||||
|
||||
chopped_msg = textual_width_chop(msg, chop)
|
||||
if as_bytes:
|
||||
chopped_msg = to_bytes(chopped_msg)
|
||||
return textual_width(chopped_msg), chopped_msg
|
||||
|
||||
def utf8_width_fill(msg, fill, chop=None, left=True, prefix='', suffix=''):
|
||||
'''**Deprecated** Pad a :term:`utf-8` string to fill a specified width
|
||||
|
||||
Use :func:`~kitchen.text.display.byte_string_textual_width_fill` instead
|
||||
'''
|
||||
warnings.warn(b_('kitchen.text.utf8.utf8_width_fill is deprecated. Use'
|
||||
' kitchen.text.display.byte_string_textual_width_fill instead'),
|
||||
DeprecationWarning, stacklevel=2)
|
||||
|
||||
return byte_string_textual_width_fill(msg, fill, chop=chop, left=left,
|
||||
prefix=prefix, suffix=suffix)
|
||||
|
||||
def utf8_text_wrap(text, width=70, initial_indent='', subsequent_indent=''):
|
||||
'''**Deprecated** Similar to :func:`textwrap.wrap` but understands
|
||||
:term:`utf-8` data and doesn't screw up lists/blocks/etc
|
||||
|
||||
Use :func:`kitchen.text.display.wrap` instead
|
||||
'''
|
||||
warnings.warn(b_('kitchen.text.utf8.utf8_text_wrap is deprecated. Use'
|
||||
' kitchen.text.display.wrap instead'),
|
||||
DeprecationWarning, stacklevel=2)
|
||||
|
||||
as_bytes = not isinstance(text, unicode)
|
||||
|
||||
text = to_unicode(text)
|
||||
lines = wrap(text, width=width, initial_indent=initial_indent,
|
||||
subsequent_indent=subsequent_indent)
|
||||
if as_bytes:
|
||||
lines = [to_bytes(m) for m in lines]
|
||||
|
||||
return lines
|
||||
|
||||
def utf8_text_fill(text, *args, **kwargs):
|
||||
'''**Deprecated** Similar to :func:`textwrap.fill` but understands
|
||||
:term:`utf-8` strings and doesn't screw up lists/blocks/etc.
|
||||
|
||||
Use :func:`kitchen.text.display.fill` instead.
|
||||
'''
|
||||
warnings.warn(b_('kitchen.text.utf8.utf8_text_fill is deprecated. Use'
|
||||
' kitchen.text.display.fill instead'),
|
||||
DeprecationWarning, stacklevel=2)
|
||||
# This assumes that all args. are utf8.
|
||||
return fill(text, *args, **kwargs)
|
||||
|
||||
def _utf8_width_le(width, *args):
|
||||
'''**Deprecated** Convert the arguments to unicode and use
|
||||
:func:`kitchen.text.display._textual_width_le` instead.
|
||||
'''
|
||||
warnings.warn(b_('kitchen.text.utf8._utf8_width_le is deprecated. Use'
|
||||
' kitchen.text.display._textual_width_le instead'),
|
||||
DeprecationWarning, stacklevel=2)
|
||||
# This assumes that all args. are utf8.
|
||||
return _textual_width_le(width, to_unicode(''.join(args)))
|
||||
|
||||
__all__ = ('utf8_text_fill', 'utf8_text_wrap', 'utf8_valid', 'utf8_width',
|
||||
'utf8_width_chop', 'utf8_width_fill')
|
107
kitchen/versioning/__init__.py
Normal file
107
kitchen/versioning/__init__.py
Normal file
|
@ -0,0 +1,107 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2011 Red Hat, Inc
|
||||
#
|
||||
# kitchen is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# kitchen is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
|
||||
#
|
||||
# Authors:
|
||||
# Toshio Kuratomi <toshio@fedoraproject.org>
|
||||
'''
|
||||
----------------------------
|
||||
PEP-386 compliant versioning
|
||||
----------------------------
|
||||
|
||||
:pep:`386` defines a standard format for version strings. This module
|
||||
contains a function for creating strings in that format.
|
||||
'''
|
||||
__version_info__ = ((1, 0, 0),)
|
||||
|
||||
import itertools
|
||||
|
||||
def version_tuple_to_string(version_info):
|
||||
'''Return a :pep:`386` version string from a :pep:`386` style version tuple
|
||||
|
||||
:arg version_info: Nested set of tuples that describes the version. See
|
||||
below for an example.
|
||||
:returns: a version string
|
||||
|
||||
This function implements just enough of :pep:`386` to satisfy our needs.
|
||||
:pep:`386` defines a standard format for version strings and refers to
|
||||
a function that will be merged into the |stdlib|_ that transforms a tuple
|
||||
of version information into a standard version string. This function is
|
||||
an implementation of that function. Once that function becomes available
|
||||
in the |stdlib|_ we will start using it and deprecate this function.
|
||||
|
||||
:attr:`version_info` takes the form that :pep:`386`'s
|
||||
:func:`NormalizedVersion.from_parts` uses::
|
||||
|
||||
((Major, Minor, [Micros]), [(Alpha/Beta/rc marker, version)],
|
||||
[(post/dev marker, version)])
|
||||
|
||||
Ex: ((1, 0, 0), ('a', 2), ('dev', 3456))
|
||||
|
||||
It generates a :pep:`386` compliant version string::
|
||||
|
||||
N.N[.N]+[{a|b|c|rc}N[.N]+][.postN][.devN]
|
||||
|
||||
Ex: 1.0.0a2.dev3456
|
||||
|
||||
.. warning:: This function does next to no error checking. It's up to the
|
||||
person defining the version tuple to make sure that the values make
|
||||
sense. If the :pep:`386` compliant version parser doesn't get
|
||||
released soon we'll look at making this function check that the
|
||||
version tuple makes sense before transforming it into a string.
|
||||
|
||||
It's recommended that you use this function to keep
|
||||
a :data:`__version_info__` tuple and :data:`__version__` string in your
|
||||
modules. Why do we need both a tuple and a string? The string is often
|
||||
useful for putting into human readable locations like release
|
||||
announcements, version strings in tarballs, etc. Meanwhile the tuple is
|
||||
very easy for a computer to compare. For example, kitchen sets up its
|
||||
version information like this::
|
||||
|
||||
from kitchen.versioning import version_tuple_to_string
|
||||
__version_info__ = ((0, 2, 1),)
|
||||
__version__ = version_tuple_to_string(__version_info__)
|
||||
|
||||
Other programs that depend on a kitchen version between 0.2.1 and 0.3.0
|
||||
can find whether the present version is okay with code like this::
|
||||
|
||||
from kitchen import __version_info__, __version__
|
||||
if __version_info__ < ((0, 2, 1),) or __version_info__ >= ((0, 3, 0),):
|
||||
print 'kitchen is present but not at the right version.'
|
||||
print 'We need at least version 0.2.1 and less than 0.3.0'
|
||||
print 'Currently found: kitchen-%s' % __version__
|
||||
'''
|
||||
ver_components = []
|
||||
for values in version_info:
|
||||
if isinstance(values[0], int):
|
||||
ver_components.append('.'.join(itertools.imap(str, values)))
|
||||
else:
|
||||
if isinstance(values[0], unicode):
|
||||
modifier = values[0].encode('ascii')
|
||||
else:
|
||||
modifier = values[0]
|
||||
if modifier in ('a', 'b', 'c', 'rc'):
|
||||
ver_components.append('%s%s' % (modifier,
|
||||
'.'.join(itertools.imap(str, values[1:])) or '0'))
|
||||
else:
|
||||
ver_components.append('.%s%s' % (modifier,
|
||||
str(values[1])))
|
||||
return unicode(''.join(ver_components), 'ascii')
|
||||
|
||||
|
||||
__version__ = version_tuple_to_string(__version_info__)
|
||||
|
||||
__all__ = ('version_tuple_to_string',)
|
BIN
locale/de/LC_MESSAGES/kitchen.mo
Normal file
BIN
locale/de/LC_MESSAGES/kitchen.mo
Normal file
Binary file not shown.
BIN
locale/en_US/LC_MESSAGES/kitchen.mo
Normal file
BIN
locale/en_US/LC_MESSAGES/kitchen.mo
Normal file
Binary file not shown.
184
po/de.po
Normal file
184
po/de.po
Normal file
|
@ -0,0 +1,184 @@
|
|||
# Translations template for PROJECT.
|
||||
# Copyright (C) 2012 ORGANIZATION
|
||||
# This file is distributed under the same license as the PROJECT project.
|
||||
#
|
||||
# Translators:
|
||||
# Christoph Scheid <c@shri.de>, 2012.
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: Kitchen: Miscellaneous, useful python code\n"
|
||||
"Report-Msgid-Bugs-To: https://fedorahosted.org/kitchen/\n"
|
||||
"POT-Creation-Date: 2012-01-03 18:23-0800\n"
|
||||
"PO-Revision-Date: 2012-01-13 20:39+0000\n"
|
||||
"Last-Translator: Christoph Scheid <c@shri.de>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 0.9.6\n"
|
||||
"Language: de\n"
|
||||
"Plural-Forms: nplurals=2; plural=(n != 1)\n"
|
||||
|
||||
#: kitchen/release.py:9
|
||||
msgid "Kitchen contains a cornucopia of useful code"
|
||||
msgstr "Kitchen ist ein Füllhorn voller nützlichem Code."
|
||||
|
||||
#: kitchen/release.py:10
|
||||
msgid ""
|
||||
"\n"
|
||||
"We've all done it. In the process of writing a brand new application we've\n"
|
||||
"discovered that we need a little bit of code that we've invented before.\n"
|
||||
"Perhaps it's something to handle unicode text. Perhaps it's something to make\n"
|
||||
"a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being\n"
|
||||
"a tiny bit of code that seems too small to worry about pushing into its own\n"
|
||||
"module so it sits there, a part of your current project, waiting to be cut and\n"
|
||||
"pasted into your next project. And the next. And the next. And since that\n"
|
||||
"little bittybit of code proved so useful to you, it's highly likely that it\n"
|
||||
"proved useful to someone else as well. Useful enough that they've written it\n"
|
||||
"and copy and pasted it over and over into each of their new projects.\n"
|
||||
"\n"
|
||||
"Well, no longer! Kitchen aims to pull these small snippets of code into a few\n"
|
||||
"python modules which you can import and use within your project. No more copy\n"
|
||||
"and paste! Now you can let someone else maintain and release these small\n"
|
||||
"snippets so that you can get on with your life.\n"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/pycompat25/collections/_defaultdict.py:93
|
||||
msgid "First argument must be callable"
|
||||
msgstr "Das erste Argument muss ausführbar (callable) sein."
|
||||
|
||||
#: kitchen/text/converters.py:140
|
||||
msgid ""
|
||||
"non_string is a deprecated parameter of to_unicode(). Use nonstring instead"
|
||||
msgstr "non_string ist ein veralteter Parameter von to_unicode(). Stattdessen nonstring verwenden."
|
||||
|
||||
#: kitchen/text/converters.py:174
|
||||
#, python-format
|
||||
msgid ""
|
||||
"to_unicode was given \"%(obj)s\" which is neither a byte string (str) or a "
|
||||
"unicode string"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:178 kitchen/text/converters.py:297
|
||||
#, python-format
|
||||
msgid "nonstring value, %(param)s, is not set to a valid action"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:255
|
||||
msgid ""
|
||||
"non_string is a deprecated parameter of to_bytes(). Use nonstring instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:294
|
||||
#, python-format
|
||||
msgid ""
|
||||
"to_bytes was given \"%(obj)s\" which is neither a unicode string or a byte "
|
||||
"string (str)"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:378
|
||||
msgid ""
|
||||
"kitchen.text.converters.to_utf8 is deprecated. Use "
|
||||
"kitchen.text.converters.to_bytes(obj, encoding=\"utf-8\", "
|
||||
"nonstring=\"passthru\" instead."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:403
|
||||
msgid ""
|
||||
"to_str is deprecated. Use to_unicode or to_bytes instead. See the to_str "
|
||||
"docstring for porting information."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:685
|
||||
msgid ""
|
||||
"unicode_to_xml must have a unicode type as the first argument. Use "
|
||||
"bytes_string_to_xml for byte strings."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:689
|
||||
msgid ""
|
||||
"The control_chars argument to unicode_to_xml must be one of ignore, replace,"
|
||||
" or strict"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:786
|
||||
msgid ""
|
||||
"byte_string_to_xml can only take a byte string as its first argument. Use "
|
||||
"unicode_to_xml for unicode strings"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:910
|
||||
msgid ""
|
||||
"kitchen.text.converters.to_xml is deprecated. Use "
|
||||
"kitchen.text.converters.guess_encoding_to_xml instead."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/display.py:344
|
||||
msgid ""
|
||||
"_ucp_width does not understand how to assign a width value to control "
|
||||
"characters."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/misc.py:83
|
||||
msgid "byte_string must be a byte string (str)"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/misc.py:171
|
||||
msgid "process_control_char must have a unicode type as the first argument."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/misc.py:182
|
||||
msgid "ASCII control code present in string input"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/misc.py:185
|
||||
msgid ""
|
||||
"The strategy argument to process_control_chars must be one of ignore, "
|
||||
"replace, or strict"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/misc.py:241
|
||||
msgid "html_entities_unescape must have a unicode type for its first argument"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:69
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_valid is deprecated. Use "
|
||||
"kitchen.text.misc.byte_string_valid_encoding(msg) instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:79
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_width is deprecated. Use "
|
||||
"kitchen.text.display.textual_width(msg) instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:101
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_width_chop is deprecated. Use "
|
||||
"kitchen.text.display.textual_width_chop instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:120
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_width_fill is deprecated. Use "
|
||||
"kitchen.text.display.byte_string_textual_width_fill instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:133
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_text_wrap is deprecated. Use "
|
||||
"kitchen.text.display.wrap instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:153
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_text_fill is deprecated. Use "
|
||||
"kitchen.text.display.fill instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:163
|
||||
msgid ""
|
||||
"kitchen.text.utf8._utf8_width_le is deprecated. Use "
|
||||
"kitchen.text.display._textual_width_le instead"
|
||||
msgstr ""
|
239
po/en_US.po
Normal file
239
po/en_US.po
Normal file
|
@ -0,0 +1,239 @@
|
|||
# Translations template for PROJECT.
|
||||
# Copyright (C) 2012 ORGANIZATION
|
||||
# This file is distributed under the same license as the PROJECT project.
|
||||
#
|
||||
# Translators:
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: Kitchen: Miscellaneous, useful python code\n"
|
||||
"Report-Msgid-Bugs-To: https://fedorahosted.org/kitchen/\n"
|
||||
"POT-Creation-Date: 2012-01-03 18:23-0800\n"
|
||||
"PO-Revision-Date: 2012-01-03 07:48+0000\n"
|
||||
"Last-Translator: Toshio Kuratomi <a.badger@gmail.com>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 0.9.6\n"
|
||||
"Language: en_US\n"
|
||||
"Plural-Forms: nplurals=2; plural=(n != 1)\n"
|
||||
|
||||
#: kitchen/release.py:9
|
||||
msgid "Kitchen contains a cornucopia of useful code"
|
||||
msgstr "Kitchen contains a cornucopia of useful code"
|
||||
|
||||
#: kitchen/release.py:10
|
||||
msgid ""
|
||||
"\n"
|
||||
"We've all done it. In the process of writing a brand new application we've\n"
|
||||
"discovered that we need a little bit of code that we've invented before.\n"
|
||||
"Perhaps it's something to handle unicode text. Perhaps it's something to make\n"
|
||||
"a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being\n"
|
||||
"a tiny bit of code that seems too small to worry about pushing into its own\n"
|
||||
"module so it sits there, a part of your current project, waiting to be cut and\n"
|
||||
"pasted into your next project. And the next. And the next. And since that\n"
|
||||
"little bittybit of code proved so useful to you, it's highly likely that it\n"
|
||||
"proved useful to someone else as well. Useful enough that they've written it\n"
|
||||
"and copy and pasted it over and over into each of their new projects.\n"
|
||||
"\n"
|
||||
"Well, no longer! Kitchen aims to pull these small snippets of code into a few\n"
|
||||
"python modules which you can import and use within your project. No more copy\n"
|
||||
"and paste! Now you can let someone else maintain and release these small\n"
|
||||
"snippets so that you can get on with your life.\n"
|
||||
msgstr ""
|
||||
"\n"
|
||||
"We've all done it. In the process of writing a brand new application we've\n"
|
||||
"discovered that we need a little bit of code that we've invented before.\n"
|
||||
"Perhaps it's something to handle unicode text. Perhaps it's something to make\n"
|
||||
"a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up being\n"
|
||||
"a tiny bit of code that seems too small to worry about pushing into its own\n"
|
||||
"module so it sits there, a part of your current project, waiting to be cut and\n"
|
||||
"pasted into your next project. And the next. And the next. And since that\n"
|
||||
"little bittybit of code proved so useful to you, it's highly likely that it\n"
|
||||
"proved useful to someone else as well. Useful enough that they've written it\n"
|
||||
"and copy and pasted it over and over into each of their new projects.\n"
|
||||
"\n"
|
||||
"Well, no longer! Kitchen aims to pull these small snippets of code into a few\n"
|
||||
"python modules which you can import and use within your project. No more copy\n"
|
||||
"and paste! Now you can let someone else maintain and release these small\n"
|
||||
"snippets so that you can get on with your life.\n"
|
||||
|
||||
#: kitchen/pycompat25/collections/_defaultdict.py:93
|
||||
msgid "First argument must be callable"
|
||||
msgstr "First argument must be callable"
|
||||
|
||||
#: kitchen/text/converters.py:140
|
||||
msgid ""
|
||||
"non_string is a deprecated parameter of to_unicode(). Use nonstring instead"
|
||||
msgstr ""
|
||||
"non_string is a deprecated parameter of to_unicode(). Use nonstring instead"
|
||||
|
||||
#: kitchen/text/converters.py:174
|
||||
#, python-format
|
||||
msgid ""
|
||||
"to_unicode was given \"%(obj)s\" which is neither a byte string (str) or a "
|
||||
"unicode string"
|
||||
msgstr ""
|
||||
"to_unicode was given \"%(obj)s\" which is neither a byte string (str) or a "
|
||||
"unicode string"
|
||||
|
||||
#: kitchen/text/converters.py:178 kitchen/text/converters.py:297
|
||||
#, python-format
|
||||
msgid "nonstring value, %(param)s, is not set to a valid action"
|
||||
msgstr "nonstring value, %(param)s, is not set to a valid action"
|
||||
|
||||
#: kitchen/text/converters.py:255
|
||||
msgid ""
|
||||
"non_string is a deprecated parameter of to_bytes(). Use nonstring instead"
|
||||
msgstr ""
|
||||
"non_string is a deprecated parameter of to_bytes(). Use nonstring instead"
|
||||
|
||||
#: kitchen/text/converters.py:294
|
||||
#, python-format
|
||||
msgid ""
|
||||
"to_bytes was given \"%(obj)s\" which is neither a unicode string or a byte "
|
||||
"string (str)"
|
||||
msgstr ""
|
||||
"to_bytes was given \"%(obj)s\" which is neither a unicode string or a byte "
|
||||
"string (str)"
|
||||
|
||||
#: kitchen/text/converters.py:378
|
||||
msgid ""
|
||||
"kitchen.text.converters.to_utf8 is deprecated. Use "
|
||||
"kitchen.text.converters.to_bytes(obj, encoding=\"utf-8\", "
|
||||
"nonstring=\"passthru\" instead."
|
||||
msgstr ""
|
||||
"kitchen.text.converters.to_utf8 is deprecated. Use "
|
||||
"kitchen.text.converters.to_bytes(obj, encoding=\"utf-8\", "
|
||||
"nonstring=\"passthru\" instead."
|
||||
|
||||
#: kitchen/text/converters.py:403
|
||||
msgid ""
|
||||
"to_str is deprecated. Use to_unicode or to_bytes instead. See the to_str "
|
||||
"docstring for porting information."
|
||||
msgstr ""
|
||||
"to_str is deprecated. Use to_unicode or to_bytes instead. See the to_str "
|
||||
"docstring for porting information."
|
||||
|
||||
#: kitchen/text/converters.py:685
|
||||
msgid ""
|
||||
"unicode_to_xml must have a unicode type as the first argument. Use "
|
||||
"bytes_string_to_xml for byte strings."
|
||||
msgstr ""
|
||||
"unicode_to_xml must have a unicode type as the first argument. Use "
|
||||
"bytes_string_to_xml for byte strings."
|
||||
|
||||
#: kitchen/text/converters.py:689
|
||||
msgid ""
|
||||
"The control_chars argument to unicode_to_xml must be one of ignore, replace,"
|
||||
" or strict"
|
||||
msgstr ""
|
||||
"The control_chars argument to unicode_to_xml must be one of ignore, replace,"
|
||||
" or strict"
|
||||
|
||||
#: kitchen/text/converters.py:786
|
||||
msgid ""
|
||||
"byte_string_to_xml can only take a byte string as its first argument. Use "
|
||||
"unicode_to_xml for unicode strings"
|
||||
msgstr ""
|
||||
"byte_string_to_xml can only take a byte string as its first argument. Use "
|
||||
"unicode_to_xml for unicode strings"
|
||||
|
||||
#: kitchen/text/converters.py:910
|
||||
msgid ""
|
||||
"kitchen.text.converters.to_xml is deprecated. Use "
|
||||
"kitchen.text.converters.guess_encoding_to_xml instead."
|
||||
msgstr ""
|
||||
"kitchen.text.converters.to_xml is deprecated. Use "
|
||||
"kitchen.text.converters.guess_encoding_to_xml instead."
|
||||
|
||||
#: kitchen/text/display.py:344
|
||||
msgid ""
|
||||
"_ucp_width does not understand how to assign a width value to control "
|
||||
"characters."
|
||||
msgstr ""
|
||||
"_ucp_width does not understand how to assign a width value to control "
|
||||
"characters."
|
||||
|
||||
#: kitchen/text/misc.py:83
|
||||
msgid "byte_string must be a byte string (str)"
|
||||
msgstr "byte_string must be a byte string (str)"
|
||||
|
||||
#: kitchen/text/misc.py:171
|
||||
msgid "process_control_char must have a unicode type as the first argument."
|
||||
msgstr "process_control_char must have a unicode type as the first argument."
|
||||
|
||||
#: kitchen/text/misc.py:182
|
||||
msgid "ASCII control code present in string input"
|
||||
msgstr "ASCII control code present in string input"
|
||||
|
||||
#: kitchen/text/misc.py:185
|
||||
msgid ""
|
||||
"The strategy argument to process_control_chars must be one of ignore, "
|
||||
"replace, or strict"
|
||||
msgstr ""
|
||||
"The strategy argument to process_control_chars must be one of ignore, "
|
||||
"replace, or strict"
|
||||
|
||||
#: kitchen/text/misc.py:241
|
||||
msgid "html_entities_unescape must have a unicode type for its first argument"
|
||||
msgstr ""
|
||||
"html_entities_unescape must have a unicode type for its first argument"
|
||||
|
||||
#: kitchen/text/utf8.py:69
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_valid is deprecated. Use "
|
||||
"kitchen.text.misc.byte_string_valid_encoding(msg) instead"
|
||||
msgstr ""
|
||||
"kitchen.text.utf8.utf8_valid is deprecated. Use "
|
||||
"kitchen.text.misc.byte_string_valid_encoding(msg) instead"
|
||||
|
||||
#: kitchen/text/utf8.py:79
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_width is deprecated. Use "
|
||||
"kitchen.text.display.textual_width(msg) instead"
|
||||
msgstr ""
|
||||
"kitchen.text.utf8.utf8_width is deprecated. Use "
|
||||
"kitchen.text.display.textual_width(msg) instead"
|
||||
|
||||
#: kitchen/text/utf8.py:101
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_width_chop is deprecated. Use "
|
||||
"kitchen.text.display.textual_width_chop instead"
|
||||
msgstr ""
|
||||
"kitchen.text.utf8.utf8_width_chop is deprecated. Use "
|
||||
"kitchen.text.display.textual_width_chop instead"
|
||||
|
||||
#: kitchen/text/utf8.py:120
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_width_fill is deprecated. Use "
|
||||
"kitchen.text.display.byte_string_textual_width_fill instead"
|
||||
msgstr ""
|
||||
"kitchen.text.utf8.utf8_width_fill is deprecated. Use "
|
||||
"kitchen.text.display.byte_string_textual_width_fill instead"
|
||||
|
||||
#: kitchen/text/utf8.py:133
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_text_wrap is deprecated. Use "
|
||||
"kitchen.text.display.wrap instead"
|
||||
msgstr ""
|
||||
"kitchen.text.utf8.utf8_text_wrap is deprecated. Use "
|
||||
"kitchen.text.display.wrap instead"
|
||||
|
||||
#: kitchen/text/utf8.py:153
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_text_fill is deprecated. Use "
|
||||
"kitchen.text.display.fill instead"
|
||||
msgstr ""
|
||||
"kitchen.text.utf8.utf8_text_fill is deprecated. Use "
|
||||
"kitchen.text.display.fill instead"
|
||||
|
||||
#: kitchen/text/utf8.py:163
|
||||
msgid ""
|
||||
"kitchen.text.utf8._utf8_width_le is deprecated. Use "
|
||||
"kitchen.text.display._textual_width_le instead"
|
||||
msgstr ""
|
||||
"kitchen.text.utf8._utf8_width_le is deprecated. Use "
|
||||
"kitchen.text.display._textual_width_le instead"
|
||||
|
||||
|
194
po/kitchen.pot
Normal file
194
po/kitchen.pot
Normal file
|
@ -0,0 +1,194 @@
|
|||
# Translations template for PROJECT.
|
||||
# Copyright (C) 2012 ORGANIZATION
|
||||
# This file is distributed under the same license as the PROJECT project.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2012.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: PROJECT VERSION\n"
|
||||
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||
"POT-Creation-Date: 2012-01-03 18:23-0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 0.9.6\n"
|
||||
|
||||
#: kitchen/release.py:9
|
||||
msgid "Kitchen contains a cornucopia of useful code"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/release.py:10
|
||||
msgid ""
|
||||
"\n"
|
||||
"We've all done it. In the process of writing a brand new application "
|
||||
"we've\n"
|
||||
"discovered that we need a little bit of code that we've invented before.\n"
|
||||
"Perhaps it's something to handle unicode text. Perhaps it's something to"
|
||||
" make\n"
|
||||
"a bit of python-2.5 code run on python-2.3. Whatever it is, it ends up "
|
||||
"being\n"
|
||||
"a tiny bit of code that seems too small to worry about pushing into its "
|
||||
"own\n"
|
||||
"module so it sits there, a part of your current project, waiting to be "
|
||||
"cut and\n"
|
||||
"pasted into your next project. And the next. And the next. And since "
|
||||
"that\n"
|
||||
"little bittybit of code proved so useful to you, it's highly likely that "
|
||||
"it\n"
|
||||
"proved useful to someone else as well. Useful enough that they've "
|
||||
"written it\n"
|
||||
"and copy and pasted it over and over into each of their new projects.\n"
|
||||
"\n"
|
||||
"Well, no longer! Kitchen aims to pull these small snippets of code into "
|
||||
"a few\n"
|
||||
"python modules which you can import and use within your project. No more"
|
||||
" copy\n"
|
||||
"and paste! Now you can let someone else maintain and release these small"
|
||||
"\n"
|
||||
"snippets so that you can get on with your life.\n"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/pycompat25/collections/_defaultdict.py:93
|
||||
msgid "First argument must be callable"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:140
|
||||
msgid ""
|
||||
"non_string is a deprecated parameter of to_unicode(). Use nonstring "
|
||||
"instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:174
|
||||
#, python-format
|
||||
msgid ""
|
||||
"to_unicode was given \"%(obj)s\" which is neither a byte string (str) or "
|
||||
"a unicode string"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:178 kitchen/text/converters.py:297
|
||||
#, python-format
|
||||
msgid "nonstring value, %(param)s, is not set to a valid action"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:255
|
||||
msgid "non_string is a deprecated parameter of to_bytes(). Use nonstring instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:294
|
||||
#, python-format
|
||||
msgid ""
|
||||
"to_bytes was given \"%(obj)s\" which is neither a unicode string or a "
|
||||
"byte string (str)"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:378
|
||||
msgid ""
|
||||
"kitchen.text.converters.to_utf8 is deprecated. Use "
|
||||
"kitchen.text.converters.to_bytes(obj, encoding=\"utf-8\", "
|
||||
"nonstring=\"passthru\" instead."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:403
|
||||
msgid ""
|
||||
"to_str is deprecated. Use to_unicode or to_bytes instead. See the "
|
||||
"to_str docstring for porting information."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:685
|
||||
msgid ""
|
||||
"unicode_to_xml must have a unicode type as the first argument. Use "
|
||||
"bytes_string_to_xml for byte strings."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:689
|
||||
msgid ""
|
||||
"The control_chars argument to unicode_to_xml must be one of ignore, "
|
||||
"replace, or strict"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:786
|
||||
msgid ""
|
||||
"byte_string_to_xml can only take a byte string as its first argument. "
|
||||
"Use unicode_to_xml for unicode strings"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/converters.py:910
|
||||
msgid ""
|
||||
"kitchen.text.converters.to_xml is deprecated. Use "
|
||||
"kitchen.text.converters.guess_encoding_to_xml instead."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/display.py:344
|
||||
msgid ""
|
||||
"_ucp_width does not understand how to assign a width value to control "
|
||||
"characters."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/misc.py:83
|
||||
msgid "byte_string must be a byte string (str)"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/misc.py:171
|
||||
msgid "process_control_char must have a unicode type as the first argument."
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/misc.py:182
|
||||
msgid "ASCII control code present in string input"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/misc.py:185
|
||||
msgid ""
|
||||
"The strategy argument to process_control_chars must be one of ignore, "
|
||||
"replace, or strict"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/misc.py:241
|
||||
msgid "html_entities_unescape must have a unicode type for its first argument"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:69
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_valid is deprecated. Use "
|
||||
"kitchen.text.misc.byte_string_valid_encoding(msg) instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:79
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_width is deprecated. Use "
|
||||
"kitchen.text.display.textual_width(msg) instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:101
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_width_chop is deprecated. Use "
|
||||
"kitchen.text.display.textual_width_chop instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:120
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_width_fill is deprecated. Use "
|
||||
"kitchen.text.display.byte_string_textual_width_fill instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:133
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_text_wrap is deprecated. Use "
|
||||
"kitchen.text.display.wrap instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:153
|
||||
msgid ""
|
||||
"kitchen.text.utf8.utf8_text_fill is deprecated. Use "
|
||||
"kitchen.text.display.fill instead"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen/text/utf8.py:163
|
||||
msgid ""
|
||||
"kitchen.text.utf8._utf8_width_le is deprecated. Use "
|
||||
"kitchen.text.display._textual_width_le instead"
|
||||
msgstr ""
|
||||
|
64
releaseutils.py
Executable file
64
releaseutils.py
Executable file
|
@ -0,0 +1,64 @@
|
|||
#!/usr/bin/python -tt
|
||||
|
||||
import ConfigParser
|
||||
import glob
|
||||
import os
|
||||
import shutil
|
||||
from kitchen.pycompat27 import subprocess
|
||||
|
||||
class MsgFmt(object):
|
||||
def run(self, args):
|
||||
cmd = subprocess.Popen(args, shell=False)
|
||||
cmd.wait()
|
||||
|
||||
def setup_message_compiler():
|
||||
# Look for msgfmt
|
||||
try:
|
||||
subprocess.Popen(['msgfmt', '-h'], stdout=subprocess.PIPE)
|
||||
except OSError:
|
||||
import babel.messages.frontend
|
||||
|
||||
return (babel.messages.frontend.CommandLineInterface(),
|
||||
'pybabel compile -D %(domain)s -d locale -i %(pofile)s -l %(lang)s'
|
||||
)
|
||||
else:
|
||||
return (MsgFmt(), 'msgfmt -c -o locale/%(lang)s/LC_MESSAGES/%(domain)s.mo %(pofile)s')
|
||||
|
||||
def main():
|
||||
# Get the directory with message catalogs
|
||||
# Reuse transifex's config file first as it will know this
|
||||
cfg = ConfigParser.SafeConfigParser()
|
||||
cfg.read('.tx/config')
|
||||
cmd, args = setup_message_compiler()
|
||||
|
||||
try:
|
||||
shutil.rmtree('locale')
|
||||
except OSError, e:
|
||||
# If the error is that locale does not exist, we're okay. We're
|
||||
# deleting it here, afterall
|
||||
if e.errno != 2:
|
||||
raise
|
||||
|
||||
for section in [s for s in cfg.sections() if s != 'main']:
|
||||
try:
|
||||
file_filter = cfg.get(section, 'file_filter')
|
||||
source_file = cfg.get(section, 'source_file')
|
||||
except ConfigParser.NoOptionError:
|
||||
continue
|
||||
glob_pattern = file_filter.replace('<lang>', '*')
|
||||
pot = os.path.basename(source_file)
|
||||
if pot.endswith('.pot'):
|
||||
pot = pot[:-4]
|
||||
arg_values = {'domain': pot}
|
||||
for po_file in glob.glob(glob_pattern):
|
||||
file_pattern = os.path.basename(po_file)
|
||||
lang = file_pattern.replace('.po','')
|
||||
os.makedirs(os.path.join('locale', lang, 'LC_MESSAGES'))
|
||||
arg_values['pofile'] = po_file
|
||||
arg_values['lang'] = lang
|
||||
compile_args = args % arg_values
|
||||
compile_args = compile_args.split(' ')
|
||||
cmd.run(compile_args)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
2
setup.cfg
Normal file
2
setup.cfg
Normal file
|
@ -0,0 +1,2 @@
|
|||
[upload_docs]
|
||||
upload_dir=build/sphinx/html
|
57
setup.py
Executable file
57
setup.py
Executable file
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/python -tt
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from distutils.command.sdist import sdist as _sdist
|
||||
import glob
|
||||
import os
|
||||
|
||||
from setuptools import find_packages, setup
|
||||
import kitchen.release
|
||||
|
||||
import releaseutils
|
||||
|
||||
# Override sdist command to compile the message catalogs as well
|
||||
class Sdist(_sdist, object):
|
||||
def run(self):
|
||||
releaseutils.main()
|
||||
data_files = []
|
||||
for langfile in filter(os.path.isfile, glob.glob('locale/*/*/*.mo')):
|
||||
data_files.append((os.path.dirname(langfile), [langfile]))
|
||||
if self.distribution.data_files and \
|
||||
hasattr(self.distribution.data_files, 'extend'):
|
||||
self.distribution.data_files.extend(data_files)
|
||||
else:
|
||||
self.distribution.data_files = data_files
|
||||
super(Sdist, self).run()
|
||||
|
||||
|
||||
setup(name='kitchen',
|
||||
version=str(kitchen.release.__version__),
|
||||
description=kitchen.release.DESCRIPTION,
|
||||
long_description=kitchen.release.LONG_DESCRIPTION,
|
||||
author=kitchen.release.AUTHOR,
|
||||
author_email=kitchen.release.EMAIL,
|
||||
maintainer='Toshio Kuratomi',
|
||||
maintainer_email='toshio@fedoraproject.org',
|
||||
license=kitchen.release.LICENSE,
|
||||
url=kitchen.release.URL,
|
||||
download_url=kitchen.release.DOWNLOAD_URL,
|
||||
cmdclass={'sdist': Sdist
|
||||
},
|
||||
keywords='Useful Small Code Snippets',
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)',
|
||||
'Operating System :: OS Independent',
|
||||
'Programming Language :: Python :: 2.3',
|
||||
'Programming Language :: Python :: 2.4',
|
||||
'Programming Language :: Python :: 2.5',
|
||||
'Programming Language :: Python :: 2.6',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Topic :: Software Development :: Internationalization',
|
||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||
'Topic :: Text Processing :: General',
|
||||
],
|
||||
packages=find_packages(),
|
||||
data_files=[],
|
||||
)
|
129
tests/base_classes.py
Normal file
129
tests/base_classes.py
Normal file
|
@ -0,0 +1,129 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Base class for testing unicode and utf8 functions. This holds data that's
|
||||
# useful for making tests
|
||||
|
||||
import re
|
||||
|
||||
from kitchen.text.converters import to_bytes
|
||||
from kitchen.text import misc
|
||||
|
||||
class UnicodeTestData(object):
|
||||
# This should encode fine -- sanity check
|
||||
u_ascii = u'the quick brown fox jumped over the lazy dog'
|
||||
b_ascii = 'the quick brown fox jumped over the lazy dog'
|
||||
|
||||
# First challenge -- what happens with latin-1 characters
|
||||
u_spanish = u'El veloz murciélago saltó sobre el perro perezoso.'
|
||||
# utf8 and latin1 both support these chars so no mangling
|
||||
utf8_spanish = u_spanish.encode('utf8')
|
||||
latin1_spanish = u_spanish.encode('latin1')
|
||||
|
||||
# ASCII does not have the accented characters so it mangles
|
||||
ascii_mangled_spanish_as_ascii = u_spanish.encode('ascii', 'replace')
|
||||
# Attempting to decode using the wrong charset will mangle
|
||||
# Note: as a general principle, we do not want to have code that mangles
|
||||
# input of one charset and output of the same charset. We want to avoid
|
||||
# things like::
|
||||
# input latin-1, transform to unicode with utf-8, output latin-1.
|
||||
u_mangled_spanish_utf8_as_latin1 = unicode(utf8_spanish, encoding='latin1', errors='replace')
|
||||
u_mangled_spanish_utf8_as_ascii = unicode(utf8_spanish, encoding='ascii', errors='replace')
|
||||
u_mangled_spanish_latin1_as_ascii = unicode(latin1_spanish, encoding='ascii', errors='replace')
|
||||
u_mangled_spanish_latin1_as_utf8 = unicode(latin1_spanish, encoding='utf-8', errors='replace')
|
||||
ascii_twice_mangled_spanish_latin1_as_utf8_as_ascii = u_mangled_spanish_latin1_as_utf8.encode('ascii', 'replace')
|
||||
utf8_mangled_spanish_latin1_as_utf8 = u_mangled_spanish_latin1_as_utf8.encode('utf-8')
|
||||
u_spanish_ignore = unicode(latin1_spanish, encoding='utf8', errors='ignore')
|
||||
|
||||
u_japanese = u"速い茶色のキツネが怠惰な犬に'増"
|
||||
utf8_japanese = u_japanese.encode('utf8')
|
||||
euc_jp_japanese = u_japanese.encode('euc_jp')
|
||||
u_mangled_euc_jp_as_latin1 = unicode(euc_jp_japanese, 'latin1')
|
||||
u_mangled_euc_jp_as_utf8 = unicode(euc_jp_japanese, 'utf-8', 'replace')
|
||||
utf8_mangled_euc_jp_as_latin1 = u_mangled_euc_jp_as_latin1.encode('utf8')
|
||||
u_mangled_japanese_utf8_as_latin1 = unicode(utf8_japanese, 'latin1')
|
||||
u_mangled_japanese_utf8_as_ascii = u"<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'<EFBFBD><EFBFBD><EFBFBD>"
|
||||
ascii_mangled_japanese_replace_as_latin1 = "??????????????'?"
|
||||
latin1_mangled_japanese_replace_as_latin1 = "??????????????'?"
|
||||
|
||||
u_mixed = u'く ku ら ra と to み mi'
|
||||
utf8_mixed = u_mixed.encode('utf8')
|
||||
utf8_ku = u_mixed[0].encode('utf8')
|
||||
utf8_ra = u_mixed[2].encode('utf8')
|
||||
utf8_to = u_mixed[4].encode('utf8')
|
||||
utf8_mi = u_mixed[6].encode('utf8')
|
||||
|
||||
u_mixed_replace = u'\ufffd ku \ufffd ra \ufffd to \ufffd mi'
|
||||
u_mixed_ignore = u' ku ra to mi'
|
||||
latin1_mixed_replace = '? ku ? ra ? to ? mi'
|
||||
latin1_mixed_ignore = ' ku ra to mi'
|
||||
|
||||
u_entity = u'Test: <"&"> – ' + u_japanese + u'é'
|
||||
utf8_entity = u_entity.encode('utf8')
|
||||
u_entity_escape = u'Test: <"&"> – ' + unicode(u_japanese.encode('ascii', 'xmlcharrefreplace'), 'ascii') + u'é'
|
||||
utf8_entity_escape = 'Test: <"&"> – 速い茶色のキツネが怠惰な犬に\'増é'
|
||||
utf8_attrib_escape = 'Test: <"&"> – 速い茶色のキツネが怠惰な犬に\'増é'
|
||||
ascii_entity_escape = (u'Test: <"&"> – ' + u_japanese + u'é').encode('ascii', 'xmlcharrefreplace').replace('&', '&',1).replace('<', '<').replace('>', '>')
|
||||
|
||||
b_byte_chars = ' '.join(map(chr, range(0, 256)))
|
||||
b_byte_encoded = 'ACABIAIgAyAEIAUgBiAHIAggCSAKIAsgDCANIA4gDyAQIBEgEiATIBQgFSAWIBcgGCAZIBogGyAcIB0gHiAfICAgISAiICMgJCAlICYgJyAoICkgKiArICwgLSAuIC8gMCAxIDIgMyA0IDUgNiA3IDggOSA6IDsgPCA9ID4gPyBAIEEgQiBDIEQgRSBGIEcgSCBJIEogSyBMIE0gTiBPIFAgUSBSIFMgVCBVIFYgVyBYIFkgWiBbIFwgXSBeIF8gYCBhIGIgYyBkIGUgZiBnIGggaSBqIGsgbCBtIG4gbyBwIHEgciBzIHQgdSB2IHcgeCB5IHogeyB8IH0gfiB/IIAggSCCIIMghCCFIIYghyCIIIkgiiCLIIwgjSCOII8gkCCRIJIgkyCUIJUgliCXIJggmSCaIJsgnCCdIJ4gnyCgIKEgoiCjIKQgpSCmIKcgqCCpIKogqyCsIK0griCvILAgsSCyILMgtCC1ILYgtyC4ILkguiC7ILwgvSC+IL8gwCDBIMIgwyDEIMUgxiDHIMggySDKIMsgzCDNIM4gzyDQINEg0iDTINQg1SDWINcg2CDZINog2yDcIN0g3iDfIOAg4SDiIOMg5CDlIOYg5yDoIOkg6iDrIOwg7SDuIO8g8CDxIPIg8yD0IPUg9iD3IPgg+SD6IPsg/CD9IP4g/w=='
|
||||
|
||||
repr_re = re.compile('^<[^ ]*\.([^.]+) object at .*>$')
|
||||
|
||||
u_paragraph = u'''ConfigObj is a simple but powerful config file reader and writer: an ini file
|
||||
round tripper. Its main feature is that it is very easy to use, with a
|
||||
straightforward programmer's interface and a simple syntax for config files.
|
||||
It has lots of other features though:
|
||||
|
||||
|
||||
|
||||
* Nested sections (subsections), to any level
|
||||
* List values
|
||||
* Multiple line values
|
||||
* String interpolation (substitution)
|
||||
* Integrated with a powerful validation system
|
||||
o including automatic type checking/conversion
|
||||
o repeated sections
|
||||
o and allowing default values
|
||||
* All comments in the file are preserved
|
||||
* The order of keys/sections is preserved
|
||||
* No external dependencies
|
||||
* Full Unicode support
|
||||
* A powerful unrepr mode for storing basic datatypes
|
||||
'''
|
||||
utf8_paragraph = u_paragraph.encode('utf-8', 'replace')
|
||||
u_paragraph_out = [u'ConfigObj is a simple but powerful config file reader and writer: an',
|
||||
u'ini file round tripper. Its main feature is that it is very easy to',
|
||||
u"use, with a straightforward programmer's interface and a simple syntax",
|
||||
u'for config files. It has lots of other features though:',
|
||||
u'',
|
||||
u'',
|
||||
u'',
|
||||
u' * Nested sections (subsections), to any level',
|
||||
u' * List values',
|
||||
u' * Multiple line values',
|
||||
u' * String interpolation (substitution)',
|
||||
u' * Integrated with a powerful validation system',
|
||||
u' o including automatic type checking/conversion',
|
||||
u' o repeated sections',
|
||||
u' o and allowing default values',
|
||||
u' * All comments in the file are preserved',
|
||||
u' * The order of keys/sections is preserved',
|
||||
u' * No external dependencies',
|
||||
u' * Full Unicode support',
|
||||
u' * A powerful unrepr mode for storing basic datatypes']
|
||||
|
||||
utf8_paragraph_out = [line.encode('utf-8', 'replace') for line in u_paragraph_out]
|
||||
|
||||
u_mixed_para = u'くらとみ kuratomi ' * 5
|
||||
utf8_mixed_para = u_mixed_para.encode('utf8')
|
||||
u_mixed_para_out = [u'くらとみ kuratomi くらとみ kuratomi くらとみ kuratomi くらとみ',
|
||||
u'kuratomi くらとみ kuratomi']
|
||||
u_mixed_para_57_initial_subsequent_out = [u' くらとみ kuratomi くらとみ kuratomi くらとみ kuratomi',
|
||||
u'----くらとみ kuratomi くらとみ kuratomi']
|
||||
utf8_mixed_para_out = map(to_bytes, u_mixed_para_out)
|
||||
utf8_mixed_para_57_initial_subsequent_out = map(to_bytes, u_mixed_para_57_initial_subsequent_out)
|
||||
|
||||
u_ascii_chars = u' '.join(map(unichr, range(0, 256)))
|
||||
u_ascii_no_ctrl = u''.join([c for c in u_ascii_chars if ord(c) not in misc._CONTROL_CODES])
|
||||
u_ascii_ctrl_replace = u_ascii_chars.translate(dict([(c, u'?') for c in misc._CONTROL_CODES]))
|
||||
utf8_ascii_chars = u_ascii_chars.encode('utf8')
|
46
tests/data/locale-old/pt_BR.po
Normal file
46
tests/data/locale-old/pt_BR.po
Normal file
|
@ -0,0 +1,46 @@
|
|||
# Portuguese (Brazil) translations for kitchen.
|
||||
# Copyright (C) 2010 ORGANIZATION
|
||||
# This file is distributed under the same license as the kitchen project.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2010.
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: kitchen 0.2.1a1\n"
|
||||
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||
"POT-Creation-Date: 2010-09-03 00:49+0400\n"
|
||||
"PO-Revision-Date: 2010-09-08 00:45-0400\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: pt_BR <LL@li.org>\n"
|
||||
"Plural-Forms: nplurals=2; plural=(n > 1)\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 0.9.5\n"
|
||||
|
||||
#: kitchen.py:1
|
||||
msgid "kitchen sink"
|
||||
msgstr "placeholder"
|
||||
|
||||
#: kitchen.py:2
|
||||
msgid "くらとみ"
|
||||
msgstr "placeholder"
|
||||
|
||||
#: kitchen.py:3
|
||||
msgid "Kuratomi"
|
||||
msgstr "placeholder"
|
||||
|
||||
#: kitchen.py:4
|
||||
msgid "1 lemon"
|
||||
msgid_plural "4 lemons"
|
||||
msgstr[0] "1 placeholder"
|
||||
msgstr[1] "4 placeholders"
|
||||
|
||||
#: kitchen.py:5
|
||||
msgid "一 limão"
|
||||
msgid_plural "四 limões"
|
||||
msgstr[0] "1 placeholder"
|
||||
msgstr[1] "4 placeholders"
|
||||
|
||||
#: kitchen.py:6
|
||||
msgid "Only café in fallback"
|
||||
msgstr "Yes, only café in fallback"
|
BIN
tests/data/locale-old/pt_BR/LC_MESSAGES/test.mo
Normal file
BIN
tests/data/locale-old/pt_BR/LC_MESSAGES/test.mo
Normal file
Binary file not shown.
46
tests/data/locale-old/test.pot
Normal file
46
tests/data/locale-old/test.pot
Normal file
|
@ -0,0 +1,46 @@
|
|||
# Translations template for kitchen.
|
||||
# Copyright (C) 2010 ORGANIZATION
|
||||
# This file is distributed under the same license as the kitchen project.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2010.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: kitchen 0.2.1a1\n"
|
||||
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||
"POT-Creation-Date: 2010-09-03 00:49-0400\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 0.9.5\n"
|
||||
|
||||
#: kitchen.py:1
|
||||
msgid "kitchen sink"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen.py:2
|
||||
msgid "くらとみ"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen.py:3
|
||||
msgid "Kuratomi"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen.py:4
|
||||
msgid "1 lemon"
|
||||
msgid_plural "4 lemons"
|
||||
msgstr[0] ""
|
||||
msgstr[1] ""
|
||||
|
||||
#: kitchen.py:5
|
||||
msgid "一 limão"
|
||||
msgid_plural "四 limões"
|
||||
msgstr[0] ""
|
||||
msgstr[1] ""
|
||||
|
||||
#: kitchen.py:6
|
||||
msgid "Only café in fallback"
|
||||
msgstr ""
|
46
tests/data/locale/pt_BR.po
Normal file
46
tests/data/locale/pt_BR.po
Normal file
|
@ -0,0 +1,46 @@
|
|||
# Portuguese (Brazil) translations for kitchen.
|
||||
# Copyright (C) 2010 ORGANIZATION
|
||||
# This file is distributed under the same license as the kitchen project.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2010.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: kitchen 0.2.1a1\n"
|
||||
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||
"POT-Creation-Date: 2010-09-03 00:49+0400\n"
|
||||
"PO-Revision-Date: 2010-09-08 00:45-0400\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: pt_BR <LL@li.org>\n"
|
||||
"Plural-Forms: nplurals=2; plural=(n > 1)\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 0.9.5\n"
|
||||
|
||||
#: kitchen.py:1
|
||||
msgid "kitchen sink"
|
||||
msgstr "pia da cozinha"
|
||||
|
||||
#: kitchen.py:2
|
||||
#, fuzzy
|
||||
msgid "くらとみ"
|
||||
msgstr "Kuratomi"
|
||||
|
||||
#: kitchen.py:3
|
||||
#, fuzzy
|
||||
msgid "Kuratomi"
|
||||
msgstr "くらとみ"
|
||||
|
||||
#: kitchen.py:4
|
||||
msgid "1 lemon"
|
||||
msgid_plural "4 lemons"
|
||||
msgstr[0] "一 limão"
|
||||
msgstr[1] "四 limões"
|
||||
|
||||
#: kitchen.py:5
|
||||
msgid "一 limão"
|
||||
msgid_plural "四 limões"
|
||||
msgstr[0] "1 lemon"
|
||||
msgstr[1] "4 lemons"
|
||||
|
BIN
tests/data/locale/pt_BR/LC_MESSAGES/test.mo
Normal file
BIN
tests/data/locale/pt_BR/LC_MESSAGES/test.mo
Normal file
Binary file not shown.
42
tests/data/locale/test.pot
Normal file
42
tests/data/locale/test.pot
Normal file
|
@ -0,0 +1,42 @@
|
|||
# Translations template for kitchen.
|
||||
# Copyright (C) 2010 ORGANIZATION
|
||||
# This file is distributed under the same license as the kitchen project.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2010.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: kitchen 0.2.1a1\n"
|
||||
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||
"POT-Creation-Date: 2010-09-03 00:49-0400\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 0.9.5\n"
|
||||
|
||||
#: kitchen.py:1
|
||||
msgid "kitchen sink"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen.py:2
|
||||
msgid "くらとみ"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen.py:3
|
||||
msgid "Kuratomi"
|
||||
msgstr ""
|
||||
|
||||
#: kitchen.py:4
|
||||
msgid "1 lemon"
|
||||
msgid_plural "4 lemons"
|
||||
msgstr[0] ""
|
||||
msgstr[1] ""
|
||||
|
||||
#: kitchen.py:5
|
||||
msgid "一 limão"
|
||||
msgid_plural "四 limões"
|
||||
msgstr[0] ""
|
||||
msgstr[1] ""
|
11
tests/subprocessdata/sigchild_ignore.py
Normal file
11
tests/subprocessdata/sigchild_ignore.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
import os
|
||||
import signal, sys
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||
|
||||
from kitchen.pycompat27.subprocess import _subprocess as subprocess
|
||||
|
||||
# On Linux this causes os.waitpid to fail with OSError as the OS has already
|
||||
# reaped our child process. The wait() passing the OSError on to the caller
|
||||
# and causing us to exit with an error is what we are testing against.
|
||||
signal.signal(signal.SIGCHLD, signal.SIG_IGN)
|
||||
subprocess.Popen([sys.executable, '-c', 'print("albatross")']).wait()
|
193
tests/test__all__.py
Normal file
193
tests/test__all__.py
Normal file
|
@ -0,0 +1,193 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from nose import tools
|
||||
|
||||
import os
|
||||
import types
|
||||
import warnings
|
||||
from kitchen.pycompat24.sets import add_builtin_set
|
||||
add_builtin_set()
|
||||
|
||||
def logit(msg):
|
||||
log = open('/var/tmp/test.log', 'a')
|
||||
log.write('%s\n' % msg)
|
||||
log.close()
|
||||
|
||||
class NoAll(RuntimeError):
|
||||
pass
|
||||
|
||||
class FailedImport(RuntimeError):
|
||||
pass
|
||||
|
||||
class Test__all__(object):
|
||||
'''Test that every function in __all__ exists and that no public methods
|
||||
are missing from __all__
|
||||
'''
|
||||
known_private = set([('kitchen', 'collections', 'version_tuple_to_string'),
|
||||
('kitchen.collections', 'strictdict', 'defaultdict'),
|
||||
('kitchen', 'i18n', 'version_tuple_to_string'),
|
||||
('kitchen', 'i18n', 'to_bytes'),
|
||||
('kitchen', 'i18n', 'to_unicode'),
|
||||
('kitchen', 'i18n', 'ENOENT'),
|
||||
('kitchen', 'i18n', 'byte_string_valid_encoding'),
|
||||
('kitchen', 'iterutils', 'version_tuple_to_string'),
|
||||
('kitchen', 'pycompat24', 'version_tuple_to_string'),
|
||||
('kitchen', 'pycompat25', 'version_tuple_to_string'),
|
||||
('kitchen.pycompat25.collections', '_defaultdict', 'b_'),
|
||||
('kitchen', 'pycompat27', 'version_tuple_to_string'),
|
||||
('kitchen.pycompat27', 'subprocess', 'MAXFD'),
|
||||
('kitchen.pycompat27', 'subprocess', 'list2cmdline'),
|
||||
('kitchen.pycompat27', 'subprocess', 'mswindows'),
|
||||
('kitchen', 'text', 'version_tuple_to_string'),
|
||||
('kitchen.text', 'converters', 'b_'),
|
||||
('kitchen.text', 'converters', 'b64decode'),
|
||||
('kitchen.text', 'converters', 'b64encode'),
|
||||
('kitchen.text', 'converters', 'ControlCharError'),
|
||||
('kitchen.text', 'converters', 'guess_encoding'),
|
||||
('kitchen.text', 'converters', 'html_entities_unescape'),
|
||||
('kitchen.text', 'converters', 'process_control_chars'),
|
||||
('kitchen.text', 'converters', 'XmlEncodeError'),
|
||||
('kitchen.text', 'misc', 'b_'),
|
||||
('kitchen.text', 'misc', 'chardet'),
|
||||
('kitchen.text', 'misc', 'ControlCharError'),
|
||||
('kitchen.text', 'display', 'b_'),
|
||||
('kitchen.text', 'display', 'ControlCharError'),
|
||||
('kitchen.text', 'display', 'to_bytes'),
|
||||
('kitchen.text', 'display', 'to_unicode'),
|
||||
('kitchen.text', 'utf8', 'b_'),
|
||||
('kitchen.text', 'utf8', 'byte_string_textual_width_fill'),
|
||||
('kitchen.text', 'utf8', 'byte_string_valid_encoding'),
|
||||
('kitchen.text', 'utf8', 'fill'),
|
||||
('kitchen.text', 'utf8', 'textual_width'),
|
||||
('kitchen.text', 'utf8', 'textual_width_chop'),
|
||||
('kitchen.text', 'utf8', 'to_bytes'),
|
||||
('kitchen.text', 'utf8', 'to_unicode'),
|
||||
('kitchen.text', 'utf8', 'wrap'),
|
||||
])
|
||||
lib_dir = os.path.join(os.path.dirname(__file__), '..', 'kitchen')
|
||||
|
||||
def setUp(self):
|
||||
# Silence deprecation warnings
|
||||
warnings.simplefilter('ignore', DeprecationWarning)
|
||||
def tearDown(self):
|
||||
warnings.simplefilter('default', DeprecationWarning)
|
||||
|
||||
def walk_modules(self, basedir, modpath):
|
||||
files = os.listdir(basedir)
|
||||
files.sort()
|
||||
for fn in files:
|
||||
path = os.path.join(basedir, fn)
|
||||
if os.path.isdir(path):
|
||||
pkg_init = os.path.join(path, '__init__.py')
|
||||
if os.path.exists(pkg_init):
|
||||
yield pkg_init, modpath + fn
|
||||
for p, m in self.walk_modules(path, modpath + fn + '.'):
|
||||
yield p, m
|
||||
continue
|
||||
if not fn.endswith('.py') or fn == '__init__.py':
|
||||
continue
|
||||
yield path, modpath + fn[:-3]
|
||||
|
||||
def check_has__all__(self, modpath):
|
||||
# This heuristic speeds up the process by removing, de facto,
|
||||
# most test modules (and avoiding the auto-executing ones).
|
||||
f = None
|
||||
try:
|
||||
try:
|
||||
f = open(modpath, 'rb')
|
||||
tools.ok_('__all__' in f.read(), '%s does not contain __all__' % modpath)
|
||||
except IOError, e:
|
||||
tools.ok_(False, '%s' % e)
|
||||
finally:
|
||||
if f:
|
||||
f.close()
|
||||
|
||||
def test_has__all__(self):
|
||||
'''
|
||||
For each module, check that it has an __all__
|
||||
'''
|
||||
# Blacklisted modules and packages
|
||||
blacklist = set([ ])
|
||||
|
||||
for path, modname in [m for m in self.walk_modules(self.lib_dir, '')
|
||||
if m[1] not in blacklist]:
|
||||
# Check that it has an __all__
|
||||
yield self.check_has__all__, path
|
||||
|
||||
def check_everything_in__all__exists(self, modname, modpath):
|
||||
names = {}
|
||||
exec 'from %s import %s' % (modpath, modname) in names
|
||||
if not hasattr(names[modname], '__all__'):
|
||||
# This should have been reported by test_has__all__
|
||||
return
|
||||
|
||||
interior_names = {}
|
||||
try:
|
||||
exec 'from %s.%s import *' % (modpath, modname) in interior_names
|
||||
except Exception, e:
|
||||
# Include the module name in the exception string
|
||||
tools.ok_(False, '__all__ failure in %s: %s: %s' % (
|
||||
modname, e.__class__.__name__, e))
|
||||
if '__builtins__' in interior_names:
|
||||
del interior_names['__builtins__']
|
||||
keys = set(interior_names)
|
||||
all = set(names[modname].__all__)
|
||||
tools.ok_(keys == all)
|
||||
|
||||
def test_everything_in__all__exists(self):
|
||||
'''
|
||||
For each name in module's __all__, check that it exists
|
||||
'''
|
||||
# Blacklisted modules and packages
|
||||
blacklist = set([ ])
|
||||
|
||||
for path, modname in [m for m in self.walk_modules(self.lib_dir, '')
|
||||
if m[1] not in blacklist]:
|
||||
# From path, deduce the module name
|
||||
from_name = path[path.find('../kitchen') + 3:]
|
||||
if from_name.endswith('__init__.py'):
|
||||
# Remove __init__.py as well as the filename
|
||||
from_name = os.path.dirname(from_name)
|
||||
from_name = os.path.dirname(from_name)
|
||||
from_name = unicode(from_name, 'utf-8')
|
||||
from_name = from_name.translate({ord(u'/'): u'.'})
|
||||
from_name = from_name.encode('utf-8')
|
||||
yield self.check_everything_in__all__exists, modname.split('.')[-1], from_name
|
||||
|
||||
|
||||
def check__all__is_complete(self, modname, modpath):
|
||||
names = {}
|
||||
exec 'from %s import %s' % (modpath, modname) in names
|
||||
if not hasattr(names[modname], '__all__'):
|
||||
# This should have been reported by test_has__all__
|
||||
return
|
||||
|
||||
mod = names[modname]
|
||||
expected_public = [k for k in mod.__dict__ if (modpath, modname, k)
|
||||
not in self.known_private and not k.startswith("_") and not
|
||||
isinstance(mod.__dict__[k], types.ModuleType)]
|
||||
|
||||
all = set(mod.__all__)
|
||||
public = set(expected_public)
|
||||
tools.ok_(all.issuperset(public), 'These public names are not in %s.__all__: %s'
|
||||
% (modname, ', '.join(public.difference(all))))
|
||||
|
||||
def test__all__is_complete(self):
|
||||
'''
|
||||
For each module, check that every public name is in __all__
|
||||
'''
|
||||
# Blacklisted modules and packages
|
||||
blacklist = set(['pycompat27.subprocess._subprocess',
|
||||
'pycompat24.base64._base64'])
|
||||
|
||||
for path, modname in [m for m in self.walk_modules(self.lib_dir, '')
|
||||
if m[1] not in blacklist]:
|
||||
# From path, deduce the module name
|
||||
from_name = path[path.find('../kitchen') + 3:]
|
||||
if from_name.endswith('__init__.py'):
|
||||
# Remove __init__.py as well as the filename
|
||||
from_name = os.path.dirname(from_name)
|
||||
from_name = os.path.dirname(from_name)
|
||||
from_name = unicode(from_name, 'utf-8')
|
||||
from_name = from_name.translate({ord(u'/'): u'.'})
|
||||
from_name = from_name.encode('utf-8')
|
||||
yield self.check__all__is_complete, modname.split('.')[-1], from_name
|
190
tests/test_base64.py
Normal file
190
tests/test_base64.py
Normal file
|
@ -0,0 +1,190 @@
|
|||
import unittest
|
||||
from test import test_support
|
||||
from kitchen.pycompat24.base64 import _base64 as base64
|
||||
|
||||
|
||||
|
||||
class LegacyBase64TestCase(unittest.TestCase):
|
||||
def test_encodestring(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.encodestring("www.python.org"), "d3d3LnB5dGhvbi5vcmc=\n")
|
||||
eq(base64.encodestring("a"), "YQ==\n")
|
||||
eq(base64.encodestring("ab"), "YWI=\n")
|
||||
eq(base64.encodestring("abc"), "YWJj\n")
|
||||
eq(base64.encodestring(""), "")
|
||||
eq(base64.encodestring("abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"0123456789!@#0^&*();:<>,. []{}"),
|
||||
"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
|
||||
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==\n")
|
||||
|
||||
def test_decodestring(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.decodestring("d3d3LnB5dGhvbi5vcmc=\n"), "www.python.org")
|
||||
eq(base64.decodestring("YQ==\n"), "a")
|
||||
eq(base64.decodestring("YWI=\n"), "ab")
|
||||
eq(base64.decodestring("YWJj\n"), "abc")
|
||||
eq(base64.decodestring("YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
|
||||
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==\n"),
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"0123456789!@#0^&*();:<>,. []{}")
|
||||
eq(base64.decodestring(''), '')
|
||||
|
||||
def test_encode(self):
|
||||
eq = self.assertEqual
|
||||
from cStringIO import StringIO
|
||||
infp = StringIO('abcdefghijklmnopqrstuvwxyz'
|
||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||
'0123456789!@#0^&*();:<>,. []{}')
|
||||
outfp = StringIO()
|
||||
base64.encode(infp, outfp)
|
||||
eq(outfp.getvalue(),
|
||||
'YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE'
|
||||
'RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT'
|
||||
'Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==\n')
|
||||
|
||||
def test_decode(self):
|
||||
from cStringIO import StringIO
|
||||
infp = StringIO('d3d3LnB5dGhvbi5vcmc=')
|
||||
outfp = StringIO()
|
||||
base64.decode(infp, outfp)
|
||||
self.assertEqual(outfp.getvalue(), 'www.python.org')
|
||||
|
||||
|
||||
|
||||
class BaseXYTestCase(unittest.TestCase):
|
||||
def test_b64encode(self):
|
||||
eq = self.assertEqual
|
||||
# Test default alphabet
|
||||
eq(base64.b64encode("www.python.org"), "d3d3LnB5dGhvbi5vcmc=")
|
||||
eq(base64.b64encode('\x00'), 'AA==')
|
||||
eq(base64.b64encode("a"), "YQ==")
|
||||
eq(base64.b64encode("ab"), "YWI=")
|
||||
eq(base64.b64encode("abc"), "YWJj")
|
||||
eq(base64.b64encode(""), "")
|
||||
eq(base64.b64encode("abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"0123456789!@#0^&*();:<>,. []{}"),
|
||||
"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
|
||||
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==")
|
||||
# Test with arbitrary alternative characters
|
||||
eq(base64.b64encode('\xd3V\xbeo\xf7\x1d', altchars='*$'), '01a*b$cd')
|
||||
# Test standard alphabet
|
||||
eq(base64.standard_b64encode("www.python.org"), "d3d3LnB5dGhvbi5vcmc=")
|
||||
eq(base64.standard_b64encode("a"), "YQ==")
|
||||
eq(base64.standard_b64encode("ab"), "YWI=")
|
||||
eq(base64.standard_b64encode("abc"), "YWJj")
|
||||
eq(base64.standard_b64encode(""), "")
|
||||
eq(base64.standard_b64encode("abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"0123456789!@#0^&*();:<>,. []{}"),
|
||||
"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
|
||||
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==")
|
||||
# Test with 'URL safe' alternative characters
|
||||
eq(base64.urlsafe_b64encode('\xd3V\xbeo\xf7\x1d'), '01a-b_cd')
|
||||
|
||||
def test_b64decode(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.b64decode("d3d3LnB5dGhvbi5vcmc="), "www.python.org")
|
||||
eq(base64.b64decode('AA=='), '\x00')
|
||||
eq(base64.b64decode("YQ=="), "a")
|
||||
eq(base64.b64decode("YWI="), "ab")
|
||||
eq(base64.b64decode("YWJj"), "abc")
|
||||
eq(base64.b64decode("YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
|
||||
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"0123456789!@#0^&*();:<>,. []{}")
|
||||
eq(base64.b64decode(''), '')
|
||||
# Test with arbitrary alternative characters
|
||||
eq(base64.b64decode('01a*b$cd', altchars='*$'), '\xd3V\xbeo\xf7\x1d')
|
||||
# Test standard alphabet
|
||||
eq(base64.standard_b64decode("d3d3LnB5dGhvbi5vcmc="), "www.python.org")
|
||||
eq(base64.standard_b64decode("YQ=="), "a")
|
||||
eq(base64.standard_b64decode("YWI="), "ab")
|
||||
eq(base64.standard_b64decode("YWJj"), "abc")
|
||||
eq(base64.standard_b64decode(""), "")
|
||||
eq(base64.standard_b64decode("YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
|
||||
"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
|
||||
"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"0123456789!@#0^&*();:<>,. []{}")
|
||||
# Test with 'URL safe' alternative characters
|
||||
eq(base64.urlsafe_b64decode('01a-b_cd'), '\xd3V\xbeo\xf7\x1d')
|
||||
|
||||
def test_b64decode_error(self):
|
||||
self.assertRaises(TypeError, base64.b64decode, 'abc')
|
||||
|
||||
def test_b32encode(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.b32encode(''), '')
|
||||
eq(base64.b32encode('\x00'), 'AA======')
|
||||
eq(base64.b32encode('a'), 'ME======')
|
||||
eq(base64.b32encode('ab'), 'MFRA====')
|
||||
eq(base64.b32encode('abc'), 'MFRGG===')
|
||||
eq(base64.b32encode('abcd'), 'MFRGGZA=')
|
||||
eq(base64.b32encode('abcde'), 'MFRGGZDF')
|
||||
|
||||
def test_b32decode(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.b32decode(''), '')
|
||||
eq(base64.b32decode('AA======'), '\x00')
|
||||
eq(base64.b32decode('ME======'), 'a')
|
||||
eq(base64.b32decode('MFRA===='), 'ab')
|
||||
eq(base64.b32decode('MFRGG==='), 'abc')
|
||||
eq(base64.b32decode('MFRGGZA='), 'abcd')
|
||||
eq(base64.b32decode('MFRGGZDF'), 'abcde')
|
||||
|
||||
def test_b32decode_casefold(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.b32decode('', True), '')
|
||||
eq(base64.b32decode('ME======', True), 'a')
|
||||
eq(base64.b32decode('MFRA====', True), 'ab')
|
||||
eq(base64.b32decode('MFRGG===', True), 'abc')
|
||||
eq(base64.b32decode('MFRGGZA=', True), 'abcd')
|
||||
eq(base64.b32decode('MFRGGZDF', True), 'abcde')
|
||||
# Lower cases
|
||||
eq(base64.b32decode('me======', True), 'a')
|
||||
eq(base64.b32decode('mfra====', True), 'ab')
|
||||
eq(base64.b32decode('mfrgg===', True), 'abc')
|
||||
eq(base64.b32decode('mfrggza=', True), 'abcd')
|
||||
eq(base64.b32decode('mfrggzdf', True), 'abcde')
|
||||
# Expected exceptions
|
||||
self.assertRaises(TypeError, base64.b32decode, 'me======')
|
||||
# Mapping zero and one
|
||||
eq(base64.b32decode('MLO23456'), 'b\xdd\xad\xf3\xbe')
|
||||
eq(base64.b32decode('M1023456', map01='L'), 'b\xdd\xad\xf3\xbe')
|
||||
eq(base64.b32decode('M1023456', map01='I'), 'b\x1d\xad\xf3\xbe')
|
||||
|
||||
def test_b32decode_error(self):
|
||||
self.assertRaises(TypeError, base64.b32decode, 'abc')
|
||||
self.assertRaises(TypeError, base64.b32decode, 'ABCDEF==')
|
||||
|
||||
def test_b16encode(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.b16encode('\x01\x02\xab\xcd\xef'), '0102ABCDEF')
|
||||
eq(base64.b16encode('\x00'), '00')
|
||||
|
||||
def test_b16decode(self):
|
||||
eq = self.assertEqual
|
||||
eq(base64.b16decode('0102ABCDEF'), '\x01\x02\xab\xcd\xef')
|
||||
eq(base64.b16decode('00'), '\x00')
|
||||
# Lower case is not allowed without a flag
|
||||
self.assertRaises(TypeError, base64.b16decode, '0102abcdef')
|
||||
# Case fold
|
||||
eq(base64.b16decode('0102abcdef', True), '\x01\x02\xab\xcd\xef')
|
||||
|
||||
|
||||
|
||||
#def test_main():
|
||||
# test_support.run_unittest(__name__)
|
||||
#
|
||||
#if __name__ == '__main__':
|
||||
# test_main()
|
156
tests/test_collections.py
Normal file
156
tests/test_collections.py
Normal file
|
@ -0,0 +1,156 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
import unittest
|
||||
from nose import tools
|
||||
|
||||
from kitchen.pycompat24.sets import add_builtin_set
|
||||
add_builtin_set()
|
||||
|
||||
from kitchen import collections
|
||||
|
||||
def test_strict_dict_get_set():
|
||||
'''Test getting and setting items in StrictDict'''
|
||||
d = collections.StrictDict()
|
||||
d[u'a'] = 1
|
||||
d['a'] = 2
|
||||
tools.ok_(d[u'a'] != d['a'])
|
||||
tools.ok_(len(d) == 2)
|
||||
|
||||
d[u'\xf1'] = 1
|
||||
d['\xf1'] = 2
|
||||
d[u'\xf1'.encode('utf8')] = 3
|
||||
tools.ok_(d[u'\xf1'] == 1)
|
||||
tools.ok_(d['\xf1'] == 2)
|
||||
tools.ok_(d[u'\xf1'.encode('utf8')] == 3)
|
||||
tools.ok_(len(d) == 5)
|
||||
|
||||
class TestStrictDict(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.d = collections.StrictDict()
|
||||
self.d[u'a'] = 1
|
||||
self.d['a'] = 2
|
||||
self.d[u'\xf1'] = 1
|
||||
self.d['\xf1'] = 2
|
||||
self.d[u'\xf1'.encode('utf8')] = 3
|
||||
self.keys = [u'a', 'a', u'\xf1', '\xf1', u'\xf1'.encode('utf8')]
|
||||
|
||||
def tearDown(self):
|
||||
del(self.d)
|
||||
|
||||
def _compare_lists(self, list1, list2, debug=False):
|
||||
'''We have a mixture of bytes and unicode and need python2.3 compat
|
||||
|
||||
So we have to compare these lists manually and inefficiently
|
||||
'''
|
||||
def _compare_lists_helper(compare_to, dupes, idx, length):
|
||||
if i not in compare_to:
|
||||
return False
|
||||
for n in range(1, length + 1):
|
||||
if i not in dupes[n][idx]:
|
||||
dupes[n][idx].add(i)
|
||||
return True
|
||||
if len(list1) != len(list2):
|
||||
return False
|
||||
|
||||
list1_dupes = dict([(i, (set(), set(), set())) for i in range(1, len(list1)+1)])
|
||||
list2_dupes = dict([(i, (set(), set(), set())) for i in range(1, len(list1)+1)])
|
||||
|
||||
list1_u = [l for l in list1 if isinstance(l, unicode)]
|
||||
list1_b = [l for l in list1 if isinstance(l, str)]
|
||||
list1_o = [l for l in list1 if not (isinstance(l, unicode) or isinstance(l, str))]
|
||||
|
||||
list2_u = [l for l in list2 if isinstance(l, unicode)]
|
||||
list2_b = [l for l in list2 if isinstance(l, str)]
|
||||
list2_o = [l for l in list2 if not (isinstance(l, unicode) or isinstance(l, str))]
|
||||
|
||||
for i in list1:
|
||||
if isinstance(i, unicode):
|
||||
if not _compare_lists_helper(list2_u, list1_dupes, 0, len(list1)):
|
||||
return False
|
||||
elif isinstance(i, str):
|
||||
if not _compare_lists_helper(list2_b, list1_dupes, 1, len(list1)):
|
||||
return False
|
||||
else:
|
||||
if not _compare_lists_helper(list2_o, list1_dupes, 2, len(list1)):
|
||||
return False
|
||||
|
||||
if list1_dupes[2][0] or list1_dupes[2][1] or list1_dupes[2][2]:
|
||||
for i in list2:
|
||||
if isinstance(i, unicode):
|
||||
if not _compare_lists_helper(list1_u, list2_dupes, 0, len(list1)):
|
||||
return False
|
||||
elif isinstance(i, str):
|
||||
if not _compare_lists_helper(list1_b, list2_dupes, 1, len(list1)):
|
||||
return False
|
||||
else:
|
||||
if not _compare_lists_helper(list1_o, list2_dupes, 2, len(list1)):
|
||||
return False
|
||||
|
||||
for i in range(2, len(list1)+1):
|
||||
for n in list1_dupes[i]:
|
||||
if n not in list2_dupes[i]:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def test__compare_list(self):
|
||||
'''*sigh* this test support function is so complex we need to test it'''
|
||||
tools.ok_(self._compare_lists(['a', 'b', 'c'], ['c', 'a', 'b']))
|
||||
tools.ok_(not self._compare_lists(['b', 'c'], ['c', 'a', 'b']))
|
||||
tools.ok_(not self._compare_lists([u'a', 'b'], ['a', 'b']))
|
||||
tools.ok_(not self._compare_lists(['a', u'b'], [u'a', 'b']))
|
||||
tools.ok_(self._compare_lists(['a', 'b', 1], ['a', 1, 'b']))
|
||||
tools.ok_(self._compare_lists([u'a', u'b'], [u'a', u'b']))
|
||||
tools.ok_(self._compare_lists([u'a', 'b'], [u'a', 'b']))
|
||||
tools.ok_(not self._compare_lists([u'a', 'b'], [u'a', u'b']))
|
||||
tools.ok_(self._compare_lists([u'a', 'b', 'b', 'c', u'a'], [u'a', u'a', 'b', 'c', 'b']))
|
||||
tools.ok_(not self._compare_lists([u'a', 'b', 'b', 'c', 'a'], [u'a', u'a', 'b', 'c', 'b']))
|
||||
tools.ok_(not self._compare_lists([u'a', 'b', 'b', 'c', u'a'], [u'a', 'b', 'b', 'c', 'b']))
|
||||
|
||||
def test_strict_dict_len(self):
|
||||
'''StrictDict len'''
|
||||
tools.ok_(len(self.d) == 5)
|
||||
|
||||
def test_strict_dict_del(self):
|
||||
'''StrictDict del'''
|
||||
tools.ok_(len(self.d) == 5)
|
||||
del(self.d[u'\xf1'])
|
||||
tools.assert_raises(KeyError, self.d.__getitem__, u'\xf1')
|
||||
tools.ok_(len(self.d) == 4)
|
||||
|
||||
def test_strict_dict_iter(self):
|
||||
'''StrictDict iteration'''
|
||||
keys = []
|
||||
for k in self.d:
|
||||
keys.append(k)
|
||||
tools.ok_(self._compare_lists(keys, self.keys))
|
||||
|
||||
keys = []
|
||||
for k in self.d.iterkeys():
|
||||
keys.append(k)
|
||||
tools.ok_(self._compare_lists(keys, self.keys))
|
||||
|
||||
keys = [k for k in self.d]
|
||||
tools.ok_(self._compare_lists(keys, self.keys))
|
||||
|
||||
keys = []
|
||||
for k in self.d.keys():
|
||||
keys.append(k)
|
||||
tools.ok_(self._compare_lists(keys, self.keys))
|
||||
|
||||
def test_strict_dict_contains(self):
|
||||
'''StrictDict contains function'''
|
||||
tools.ok_('b' not in self.d)
|
||||
tools.ok_(u'b' not in self.d)
|
||||
tools.ok_('\xf1' in self.d)
|
||||
tools.ok_(u'\xf1' in self.d)
|
||||
tools.ok_('a' in self.d)
|
||||
tools.ok_(u'a' in self.d)
|
||||
|
||||
del(self.d[u'\xf1'])
|
||||
tools.ok_(u'\xf1' not in self.d)
|
||||
tools.ok_('\xf1' in self.d)
|
||||
|
||||
del(self.d['a'])
|
||||
tools.ok_(u'a' in self.d)
|
||||
tools.ok_('a' not in self.d)
|
387
tests/test_converters.py
Normal file
387
tests/test_converters.py
Normal file
|
@ -0,0 +1,387 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
|
||||
import unittest
|
||||
from nose import tools
|
||||
from nose.plugins.skip import SkipTest
|
||||
|
||||
import StringIO
|
||||
import warnings
|
||||
|
||||
try:
|
||||
import chardet
|
||||
except:
|
||||
chardet = None
|
||||
|
||||
from kitchen.text import converters
|
||||
from kitchen.text.exceptions import XmlEncodeError
|
||||
|
||||
import base_classes
|
||||
|
||||
class UnicodeNoStr(object):
|
||||
def __unicode__(self):
|
||||
return u'El veloz murciélago saltó sobre el perro perezoso.'
|
||||
|
||||
class StrNoUnicode(object):
|
||||
def __str__(self):
|
||||
return u'El veloz murciélago saltó sobre el perro perezoso.'.encode('utf8')
|
||||
|
||||
class StrReturnsUnicode(object):
|
||||
def __str__(self):
|
||||
return u'El veloz murciélago saltó sobre el perro perezoso.'
|
||||
|
||||
class UnicodeReturnsStr(object):
|
||||
def __unicode__(self):
|
||||
return u'El veloz murciélago saltó sobre el perro perezoso.'.encode('utf8')
|
||||
|
||||
class UnicodeStrCrossed(object):
|
||||
def __unicode__(self):
|
||||
return u'El veloz murciélago saltó sobre el perro perezoso.'.encode('utf8')
|
||||
|
||||
def __str__(self):
|
||||
return u'El veloz murciélago saltó sobre el perro perezoso.'
|
||||
|
||||
class ReprUnicode(object):
|
||||
def __repr__(self):
|
||||
return u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'
|
||||
|
||||
class TestConverters(unittest.TestCase, base_classes.UnicodeTestData):
|
||||
def test_to_unicode(self):
|
||||
'''Test to_unicode when the user gives good values'''
|
||||
tools.ok_(converters.to_unicode(self.u_japanese, encoding='latin1') == self.u_japanese)
|
||||
|
||||
tools.ok_(converters.to_unicode(self.utf8_spanish) == self.u_spanish)
|
||||
tools.ok_(converters.to_unicode(self.utf8_japanese) == self.u_japanese)
|
||||
|
||||
tools.ok_(converters.to_unicode(self.latin1_spanish, encoding='latin1') == self.u_spanish)
|
||||
tools.ok_(converters.to_unicode(self.euc_jp_japanese, encoding='euc_jp') == self.u_japanese)
|
||||
|
||||
tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'nonstring': 'foo'})
|
||||
|
||||
def test_to_unicode_errors(self):
|
||||
tools.ok_(converters.to_unicode(self.latin1_spanish) == self.u_mangled_spanish_latin1_as_utf8)
|
||||
tools.ok_(converters.to_unicode(self.latin1_spanish, errors='ignore') == self.u_spanish_ignore)
|
||||
tools.assert_raises(UnicodeDecodeError, converters.to_unicode,
|
||||
*[self.latin1_spanish], **{'errors': 'strict'})
|
||||
|
||||
def test_to_unicode_nonstring(self):
|
||||
tools.ok_(converters.to_unicode(5) == u'5')
|
||||
tools.ok_(converters.to_unicode(5, nonstring='empty') == u'')
|
||||
tools.ok_(converters.to_unicode(5, nonstring='passthru') == 5)
|
||||
tools.ok_(converters.to_unicode(5, nonstring='simplerepr') == u'5')
|
||||
tools.ok_(converters.to_unicode(5, nonstring='repr') == u'5')
|
||||
tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'nonstring': 'strict'})
|
||||
|
||||
tools.ok_(converters.to_unicode(UnicodeNoStr(), nonstring='simplerepr') == self.u_spanish)
|
||||
tools.ok_(converters.to_unicode(StrNoUnicode(), nonstring='simplerepr') == self.u_spanish)
|
||||
tools.ok_(converters.to_unicode(StrReturnsUnicode(), nonstring='simplerepr') == self.u_spanish)
|
||||
tools.ok_(converters.to_unicode(UnicodeReturnsStr(), nonstring='simplerepr') == self.u_spanish)
|
||||
tools.ok_(converters.to_unicode(UnicodeStrCrossed(), nonstring='simplerepr') == self.u_spanish)
|
||||
|
||||
obj_repr = converters.to_unicode(object, nonstring='simplerepr')
|
||||
tools.ok_(obj_repr == u"<type 'object'>" and isinstance(obj_repr, unicode))
|
||||
|
||||
def test_to_bytes(self):
|
||||
'''Test to_bytes when the user gives good values'''
|
||||
tools.ok_(converters.to_bytes(self.utf8_japanese, encoding='latin1') == self.utf8_japanese)
|
||||
|
||||
tools.ok_(converters.to_bytes(self.u_spanish) == self.utf8_spanish)
|
||||
tools.ok_(converters.to_bytes(self.u_japanese) == self.utf8_japanese)
|
||||
|
||||
tools.ok_(converters.to_bytes(self.u_spanish, encoding='latin1') == self.latin1_spanish)
|
||||
tools.ok_(converters.to_bytes(self.u_japanese, encoding='euc_jp') == self.euc_jp_japanese)
|
||||
|
||||
def test_to_bytes_errors(self):
|
||||
tools.ok_(converters.to_bytes(self.u_mixed, encoding='latin1') ==
|
||||
self.latin1_mixed_replace)
|
||||
tools.ok_(converters.to_bytes(self.u_mixed, encoding='latin',
|
||||
errors='ignore') == self.latin1_mixed_ignore)
|
||||
tools.assert_raises(UnicodeEncodeError, converters.to_bytes,
|
||||
*[self.u_mixed], **{'errors': 'strict', 'encoding': 'latin1'})
|
||||
|
||||
def _check_repr_bytes(self, repr_string, obj_name):
|
||||
tools.ok_(isinstance(repr_string, str))
|
||||
match = self.repr_re.match(repr_string)
|
||||
tools.ok_(match != None)
|
||||
tools.ok_(match.groups()[0] == obj_name)
|
||||
|
||||
def test_to_bytes_nonstring(self):
|
||||
tools.ok_(converters.to_bytes(5) == '5')
|
||||
tools.ok_(converters.to_bytes(5, nonstring='empty') == '')
|
||||
tools.ok_(converters.to_bytes(5, nonstring='passthru') == 5)
|
||||
tools.ok_(converters.to_bytes(5, nonstring='simplerepr') == '5')
|
||||
tools.ok_(converters.to_bytes(5, nonstring='repr') == '5')
|
||||
|
||||
# Raise a TypeError if the msg is nonstring and we're set to strict
|
||||
tools.assert_raises(TypeError, converters.to_bytes, *[5], **{'nonstring': 'strict'})
|
||||
# Raise a TypeError if given an invalid nonstring arg
|
||||
tools.assert_raises(TypeError, converters.to_bytes, *[5], **{'nonstring': 'INVALID'})
|
||||
|
||||
# No __str__ method so this returns repr
|
||||
string = converters.to_bytes(UnicodeNoStr(), nonstring='simplerepr')
|
||||
self._check_repr_bytes(string, 'UnicodeNoStr')
|
||||
|
||||
# This object's _str__ returns a utf8 encoded object
|
||||
tools.ok_(converters.to_bytes(StrNoUnicode(), nonstring='simplerepr') == self.utf8_spanish)
|
||||
|
||||
# This object's __str__ returns unicode which to_bytes converts to utf8
|
||||
tools.ok_(converters.to_bytes(StrReturnsUnicode(), nonstring='simplerepr') == self.utf8_spanish)
|
||||
# Unless we explicitly ask for something different
|
||||
tools.ok_(converters.to_bytes(StrReturnsUnicode(),
|
||||
nonstring='simplerepr', encoding='latin1') == self.latin1_spanish)
|
||||
|
||||
# This object has no __str__ so it returns repr
|
||||
string = converters.to_bytes(UnicodeReturnsStr(), nonstring='simplerepr')
|
||||
self._check_repr_bytes(string, 'UnicodeReturnsStr')
|
||||
|
||||
# This object's __str__ returns unicode which to_bytes converts to utf8
|
||||
tools.ok_(converters.to_bytes(UnicodeStrCrossed(), nonstring='simplerepr') == self.utf8_spanish)
|
||||
|
||||
# This object's __repr__ returns unicode which to_bytes converts to utf8
|
||||
tools.ok_(converters.to_bytes(ReprUnicode(), nonstring='simplerepr')
|
||||
== u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'.encode('utf8'))
|
||||
tools.ok_(converters.to_bytes(ReprUnicode(), nonstring='repr') ==
|
||||
u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'.encode('utf8'))
|
||||
|
||||
obj_repr = converters.to_bytes(object, nonstring='simplerepr')
|
||||
tools.ok_(obj_repr == "<type 'object'>" and isinstance(obj_repr, str))
|
||||
|
||||
def test_unicode_to_xml(self):
|
||||
tools.ok_(converters.unicode_to_xml(None) == '')
|
||||
tools.assert_raises(XmlEncodeError, converters.unicode_to_xml, *['byte string'])
|
||||
tools.assert_raises(ValueError, converters.unicode_to_xml, *[u'string'], **{'control_chars': 'foo'})
|
||||
tools.assert_raises(XmlEncodeError, converters.unicode_to_xml,
|
||||
*[u'string\u0002'], **{'control_chars': 'strict'})
|
||||
tools.ok_(converters.unicode_to_xml(self.u_entity) == self.utf8_entity_escape)
|
||||
tools.ok_(converters.unicode_to_xml(self.u_entity, attrib=True) == self.utf8_attrib_escape)
|
||||
|
||||
def test_xml_to_unicode(self):
|
||||
tools.ok_(converters.xml_to_unicode(self.utf8_entity_escape, 'utf8', 'replace') == self.u_entity)
|
||||
tools.ok_(converters.xml_to_unicode(self.utf8_attrib_escape, 'utf8', 'replace') == self.u_entity)
|
||||
|
||||
def test_xml_to_byte_string(self):
|
||||
tools.ok_(converters.xml_to_byte_string(self.utf8_entity_escape, 'utf8', 'replace') == self.u_entity.encode('utf8'))
|
||||
tools.ok_(converters.xml_to_byte_string(self.utf8_attrib_escape, 'utf8', 'replace') == self.u_entity.encode('utf8'))
|
||||
|
||||
tools.ok_(converters.xml_to_byte_string(self.utf8_attrib_escape,
|
||||
output_encoding='euc_jp', errors='replace') ==
|
||||
self.u_entity.encode('euc_jp', 'replace'))
|
||||
tools.ok_(converters.xml_to_byte_string(self.utf8_attrib_escape,
|
||||
output_encoding='latin1', errors='replace') ==
|
||||
self.u_entity.encode('latin1', 'replace'))
|
||||
|
||||
def test_byte_string_to_xml(self):
|
||||
tools.assert_raises(XmlEncodeError, converters.byte_string_to_xml, *[u'test'])
|
||||
tools.ok_(converters.byte_string_to_xml(self.utf8_entity) == self.utf8_entity_escape)
|
||||
tools.ok_(converters.byte_string_to_xml(self.utf8_entity, attrib=True) == self.utf8_attrib_escape)
|
||||
|
||||
def test_bytes_to_xml(self):
|
||||
tools.ok_(converters.bytes_to_xml(self.b_byte_chars) == self.b_byte_encoded)
|
||||
|
||||
def test_xml_to_bytes(self):
|
||||
tools.ok_(converters.xml_to_bytes(self.b_byte_encoded) == self.b_byte_chars)
|
||||
|
||||
def test_guess_encoding_to_xml(self):
|
||||
tools.ok_(converters.guess_encoding_to_xml(self.u_entity) == self.utf8_entity_escape)
|
||||
tools.ok_(converters.guess_encoding_to_xml(self.utf8_spanish) == self.utf8_spanish)
|
||||
tools.ok_(converters.guess_encoding_to_xml(self.latin1_spanish) == self.utf8_spanish)
|
||||
tools.ok_(converters.guess_encoding_to_xml(self.utf8_japanese) == self.utf8_japanese)
|
||||
|
||||
def test_guess_encoding_to_xml_euc_japanese(self):
|
||||
if chardet:
|
||||
tools.ok_(converters.guess_encoding_to_xml(self.euc_jp_japanese)
|
||||
== self.utf8_japanese)
|
||||
else:
|
||||
raise SkipTest('chardet not installed, euc_japanese won\'t be detected')
|
||||
|
||||
def test_guess_encoding_to_xml_euc_japanese_mangled(self):
|
||||
if chardet:
|
||||
raise SkipTest('chardet installed, euc_japanese won\'t be mangled')
|
||||
else:
|
||||
tools.ok_(converters.guess_encoding_to_xml(self.euc_jp_japanese)
|
||||
== self.utf8_mangled_euc_jp_as_latin1)
|
||||
|
||||
class TestGetWriter(unittest.TestCase, base_classes.UnicodeTestData):
|
||||
def setUp(self):
|
||||
self.io = StringIO.StringIO()
|
||||
|
||||
def test_utf8_writer(self):
|
||||
writer = converters.getwriter('utf-8')
|
||||
io = writer(self.io)
|
||||
io.write(u'%s\n' % self.u_japanese)
|
||||
io.seek(0)
|
||||
result = io.read().strip()
|
||||
tools.ok_(result == self.utf8_japanese)
|
||||
|
||||
io.seek(0)
|
||||
io.truncate(0)
|
||||
io.write('%s\n' % self.euc_jp_japanese)
|
||||
io.seek(0)
|
||||
result = io.read().strip()
|
||||
tools.ok_(result == self.euc_jp_japanese)
|
||||
|
||||
io.seek(0)
|
||||
io.truncate(0)
|
||||
io.write('%s\n' % self.utf8_japanese)
|
||||
io.seek(0)
|
||||
result = io.read().strip()
|
||||
tools.ok_(result == self.utf8_japanese)
|
||||
|
||||
def test_error_handlers(self):
|
||||
'''Test setting alternate error handlers'''
|
||||
writer = converters.getwriter('latin1')
|
||||
io = writer(self.io, errors='strict')
|
||||
tools.assert_raises(UnicodeEncodeError, io.write, self.u_japanese)
|
||||
|
||||
|
||||
class TestExceptionConverters(unittest.TestCase, base_classes.UnicodeTestData):
|
||||
def setUp(self):
|
||||
self.exceptions = {}
|
||||
tests = {'u_jpn': self.u_japanese,
|
||||
'u_spanish': self.u_spanish,
|
||||
'utf8_jpn': self.utf8_japanese,
|
||||
'utf8_spanish': self.utf8_spanish,
|
||||
'euc_jpn': self.euc_jp_japanese,
|
||||
'latin1_spanish': self.latin1_spanish}
|
||||
for test in tests.iteritems():
|
||||
try:
|
||||
raise Exception(test[1])
|
||||
except Exception, self.exceptions[test[0]]:
|
||||
pass
|
||||
|
||||
def test_exception_to_unicode_with_unicode(self):
|
||||
tools.ok_(converters.exception_to_unicode(self.exceptions['u_jpn']) == self.u_japanese)
|
||||
tools.ok_(converters.exception_to_unicode(self.exceptions['u_spanish']) == self.u_spanish)
|
||||
|
||||
def test_exception_to_unicode_with_bytes(self):
|
||||
tools.ok_(converters.exception_to_unicode(self.exceptions['utf8_jpn']) == self.u_japanese)
|
||||
tools.ok_(converters.exception_to_unicode(self.exceptions['utf8_spanish']) == self.u_spanish)
|
||||
# Mangled latin1/utf8 conversion but no tracebacks
|
||||
tools.ok_(converters.exception_to_unicode(self.exceptions['latin1_spanish']) == self.u_mangled_spanish_latin1_as_utf8)
|
||||
# Mangled euc_jp/utf8 conversion but no tracebacks
|
||||
tools.ok_(converters.exception_to_unicode(self.exceptions['euc_jpn']) == self.u_mangled_euc_jp_as_utf8)
|
||||
|
||||
def test_exception_to_unicode_custom(self):
|
||||
# If given custom functions, then we should not mangle
|
||||
c = [lambda e: converters.to_unicode(e, encoding='euc_jp')]
|
||||
tools.ok_(converters.exception_to_unicode(self.exceptions['euc_jpn'],
|
||||
converters=c) == self.u_japanese)
|
||||
c.extend(converters.EXCEPTION_CONVERTERS)
|
||||
tools.ok_(converters.exception_to_unicode(self.exceptions['euc_jpn'],
|
||||
converters=c) == self.u_japanese)
|
||||
|
||||
c = [lambda e: converters.to_unicode(e, encoding='latin1')]
|
||||
tools.ok_(converters.exception_to_unicode(self.exceptions['latin1_spanish'],
|
||||
converters=c) == self.u_spanish)
|
||||
c.extend(converters.EXCEPTION_CONVERTERS)
|
||||
tools.ok_(converters.exception_to_unicode(self.exceptions['latin1_spanish'],
|
||||
converters=c) == self.u_spanish)
|
||||
|
||||
def test_exception_to_bytes_with_unicode(self):
|
||||
tools.ok_(converters.exception_to_bytes(self.exceptions['u_jpn']) == self.utf8_japanese)
|
||||
tools.ok_(converters.exception_to_bytes(self.exceptions['u_spanish']) == self.utf8_spanish)
|
||||
|
||||
def test_exception_to_bytes_with_bytes(self):
|
||||
tools.ok_(converters.exception_to_bytes(self.exceptions['utf8_jpn']) == self.utf8_japanese)
|
||||
tools.ok_(converters.exception_to_bytes(self.exceptions['utf8_spanish']) == self.utf8_spanish)
|
||||
tools.ok_(converters.exception_to_bytes(self.exceptions['latin1_spanish']) == self.latin1_spanish)
|
||||
tools.ok_(converters.exception_to_bytes(self.exceptions['euc_jpn']) == self.euc_jp_japanese)
|
||||
|
||||
def test_exception_to_bytes_custom(self):
|
||||
# If given custom functions, then we should not mangle
|
||||
c = [lambda e: converters.to_bytes(e, encoding='euc_jp')]
|
||||
tools.ok_(converters.exception_to_bytes(self.exceptions['euc_jpn'],
|
||||
converters=c) == self.euc_jp_japanese)
|
||||
c.extend(converters.EXCEPTION_CONVERTERS)
|
||||
tools.ok_(converters.exception_to_bytes(self.exceptions['euc_jpn'],
|
||||
converters=c) == self.euc_jp_japanese)
|
||||
|
||||
c = [lambda e: converters.to_bytes(e, encoding='latin1')]
|
||||
tools.ok_(converters.exception_to_bytes(self.exceptions['latin1_spanish'],
|
||||
converters=c) == self.latin1_spanish)
|
||||
c.extend(converters.EXCEPTION_CONVERTERS)
|
||||
tools.ok_(converters.exception_to_bytes(self.exceptions['latin1_spanish'],
|
||||
converters=c) == self.latin1_spanish)
|
||||
|
||||
|
||||
class TestDeprecatedConverters(TestConverters):
|
||||
def setUp(self):
|
||||
warnings.simplefilter('ignore', DeprecationWarning)
|
||||
|
||||
def tearDown(self):
|
||||
warnings.simplefilter('default', DeprecationWarning)
|
||||
|
||||
def test_to_xml(self):
|
||||
tools.ok_(converters.to_xml(self.u_entity) == self.utf8_entity_escape)
|
||||
tools.ok_(converters.to_xml(self.utf8_spanish) == self.utf8_spanish)
|
||||
tools.ok_(converters.to_xml(self.latin1_spanish) == self.utf8_spanish)
|
||||
tools.ok_(converters.to_xml(self.utf8_japanese) == self.utf8_japanese)
|
||||
|
||||
def test_to_utf8(self):
|
||||
tools.ok_(converters.to_utf8(self.u_japanese) == self.utf8_japanese)
|
||||
tools.ok_(converters.to_utf8(self.utf8_spanish) == self.utf8_spanish)
|
||||
|
||||
def test_to_str(self):
|
||||
tools.ok_(converters.to_str(self.u_japanese) == self.utf8_japanese)
|
||||
tools.ok_(converters.to_str(self.utf8_spanish) == self.utf8_spanish)
|
||||
tools.ok_(converters.to_str(object) == "<type 'object'>")
|
||||
|
||||
def test_non_string(self):
|
||||
'''Test deprecated non_string parameter'''
|
||||
# unicode
|
||||
tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'non_string': 'foo'})
|
||||
tools.ok_(converters.to_unicode(5, non_string='empty') == u'')
|
||||
tools.ok_(converters.to_unicode(5, non_string='passthru') == 5)
|
||||
tools.ok_(converters.to_unicode(5, non_string='simplerepr') == u'5')
|
||||
tools.ok_(converters.to_unicode(5, non_string='repr') == u'5')
|
||||
tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'non_string': 'strict'})
|
||||
|
||||
tools.ok_(converters.to_unicode(UnicodeNoStr(), non_string='simplerepr') == self.u_spanish)
|
||||
tools.ok_(converters.to_unicode(StrNoUnicode(), non_string='simplerepr') == self.u_spanish)
|
||||
tools.ok_(converters.to_unicode(StrReturnsUnicode(), non_string='simplerepr') == self.u_spanish)
|
||||
tools.ok_(converters.to_unicode(UnicodeReturnsStr(), non_string='simplerepr') == self.u_spanish)
|
||||
tools.ok_(converters.to_unicode(UnicodeStrCrossed(), non_string='simplerepr') == self.u_spanish)
|
||||
|
||||
obj_repr = converters.to_unicode(object, non_string='simplerepr')
|
||||
tools.ok_(obj_repr == u"<type 'object'>" and isinstance(obj_repr, unicode))
|
||||
|
||||
# Bytes
|
||||
tools.ok_(converters.to_bytes(5) == '5')
|
||||
tools.ok_(converters.to_bytes(5, non_string='empty') == '')
|
||||
tools.ok_(converters.to_bytes(5, non_string='passthru') == 5)
|
||||
tools.ok_(converters.to_bytes(5, non_string='simplerepr') == '5')
|
||||
tools.ok_(converters.to_bytes(5, non_string='repr') == '5')
|
||||
|
||||
# Raise a TypeError if the msg is non_string and we're set to strict
|
||||
tools.assert_raises(TypeError, converters.to_bytes, *[5], **{'non_string': 'strict'})
|
||||
# Raise a TypeError if given an invalid non_string arg
|
||||
tools.assert_raises(TypeError, converters.to_bytes, *[5], **{'non_string': 'INVALID'})
|
||||
|
||||
# No __str__ method so this returns repr
|
||||
string = converters.to_bytes(UnicodeNoStr(), non_string='simplerepr')
|
||||
self._check_repr_bytes(string, 'UnicodeNoStr')
|
||||
|
||||
# This object's _str__ returns a utf8 encoded object
|
||||
tools.ok_(converters.to_bytes(StrNoUnicode(), non_string='simplerepr') == self.utf8_spanish)
|
||||
|
||||
# This object's __str__ returns unicode which to_bytes converts to utf8
|
||||
tools.ok_(converters.to_bytes(StrReturnsUnicode(), non_string='simplerepr') == self.utf8_spanish)
|
||||
# Unless we explicitly ask for something different
|
||||
tools.ok_(converters.to_bytes(StrReturnsUnicode(),
|
||||
non_string='simplerepr', encoding='latin1') == self.latin1_spanish)
|
||||
|
||||
# This object has no __str__ so it returns repr
|
||||
string = converters.to_bytes(UnicodeReturnsStr(), non_string='simplerepr')
|
||||
self._check_repr_bytes(string, 'UnicodeReturnsStr')
|
||||
|
||||
# This object's __str__ returns unicode which to_bytes converts to utf8
|
||||
tools.ok_(converters.to_bytes(UnicodeStrCrossed(), non_string='simplerepr') == self.utf8_spanish)
|
||||
|
||||
# This object's __repr__ returns unicode which to_bytes converts to utf8
|
||||
tools.ok_(converters.to_bytes(ReprUnicode(), non_string='simplerepr')
|
||||
== u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'.encode('utf8'))
|
||||
tools.ok_(converters.to_bytes(ReprUnicode(), non_string='repr') ==
|
||||
u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'.encode('utf8'))
|
||||
|
||||
obj_repr = converters.to_bytes(object, non_string='simplerepr')
|
||||
tools.ok_(obj_repr == "<type 'object'>" and isinstance(obj_repr, str))
|
180
tests/test_defaultdict.py
Normal file
180
tests/test_defaultdict.py
Normal file
|
@ -0,0 +1,180 @@
|
|||
"""Unit tests for collections.defaultdict."""
|
||||
|
||||
import os
|
||||
import copy
|
||||
import tempfile
|
||||
import unittest
|
||||
from test import test_support
|
||||
|
||||
from kitchen.pycompat25.collections._defaultdict import defaultdict
|
||||
|
||||
def foobar():
|
||||
return list
|
||||
|
||||
class TestDefaultDict(unittest.TestCase):
|
||||
|
||||
def test_basic(self):
|
||||
d1 = defaultdict()
|
||||
self.assertEqual(d1.default_factory, None)
|
||||
d1.default_factory = list
|
||||
d1[12].append(42)
|
||||
self.assertEqual(d1, {12: [42]})
|
||||
d1[12].append(24)
|
||||
self.assertEqual(d1, {12: [42, 24]})
|
||||
d1[13]
|
||||
d1[14]
|
||||
self.assertEqual(d1, {12: [42, 24], 13: [], 14: []})
|
||||
self.assert_(d1[12] is not d1[13] is not d1[14])
|
||||
d2 = defaultdict(list, foo=1, bar=2)
|
||||
self.assertEqual(d2.default_factory, list)
|
||||
self.assertEqual(d2, {"foo": 1, "bar": 2})
|
||||
self.assertEqual(d2["foo"], 1)
|
||||
self.assertEqual(d2["bar"], 2)
|
||||
self.assertEqual(d2[42], [])
|
||||
self.assert_("foo" in d2)
|
||||
self.assert_("foo" in d2.keys())
|
||||
self.assert_("bar" in d2)
|
||||
self.assert_("bar" in d2.keys())
|
||||
self.assert_(42 in d2)
|
||||
self.assert_(42 in d2.keys())
|
||||
self.assert_(12 not in d2)
|
||||
self.assert_(12 not in d2.keys())
|
||||
d2.default_factory = None
|
||||
self.assertEqual(d2.default_factory, None)
|
||||
try:
|
||||
d2[15]
|
||||
except KeyError, err:
|
||||
self.assertEqual(err.args, (15,))
|
||||
else:
|
||||
self.fail("d2[15] didn't raise KeyError")
|
||||
self.assertRaises(TypeError, defaultdict, 1)
|
||||
|
||||
def test_missing(self):
|
||||
d1 = defaultdict()
|
||||
self.assertRaises(KeyError, d1.__missing__, 42)
|
||||
d1.default_factory = list
|
||||
self.assertEqual(d1.__missing__(42), [])
|
||||
|
||||
def test_repr(self):
|
||||
d1 = defaultdict()
|
||||
self.assertEqual(d1.default_factory, None)
|
||||
self.assertEqual(repr(d1), "defaultdict(None, {})")
|
||||
self.assertEqual(eval(repr(d1)), d1)
|
||||
d1[11] = 41
|
||||
self.assertEqual(repr(d1), "defaultdict(None, {11: 41})")
|
||||
d2 = defaultdict(int)
|
||||
self.assertEqual(d2.default_factory, int)
|
||||
d2[12] = 42
|
||||
self.assertEqual(repr(d2), "defaultdict(<type 'int'>, {12: 42})")
|
||||
def foo(): return 43
|
||||
d3 = defaultdict(foo)
|
||||
|
||||
self.assert_(d3.default_factory is foo)
|
||||
d3[13]
|
||||
self.assertEqual(repr(d3), "defaultdict(%s, {13: 43})" % repr(foo))
|
||||
|
||||
def test_print(self):
|
||||
d1 = defaultdict()
|
||||
def foo(): return 42
|
||||
d2 = defaultdict(foo, {1: 2})
|
||||
# NOTE: We can't use tempfile.[Named]TemporaryFile since this
|
||||
# code must exercise the tp_print C code, which only gets
|
||||
# invoked for *real* files.
|
||||
tfn = tempfile.mktemp()
|
||||
try:
|
||||
f = open(tfn, "w+")
|
||||
try:
|
||||
print >>f, d1
|
||||
print >>f, d2
|
||||
f.seek(0)
|
||||
self.assertEqual(f.readline(), repr(d1) + "\n")
|
||||
self.assertEqual(f.readline(), repr(d2) + "\n")
|
||||
finally:
|
||||
f.close()
|
||||
finally:
|
||||
os.remove(tfn)
|
||||
|
||||
def test_copy(self):
|
||||
d1 = defaultdict()
|
||||
d2 = d1.copy()
|
||||
self.assertEqual(type(d2), defaultdict)
|
||||
self.assertEqual(d2.default_factory, None)
|
||||
self.assertEqual(d2, {})
|
||||
d1.default_factory = list
|
||||
d3 = d1.copy()
|
||||
self.assertEqual(type(d3), defaultdict)
|
||||
self.assertEqual(d3.default_factory, list)
|
||||
self.assertEqual(d3, {})
|
||||
d1[42]
|
||||
d4 = d1.copy()
|
||||
self.assertEqual(type(d4), defaultdict)
|
||||
self.assertEqual(d4.default_factory, list)
|
||||
self.assertEqual(d4, {42: []})
|
||||
d4[12]
|
||||
self.assertEqual(d4, {42: [], 12: []})
|
||||
|
||||
# Issue 6637: Copy fails for empty default dict
|
||||
d = defaultdict()
|
||||
d['a'] = 42
|
||||
e = d.copy()
|
||||
self.assertEqual(e['a'], 42)
|
||||
|
||||
def test_shallow_copy(self):
|
||||
d1 = defaultdict(foobar, {1: 1})
|
||||
d2 = copy.copy(d1)
|
||||
self.assertEqual(d2.default_factory, foobar)
|
||||
self.assertEqual(d2, d1)
|
||||
d1.default_factory = list
|
||||
d2 = copy.copy(d1)
|
||||
self.assertEqual(d2.default_factory, list)
|
||||
self.assertEqual(d2, d1)
|
||||
|
||||
def test_deep_copy(self):
|
||||
d1 = defaultdict(foobar, {1: [1]})
|
||||
d2 = copy.deepcopy(d1)
|
||||
self.assertEqual(d2.default_factory, foobar)
|
||||
self.assertEqual(d2, d1)
|
||||
self.assert_(d1[1] is not d2[1])
|
||||
d1.default_factory = list
|
||||
d2 = copy.deepcopy(d1)
|
||||
self.assertEqual(d2.default_factory, list)
|
||||
self.assertEqual(d2, d1)
|
||||
|
||||
def test_keyerror_without_factory(self):
|
||||
d1 = defaultdict()
|
||||
try:
|
||||
d1[(1,)]
|
||||
except KeyError, err:
|
||||
self.assertEqual(err.args[0], (1,))
|
||||
else:
|
||||
self.fail("expected KeyError")
|
||||
|
||||
def test_recursive_repr(self):
|
||||
# Issue2045: stack overflow when default_factory is a bound method
|
||||
class sub(defaultdict):
|
||||
def __init__(self):
|
||||
self.default_factory = self._factory
|
||||
def _factory(self):
|
||||
return []
|
||||
d = sub()
|
||||
self.assert_(repr(d).startswith(
|
||||
"defaultdict(<bound method sub._factory of defaultdict(..."))
|
||||
|
||||
# NOTE: printing a subclass of a builtin type does not call its
|
||||
# tp_print slot. So this part is essentially the same test as above.
|
||||
tfn = tempfile.mktemp()
|
||||
try:
|
||||
f = open(tfn, "w+")
|
||||
try:
|
||||
print >>f, d
|
||||
finally:
|
||||
f.close()
|
||||
finally:
|
||||
os.remove(tfn)
|
||||
|
||||
|
||||
#def test_main():
|
||||
# test_support.run_unittest(TestDefaultDict)
|
||||
#
|
||||
#if __name__ == "__main__":
|
||||
# test_main()
|
47
tests/test_deprecation.py
Normal file
47
tests/test_deprecation.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
import unittest
|
||||
from nose import tools
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
from kitchen.text import converters
|
||||
from kitchen.text import utf8
|
||||
|
||||
class TestDeprecated(unittest.TestCase):
|
||||
def setUp(self):
|
||||
registry = sys._getframe(2).f_globals.get('__warningregistry__')
|
||||
if registry:
|
||||
registry.clear()
|
||||
registry = sys._getframe(1).f_globals.get('__warningregistry__')
|
||||
if registry:
|
||||
registry.clear()
|
||||
warnings.simplefilter('error', DeprecationWarning)
|
||||
|
||||
def tearDown(self):
|
||||
warnings.simplefilter('default', DeprecationWarning)
|
||||
|
||||
def test_deprecated_functions(self):
|
||||
'''Test that all deprecated functions raise DeprecationWarning'''
|
||||
tools.assert_raises(DeprecationWarning, converters.to_utf8, u'café')
|
||||
tools.assert_raises(DeprecationWarning, converters.to_str, 5)
|
||||
tools.assert_raises(DeprecationWarning, converters.to_xml, 'test')
|
||||
|
||||
tools.assert_raises(DeprecationWarning, utf8.utf8_valid, 'test')
|
||||
tools.assert_raises(DeprecationWarning, utf8.utf8_width, 'test')
|
||||
tools.assert_raises(DeprecationWarning, utf8.utf8_width_chop, 'test')
|
||||
tools.assert_raises(DeprecationWarning, utf8.utf8_width_fill, 'test', 'asd')
|
||||
tools.assert_raises(DeprecationWarning, utf8.utf8_text_wrap, 'test')
|
||||
tools.assert_raises(DeprecationWarning, utf8.utf8_text_fill, 'test')
|
||||
tools.assert_raises(DeprecationWarning, utf8._utf8_width_le, 'test')
|
||||
|
||||
def test_deprecated_parameters(self):
|
||||
tools.assert_raises(DeprecationWarning, converters.to_unicode, *[5],
|
||||
**{'non_string': 'simplerepr'})
|
||||
tools.assert_raises(DeprecationWarning, converters.to_unicode, *[5],
|
||||
**{'nonstring': 'simplerepr', 'non_string': 'simplerepr'})
|
||||
|
||||
tools.assert_raises(DeprecationWarning, converters.to_bytes, *[5],
|
||||
**{'non_string': 'simplerepr'})
|
||||
tools.assert_raises(DeprecationWarning, converters.to_bytes, *[5],
|
||||
**{'nonstring': 'simplerepr', 'non_string': 'simplerepr'})
|
749
tests/test_i18n.py
Normal file
749
tests/test_i18n.py
Normal file
|
@ -0,0 +1,749 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
import unittest
|
||||
from nose import tools
|
||||
|
||||
import os
|
||||
import types
|
||||
|
||||
from kitchen import i18n
|
||||
|
||||
import base_classes
|
||||
|
||||
class TestI18N_UTF8(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||
os.environ['LC_ALL'] = 'pt_BR.UTF8'
|
||||
|
||||
def tearDown(self):
|
||||
if self.old_LC_ALL:
|
||||
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||
else:
|
||||
del(os.environ['LC_ALL'])
|
||||
|
||||
def test_easy_gettext_setup(self):
|
||||
'''Test that the easy_gettext_setup function works
|
||||
'''
|
||||
_, N_ = i18n.easy_gettext_setup('foo', localedirs=
|
||||
['%s/data/locale/' % os.path.dirname(__file__)])
|
||||
tools.ok_(isinstance(_, types.MethodType))
|
||||
tools.ok_(isinstance(N_, types.MethodType))
|
||||
tools.ok_(_.im_func.func_name == 'ugettext')
|
||||
tools.ok_(N_.im_func.func_name == 'ungettext')
|
||||
|
||||
tools.ok_(_('café') == u'café')
|
||||
tools.ok_(_(u'café') == u'café')
|
||||
tools.ok_(N_('café', 'cafés', 1) == u'café')
|
||||
tools.ok_(N_('café', 'cafés', 2) == u'cafés')
|
||||
tools.ok_(N_(u'café', u'cafés', 1) == u'café')
|
||||
tools.ok_(N_(u'café', u'cafés', 2) == u'cafés')
|
||||
|
||||
def test_easy_gettext_setup_non_unicode(self):
|
||||
'''Test that the easy_gettext_setup function works
|
||||
'''
|
||||
b_, bN_ = i18n.easy_gettext_setup('foo', localedirs=
|
||||
['%s/data/locale/' % os.path.dirname(__file__)],
|
||||
use_unicode=False)
|
||||
tools.ok_(isinstance(b_, types.MethodType))
|
||||
tools.ok_(isinstance(bN_, types.MethodType))
|
||||
tools.ok_(b_.im_func.func_name == 'lgettext')
|
||||
tools.ok_(bN_.im_func.func_name == 'lngettext')
|
||||
|
||||
tools.ok_(b_('café') == 'café')
|
||||
tools.ok_(b_(u'café') == 'café')
|
||||
tools.ok_(bN_('café', 'cafés', 1) == 'café')
|
||||
tools.ok_(bN_('café', 'cafés', 2) == 'cafés')
|
||||
tools.ok_(bN_(u'café', u'cafés', 1) == 'café')
|
||||
tools.ok_(bN_(u'café', u'cafés', 2) == 'cafés')
|
||||
|
||||
def test_get_translation_object(self):
|
||||
'''Test that the get_translation_object function works
|
||||
'''
|
||||
translations = i18n.get_translation_object('foo', ['%s/data/locale/' % os.path.dirname(__file__)])
|
||||
tools.ok_(translations.__class__==i18n.DummyTranslations)
|
||||
tools.assert_raises(IOError, i18n.get_translation_object, 'foo', ['%s/data/locale/' % os.path.dirname(__file__)], fallback=False)
|
||||
|
||||
translations = i18n.get_translation_object('test', ['%s/data/locale/' % os.path.dirname(__file__)])
|
||||
tools.ok_(translations.__class__==i18n.NewGNUTranslations)
|
||||
|
||||
def test_get_translation_object_create_fallback(self):
|
||||
'''Test get_translation_object creates fallbacks for additional catalogs'''
|
||||
translations = i18n.get_translation_object('test',
|
||||
['%s/data/locale' % os.path.dirname(__file__),
|
||||
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||
tools.ok_(translations.__class__==i18n.NewGNUTranslations)
|
||||
tools.ok_(translations._fallback.__class__==i18n.NewGNUTranslations)
|
||||
|
||||
def test_get_translation_object_copy(self):
|
||||
'''Test get_translation_object shallow copies the message catalog'''
|
||||
translations = i18n.get_translation_object('test',
|
||||
['%s/data/locale' % os.path.dirname(__file__),
|
||||
'%s/data/locale-old' % os.path.dirname(__file__)], codeset='utf-8')
|
||||
translations.input_charset = 'utf-8'
|
||||
translations2 = i18n.get_translation_object('test',
|
||||
['%s/data/locale' % os.path.dirname(__file__),
|
||||
'%s/data/locale-old' % os.path.dirname(__file__)], codeset='latin-1')
|
||||
translations2.input_charset = 'latin-1'
|
||||
|
||||
# Test that portions of the translation objects are the same and other
|
||||
# portions are different (which is a space optimization so that the
|
||||
# translation data isn't in memory multiple times)
|
||||
tools.ok_(id(translations._fallback) != id(translations2._fallback))
|
||||
tools.ok_(id(translations.output_charset()) != id(translations2.output_charset()))
|
||||
tools.ok_(id(translations.input_charset) != id(translations2.input_charset))
|
||||
tools.ok_(id(translations.input_charset) != id(translations2.input_charset))
|
||||
tools.eq_(id(translations._catalog), id(translations2._catalog))
|
||||
|
||||
def test_get_translation_object_optional_params(self):
|
||||
'''Smoketest leaving out optional parameters'''
|
||||
translations = i18n.get_translation_object('test')
|
||||
tools.ok_(translations.__class__ in (i18n.NewGNUTranslations, i18n.DummyTranslations))
|
||||
|
||||
def test_dummy_translation(self):
|
||||
'''Test that we can create a DummyTranslation object
|
||||
'''
|
||||
tools.ok_(isinstance(i18n.DummyTranslations(), i18n.DummyTranslations))
|
||||
|
||||
# Note: Using nose's generator tests for this so we can't subclass
|
||||
# unittest.TestCase
|
||||
class TestDummyTranslations(base_classes.UnicodeTestData):
|
||||
def __init__(self):
|
||||
self.test_data = {'bytes': (( # First set is with default charset (utf8)
|
||||
(self.u_ascii, self.b_ascii),
|
||||
(self.u_spanish, self.utf8_spanish),
|
||||
(self.u_japanese, self.utf8_japanese),
|
||||
(self.b_ascii, self.b_ascii),
|
||||
(self.utf8_spanish, self.utf8_spanish),
|
||||
(self.latin1_spanish, self.utf8_mangled_spanish_latin1_as_utf8),
|
||||
(self.utf8_japanese, self.utf8_japanese),
|
||||
),
|
||||
( # Second set is with output_charset of latin1 (ISO-8859-1)
|
||||
(self.u_ascii, self.b_ascii),
|
||||
(self.u_spanish, self.latin1_spanish),
|
||||
(self.u_japanese, self.latin1_mangled_japanese_replace_as_latin1),
|
||||
(self.b_ascii, self.b_ascii),
|
||||
(self.utf8_spanish, self.utf8_spanish),
|
||||
(self.latin1_spanish, self.latin1_spanish),
|
||||
(self.utf8_japanese, self.utf8_japanese),
|
||||
),
|
||||
( # Third set is with output_charset of C
|
||||
(self.u_ascii, self.b_ascii),
|
||||
(self.u_spanish, self.ascii_mangled_spanish_as_ascii),
|
||||
(self.u_japanese, self.ascii_mangled_japanese_replace_as_latin1),
|
||||
(self.b_ascii, self.b_ascii),
|
||||
(self.utf8_spanish, self.ascii_mangled_spanish_as_ascii),
|
||||
(self.latin1_spanish, self.ascii_twice_mangled_spanish_latin1_as_utf8_as_ascii),
|
||||
(self.utf8_japanese, self.ascii_mangled_japanese_replace_as_latin1),
|
||||
),
|
||||
),
|
||||
'unicode': (( # First set is with the default charset (utf8)
|
||||
(self.u_ascii, self.u_ascii),
|
||||
(self.u_spanish, self.u_spanish),
|
||||
(self.u_japanese, self.u_japanese),
|
||||
(self.b_ascii, self.u_ascii),
|
||||
(self.utf8_spanish, self.u_spanish),
|
||||
(self.latin1_spanish, self.u_mangled_spanish_latin1_as_utf8), # String is mangled but no exception
|
||||
(self.utf8_japanese, self.u_japanese),
|
||||
),
|
||||
( # Second set is with _charset of latin1 (ISO-8859-1)
|
||||
(self.u_ascii, self.u_ascii),
|
||||
(self.u_spanish, self.u_spanish),
|
||||
(self.u_japanese, self.u_japanese),
|
||||
(self.b_ascii, self.u_ascii),
|
||||
(self.utf8_spanish, self.u_mangled_spanish_utf8_as_latin1), # String mangled but no exception
|
||||
(self.latin1_spanish, self.u_spanish),
|
||||
(self.utf8_japanese, self.u_mangled_japanese_utf8_as_latin1), # String mangled but no exception
|
||||
),
|
||||
( # Third set is with _charset of C
|
||||
(self.u_ascii, self.u_ascii),
|
||||
(self.u_spanish, self.u_spanish),
|
||||
(self.u_japanese, self.u_japanese),
|
||||
(self.b_ascii, self.u_ascii),
|
||||
(self.utf8_spanish, self.u_mangled_spanish_utf8_as_ascii), # String mangled but no exception
|
||||
(self.latin1_spanish, self.u_mangled_spanish_latin1_as_ascii), # String mangled but no exception
|
||||
(self.utf8_japanese, self.u_mangled_japanese_utf8_as_ascii), # String mangled but no exception
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
def setUp(self):
|
||||
self.translations = i18n.DummyTranslations()
|
||||
|
||||
def check_gettext(self, message, value, charset=None):
|
||||
self.translations.set_output_charset(charset)
|
||||
tools.eq_(self.translations.gettext(message), value,
|
||||
msg='gettext(%s): trans: %s != val: %s (charset=%s)'
|
||||
% (repr(message), repr(self.translations.gettext(message)),
|
||||
repr(value), charset))
|
||||
|
||||
def check_lgettext(self, message, value, charset=None,
|
||||
locale='en_US.UTF-8'):
|
||||
os.environ['LC_ALL'] = locale
|
||||
self.translations.set_output_charset(charset)
|
||||
tools.eq_(self.translations.lgettext(message), value,
|
||||
msg='lgettext(%s): trans: %s != val: %s (charset=%s, locale=%s)'
|
||||
% (repr(message), repr(self.translations.lgettext(message)),
|
||||
repr(value), charset, locale))
|
||||
|
||||
# Note: charset has a default value because nose isn't invoking setUp and
|
||||
# tearDown each time check_* is run.
|
||||
def check_ugettext(self, message, value, charset='utf-8'):
|
||||
'''ugettext method with default values'''
|
||||
self.translations.input_charset = charset
|
||||
tools.eq_(self.translations.ugettext(message), value,
|
||||
msg='ugettext(%s): trans: %s != val: %s (charset=%s)'
|
||||
% (repr(message), repr(self.translations.ugettext(message)),
|
||||
repr(value), charset))
|
||||
|
||||
def check_ngettext(self, message, value, charset=None):
|
||||
self.translations.set_output_charset(charset)
|
||||
tools.eq_(self.translations.ngettext(message, 'blank', 1), value)
|
||||
tools.eq_(self.translations.ngettext('blank', message, 2), value)
|
||||
tools.ok_(self.translations.ngettext(message, 'blank', 2) != value)
|
||||
tools.ok_(self.translations.ngettext('blank', message, 1) != value)
|
||||
|
||||
def check_lngettext(self, message, value, charset=None, locale='en_US.UTF-8'):
|
||||
os.environ['LC_ALL'] = locale
|
||||
self.translations.set_output_charset(charset)
|
||||
tools.eq_(self.translations.lngettext(message, 'blank', 1), value,
|
||||
msg='lngettext(%s, "blank", 1): trans: %s != val: %s (charset=%s, locale=%s)'
|
||||
% (repr(message), repr(self.translations.lngettext(message,
|
||||
'blank', 1)), repr(value), charset, locale))
|
||||
tools.eq_(self.translations.lngettext('blank', message, 2), value,
|
||||
msg='lngettext("blank", %s, 2): trans: %s != val: %s (charset=%s, locale=%s)'
|
||||
% (repr(message), repr(self.translations.lngettext('blank',
|
||||
message, 2)), repr(value), charset, locale))
|
||||
tools.ok_(self.translations.lngettext(message, 'blank', 2) != value,
|
||||
msg='lngettext(%s, "blank", 2): trans: %s != val: %s (charset=%s, locale=%s)'
|
||||
% (repr(message), repr(self.translations.lngettext(message,
|
||||
'blank', 2)), repr(value), charset, locale))
|
||||
tools.ok_(self.translations.lngettext('blank', message, 1) != value,
|
||||
msg='lngettext("blank", %s, 1): trans: %s != val: %s (charset=%s, locale=%s)'
|
||||
% (repr(message), repr(self.translations.lngettext('blank',
|
||||
message, 1)), repr(value), charset, locale))
|
||||
|
||||
# Note: charset has a default value because nose isn't invoking setUp and
|
||||
# tearDown each time check_* is run.
|
||||
def check_ungettext(self, message, value, charset='utf-8'):
|
||||
self.translations.input_charset = charset
|
||||
tools.eq_(self.translations.ungettext(message, 'blank', 1), value)
|
||||
tools.eq_(self.translations.ungettext('blank', message, 2), value)
|
||||
tools.ok_(self.translations.ungettext(message, 'blank', 2) != value)
|
||||
tools.ok_(self.translations.ungettext('blank', message, 1) != value)
|
||||
|
||||
def test_gettext(self):
|
||||
'''gettext method with default values'''
|
||||
for message, value in self.test_data['bytes'][0]:
|
||||
yield self.check_gettext, message, value
|
||||
|
||||
def test_gettext_output_charset(self):
|
||||
'''gettext method after output_charset is set'''
|
||||
for message, value in self.test_data['bytes'][1]:
|
||||
yield self.check_gettext, message, value, 'latin1'
|
||||
|
||||
def test_ngettext(self):
|
||||
for message, value in self.test_data['bytes'][0]:
|
||||
yield self.check_ngettext, message, value
|
||||
|
||||
def test_ngettext_output_charset(self):
|
||||
for message, value in self.test_data['bytes'][1]:
|
||||
yield self.check_ngettext, message, value, 'latin1'
|
||||
|
||||
def test_lgettext(self):
|
||||
'''lgettext method with default values on a utf8 locale'''
|
||||
for message, value in self.test_data['bytes'][0]:
|
||||
yield self.check_lgettext, message, value
|
||||
|
||||
def test_lgettext_output_charset(self):
|
||||
'''lgettext method after output_charset is set'''
|
||||
for message, value in self.test_data['bytes'][1]:
|
||||
yield self.check_lgettext, message, value, 'latin1'
|
||||
|
||||
def test_lgettext_output_charset_and_locale(self):
|
||||
'''lgettext method after output_charset is set in C locale
|
||||
|
||||
output_charset should take precedence
|
||||
'''
|
||||
for message, value in self.test_data['bytes'][1]:
|
||||
yield self.check_lgettext, message, value, 'latin1', 'C'
|
||||
|
||||
def test_lgettext_locale_C(self):
|
||||
'''lgettext method in a C locale'''
|
||||
for message, value in self.test_data['bytes'][2]:
|
||||
yield self.check_lgettext, message, value, None, 'C'
|
||||
|
||||
def test_lngettext(self):
|
||||
'''lngettext method with default values on a utf8 locale'''
|
||||
for message, value in self.test_data['bytes'][0]:
|
||||
yield self.check_lngettext, message, value
|
||||
|
||||
def test_lngettext_output_charset(self):
|
||||
'''lngettext method after output_charset is set'''
|
||||
for message, value in self.test_data['bytes'][1]:
|
||||
yield self.check_lngettext, message, value, 'latin1'
|
||||
|
||||
def test_lngettext_output_charset_and_locale(self):
|
||||
'''lngettext method after output_charset is set in C locale
|
||||
|
||||
output_charset should take precedence
|
||||
'''
|
||||
for message, value in self.test_data['bytes'][1]:
|
||||
yield self.check_lngettext, message, value, 'latin1', 'C'
|
||||
|
||||
def test_lngettext_locale_C(self):
|
||||
'''lngettext method in a C locale'''
|
||||
for message, value in self.test_data['bytes'][2]:
|
||||
yield self.check_lngettext, message, value, None, 'C'
|
||||
|
||||
def test_ugettext(self):
|
||||
for message, value in self.test_data['unicode'][0]:
|
||||
yield self.check_ugettext, message, value
|
||||
|
||||
def test_ugettext_charset_latin1(self):
|
||||
for message, value in self.test_data['unicode'][1]:
|
||||
yield self.check_ugettext, message, value, 'latin1'
|
||||
|
||||
def test_ugettext_charset_ascii(self):
|
||||
for message, value in self.test_data['unicode'][2]:
|
||||
yield self.check_ugettext, message, value, 'ascii'
|
||||
|
||||
def test_ungettext(self):
|
||||
for message, value in self.test_data['unicode'][0]:
|
||||
yield self.check_ungettext, message, value
|
||||
|
||||
def test_ungettext_charset_latin1(self):
|
||||
for message, value in self.test_data['unicode'][1]:
|
||||
yield self.check_ungettext, message, value, 'latin1'
|
||||
|
||||
def test_ungettext_charset_ascii(self):
|
||||
for message, value in self.test_data['unicode'][2]:
|
||||
yield self.check_ungettext, message, value, 'ascii'
|
||||
|
||||
def test_nonbasestring(self):
|
||||
tools.eq_(self.translations.gettext(dict(hi='there')), '')
|
||||
tools.eq_(self.translations.ngettext(dict(hi='there'), dict(hi='two'), 1), '')
|
||||
tools.eq_(self.translations.lgettext(dict(hi='there')), '')
|
||||
tools.eq_(self.translations.lngettext(dict(hi='there'), dict(hi='two'), 1), '')
|
||||
tools.eq_(self.translations.ugettext(dict(hi='there')), u'')
|
||||
tools.eq_(self.translations.ungettext(dict(hi='there'), dict(hi='two'), 1), u'')
|
||||
|
||||
|
||||
class TestI18N_Latin1(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||
os.environ['LC_ALL'] = 'pt_BR.ISO8859-1'
|
||||
|
||||
def tearDown(self):
|
||||
if self.old_LC_ALL:
|
||||
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||
else:
|
||||
del(os.environ['LC_ALL'])
|
||||
|
||||
def test_easy_gettext_setup_non_unicode(self):
|
||||
'''Test that the easy_gettext_setup function works
|
||||
'''
|
||||
b_, bN_ = i18n.easy_gettext_setup('foo', localedirs=
|
||||
['%s/data/locale/' % os.path.dirname(__file__)],
|
||||
use_unicode=False)
|
||||
|
||||
tools.ok_(b_('café') == 'café')
|
||||
tools.ok_(b_(u'café') == 'caf\xe9')
|
||||
tools.ok_(bN_('café', 'cafés', 1) == 'café')
|
||||
tools.ok_(bN_('café', 'cafés', 2) == 'cafés')
|
||||
tools.ok_(bN_(u'café', u'cafés', 1) == 'caf\xe9')
|
||||
tools.ok_(bN_(u'café', u'cafés', 2) == 'caf\xe9s')
|
||||
|
||||
|
||||
class TestNewGNUTranslationsNoMatch(TestDummyTranslations):
|
||||
def setUp(self):
|
||||
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||
os.environ['LC_ALL'] = 'pt_BR.utf8'
|
||||
self.translations = i18n.get_translation_object('test', ['%s/data/locale/' % os.path.dirname(__file__)])
|
||||
|
||||
def tearDown(self):
|
||||
if self.old_LC_ALL:
|
||||
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||
else:
|
||||
del(os.environ['LC_ALL'])
|
||||
|
||||
|
||||
class TestNewGNURealTranslations_UTF8(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||
os.environ['LC_ALL'] = 'pt_BR.UTF8'
|
||||
self.translations = i18n.get_translation_object('test', ['%s/data/locale/' % os.path.dirname(__file__)])
|
||||
|
||||
def tearDown(self):
|
||||
if self.old_LC_ALL:
|
||||
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||
else:
|
||||
del(os.environ['LC_ALL'])
|
||||
|
||||
def test_gettext(self):
|
||||
_ = self.translations.gettext
|
||||
tools.ok_(_('kitchen sink')=='pia da cozinha')
|
||||
tools.ok_(_('Kuratomi')=='くらとみ')
|
||||
tools.ok_(_('くらとみ')=='Kuratomi')
|
||||
tools.ok_(_('Only café in fallback')=='Only café in fallback')
|
||||
|
||||
tools.ok_(_(u'kitchen sink')=='pia da cozinha')
|
||||
tools.ok_(_(u'くらとみ')=='Kuratomi')
|
||||
tools.ok_(_(u'Kuratomi')=='くらとみ')
|
||||
tools.ok_(_(u'Only café in fallback')=='Only café in fallback')
|
||||
|
||||
def test_ngettext(self):
|
||||
_ = self.translations.ngettext
|
||||
tools.ok_(_('1 lemon', '4 lemons', 1)=='一 limão')
|
||||
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='一 limão')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||
|
||||
tools.ok_(_('1 lemon', '4 lemons', 2)=='四 limões')
|
||||
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='四 limões')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||
|
||||
def test_lgettext(self):
|
||||
_ = self.translations.lgettext
|
||||
tools.ok_(_('kitchen sink')=='pia da cozinha')
|
||||
tools.ok_(_('Kuratomi')=='くらとみ')
|
||||
tools.ok_(_('くらとみ')=='Kuratomi')
|
||||
tools.ok_(_('Only café in fallback')=='Only café in fallback')
|
||||
|
||||
tools.ok_(_(u'kitchen sink')=='pia da cozinha')
|
||||
tools.ok_(_(u'くらとみ')=='Kuratomi')
|
||||
tools.ok_(_(u'Kuratomi')=='くらとみ')
|
||||
tools.ok_(_(u'Only café in fallback')=='Only café in fallback')
|
||||
|
||||
def test_lngettext(self):
|
||||
_ = self.translations.lngettext
|
||||
tools.ok_(_('1 lemon', '4 lemons', 1)=='一 limão')
|
||||
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='一 limão')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||
|
||||
tools.ok_(_('1 lemon', '4 lemons', 2)=='四 limões')
|
||||
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='四 limões')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||
|
||||
def test_ugettext(self):
|
||||
_ = self.translations.ugettext
|
||||
tools.ok_(_('kitchen sink')==u'pia da cozinha')
|
||||
tools.ok_(_('Kuratomi')==u'くらとみ')
|
||||
tools.ok_(_('くらとみ')==u'Kuratomi')
|
||||
tools.ok_(_('Only café in fallback')==u'Only café in fallback')
|
||||
|
||||
tools.ok_(_(u'kitchen sink')==u'pia da cozinha')
|
||||
tools.ok_(_(u'くらとみ')==u'Kuratomi')
|
||||
tools.ok_(_(u'Kuratomi')==u'くらとみ')
|
||||
tools.ok_(_(u'Only café in fallback')==u'Only café in fallback')
|
||||
|
||||
def test_ungettext(self):
|
||||
_ = self.translations.ungettext
|
||||
tools.ok_(_('1 lemon', '4 lemons', 1)==u'一 limão')
|
||||
tools.ok_(_('一 limão', '四 limões', 1)==u'1 lemon')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 1)==u'一 limão')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 1)==u'1 lemon')
|
||||
|
||||
tools.ok_(_('1 lemon', '4 lemons', 2)==u'四 limões')
|
||||
tools.ok_(_('一 limão', '四 limões', 2)==u'4 lemons')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 2)==u'四 limões')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 2)==u'4 lemons')
|
||||
|
||||
|
||||
class TestNewGNURealTranslations_Latin1(TestNewGNURealTranslations_UTF8):
|
||||
def setUp(self):
|
||||
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||
os.environ['LC_ALL'] = 'pt_BR.ISO8859-1'
|
||||
self.translations = i18n.get_translation_object('test', ['%s/data/locale/' % os.path.dirname(__file__)])
|
||||
|
||||
def tearDown(self):
|
||||
if self.old_LC_ALL:
|
||||
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||
else:
|
||||
del(os.environ['LC_ALL'])
|
||||
|
||||
def test_lgettext(self):
|
||||
_ = self.translations.lgettext
|
||||
tools.eq_(_('kitchen sink'), 'pia da cozinha')
|
||||
tools.eq_(_('Kuratomi'), '????')
|
||||
tools.eq_(_('くらとみ'), 'Kuratomi')
|
||||
# The following returns utf-8 because latin-1 can hold all of the
|
||||
# bytes that are present in utf-8 encodings. Therefore, we cannot
|
||||
# tell that we should reencode the string. This will be displayed as
|
||||
# mangled text if used in a program
|
||||
tools.eq_(_('Only café in fallback'), 'Only caf\xc3\xa9 in fallback')
|
||||
|
||||
tools.eq_(_(u'kitchen sink'), 'pia da cozinha')
|
||||
tools.eq_(_(u'くらとみ'), 'Kuratomi')
|
||||
tools.eq_(_(u'Kuratomi'), '????')
|
||||
tools.eq_(_(u'Only café in fallback'), 'Only caf\xe9 in fallback')
|
||||
|
||||
def test_lngettext(self):
|
||||
_ = self.translations.lngettext
|
||||
tools.ok_(_('1 lemon', '4 lemons', 1)=='? lim\xe3o')
|
||||
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='? lim\xe3o')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||
|
||||
tools.ok_(_('1 lemon', '4 lemons', 2)=='? lim\xf5es')
|
||||
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='? lim\xf5es')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||
|
||||
|
||||
class TestFallbackNewGNUTranslationsNoMatch(TestDummyTranslations):
|
||||
def setUp(self):
|
||||
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||
os.environ['LC_ALL'] = 'pt_BR.utf8'
|
||||
self.translations = i18n.get_translation_object('test',
|
||||
['%s/data/locale/' % os.path.dirname(__file__),
|
||||
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||
|
||||
def tearDown(self):
|
||||
if self.old_LC_ALL:
|
||||
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||
else:
|
||||
del(os.environ['LC_ALL'])
|
||||
|
||||
|
||||
class TestFallbackNewGNURealTranslations_UTF8(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||
os.environ['LC_ALL'] = 'pt_BR.UTF8'
|
||||
self.translations = i18n.get_translation_object('test',
|
||||
['%s/data/locale/' % os.path.dirname(__file__),
|
||||
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||
|
||||
def tearDown(self):
|
||||
if self.old_LC_ALL:
|
||||
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||
else:
|
||||
del(os.environ['LC_ALL'])
|
||||
|
||||
def test_gettext(self):
|
||||
_ = self.translations.gettext
|
||||
tools.ok_(_('kitchen sink')=='pia da cozinha')
|
||||
tools.ok_(_('Kuratomi')=='くらとみ')
|
||||
tools.ok_(_('くらとみ')=='Kuratomi')
|
||||
tools.ok_(_(u'Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||
|
||||
tools.ok_(_(u'kitchen sink')=='pia da cozinha')
|
||||
tools.ok_(_(u'くらとみ')=='Kuratomi')
|
||||
tools.ok_(_(u'Kuratomi')=='くらとみ')
|
||||
tools.ok_(_(u'Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||
|
||||
def test_ngettext(self):
|
||||
_ = self.translations.ngettext
|
||||
tools.ok_(_('1 lemon', '4 lemons', 1)=='一 limão')
|
||||
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='一 limão')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||
|
||||
tools.ok_(_('1 lemon', '4 lemons', 2)=='四 limões')
|
||||
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='四 limões')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||
|
||||
def test_lgettext(self):
|
||||
_ = self.translations.lgettext
|
||||
tools.eq_(_('kitchen sink'), 'pia da cozinha')
|
||||
tools.ok_(_('Kuratomi')=='くらとみ')
|
||||
tools.ok_(_('くらとみ')=='Kuratomi')
|
||||
tools.ok_(_('Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||
|
||||
tools.ok_(_(u'kitchen sink')=='pia da cozinha')
|
||||
tools.ok_(_(u'くらとみ')=='Kuratomi')
|
||||
tools.ok_(_(u'Kuratomi')=='くらとみ')
|
||||
tools.ok_(_(u'Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||
|
||||
def test_lngettext(self):
|
||||
_ = self.translations.lngettext
|
||||
tools.ok_(_('1 lemon', '4 lemons', 1)=='一 limão')
|
||||
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='一 limão')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||
|
||||
tools.ok_(_('1 lemon', '4 lemons', 2)=='四 limões')
|
||||
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='四 limões')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||
|
||||
def test_ugettext(self):
|
||||
_ = self.translations.ugettext
|
||||
tools.ok_(_('kitchen sink')==u'pia da cozinha')
|
||||
tools.ok_(_('Kuratomi')==u'くらとみ')
|
||||
tools.ok_(_('くらとみ')==u'Kuratomi')
|
||||
tools.ok_(_('Only café in fallback')==u'Yes, only caf\xe9 in fallback')
|
||||
|
||||
tools.ok_(_(u'kitchen sink')==u'pia da cozinha')
|
||||
tools.ok_(_(u'くらとみ')==u'Kuratomi')
|
||||
tools.ok_(_(u'Kuratomi')==u'くらとみ')
|
||||
tools.ok_(_(u'Only café in fallback')==u'Yes, only caf\xe9 in fallback')
|
||||
|
||||
def test_ungettext(self):
|
||||
_ = self.translations.ungettext
|
||||
tools.ok_(_('1 lemon', '4 lemons', 1)==u'一 limão')
|
||||
tools.ok_(_('一 limão', '四 limões', 1)==u'1 lemon')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 1)==u'一 limão')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 1)==u'1 lemon')
|
||||
|
||||
tools.ok_(_('1 lemon', '4 lemons', 2)==u'四 limões')
|
||||
tools.ok_(_('一 limão', '四 limões', 2)==u'4 lemons')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 2)==u'四 limões')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 2)==u'4 lemons')
|
||||
|
||||
|
||||
class TestFallbackNewGNURealTranslations_Latin1(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||
os.environ['LC_ALL'] = 'pt_BR.ISO8859-1'
|
||||
self.translations = i18n.get_translation_object('test',
|
||||
['%s/data/locale/' % os.path.dirname(__file__),
|
||||
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||
|
||||
def tearDown(self):
|
||||
if self.old_LC_ALL:
|
||||
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||
else:
|
||||
del(os.environ['LC_ALL'])
|
||||
|
||||
def test_gettext(self):
|
||||
_ = self.translations.gettext
|
||||
tools.ok_(_('kitchen sink')=='pia da cozinha')
|
||||
tools.ok_(_('Kuratomi')=='くらとみ')
|
||||
tools.ok_(_('くらとみ')=='Kuratomi')
|
||||
tools.ok_(_(u'Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||
|
||||
tools.ok_(_(u'kitchen sink')=='pia da cozinha')
|
||||
tools.ok_(_(u'くらとみ')=='Kuratomi')
|
||||
tools.ok_(_(u'Kuratomi')=='くらとみ')
|
||||
tools.ok_(_(u'Only café in fallback')=='Yes, only caf\xc3\xa9 in fallback')
|
||||
|
||||
def test_ngettext(self):
|
||||
_ = self.translations.ngettext
|
||||
tools.ok_(_('1 lemon', '4 lemons', 1)=='一 limão')
|
||||
tools.ok_(_('一 limão', '四 limões', 1)=='1 lemon')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 1)=='一 limão')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 1)=='1 lemon')
|
||||
|
||||
tools.ok_(_('1 lemon', '4 lemons', 2)=='四 limões')
|
||||
tools.ok_(_('一 limão', '四 limões', 2)=='4 lemons')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 2)=='四 limões')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 2)=='4 lemons')
|
||||
|
||||
def test_lgettext(self):
|
||||
_ = self.translations.lgettext
|
||||
tools.eq_(_('kitchen sink'), 'pia da cozinha')
|
||||
tools.eq_(_('Kuratomi'), '????')
|
||||
tools.eq_(_('くらとみ'), 'Kuratomi')
|
||||
tools.eq_(_('Only café in fallback'), 'Yes, only caf\xe9 in fallback')
|
||||
|
||||
tools.eq_(_(u'kitchen sink'), 'pia da cozinha')
|
||||
tools.eq_(_(u'くらとみ'), 'Kuratomi')
|
||||
tools.eq_(_(u'Kuratomi'), '????')
|
||||
tools.eq_(_(u'Only café in fallback'), 'Yes, only caf\xe9 in fallback')
|
||||
|
||||
def test_lngettext(self):
|
||||
_ = self.translations.lngettext
|
||||
tools.eq_(_('1 lemon', '4 lemons', 1), u'一 limão'.encode('latin1', 'replace'))
|
||||
tools.eq_(_('一 limão', '四 limões', 1), '1 lemon')
|
||||
tools.eq_(_(u'1 lemon', u'4 lemons', 1), u'一 limão'.encode('latin1', 'replace'))
|
||||
tools.eq_(_(u'一 limão', u'四 limões', 1), '1 lemon')
|
||||
|
||||
tools.eq_(_('1 lemon', '4 lemons', 2), u'四 limões'.encode('latin1', 'replace'))
|
||||
tools.eq_(_('一 limão', '四 limões', 2), '4 lemons')
|
||||
tools.eq_(_(u'1 lemon', u'4 lemons', 2), u'四 limões'.encode('latin1', 'replace'))
|
||||
tools.eq_(_(u'一 limão', u'四 limões', 2), '4 lemons')
|
||||
|
||||
def test_ugettext(self):
|
||||
_ = self.translations.ugettext
|
||||
tools.ok_(_('kitchen sink')==u'pia da cozinha')
|
||||
tools.ok_(_('Kuratomi')==u'くらとみ')
|
||||
tools.ok_(_('くらとみ')==u'Kuratomi')
|
||||
tools.ok_(_('Only café in fallback')==u'Yes, only caf\xe9 in fallback')
|
||||
|
||||
tools.ok_(_(u'kitchen sink')==u'pia da cozinha')
|
||||
tools.ok_(_(u'くらとみ')==u'Kuratomi')
|
||||
tools.ok_(_(u'Kuratomi')==u'くらとみ')
|
||||
tools.ok_(_(u'Only café in fallback')==u'Yes, only caf\xe9 in fallback')
|
||||
|
||||
def test_ungettext(self):
|
||||
_ = self.translations.ungettext
|
||||
tools.ok_(_('1 lemon', '4 lemons', 1)==u'一 limão')
|
||||
tools.ok_(_('一 limão', '四 limões', 1)==u'1 lemon')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 1)==u'一 limão')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 1)==u'1 lemon')
|
||||
|
||||
tools.ok_(_('1 lemon', '4 lemons', 2)==u'四 limões')
|
||||
tools.ok_(_('一 limão', '四 limões', 2)==u'4 lemons')
|
||||
tools.ok_(_(u'1 lemon', u'4 lemons', 2)==u'四 limões')
|
||||
tools.ok_(_(u'一 limão', u'四 limões', 2)==u'4 lemons')
|
||||
|
||||
|
||||
class TestFallback(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||
os.environ['LC_ALL'] = 'pt_BR.ISO8859-1'
|
||||
self.gtranslations = i18n.get_translation_object('test',
|
||||
['%s/data/locale/' % os.path.dirname(__file__),
|
||||
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||
self.gtranslations.add_fallback(object())
|
||||
self.dtranslations = i18n.get_translation_object('nonexistent',
|
||||
['%s/data/locale/' % os.path.dirname(__file__),
|
||||
'%s/data/locale-old' % os.path.dirname(__file__)])
|
||||
self.dtranslations.add_fallback(object())
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
if self.old_LC_ALL:
|
||||
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||
else:
|
||||
del(os.environ['LC_ALL'])
|
||||
|
||||
def test_invalid_fallback_no_raise(self):
|
||||
'''Test when we have an invalid fallback that it does not raise.'''
|
||||
tools.eq_(self.gtranslations.gettext('abc'), 'abc')
|
||||
tools.eq_(self.gtranslations.ugettext('abc'), 'abc')
|
||||
tools.eq_(self.gtranslations.lgettext('abc'), 'abc')
|
||||
tools.eq_(self.dtranslations.gettext('abc'), 'abc')
|
||||
tools.eq_(self.dtranslations.ugettext('abc'), 'abc')
|
||||
tools.eq_(self.dtranslations.lgettext('abc'), 'abc')
|
||||
|
||||
tools.eq_(self.dtranslations.ngettext('abc', 'cde', 1), 'abc')
|
||||
tools.eq_(self.dtranslations.ungettext('abc', 'cde', 1), 'abc')
|
||||
tools.eq_(self.dtranslations.lngettext('abc', 'cde', 1), 'abc')
|
||||
tools.eq_(self.gtranslations.ngettext('abc', 'cde', 1), 'abc')
|
||||
tools.eq_(self.gtranslations.ungettext('abc', 'cde', 1), 'abc')
|
||||
tools.eq_(self.gtranslations.lngettext('abc', 'cde', 1), 'abc')
|
||||
|
||||
class TestDefaultLocaleDir(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.old_LC_ALL = os.environ.get('LC_ALL', None)
|
||||
os.environ['LC_ALL'] = 'pt_BR.UTF8'
|
||||
self.old_DEFAULT_LOCALEDIRS = i18n._DEFAULT_LOCALEDIR
|
||||
i18n._DEFAULT_LOCALEDIR = '%s/data/locale/' % os.path.dirname(__file__)
|
||||
self.translations = i18n.get_translation_object('test')
|
||||
|
||||
def tearDown(self):
|
||||
if self.old_LC_ALL:
|
||||
os.environ['LC_ALL'] = self.old_LC_ALL
|
||||
else:
|
||||
del(os.environ['LC_ALL'])
|
||||
if self.old_DEFAULT_LOCALEDIRS:
|
||||
i18n._DEFAULT_LOCALEDIR = self.old_DEFAULT_LOCALEDIRS
|
||||
|
||||
def test_gettext(self):
|
||||
_ = self.translations.gettext
|
||||
tools.eq_(_('kitchen sink'), 'pia da cozinha')
|
||||
tools.eq_(_('Kuratomi'), 'くらとみ')
|
||||
tools.eq_(_('くらとみ'), 'Kuratomi')
|
||||
tools.eq_(_('Only café in fallback'), 'Only café in fallback')
|
||||
|
||||
tools.eq_(_(u'kitchen sink'), 'pia da cozinha')
|
||||
tools.eq_(_(u'くらとみ'), 'Kuratomi')
|
||||
tools.eq_(_(u'Kuratomi'), 'くらとみ')
|
||||
tools.eq_(_(u'Only café in fallback'), 'Only café in fallback')
|
||||
|
||||
|
57
tests/test_iterutils.py
Normal file
57
tests/test_iterutils.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
import unittest
|
||||
from nose import tools
|
||||
|
||||
from kitchen import iterutils
|
||||
|
||||
class TestStrictDict(unittest.TestCase):
|
||||
iterable_data = (
|
||||
[0, 1, 2],
|
||||
[],
|
||||
(0, 1, 2),
|
||||
tuple(),
|
||||
set([0, 1, 2]),
|
||||
set(),
|
||||
dict(a=1, b=2),
|
||||
dict(),
|
||||
[None],
|
||||
[False],
|
||||
[0],
|
||||
xrange(0, 3),
|
||||
iter([1, 2, 3]),
|
||||
)
|
||||
non_iterable_data = (
|
||||
None,
|
||||
False,
|
||||
True,
|
||||
0,
|
||||
1.1,
|
||||
)
|
||||
|
||||
def test_isiterable(self):
|
||||
for item in self.iterable_data:
|
||||
tools.ok_(iterutils.isiterable(item) == True)
|
||||
|
||||
for item in self.non_iterable_data:
|
||||
tools.ok_(iterutils.isiterable(item) == False)
|
||||
|
||||
# strings
|
||||
tools.ok_(iterutils.isiterable('a', include_string=True) == True)
|
||||
tools.ok_(iterutils.isiterable('a', include_string=False) == False)
|
||||
tools.ok_(iterutils.isiterable('a') == False)
|
||||
|
||||
def test_iterate(self):
|
||||
iterutils.iterate(None)
|
||||
for item in self.non_iterable_data:
|
||||
tools.ok_(list(iterutils.iterate(item)) == [item])
|
||||
|
||||
for item in self.iterable_data[:-1]:
|
||||
tools.ok_(list(iterutils.iterate(item)) == list(item))
|
||||
|
||||
# iter() is exhausted after use so we have to test separately
|
||||
tools.ok_(list(iterutils.iterate(iter([1, 2, 3]))) == [1, 2, 3])
|
||||
|
||||
# strings
|
||||
tools.ok_(list(iterutils.iterate('abc')) == ['abc'])
|
||||
tools.ok_(list(iterutils.iterate('abc', include_string=True)) == ['a', 'b', 'c'])
|
25
tests/test_pycompat.py
Normal file
25
tests/test_pycompat.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
import unittest
|
||||
from nose import tools
|
||||
|
||||
class TestUsableModules(unittest.TestCase):
|
||||
def test_subprocess(self):
|
||||
'''Test that importing subprocess as a module works
|
||||
'''
|
||||
try:
|
||||
from kitchen.pycompat24.subprocess import Popen
|
||||
except ImportError:
|
||||
tools.ok_(False, 'Unable to import pycompat24.subprocess as a module')
|
||||
try:
|
||||
from kitchen.pycompat27.subprocess import Popen
|
||||
except ImportError:
|
||||
tools.ok_(False, 'Unable to import pycompat27.subprocess as a module')
|
||||
|
||||
def test_base64(self):
|
||||
'''Test that importing base64 as a module works
|
||||
'''
|
||||
try:
|
||||
from kitchen.pycompat24.base64 import b64encode
|
||||
except ImportError:
|
||||
tools.ok_(False, 'Unable to import pycompat24.base64 as a module')
|
109
tests/test_pycompat24.py
Normal file
109
tests/test_pycompat24.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
import unittest
|
||||
from nose import tools
|
||||
from nose.plugins.skip import SkipTest
|
||||
|
||||
import __builtin__
|
||||
import base64 as py_b64
|
||||
import warnings
|
||||
|
||||
from kitchen.pycompat24 import sets
|
||||
from kitchen.pycompat24.base64 import _base64 as base64
|
||||
|
||||
class TestSetsNoOverwrite(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.set_val = None
|
||||
self.frozenset_val = None
|
||||
if not hasattr(__builtin__, 'set'):
|
||||
__builtin__.set = self.set_val
|
||||
else:
|
||||
self.set_val = __builtin__.set
|
||||
if not hasattr(__builtin__, 'frozenset'):
|
||||
__builtin__.frozenset = self.frozenset_val
|
||||
else:
|
||||
self.frozenset_val = __builtin__.frozenset
|
||||
|
||||
def tearDown(self):
|
||||
if self.frozenset_val == None:
|
||||
del(__builtin__.frozenset)
|
||||
if self.set_val == None:
|
||||
del(__builtin__.set)
|
||||
|
||||
def test_sets_dont_overwrite(self):
|
||||
'''Test that importing sets when there's already a set and frozenset defined does not overwrite
|
||||
'''
|
||||
sets.add_builtin_set()
|
||||
tools.ok_(__builtin__.set == self.set_val)
|
||||
tools.ok_(__builtin__.frozenset == self.frozenset_val)
|
||||
|
||||
class TestDefineSets(unittest.TestCase):
|
||||
def setUp(self):
|
||||
warnings.simplefilter('ignore', DeprecationWarning)
|
||||
self.set_val = None
|
||||
self.frozenset_val = None
|
||||
if hasattr(__builtin__, 'set'):
|
||||
self.set_val = __builtin__.set
|
||||
del(__builtin__.set)
|
||||
if hasattr(__builtin__, 'frozenset'):
|
||||
self.frozenset_val = __builtin__.frozenset
|
||||
del(__builtin__.frozenset)
|
||||
|
||||
def tearDown(self):
|
||||
warnings.simplefilter('default', DeprecationWarning)
|
||||
if self.set_val:
|
||||
__builtin__.set = self.set_val
|
||||
else:
|
||||
del(__builtin__.set)
|
||||
if self.frozenset_val:
|
||||
__builtin__.frozenset = self.frozenset_val
|
||||
else:
|
||||
del(__builtin__.frozenset)
|
||||
|
||||
def test_pycompat_defines_set(self):
|
||||
'''Test that calling pycompat24.add_builtin_set() adds set and frozenset to __builtin__
|
||||
'''
|
||||
import sets as py_sets
|
||||
sets.add_builtin_set()
|
||||
if self.set_val:
|
||||
tools.ok_(__builtin__.set == self.set_val)
|
||||
tools.ok_(__builtin__.frozenset == self.frozenset_val)
|
||||
else:
|
||||
tools.ok_(__builtin__.set == py_sets.Set)
|
||||
tools.ok_(__builtin__.frozenset == py_sets.ImmutableSet)
|
||||
|
||||
class TestSubprocess(unittest.TestCase):
|
||||
pass
|
||||
|
||||
class TestBase64(unittest.TestCase):
|
||||
b_byte_chars = ' '.join(map(chr, range(0, 256)))
|
||||
b_byte_encoded = 'ACABIAIgAyAEIAUgBiAHIAggCSAKIAsgDCANIA4gDyAQIBEgEiATIBQgFSAWIBcgGCAZIBogGyAcIB0gHiAfICAgISAiICMgJCAlICYgJyAoICkgKiArICwgLSAuIC8gMCAxIDIgMyA0IDUgNiA3IDggOSA6IDsgPCA9ID4gPyBAIEEgQiBDIEQgRSBGIEcgSCBJIEogSyBMIE0gTiBPIFAgUSBSIFMgVCBVIFYgVyBYIFkgWiBbIFwgXSBeIF8gYCBhIGIgYyBkIGUgZiBnIGggaSBqIGsgbCBtIG4gbyBwIHEgciBzIHQgdSB2IHcgeCB5IHogeyB8IH0gfiB/IIAggSCCIIMghCCFIIYghyCIIIkgiiCLIIwgjSCOII8gkCCRIJIgkyCUIJUgliCXIJggmSCaIJsgnCCdIJ4gnyCgIKEgoiCjIKQgpSCmIKcgqCCpIKogqyCsIK0griCvILAgsSCyILMgtCC1ILYgtyC4ILkguiC7ILwgvSC+IL8gwCDBIMIgwyDEIMUgxiDHIMggySDKIMsgzCDNIM4gzyDQINEg0iDTINQg1SDWINcg2CDZINog2yDcIN0g3iDfIOAg4SDiIOMg5CDlIOYg5yDoIOkg6iDrIOwg7SDuIO8g8CDxIPIg8yD0IPUg9iD3IPgg+SD6IPsg/CD9IP4g/w=='
|
||||
b_byte_encoded_urlsafe = 'ACABIAIgAyAEIAUgBiAHIAggCSAKIAsgDCANIA4gDyAQIBEgEiATIBQgFSAWIBcgGCAZIBogGyAcIB0gHiAfICAgISAiICMgJCAlICYgJyAoICkgKiArICwgLSAuIC8gMCAxIDIgMyA0IDUgNiA3IDggOSA6IDsgPCA9ID4gPyBAIEEgQiBDIEQgRSBGIEcgSCBJIEogSyBMIE0gTiBPIFAgUSBSIFMgVCBVIFYgVyBYIFkgWiBbIFwgXSBeIF8gYCBhIGIgYyBkIGUgZiBnIGggaSBqIGsgbCBtIG4gbyBwIHEgciBzIHQgdSB2IHcgeCB5IHogeyB8IH0gfiB_IIAggSCCIIMghCCFIIYghyCIIIkgiiCLIIwgjSCOII8gkCCRIJIgkyCUIJUgliCXIJggmSCaIJsgnCCdIJ4gnyCgIKEgoiCjIKQgpSCmIKcgqCCpIKogqyCsIK0griCvILAgsSCyILMgtCC1ILYgtyC4ILkguiC7ILwgvSC-IL8gwCDBIMIgwyDEIMUgxiDHIMggySDKIMsgzCDNIM4gzyDQINEg0iDTINQg1SDWINcg2CDZINog2yDcIN0g3iDfIOAg4SDiIOMg5CDlIOYg5yDoIOkg6iDrIOwg7SDuIO8g8CDxIPIg8yD0IPUg9iD3IPgg-SD6IPsg_CD9IP4g_w=='
|
||||
|
||||
def test_base64_encode(self):
|
||||
tools.ok_(base64.b64encode(self.b_byte_chars) == self.b_byte_encoded)
|
||||
tools.ok_(base64.b64encode(self.b_byte_chars, altchars='-_') == self.b_byte_encoded_urlsafe)
|
||||
tools.ok_(base64.standard_b64encode(self.b_byte_chars) == self.b_byte_encoded)
|
||||
tools.ok_(base64.urlsafe_b64encode(self.b_byte_chars) == self.b_byte_encoded_urlsafe)
|
||||
|
||||
tools.ok_(base64.b64encode(self.b_byte_chars) == self.b_byte_encoded)
|
||||
tools.ok_(base64.b64encode(self.b_byte_chars, altchars='-_') == self.b_byte_encoded_urlsafe)
|
||||
tools.ok_(base64.standard_b64encode(self.b_byte_chars) == self.b_byte_encoded)
|
||||
tools.ok_(base64.urlsafe_b64encode(self.b_byte_chars) == self.b_byte_encoded_urlsafe)
|
||||
|
||||
def test_base64_decode(self):
|
||||
tools.ok_(base64.b64decode(self.b_byte_encoded) == self.b_byte_chars)
|
||||
tools.ok_(base64.b64decode(self.b_byte_encoded_urlsafe, altchars='-_') == self.b_byte_chars)
|
||||
tools.ok_(base64.standard_b64decode(self.b_byte_encoded) == self.b_byte_chars)
|
||||
tools.ok_(base64.urlsafe_b64decode(self.b_byte_encoded_urlsafe) == self.b_byte_chars)
|
||||
|
||||
tools.ok_(base64.b64decode(self.b_byte_encoded) == self.b_byte_chars)
|
||||
tools.ok_(base64.b64decode(self.b_byte_encoded_urlsafe, altchars='-_') == self.b_byte_chars)
|
||||
tools.ok_(base64.standard_b64decode(self.b_byte_encoded) == self.b_byte_chars)
|
||||
tools.ok_(base64.urlsafe_b64decode(self.b_byte_encoded_urlsafe) == self.b_byte_chars)
|
||||
|
||||
def test_base64_stdlib_compat(self):
|
||||
if not hasattr(py_b64, 'b64encode'):
|
||||
raise SkipTest('Python-2.3 doesn\'t have b64encode to compare against')
|
||||
tools.ok_(base64.b64encode(self.b_byte_chars) == py_b64.b64encode(self.b_byte_chars))
|
||||
tools.ok_(base64.b64decode(self.b_byte_chars) == py_b64.b64decode(self.b_byte_chars))
|
1457
tests/test_subprocess.py
Normal file
1457
tests/test_subprocess.py
Normal file
File diff suppressed because it is too large
Load diff
161
tests/test_text_display.py
Normal file
161
tests/test_text_display.py
Normal file
|
@ -0,0 +1,161 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
import unittest
|
||||
from nose import tools
|
||||
|
||||
from kitchen.text.exceptions import ControlCharError
|
||||
|
||||
from kitchen.text import display
|
||||
|
||||
import base_classes
|
||||
|
||||
class TestDisplay(base_classes.UnicodeTestData, unittest.TestCase):
|
||||
|
||||
def test_internal_interval_bisearch(self):
|
||||
'''Test that we can find things in an interval table'''
|
||||
table = ((0, 3), (5,7), (9, 10))
|
||||
tools.ok_(display._interval_bisearch(0, table) == True)
|
||||
tools.ok_(display._interval_bisearch(1, table) == True)
|
||||
tools.ok_(display._interval_bisearch(2, table) == True)
|
||||
tools.ok_(display._interval_bisearch(3, table) == True)
|
||||
tools.ok_(display._interval_bisearch(5, table) == True)
|
||||
tools.ok_(display._interval_bisearch(6, table) == True)
|
||||
tools.ok_(display._interval_bisearch(7, table) == True)
|
||||
tools.ok_(display._interval_bisearch(9, table) == True)
|
||||
tools.ok_(display._interval_bisearch(10, table) == True)
|
||||
tools.ok_(display._interval_bisearch(-1, table) == False)
|
||||
tools.ok_(display._interval_bisearch(4, table) == False)
|
||||
tools.ok_(display._interval_bisearch(8, table) == False)
|
||||
tools.ok_(display._interval_bisearch(11, table) == False)
|
||||
|
||||
def test_internal_generate_combining_table(self):
|
||||
'''Test that the combining table we generate is equal to or a subseet of what's in the current table
|
||||
|
||||
If we assert it can mean one of two things:
|
||||
|
||||
1. The code is broken
|
||||
2. The table we have is out of date.
|
||||
'''
|
||||
old_table = display._COMBINING
|
||||
new_table = display._generate_combining_table()
|
||||
for interval in new_table:
|
||||
if interval[0] == interval[1]:
|
||||
tools.ok_(display._interval_bisearch(interval[0], old_table) == True)
|
||||
else:
|
||||
for codepoint in xrange(interval[0], interval[1] + 1):
|
||||
tools.ok_(display._interval_bisearch(interval[0], old_table) == True)
|
||||
|
||||
def test_internal_ucp_width(self):
|
||||
'''Test that ucp_width returns proper width for characters'''
|
||||
for codepoint in xrange(0, 0xFFFFF + 1):
|
||||
if codepoint < 32 or (codepoint < 0xa0 and codepoint >= 0x7f):
|
||||
# With strict on, we should raise an error
|
||||
tools.assert_raises(ControlCharError, display._ucp_width, codepoint, 'strict')
|
||||
|
||||
if codepoint in (0x08, 0x1b, 0x7f, 0x94):
|
||||
# Backspace, delete, clear delete remove one char
|
||||
tools.ok_(display._ucp_width(codepoint) == -1)
|
||||
else:
|
||||
# Everything else returns 0
|
||||
tools.ok_(display._ucp_width(codepoint) == 0)
|
||||
elif display._interval_bisearch(codepoint, display._COMBINING):
|
||||
# Combining character
|
||||
tools.ok_(display._ucp_width(codepoint) == 0)
|
||||
elif (codepoint >= 0x1100 and
|
||||
(codepoint <= 0x115f or # Hangul Jamo init. consonants
|
||||
codepoint == 0x2329 or codepoint == 0x232a or
|
||||
(codepoint >= 0x2e80 and codepoint <= 0xa4cf and
|
||||
codepoint != 0x303f) or # CJK ... Yi
|
||||
(codepoint >= 0xac00 and codepoint <= 0xd7a3) or # Hangul Syllables
|
||||
(codepoint >= 0xf900 and codepoint <= 0xfaff) or # CJK Compatibility Ideographs
|
||||
(codepoint >= 0xfe10 and codepoint <= 0xfe19) or # Vertical forms
|
||||
(codepoint >= 0xfe30 and codepoint <= 0xfe6f) or # CJK Compatibility Forms
|
||||
(codepoint >= 0xff00 and codepoint <= 0xff60) or # Fullwidth Forms
|
||||
(codepoint >= 0xffe0 and codepoint <= 0xffe6) or
|
||||
(codepoint >= 0x20000 and codepoint <= 0x2fffd) or
|
||||
(codepoint >= 0x30000 and codepoint <= 0x3fffd))):
|
||||
tools.ok_(display._ucp_width(codepoint) == 2)
|
||||
else:
|
||||
tools.ok_(display._ucp_width(codepoint) == 1)
|
||||
|
||||
def test_textual_width(self):
|
||||
'''Test that we find the proper number of spaces that a utf8 string will consume'''
|
||||
tools.ok_(display.textual_width(self.u_japanese) == 31)
|
||||
tools.ok_(display.textual_width(self.u_spanish) == 50)
|
||||
tools.ok_(display.textual_width(self.u_mixed) == 23)
|
||||
|
||||
def test_textual_width_chop(self):
|
||||
'''utf8_width_chop with byte strings'''
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 1000) == self.u_mixed)
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 23) == self.u_mixed)
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 22) == self.u_mixed[:-1])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 19) == self.u_mixed[:-4])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 1) == u'')
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 2) == self.u_mixed[0])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 3) == self.u_mixed[:2])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 4) == self.u_mixed[:3])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 5) == self.u_mixed[:4])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 6) == self.u_mixed[:5])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 7) == self.u_mixed[:5])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 8) == self.u_mixed[:6])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 9) == self.u_mixed[:7])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 10) == self.u_mixed[:8])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 11) == self.u_mixed[:9])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 12) == self.u_mixed[:10])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 13) == self.u_mixed[:10])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 14) == self.u_mixed[:11])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 15) == self.u_mixed[:12])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 16) == self.u_mixed[:13])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 17) == self.u_mixed[:14])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 18) == self.u_mixed[:15])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 19) == self.u_mixed[:15])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 20) == self.u_mixed[:16])
|
||||
tools.ok_(display.textual_width_chop(self.u_mixed, 21) == self.u_mixed[:17])
|
||||
|
||||
def test_textual_width_fill(self):
|
||||
'''Pad a utf8 string'''
|
||||
tools.ok_(display.textual_width_fill(self.u_mixed, 1) == self.u_mixed)
|
||||
tools.ok_(display.textual_width_fill(self.u_mixed, 25) == self.u_mixed + u' ')
|
||||
tools.ok_(display.textual_width_fill(self.u_mixed, 25, left=False) == u' ' + self.u_mixed)
|
||||
tools.ok_(display.textual_width_fill(self.u_mixed, 25, chop=18) == self.u_mixed[:-4] + u' ')
|
||||
tools.ok_(display.textual_width_fill(self.u_mixed, 25, chop=18, prefix=self.u_spanish, suffix=self.u_spanish) == self.u_spanish + self.u_mixed[:-4] + self.u_spanish + u' ')
|
||||
tools.ok_(display.textual_width_fill(self.u_mixed, 25, chop=18) == self.u_mixed[:-4] + u' ')
|
||||
tools.ok_(display.textual_width_fill(self.u_mixed, 25, chop=18, prefix=self.u_spanish, suffix=self.u_spanish) == self.u_spanish + self.u_mixed[:-4] + self.u_spanish + u' ')
|
||||
|
||||
def test_internal_textual_width_le(self):
|
||||
test_data = ''.join([self.u_mixed, self.u_spanish])
|
||||
tw = display.textual_width(test_data)
|
||||
tools.ok_(display._textual_width_le(68, self.u_mixed, self.u_spanish) == (tw <= 68))
|
||||
tools.ok_(display._textual_width_le(69, self.u_mixed, self.u_spanish) == (tw <= 69))
|
||||
tools.ok_(display._textual_width_le(137, self.u_mixed, self.u_spanish) == (tw <= 137))
|
||||
tools.ok_(display._textual_width_le(138, self.u_mixed, self.u_spanish) == (tw <= 138))
|
||||
tools.ok_(display._textual_width_le(78, self.u_mixed, self.u_spanish) == (tw <= 78))
|
||||
tools.ok_(display._textual_width_le(79, self.u_mixed, self.u_spanish) == (tw <= 79))
|
||||
|
||||
def test_wrap(self):
|
||||
'''Test that text wrapping works'''
|
||||
tools.ok_(display.wrap(self.u_mixed) == [self.u_mixed])
|
||||
tools.ok_(display.wrap(self.u_paragraph) == self.u_paragraph_out)
|
||||
tools.ok_(display.wrap(self.utf8_paragraph) == self.u_paragraph_out)
|
||||
tools.ok_(display.wrap(self.u_mixed_para) == self.u_mixed_para_out)
|
||||
tools.ok_(display.wrap(self.u_mixed_para, width=57,
|
||||
initial_indent=' ', subsequent_indent='----') ==
|
||||
self.u_mixed_para_57_initial_subsequent_out)
|
||||
|
||||
def test_fill(self):
|
||||
tools.ok_(display.fill(self.u_paragraph) == u'\n'.join(self.u_paragraph_out))
|
||||
tools.ok_(display.fill(self.utf8_paragraph) == u'\n'.join(self.u_paragraph_out))
|
||||
tools.ok_(display.fill(self.u_mixed_para) == u'\n'.join(self.u_mixed_para_out))
|
||||
tools.ok_(display.fill(self.u_mixed_para, width=57,
|
||||
initial_indent=' ', subsequent_indent='----') ==
|
||||
u'\n'.join(self.u_mixed_para_57_initial_subsequent_out))
|
||||
|
||||
def test_byte_string_textual_width_fill(self):
|
||||
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 1) == self.utf8_mixed)
|
||||
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25) == self.utf8_mixed + ' ')
|
||||
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25, left=False) == ' ' + self.utf8_mixed)
|
||||
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25, chop=18) == self.u_mixed[:-4].encode('utf8') + ' ')
|
||||
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25, chop=18, prefix=self.utf8_spanish, suffix=self.utf8_spanish) == self.utf8_spanish + self.u_mixed[:-4].encode('utf8') + self.utf8_spanish + ' ')
|
||||
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25, chop=18) == self.u_mixed[:-4].encode('utf8') + ' ')
|
||||
tools.ok_(display.byte_string_textual_width_fill(self.utf8_mixed, 25, chop=18, prefix=self.utf8_spanish, suffix=self.utf8_spanish) == self.utf8_spanish + self.u_mixed[:-4].encode('utf8') + self.utf8_spanish + ' ')
|
||||
|
137
tests/test_text_misc.py
Normal file
137
tests/test_text_misc.py
Normal file
|
@ -0,0 +1,137 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
import unittest
|
||||
from nose import tools
|
||||
from nose.plugins.skip import SkipTest
|
||||
|
||||
try:
|
||||
import chardet
|
||||
except ImportError:
|
||||
chardet = None
|
||||
|
||||
from kitchen.text import misc
|
||||
from kitchen.text.exceptions import ControlCharError
|
||||
from kitchen.text.converters import to_unicode
|
||||
|
||||
import base_classes
|
||||
|
||||
class TestTextMisc(unittest.TestCase, base_classes.UnicodeTestData):
|
||||
def test_guess_encoding_no_chardet(self):
|
||||
# Test that unicode strings are not allowed
|
||||
tools.assert_raises(TypeError, misc.guess_encoding, self.u_spanish)
|
||||
|
||||
tools.ok_(misc.guess_encoding(self.utf8_spanish, disable_chardet=True) == 'utf-8')
|
||||
tools.ok_(misc.guess_encoding(self.latin1_spanish, disable_chardet=True) == 'latin-1')
|
||||
tools.ok_(misc.guess_encoding(self.utf8_japanese, disable_chardet=True) == 'utf-8')
|
||||
tools.ok_(misc.guess_encoding(self.euc_jp_japanese, disable_chardet=True) == 'latin-1')
|
||||
|
||||
def test_guess_encoding_with_chardet(self):
|
||||
# We go this slightly roundabout way because multiple encodings can
|
||||
# output the same byte sequence. What we're really interested in is
|
||||
# if we can get the original unicode string without knowing the
|
||||
# converters beforehand
|
||||
tools.ok_(to_unicode(self.utf8_spanish,
|
||||
misc.guess_encoding(self.utf8_spanish)) == self.u_spanish)
|
||||
tools.ok_(to_unicode(self.latin1_spanish,
|
||||
misc.guess_encoding(self.latin1_spanish)) == self.u_spanish)
|
||||
tools.ok_(to_unicode(self.utf8_japanese,
|
||||
misc.guess_encoding(self.utf8_japanese)) == self.u_japanese)
|
||||
|
||||
def test_guess_encoding_with_chardet_installed(self):
|
||||
if chardet:
|
||||
tools.ok_(to_unicode(self.euc_jp_japanese,
|
||||
misc.guess_encoding(self.euc_jp_japanese)) == self.u_japanese)
|
||||
else:
|
||||
raise SkipTest('chardet not installed, euc_jp will not be guessed correctly')
|
||||
|
||||
def test_guess_encoding_with_chardet_uninstalled(self):
|
||||
if chardet:
|
||||
raise SkipTest('chardet installed, euc_jp will not be mangled')
|
||||
else:
|
||||
tools.ok_(to_unicode(self.euc_jp_japanese,
|
||||
misc.guess_encoding(self.euc_jp_japanese)) ==
|
||||
self.u_mangled_euc_jp_as_latin1)
|
||||
|
||||
def test_str_eq(self):
|
||||
# str vs str:
|
||||
tools.ok_(misc.str_eq(self.euc_jp_japanese, self.euc_jp_japanese) == True)
|
||||
tools.ok_(misc.str_eq(self.utf8_japanese, self.utf8_japanese) == True)
|
||||
tools.ok_(misc.str_eq(self.b_ascii, self.b_ascii) == True)
|
||||
tools.ok_(misc.str_eq(self.euc_jp_japanese, self.latin1_spanish) == False)
|
||||
tools.ok_(misc.str_eq(self.utf8_japanese, self.euc_jp_japanese) == False)
|
||||
tools.ok_(misc.str_eq(self.b_ascii, self.b_ascii[:-2]) == False)
|
||||
|
||||
# unicode vs unicode:
|
||||
tools.ok_(misc.str_eq(self.u_japanese, self.u_japanese) == True)
|
||||
tools.ok_(misc.str_eq(self.u_ascii, self.u_ascii) == True)
|
||||
tools.ok_(misc.str_eq(self.u_japanese, self.u_spanish) == False)
|
||||
tools.ok_(misc.str_eq(self.u_ascii, self.u_ascii[:-2]) == False)
|
||||
|
||||
# unicode vs str with default utf-8 conversion:
|
||||
tools.ok_(misc.str_eq(self.u_japanese, self.utf8_japanese) == True)
|
||||
tools.ok_(misc.str_eq(self.u_ascii, self.b_ascii) == True)
|
||||
tools.ok_(misc.str_eq(self.u_japanese, self.euc_jp_japanese) == False)
|
||||
tools.ok_(misc.str_eq(self.u_ascii, self.b_ascii[:-2]) == False)
|
||||
|
||||
# unicode vs str with explicit encodings:
|
||||
tools.ok_(misc.str_eq(self.u_japanese, self.euc_jp_japanese, encoding='euc_jp') == True)
|
||||
tools.ok_(misc.str_eq(self.u_japanese, self.utf8_japanese, encoding='utf8') == True)
|
||||
tools.ok_(misc.str_eq(self.u_ascii, self.b_ascii, encoding='latin1') == True)
|
||||
tools.ok_(misc.str_eq(self.u_japanese, self.euc_jp_japanese, encoding='latin1') == False)
|
||||
tools.ok_(misc.str_eq(self.u_japanese, self.utf8_japanese, encoding='euc_jp') == False)
|
||||
tools.ok_(misc.str_eq(self.u_japanese, self.utf8_japanese, encoding='euc_jp') == False)
|
||||
tools.ok_(misc.str_eq(self.u_ascii, self.b_ascii[:-2], encoding='latin1') == False)
|
||||
|
||||
# str vs unicode (reverse parameter order of unicode vs str)
|
||||
tools.ok_(misc.str_eq(self.utf8_japanese, self.u_japanese) == True)
|
||||
tools.ok_(misc.str_eq(self.b_ascii, self.u_ascii) == True)
|
||||
tools.ok_(misc.str_eq(self.euc_jp_japanese, self.u_japanese) == False)
|
||||
tools.ok_(misc.str_eq(self.b_ascii, self.u_ascii[:-2]) == False)
|
||||
|
||||
tools.ok_(misc.str_eq(self.euc_jp_japanese, self.u_japanese, encoding='euc_jp') == True)
|
||||
tools.ok_(misc.str_eq(self.utf8_japanese, self.u_japanese, encoding='utf8') == True)
|
||||
tools.ok_(misc.str_eq(self.b_ascii, self.u_ascii, encoding='latin1') == True)
|
||||
tools.ok_(misc.str_eq(self.euc_jp_japanese, self.u_japanese, encoding='latin1') == False)
|
||||
tools.ok_(misc.str_eq(self.utf8_japanese, self.u_japanese, encoding='euc_jp') == False)
|
||||
tools.ok_(misc.str_eq(self.utf8_japanese, self.u_japanese, encoding='euc_jp') == False)
|
||||
tools.ok_(misc.str_eq(self.b_ascii, self.u_ascii[:-2], encoding='latin1') == False)
|
||||
|
||||
|
||||
def test_process_control_chars(self):
|
||||
tools.assert_raises(TypeError, misc.process_control_chars, 'byte string')
|
||||
tools.assert_raises(ControlCharError, misc.process_control_chars,
|
||||
*[self.u_ascii_chars], **{'strategy':'strict'})
|
||||
tools.ok_(misc.process_control_chars(self.u_ascii_chars,
|
||||
strategy='ignore') == self.u_ascii_no_ctrl)
|
||||
tools.ok_(misc.process_control_chars(self.u_ascii_chars,
|
||||
strategy='replace') == self.u_ascii_ctrl_replace)
|
||||
|
||||
def test_html_entities_unescape(self):
|
||||
tools.assert_raises(TypeError, misc.html_entities_unescape, 'byte string')
|
||||
tools.ok_(misc.html_entities_unescape(self.u_entity_escape) == self.u_entity)
|
||||
tools.ok_(misc.html_entities_unescape(u'<tag>%s</tag>'
|
||||
% self.u_entity_escape) == self.u_entity)
|
||||
tools.ok_(misc.html_entities_unescape(u'a�b') == u'a�b')
|
||||
tools.ok_(misc.html_entities_unescape(u'a�b') == u'a\ufffdb')
|
||||
tools.ok_(misc.html_entities_unescape(u'a�b') == u'a\ufffdb')
|
||||
|
||||
def test_byte_string_valid_xml(self):
|
||||
tools.ok_(misc.byte_string_valid_xml(u'unicode string') == False)
|
||||
|
||||
tools.ok_(misc.byte_string_valid_xml(self.utf8_japanese))
|
||||
tools.ok_(misc.byte_string_valid_xml(self.euc_jp_japanese, 'euc_jp'))
|
||||
|
||||
tools.ok_(misc.byte_string_valid_xml(self.utf8_japanese, 'euc_jp') == False)
|
||||
tools.ok_(misc.byte_string_valid_xml(self.euc_jp_japanese, 'utf8') == False)
|
||||
|
||||
tools.ok_(misc.byte_string_valid_xml(self.utf8_ascii_chars) == False)
|
||||
|
||||
def test_byte_string_valid_encoding(self):
|
||||
'''Test that a byte sequence is validated'''
|
||||
tools.ok_(misc.byte_string_valid_encoding(self.utf8_japanese) == True)
|
||||
tools.ok_(misc.byte_string_valid_encoding(self.euc_jp_japanese, encoding='euc_jp') == True)
|
||||
|
||||
def test_byte_string_invalid_encoding(self):
|
||||
'''Test that we return False with non-encoded chars'''
|
||||
tools.ok_(misc.byte_string_valid_encoding('\xff') == False)
|
||||
tools.ok_(misc.byte_string_valid_encoding(self.euc_jp_japanese) == False)
|
92
tests/test_text_utf8.py
Normal file
92
tests/test_text_utf8.py
Normal file
|
@ -0,0 +1,92 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
import unittest
|
||||
from nose import tools
|
||||
|
||||
import warnings
|
||||
|
||||
from kitchen.text import utf8
|
||||
|
||||
import base_classes
|
||||
|
||||
class TestUTF8(base_classes.UnicodeTestData, unittest.TestCase):
|
||||
def setUp(self):
|
||||
# All of the utf8* functions are deprecated
|
||||
warnings.simplefilter('ignore', DeprecationWarning)
|
||||
|
||||
def tearDown(self):
|
||||
warnings.simplefilter('default', DeprecationWarning)
|
||||
|
||||
def test_utf8_width(self):
|
||||
'''Test that we find the proper number of spaces that a utf8 string will consume'''
|
||||
tools.ok_(utf8.utf8_width(self.utf8_japanese) == 31)
|
||||
tools.ok_(utf8.utf8_width(self.utf8_spanish) == 50)
|
||||
tools.ok_(utf8.utf8_width(self.utf8_mixed) == 23)
|
||||
|
||||
def test_utf8_width_non_utf8(self):
|
||||
'''Test that we handle non-utf8 bytes in utf8_width without backtracing'''
|
||||
# utf8_width() treats non-utf8 byte sequences as undecodable so you
|
||||
# end up with less characters than normal. In this string:
|
||||
# Python-2.7+ replaces problematic characters in a different manner
|
||||
# than older pythons.
|
||||
# Python >= 2.7:
|
||||
# El veloz murci<63>lago salt<6C> sobre el perro perezoso.
|
||||
# Python < 2.7:
|
||||
# El veloz murci<63>go salt<6C>bre el perro perezoso.
|
||||
if len(unicode(u'\xe9la'.encode('latin1'), 'utf8', 'replace')) == 1:
|
||||
# Python < 2.7
|
||||
tools.ok_(utf8.utf8_width(self.latin1_spanish) == 45)
|
||||
else:
|
||||
# Python >= 2.7
|
||||
tools.ok_(utf8.utf8_width(self.latin1_spanish) == 50)
|
||||
|
||||
def test_utf8_width_chop(self):
|
||||
'''utf8_width_chop with byte strings'''
|
||||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed) == (23, self.utf8_mixed))
|
||||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 23) == (23, self.utf8_mixed))
|
||||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 22) == (22, self.utf8_mixed[:-1]))
|
||||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 19) == (18, self.u_mixed[:-4].encode('utf8')))
|
||||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 2) == (2, self.u_mixed[0].encode('utf8')))
|
||||
tools.ok_(utf8.utf8_width_chop(self.utf8_mixed, 1) == (0, ''))
|
||||
|
||||
def test_utf8_width_chop_unicode(self):
|
||||
'''utf8_width_chop with unicode input'''
|
||||
tools.ok_(utf8.utf8_width_chop(self.u_mixed) == (23, self.u_mixed))
|
||||
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 23) == (23, self.u_mixed))
|
||||
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 22) == (22, self.u_mixed[:-1]))
|
||||
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 19) == (18, self.u_mixed[:-4]))
|
||||
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 2) == (2, self.u_mixed[0]))
|
||||
tools.ok_(utf8.utf8_width_chop(self.u_mixed, 1) == (0, ''))
|
||||
|
||||
def test_utf8_width_fill(self):
|
||||
'''Pad a utf8 string'''
|
||||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 1) == self.utf8_mixed)
|
||||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25) == self.utf8_mixed + ' ')
|
||||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, left=False) == ' ' + self.utf8_mixed)
|
||||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, chop=18) == self.u_mixed[:-4].encode('utf8') + ' ')
|
||||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, chop=18, prefix=self.utf8_spanish, suffix=self.utf8_spanish) == self.utf8_spanish + self.u_mixed[:-4].encode('utf8') + self.utf8_spanish + ' ')
|
||||
tools.ok_(utf8.utf8_width_fill(self.utf8_mixed, 25, chop=18) == self.u_mixed[:-4].encode('utf8') + ' ')
|
||||
tools.ok_(utf8.utf8_width_fill(self.u_mixed, 25, chop=18, prefix=self.u_spanish, suffix=self.utf8_spanish) == self.u_spanish.encode('utf8') + self.u_mixed[:-4].encode('utf8') + self.u_spanish.encode('utf8') + ' ')
|
||||
pass
|
||||
|
||||
def test_utf8_valid(self):
|
||||
'''Test that a utf8 byte sequence is validated'''
|
||||
warnings.simplefilter('ignore', DeprecationWarning)
|
||||
tools.ok_(utf8.utf8_valid(self.utf8_japanese) == True)
|
||||
tools.ok_(utf8.utf8_valid(self.utf8_spanish) == True)
|
||||
warnings.simplefilter('default', DeprecationWarning)
|
||||
|
||||
def test_utf8_invalid(self):
|
||||
'''Test that we return False with non-utf8 chars'''
|
||||
warnings.simplefilter('ignore', DeprecationWarning)
|
||||
tools.ok_(utf8.utf8_valid('\xff') == False)
|
||||
tools.ok_(utf8.utf8_valid(self.latin1_spanish) == False)
|
||||
warnings.simplefilter('default', DeprecationWarning)
|
||||
|
||||
def test_utf8_text_wrap(self):
|
||||
tools.ok_(utf8.utf8_text_wrap(self.utf8_mixed) == [self.utf8_mixed])
|
||||
tools.ok_(utf8.utf8_text_wrap(self.utf8_paragraph) == self.utf8_paragraph_out)
|
||||
tools.ok_(utf8.utf8_text_wrap(self.utf8_mixed_para) == self.utf8_mixed_para_out)
|
||||
tools.ok_(utf8.utf8_text_wrap(self.utf8_mixed_para, width=57,
|
||||
initial_indent=' ', subsequent_indent='----') ==
|
||||
self.utf8_mixed_para_57_initial_subsequent_out)
|
35
tests/test_versioning.py
Normal file
35
tests/test_versioning.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
import unittest
|
||||
from nose import tools
|
||||
|
||||
from kitchen.versioning import version_tuple_to_string
|
||||
|
||||
# Note: Using nose's generator tests for this so we can't subclass
|
||||
# unittest.TestCase
|
||||
class TestVersionTuple(object):
|
||||
ver_to_tuple = {u'1': ((1,),),
|
||||
u'1.0': ((1, 0),),
|
||||
u'1.0.0': ((1, 0, 0),),
|
||||
u'1.0a1': ((1, 0), ('a', 1)),
|
||||
u'1.0a1': ((1, 0), (u'a', 1)),
|
||||
u'1.0rc1': ((1, 0), ('rc', 1)),
|
||||
u'1.0rc1': ((1, 0), (u'rc', 1)),
|
||||
u'1.0rc1.2': ((1, 0), ('rc', 1, 2)),
|
||||
u'1.0rc1.2': ((1, 0), (u'rc', 1, 2)),
|
||||
u'1.0.dev345': ((1, 0), ('dev', 345)),
|
||||
u'1.0.dev345': ((1, 0), (u'dev', 345)),
|
||||
u'1.0a1.dev345': ((1, 0), ('a', 1), ('dev', 345)),
|
||||
u'1.0a1.dev345': ((1, 0), (u'a', 1), (u'dev', 345)),
|
||||
u'1.0a1.2.dev345': ((1, 0), ('a', 1, 2), ('dev', 345)),
|
||||
u'1.0a1.2.dev345': ((1, 0), (u'a', 1, 2), (u'dev', 345)),
|
||||
}
|
||||
|
||||
def check_ver_tuple_to_str(self, v_tuple, v_str):
|
||||
tools.ok_(version_tuple_to_string(v_tuple) == v_str)
|
||||
|
||||
def test_version_tuple_to_string(self):
|
||||
'''Test that version_tuple_to_string outputs PEP-386 compliant strings
|
||||
'''
|
||||
for v_str, v_tuple in self.ver_to_tuple.items():
|
||||
yield self.check_ver_tuple_to_str, v_tuple, v_str
|
Loading…
Reference in a new issue