Current File : //opt/alt/ruby31/include/ruby/internal/core/rstring.h
#ifndef RBIMPL_RSTRING_H                             /*-*-C++-*-vi:se ft=cpp:*/
#define RBIMPL_RSTRING_H
/**
 * @file
 * @author     Ruby developers <ruby-core@ruby-lang.org>
 * @copyright  This  file  is   a  part  of  the   programming  language  Ruby.
 *             Permission  is hereby  granted,  to  either redistribute  and/or
 *             modify this file, provided that  the conditions mentioned in the
 *             file COPYING are met.  Consult the file for details.
 * @warning    Symbols   prefixed  with   either  `RBIMPL`   or  `rbimpl`   are
 *             implementation details.   Don't take  them as canon.  They could
 *             rapidly appear then vanish.  The name (path) of this header file
 *             is also an  implementation detail.  Do not expect  it to persist
 *             at the place it is now.  Developers are free to move it anywhere
 *             anytime at will.
 * @note       To  ruby-core:  remember  that   this  header  can  be  possibly
 *             recursively included  from extension  libraries written  in C++.
 *             Do not  expect for  instance `__VA_ARGS__` is  always available.
 *             We assume C99  for ruby itself but we don't  assume languages of
 *             extension libraries.  They could be written in C++98.
 * @brief      Defines struct ::RString.
 */
#include "ruby/internal/config.h"
#include "ruby/internal/arithmetic/long.h"
#include "ruby/internal/attr/artificial.h"
#include "ruby/internal/attr/pure.h"
#include "ruby/internal/cast.h"
#include "ruby/internal/core/rbasic.h"
#include "ruby/internal/dllexport.h"
#include "ruby/internal/fl_type.h"
#include "ruby/internal/value_type.h"
#include "ruby/internal/warning_push.h"
#include "ruby/assert.h"

/**
 * Convenient casting macro.
 *
 * @param   obj  An object, which is in fact an ::RString.
 * @return  The passed object casted to ::RString.
 */
#define RSTRING(obj)            RBIMPL_CAST((struct RString *)(obj))

/** @cond INTERNAL_MACRO */
#define RSTRING_NOEMBED         RSTRING_NOEMBED
#if !USE_RVARGC
#define RSTRING_EMBED_LEN_MASK  RSTRING_EMBED_LEN_MASK
#define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT
#define RSTRING_EMBED_LEN_MAX   RSTRING_EMBED_LEN_MAX
#endif
#define RSTRING_FSTR            RSTRING_FSTR
#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN
#define RSTRING_LEN       RSTRING_LEN
#define RSTRING_LENINT    RSTRING_LENINT
#define RSTRING_PTR       RSTRING_PTR
#define RSTRING_END       RSTRING_END
/** @endcond */

/**
 * @name Conversion of Ruby strings into C's
 *
 * @{
 */

/**
 * Ensures that the parameter object is a  String.  This is done by calling its
 * `to_str` method.
 *
 * @param[in,out]  v              Arbitrary Ruby object.
 * @exception      rb_eTypeError  No implicit conversion defined.
 * @post           `v` is a String.
 */
#define StringValue(v)     rb_string_value(&(v))

/**
 * Identical to #StringValue, except it returns a `char*`.
 *
 * @param[in,out]  v              Arbitrary Ruby object.
 * @exception      rb_eTypeError  No implicit conversion defined.
 * @return         Converted Ruby string's backend C string.
 * @post           `v` is a String.
 */
#define StringValuePtr(v)  rb_string_value_ptr(&(v))

/**
 * Identical to #StringValuePtr, except it additionally checks for the contents
 * for viability  as a C  string.  Ruby can accept  wider range of  contents as
 * strings, compared to C.  This function is to check that.
 *
 * @param[in,out]  v              Arbitrary Ruby object.
 * @exception      rb_eTypeError  No implicit conversion defined.
 * @exception      rb_eArgError   String is not C-compatible.
 * @return         Converted Ruby string's backend C string.
 * @post           `v` is a String.
 */
#define StringValueCStr(v) rb_string_value_cstr(&(v))

/**
 * @private
 *
 * @deprecated  This macro once was a thing in the old days, but makes no sense
 *              any  longer today.   Exists  here  for backwards  compatibility
 *              only.  You can safely forget about it.
 */
#define SafeStringValue(v) StringValue(v)

/**
 * Identical  to #StringValue,  except  it additionally  converts the  string's
 * encoding to default external encoding.  Ruby has a concept called encodings.
 * A string can have different  encoding than the environment expects.  Someone
 * has to make  sure its contents be converted to  something suitable.  This is
 * that routine.  Call it when necessary.
 *
 * @param[in,out]  v              Arbitrary Ruby object.
 * @exception      rb_eTypeError  No implicit conversion defined.
 * @return         Converted Ruby string's backend C string.
 * @post           `v` is a String.
 *
 * @internal
 *
 * Not   sure  but   it  seems   this  macro   does  not   raise  on   encoding
 * incompatibilities?  Doesn't sound right to @shyouhei.
 */
#define ExportStringValue(v) do { \
    StringValue(v);               \
    (v) = rb_str_export(v);       \
} while (0)

/** @} */

/**
 * @private
 *
 * Bits that you can set to ::RBasic::flags.
 *
 * @warning  These enums are not the only bits we use for strings.
 *
 * @internal
 *
 * Actually all bits  through FL_USER1 to FL_USER19 are used  for strings.  Why
 * only this  tiny part of  them are made public  here?  @shyouhei can  find no
 * reason.
 */
enum ruby_rstring_flags {

    /**
     * This flag has  something to do with memory footprint.   If the string is
     * short enough, ruby tries to be  creative to abuse padding bits of struct
     * ::RString for  storing contents.  If this  flag is set that  string does
     * _not_  do that,  to resort  to  good old  fashioned external  allocation
     * strategy instead.
     *
     * @warning  This  bit has  to be  considered read-only.   Setting/clearing
     *           this  bit without  corresponding fix  up must  cause immediate
     *           SEGV.    Also,  internal   structures  of   a  string   change
     *           dynamically  and  transparently  throughout of  its  lifetime.
     *           Don't assume it being persistent.
     *
     * @internal
     *
     * 3rd parties must  not be aware that  there even is more than  one way to
     * store a string.  Might better be hidden.
     */
    RSTRING_NOEMBED         = RUBY_FL_USER1,

#if !USE_RVARGC
    /**
     * When a  string employs embedded strategy  (see ::RSTRING_NOEMBED), these
     * bits  are  used to  store  the  number  of  bytes actually  filled  into
     * ::RString::ary.
     *
     * @internal
     *
     * 3rd parties must  not be aware that  there even is more than  one way to
     * store a string.  Might better be hidden.
     */
    RSTRING_EMBED_LEN_MASK  = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 |
                              RUBY_FL_USER5 | RUBY_FL_USER6,
#endif

    /* Actually,  string  encodings are  also  encoded  into the  flags,  using
     * remaining bits.*/

    /**
     * This  flag has  something  to do  with infamous  "f"string.   What is  a
     * fstring?  Well  it is a  special subkind  of strings that  is immutable,
     * deduped globally, and managed  by our GC.  It is much  like a Symbol (in
     * fact Symbols are  dynamic these days and are  backended using fstrings).
     * This concept  has been  silently introduced  at some  point in  2.x era.
     * Since  then it  gained  wider  acceptance in  the  core.  But  extension
     * libraries could not know that until very recently.  Strings of this flag
     * live in  a special Limbo deep  inside of the interpreter.   Never try to
     * manipulate it by hand.
     *
     * @internal
     *
     * Fstrings  are not  the only  variant  strings that  we implement  today.
     * Other things are behind-the-scene.  This is the only one that is visible
     * from extension  library.  There  is no  clear reason why  it has  to be.
     * Given there are more "polite" ways to create fstrings, it seems this bit
     * need not be exposed to extension libraries.  Might better be hidden.
     */
    RSTRING_FSTR            = RUBY_FL_USER17
};

#if !USE_RVARGC
/**
 * This is an enum because GDB wants it (rather than a macro).  People need not
 * bother.
 */
enum ruby_rstring_consts {
    /** Where ::RSTRING_EMBED_LEN_MASK resides. */
    RSTRING_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 2,

    /** Max possible number of characters that can be embedded. */
    RSTRING_EMBED_LEN_MAX   = RBIMPL_EMBED_LEN_MAX_OF(char) - 1
};
#endif

/**
 * Ruby's String.  A string in ruby conceptually has these information:
 *
 * - Encoding of the string.
 * - Length of the string.
 * - Contents of the string.
 *
 * It is worth  noting that a string  is _not_ an array of  characters in ruby.
 * It has never been.   In 1.x a string was an array of  integers.  Since 2.x a
 * string is no longer an array of anything.  A string is a string -- just like
 * a Time is not an integer.
 */
struct RString {

    /** Basic part, including flags and class. */
    struct RBasic basic;

    /** String's specific fields. */
    union {

        /**
         * Strings  that use  separated  memory region  for  contents use  this
         * pattern.
         */
        struct {

            /**
             * Length of the string, not including terminating NUL character.
             *
             * @note  This is in bytes.
             */
            long len;

            /**
             * Pointer to  the contents of  the string.   In the old  days each
             * string had  dedicated memory  regions.  That  is no  longer true
             * today,  but there  still are  strings of  such properties.  This
             * field could be used to point such things.
             */
            char *ptr;

            /** Auxiliary info. */
            union {

                /**
                 * Capacity of `*ptr`.  A continuous  memory region of at least
                 * `capa` bytes  is expected to  exist at `*ptr`.  This  can be
                 * bigger than `len`.
                 */
                long capa;

                /**
                 * Parent  of the  string.   Nowadays strings  can share  their
                 * contents each other, constructing  gigantic nest of objects.
                 * This situation is called "shared",  and this is the field to
                 * control such properties.
                 */
                VALUE shared;
            } aux;
        } heap;

        /** Embedded contents. */
        struct {
#if USE_RVARGC
            short len;
            /* This is a length 1 array because:
             *   1. GCC has a bug that does not optimize C flexible array members
             *      (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452)
             *   2. Zero length arrays are not supported by all compilers
             */
            char ary[1];
#else
            /**
             * When a  string is short enough,  it uses this area  to store the
             * contents themselves.  This was  impractical in the 20th century,
             * but these days 64 bit machines can typically hold 24 bytes here.
             * Could be sufficiently large.  In this case the length is encoded
             * into the flags.
             */
            char ary[RSTRING_EMBED_LEN_MAX + 1];
#endif
        } embed;
    } as;
};

RBIMPL_SYMBOL_EXPORT_BEGIN()
/**
 * Identical to rb_check_string_type(), except it  raises exceptions in case of
 * conversion failures.
 *
 * @param[in]  obj            Target object.
 * @exception  rb_eTypeError  No implicit conversion to String.
 * @return     Return value of `obj.to_str`.
 * @see        rb_io_get_io
 * @see        rb_ary_to_ary
 */
VALUE rb_str_to_str(VALUE obj);

/**
 * Identical to  rb_str_to_str(), except it  fills the passed pointer  with the
 * converted object.
 *
 * @param[in,out]  ptr            Pointer to a variable of target object.
 * @exception      rb_eTypeError  No implicit conversion to String.
 * @return         Return value of `obj.to_str`.
 * @post           `*ptr` is the return value.
 */
VALUE rb_string_value(volatile VALUE *ptr);

/**
 * Identical  to  rb_str_to_str(), except  it  returns  the converted  string's
 * backend memory region.
 *
 * @param[in,out]  ptr            Pointer to a variable of target object.
 * @exception      rb_eTypeError  No implicit conversion to String.
 * @post           `*ptr` is the return value of `obj.to_str`.
 * @return         Pointer to the contents of the return value.
 */
char *rb_string_value_ptr(volatile VALUE *ptr);

/**
 * Identical to  rb_string_value_ptr(), except  it additionally checks  for the
 * contents  for viability  as a  C  string.  Ruby  can accept  wider range  of
 * contents as strings, compared to C.  This function is to check that.
 *
 * @param[in,out]  ptr            Pointer to a variable of target object.
 * @exception      rb_eTypeError  No implicit conversion to String.
 * @exception      rb_eArgError   String is not C-compatible.
 * @post           `*ptr` is the return value of `obj.to_str`.
 * @return         Pointer to the contents of the return value.
 */
char *rb_string_value_cstr(volatile VALUE *ptr);

/**
 * Identical  to rb_str_to_str(),  except it  additionally converts  the string
 * into default  external encoding.   Ruby has a  concept called  encodings.  A
 * string can  have different encoding  than the environment  expects.  Someone
 * has to make  sure its contents be converted to  something suitable.  This is
 * that routine.  Call it when necessary.
 *
 * @param[in]  obj            Target object.
 * @exception  rb_eTypeError  No implicit conversion to String.
 * @return     Converted ruby string of default external encoding.
 */
VALUE rb_str_export(VALUE obj);

/**
 * Identical to  rb_str_export(), except it  converts into the  locale encoding
 * instead.
 *
 * @param[in]  obj            Target object.
 * @exception  rb_eTypeError  No implicit conversion to String.
 * @return     Converted ruby string of locale encoding.
 */
VALUE rb_str_export_locale(VALUE obj);

RBIMPL_ATTR_ERROR(("rb_check_safe_str() and Check_SafeStr() are obsolete; use StringValue() instead"))
/**
 * @private
 *
 * @deprecated  This function  once was a thing  in the old days,  but makes no
 *              sense   any   longer   today.   Exists   here   for   backwards
 *              compatibility only.  You can safely forget about it.
 */
void rb_check_safe_str(VALUE);

/**
 * @private
 *
 * @deprecated  This macro once was a thing in the old days, but makes no sense
 *              any  longer today.   Exists  here  for backwards  compatibility
 *              only.  You can safely forget about it.
 */
#define Check_SafeStr(v) rb_check_safe_str(RBIMPL_CAST((VALUE)(v)))

/**
 * @private
 *
 * Prints diagnostic message to stderr when RSTRING_PTR or RSTRING_END
 * is NULL.
 *
 * @param[in]  func           The function name where encountered NULL pointer.
 */
void rb_debug_rstring_null_ptr(const char *func);
RBIMPL_SYMBOL_EXPORT_END()

RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
 * Queries the length of the string.
 *
 * @param[in]  str  String in question.
 * @return     Its length, in bytes.
 * @pre        `str`  must  be an  instance  of  ::RString,  and must  has  its
 *             ::RSTRING_NOEMBED flag off.
 *
 * @internal
 *
 * This was a macro  before.  It was inevitable to be  public, since macros are
 * global constructs.   But should it be  forever?  Now that it  is a function,
 * @shyouhei thinks  it could  just be  eliminated, hidden  into implementation
 * details.
 */
static inline long
RSTRING_EMBED_LEN(VALUE str)
{
    RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);
    RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED));

#if USE_RVARGC
    short f = RSTRING(str)->as.embed.len;
#else
    VALUE f = RBASIC(str)->flags;
    f &= RSTRING_EMBED_LEN_MASK;
    f >>= RSTRING_EMBED_LEN_SHIFT;
#endif
    return RBIMPL_CAST((long)f);
}

RBIMPL_WARNING_PUSH()
#if RBIMPL_COMPILER_IS(Intel)
RBIMPL_WARNING_IGNORED(413)
#endif

RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
 * @private
 *
 * "Expands" an embedded  string into an ordinal one.  This  is a function that
 * returns aggregated type.   The returned struct always  has its `as.heap.len`
 * an `as.heap.ptr` fields set appropriately.
 *
 * This is an implementation detail that 3rd parties should never bother.
 */
static inline struct RString
rbimpl_rstring_getmem(VALUE str)
{
    RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);

    if (RB_FL_ANY_RAW(str, RSTRING_NOEMBED)) {
        return *RSTRING(str);
    }
    else {
        /* Expecting compilers to optimize this on-stack struct away. */
        struct RString retval;
        retval.as.heap.len = RSTRING_EMBED_LEN(str);
        retval.as.heap.ptr = RSTRING(str)->as.embed.ary;
        return retval;
    }
}

RBIMPL_WARNING_POP()

RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
 * Queries the length of the string.
 *
 * @param[in]  str  String in question.
 * @return     Its length, in bytes.
 * @pre        `str` must be an instance of ::RString.
 */
static inline long
RSTRING_LEN(VALUE str)
{
    return rbimpl_rstring_getmem(str).as.heap.len;
}

RBIMPL_ATTR_ARTIFICIAL()
/**
 * Queries the contents pointer of the string.
 *
 * @param[in]  str  String in question.
 * @return     Pointer to its contents.
 * @pre        `str` must be an instance of ::RString.
 */
static inline char *
RSTRING_PTR(VALUE str)
{
    char *ptr = rbimpl_rstring_getmem(str).as.heap.ptr;

    if (RB_UNLIKELY(! ptr)) {
        /* :BEWARE: @shyouhei thinks  that currently, there are  rooms for this
         * function to return  NULL.  In the 20th century that  was a pointless
         * concern.  However struct RString can hold fake strings nowadays.  It
         * seems no  check against NULL  are exercised around handling  of them
         * (one  of  such   usages  is  located  in   marshal.c,  which  scares
         * @shyouhei).  Better check here for maximum safety.
         *
         * Also,  this is  not rb_warn()  because RSTRING_PTR()  can be  called
         * during GC (see  what obj_info() does).  rb_warn()  needs to allocate
         * Ruby objects.  That is not possible at this moment. */
        rb_debug_rstring_null_ptr("RSTRING_PTR");
    }

    return ptr;
}

RBIMPL_ATTR_ARTIFICIAL()
/**
 * Queries the end of the contents pointer of the string.
 *
 * @param[in]  str  String in question.
 * @return     Pointer to its end of contents.
 * @pre        `str` must be an instance of ::RString.
 */
static inline char *
RSTRING_END(VALUE str)
{
    struct RString buf = rbimpl_rstring_getmem(str);

    if (RB_UNLIKELY(! buf.as.heap.ptr)) {
        /* Ditto. */
        rb_debug_rstring_null_ptr("RSTRING_END");
    }

    return &buf.as.heap.ptr[buf.as.heap.len];
}

RBIMPL_ATTR_ARTIFICIAL()
/**
 * Identical to RSTRING_LEN(), except it differs for the return type.
 *
 * @param[in]  str             String in question.
 * @exception  rb_eRangeError  Too long.
 * @return     Its length, in bytes.
 * @pre        `str` must be an instance of ::RString.
 *
 * @internal
 *
 * This API seems redundant but has actual usages.
 */
static inline int
RSTRING_LENINT(VALUE str)
{
    return rb_long2int(RSTRING_LEN(str));
}

/**
 * Convenient macro to obtain the contents and length at once.
 *
 * @param  str     String in question.
 * @param  ptrvar  Variable where its contents is stored.
 * @param  lenvar  Variable where its length is stored.
 */
#ifdef HAVE_STMT_AND_DECL_IN_EXPR
# define RSTRING_GETMEM(str, ptrvar, lenvar) \
    __extension__ ({ \
        struct RString rbimpl_str = rbimpl_rstring_getmem(str); \
        (ptrvar) = rbimpl_str.as.heap.ptr; \
        (lenvar) = rbimpl_str.as.heap.len; \
    })
#else
# define RSTRING_GETMEM(str, ptrvar, lenvar) \
    ((ptrvar) = RSTRING_PTR(str),           \
     (lenvar) = RSTRING_LEN(str))
#endif /* HAVE_STMT_AND_DECL_IN_EXPR */
#endif /* RBIMPL_RSTRING_H */