1; } // Three byte sequence: elseif (($value & 0xF0) === 0xE0) { $character = ($value & 0x0F) << 12; $length = 3; $remaining = 2; } // Four byte sequence: elseif (($value & 0xF8) === 0xF0) { $character = ($value & 0x07) << 18; $length = 4; $remaining = 3; } // Invalid byte: else { $valid = false; $remaining = 0; } } // Continuation byte: else { // Check that the byte is valid, then add it to the character: if (($value & 0xC0) === 0x80) { $remaining--; $character |= ($value & 0x3F) << ($remaining * 6); } // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence: else { $valid = false; $remaining = 0; $i--; } } // If we've reached the end of the current byte sequence, append it to Unicode::$data if (!$remaining) { // Percent encode anything invalid or not in iunreserved if ( // Invalid sequences !$valid // Non-shortest form sequences are invalid || $length > 1 && $character <= 0x7F || $length > 2 && $character <= 0x7FF || $length > 3 && $character <= 0xFFFF // Outside of range of iunreserved codepoints || $character < 0x2D || $character > 0xEFFFD // Noncharacters || ($character & 0xFFFE) === 0xFFFE || $character >= 0xFDD0 && $character <= 0xFDEF // Everything else not in iunreserved (this is all BMP) || $character === 0x2F || $character > 0x39 && $character < 0x41 || $character > 0x5A && $character < 0x61 || $character > 0x7A && $character < 0x7E || $character > 0x7E && $character < 0xA0 || $character > 0xD7FF && $character < 0xF900 ) { for ($j = $start; $j <= $i; $j++) { $string .= '%' . strtoupper($bytes[$j]); } } else { for ($j = $start; $j <= $i; $j++) { $string .= chr(hexdec($bytes[$j])); } } } } // If we have any bytes left over they are invalid (i.e., we are // mid-way through a multi-byte sequence) if ($remaining) { for ($j = $start; $j < $len; $j++) { $string .= '%' . strtoupper($bytes[$j]); } } return $string; } protected function scheme_normalization() { if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) { $this->iuserinfo = null; } if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) { $this->ihost = null; } if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) { $this->port = null; } if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) { $this->ipath = ''; } if (isset($this->ihost) && empty($this->ipath)) { $this->ipath = '/'; } if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) { $this->iquery = null; } if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) { $this->ifragment = null; } } /** * Check if the object represents a valid IRI. This needs to be done on each * call as some things change depending on another part of the IRI. * * @return bool */ public function is_valid() { $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null; if ($this->ipath !== '' && ( $isauthority && $this->ipath[0] !== '/' || ( $this->scheme === null && !$isauthority && strpos($this->ipath, ':') !== false && (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/')) ) ) ) { return false; } return true; } /** * Set the entire IRI. Returns true on success, false on failure (if there * are any invalid characters). * * @param strin=== 0x80) { $character = $value; $length = 1; $remaining = 0; } // Two byte sequence: elseif (($value & 0xE0) === 0xC0) { $character = ($value & 0x1F) << 6; $length = 2; $remaining = 1; } // Three byte sequence: elseif (($value & 0xF0) === 0xE0) { $character = ($value & 0x0F) << 12; $length = 3; $remaining = 2; } // Four byte sequence: elseif (($value & 0xF8) === 0xF0) { $character = ($value & 0x07) << 18; $length = 4; $remaining = 3; } // Invalid byte: else { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $value); } if ($remaining > 0) { if ($position + $length > $strlen) { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); } for ($position++; $remaining > 0; $position++) { $value = ord($input[$position]); // If it is invalid, count the sequence as invalid and reprocess the current byte: if (($value & 0xC0) !== 0x80) { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); } $character |= ($value & 0x3F) << (--$remaining * 6); } $position--; } if ( // Non-shortest form sequences are invalid $length > 1 && $character <= 0x7F || $length > 2 && $character <= 0x7FF || $length > 3 && $character <= 0xFFFF // Outside of range of ucschar codepoints // Noncharacters || ($character & 0xFFFE) === 0xFFFE || $character >= 0xFDD0 && $character <= 0xFDEF || ( // Everything else not in ucschar $character > 0xD7FF && $character < 0xF900 || $character < 0x20 || $character > 0x7E && $character < 0xA0 || $character > 0xEFFFD ) ) { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); } $codepoints[] = $character; } return $codepoints; } /** * RFC3492-compliant encoder * * @internal Pseudo-code from Section 6.3 is commented with "#" next to relevant code * @throws Requests_Exception On character outside of the domain (never happens with Punycode) (`idna.character_outside_domain`) * * @param string $input UTF-8 encoded string to encode * @return string Punycode-encoded string */ public static function punycode_encode($input) { $output = ''; # let n = initial_n $n = self::BOOTSTRAP_INITIAL_N; # let delta = 0 $delta = 0; # let bias = initial_bias $bias = self::BOOTSTRAP_INITIAL_BIAS; # let h = b = the number of basic code points in the input $h = $b = 0; // see loop # copy them to the output in order $codepoints = self::utf8_to_codepoints($input); $extended = array(); foreach ($codepoints as $char) { if ($char < 128) { // Character is valid ASCII // TODO: this should also check if it's valid for a URL $output .= chr($char); $h++; } // Check if the character is non-ASCII, but below initial n // This never occurs for Punycode, so ignore in coverage // @codeCoverageIgnoreStart elseif ($char < $n) { throw new Requests_Exception('Invalid character', 'idna.character_outside_domain', $char); } // @codeCoverageIgnoreEnd else { $extended[$char] = true; } } $extended = array_keys($extended); sort($extended); $b = $h; # [copy them] followed by a delimiter if b > 0 if (strlen($output) > 0) { $output .= '-'; } # {if the input contains a non-basic code point < n then fail} # while h < length(input) do begin while ($h < count($codepoints)) { # let m = the minimum code point >= n in the input $m = array_shift($extended); //printf('next code point to insert is %s' . PHP_EOL, dechex($m)); # let delta = delta + (m - n) * (h + 1), fail on overflow $delta += ($m - $n) * ($h + 1); # let n = m $n = $m; # for each code point c in the input (in order) do begin for ($num = 0; $num < count($codepoints); $num++) { $c = $codepoints[$num]; # if c < n then increment delta, fail on overflow if ($c < $n) { $delta++; ; $position++; } } $this->ihost = $ihost; } $this->scheme_normalization(); return true; } /** * Set the port. Returns true on success, false on failure (if there are * any invalid characters). * * @param string $port * @return bool */ protected function set_port($port) { if ($port === null) { $this->port = null; return true; } if (strspn($port, '0123456789') === strlen($port)) { $this->port = (int) $port; $this->scheme_normalization(); return true; } $this->port = null; return false; } /** * Set the ipath. * * @param string $ipath * @return bool */ protected function set_path($ipath) { static $cache; if (!$cache) { $cache = array(); } $ipath = (string) $ipath; if (isset($cache[$ipath])) { $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)]; } else { $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/'); $removed = $this->remove_dot_segments($valid); $cache[$ipath] = array($valid, $removed); $this->ipath = ($this->scheme !== null) ? $removed : $valid; } $this->scheme_normalization(); return true; } /** * Set the iquery. * * @param string $iquery * @return bool */ protected function set_query($iquery) { if ($iquery === null) { $this->iquery = null; } else { $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true); $this->scheme_normalization(); } return true; } /** * Set the ifragment. * * @param string $ifragment * @return bool */ protected function set_fragment($ifragment) { if ($ifragment === null) { $this->ifragment = null; } else { $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?'); $this->scheme_normalization(); } return true; } /** * Convert an IRI to a URI (or parts thereof) * * @param string|bool IRI to convert (or false from {@see get_iri}) * @return string|false URI if IRI is valid, false otherwise. */ protected function to_uri($string) { if (!is_string($string)) { return false; } static $non_ascii; if (!$non_ascii) { $non_ascii = implode('', range("\x80", "\xFF")); } $position = 0; $strlen = strlen($string); while (($position += strcspn($string, $non_ascii, $position)) < $strlen) { $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1); $position += 3; $strlen += 2; } return $string; } /** * Get the complete IRI * * @return string */ protected function get_iri() { if (!$this->is_valid()) { return false; } $iri = ''; if ($this->scheme !== null) { $iri .= $this->scheme . ':'; } if (($iauthority = $this->get_iauthority()) !== null) { $iri .= '//' . $iauthority; } $iri .= $this->ipath; if ($this->iquery !== null) { $iri .= '?' . $this->iquery; } if ($this->ifragment !== null) { $iri .= '#' . $this->ifragment; } return $iri; } /** * Get the complete URI * * @return string */ protected function get_uri() { return $this->to_u