Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
Alexey Milovidov 2017-06-08 09:29:49 +03:00
commit a4b28db411
19 changed files with 88 additions and 1271 deletions

View File

@ -35,6 +35,7 @@ namespace ErrorCodes
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_PARSE_DATETIME;
extern const int CANNOT_READ_ARRAY_FROM_TEXT;
extern const int CANNOT_PARSE_NUMBER;
}
/// Helper functions for formatted input.
@ -243,7 +244,12 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf)
if (std::is_signed<T>::value)
negative = true;
else
return ReturnType(false);
{
if (throw_exception)
throw Exception("Unsigned type must not contain '-' symbol", ErrorCodes::CANNOT_PARSE_NUMBER);
else
return ReturnType(false);
}
break;
case '0':
case '1':
@ -887,6 +893,7 @@ inline T parse(const char * data, size_t size)
T res;
ReadBufferFromMemory buf(data, size);
readText(res, buf);
assertEOF(buf);
return res;
}

View File

@ -8,8 +8,32 @@
#include <Poco/HexBinaryEncoder.h>
static void parse_trash_string_as_uint_must_fail(const std::string & str)
{
using namespace DB;
unsigned x = 0xFF;
try
{
x = parse<unsigned>(str);
}
catch (...)
{
/// Ok
return;
}
std::cerr << "Parsing must fail, but finished sucessfully x=" << x;
exit(-1);
}
int main(int argc, char ** argv)
{
parse_trash_string_as_uint_must_fail("trash");
parse_trash_string_as_uint_must_fail("-1");
if (argc != 2)
{
std::cerr << "Usage: " << std::endl

View File

@ -136,7 +136,14 @@ static std::chrono::steady_clock::duration parseSessionTimeout(const HTMLForm &
unsigned max_session_timeout = config.getUInt("max_session_timeout", 3600);
std::string session_timeout_str = params.get("session_timeout");
session_timeout = parse<unsigned>(session_timeout_str);
try
{
session_timeout = parse<unsigned>(session_timeout_str);
}
catch (...)
{
throw Exception(getCurrentExceptionMessage(false) + ". Invalid session timeout", ErrorCodes::INVALID_SESSION_TIMEOUT);
}
if (session_timeout > max_session_timeout)
throw Exception("Session timeout '" + session_timeout_str + "' is larger than max_session_timeout: " + toString(max_session_timeout)

View File

@ -44,7 +44,7 @@ check "$url$session&session_check=0" "$select" "Exception" 0 "session_check=0 do
request $url$session "SET max_rows_to_read=7777777"
check "$url$session&session_timeout=string" "$select" "Exception.*Invalid session timeout" 1 "Non-numeric value accepted as a timeout."
check "$url$session&session_timeout=3601" "$select" "Exception.*Invalid session timeout" 1 "More then 3600 seconds accepted as a timeout."
check "$url$session&session_timeout=3601" "$select" "Exception.*Maximum session timeout*" 1 "More then 3600 seconds accepted as a timeout."
check "$url$session&session_timeout=-1" "$select" "Exception.*Invalid session timeout" 1 "Negative timeout accepted."
check "$url$session&session_timeout=0" "$select" "Exception" 0 "Zero timeout not accepted."
check "$url$session&session_timeout=3600" "$select" "Exception" 0 "3600 second timeout not accepted."

View File

@ -1,522 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge"/>
<title>ClickHouse — open-source distributed column-oriented DBMS</title>
<link rel="shortcut icon" href="favicon.ico"/>
<meta property="og:title" content="ClickHouse DBMS"/>
<meta property="og:description" content="ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time."/>
<meta property="og:type" content="website"/>
<meta property="twitter:title" content="ClickHouse DBMS"/>
<meta name="description" content="ClickHouse is an open-source distributed column-oriented database management system that allows generating analytical data reports in real time. Сreated by Yandex ClickHouse manages extremely large volumes of data in a stable and sustainable manner."/>
<meta name="keywords" content="ClickHouse, DBMS, OLAP, relational, analytics, analytical, big data, open-source, SQL, web-analytics"/>
<style type="text/css">
@font-face {
font-family: 'Yandex Sans Text Web';
src: url(https://yastatic.net/adv-www/_/yy5JveR58JFkc97waf-xp0i6_jM.eot);
src: url(https://yastatic.net/adv-www/_/yy5JveR58JFkc97waf-xp0i6_jM.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/CYblzLEXzCqQIvrYs7QKQe2omRk.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/pUcnOdRwl83MvPPzrNomhyletnA.woff) format('woff'),
url(https://yastatic.net/adv-www/_/vNFEmXOcGYKJ4AAidUprHWoXrLU.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/0w7OcWZM_QLP8x-LQUXFOgXO6dE.svg#YandexSansTextWeb-Bold) format('svg');
font-weight: 700;
font-style: normal;
font-stretch: normal
}
@font-face {
font-family: 'Yandex Sans Text Web';
src: url(https://yastatic.net/adv-www/_/LI6l3L2RqcgxBe2pXmuUha37czQ.eot);
src: url(https://yastatic.net/adv-www/_/LI6l3L2RqcgxBe2pXmuUha37czQ.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/z3MYElcut0R2MF_Iw1RDNrstgYs.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/1jvKJ_-hCXl3s7gmFl-y_-UHTaI.woff) format('woff'),
url(https://yastatic.net/adv-www/_/9nzjfpCR2QHvK1EzHpDEIoVFGuY.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/gwyBTpxSwkFCF1looxqs6JokKls.svg#YandexSansTextWeb-Regular) format('svg');
font-weight: 400;
font-style: normal;
font-stretch: normal
}
@font-face {
font-family: 'Yandex Sans Text Web';
src: url(https://yastatic.net/adv-www/_/ayAFYoY8swgBLhq_I56tKj2JftU.eot);
src: url(https://yastatic.net/adv-www/_/ayAFYoY8swgBLhq_I56tKj2JftU.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/lGQcYklLVV0hyvz1HFmFsUTj8_0.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/f0AAJ9GJ4iiwEmhG-7PWMHk6vUY.woff) format('woff'),
url(https://yastatic.net/adv-www/_/4UDe4nlVvgEJ-VmLWNVq3SxCsA.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/EKLr1STNokPqxLAQa_RyN82pL98.svg#YandexSansTextWeb-Light) format('svg');
font-weight: 300;
font-style: normal;
font-stretch: normal
}
@font-face {
font-family: 'Yandex Sans Display Web';
src: url(https://yastatic.net/adv-www/_/H63jN0veW07XQUIA2317lr9UIm8.eot);
src: url(https://yastatic.net/adv-www/_/H63jN0veW07XQUIA2317lr9UIm8.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/sUYVCPUAQE7ExrvMS7FoISoO83s.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/v2Sve_obH3rKm6rKrtSQpf-eB7U.woff) format('woff'),
url(https://yastatic.net/adv-www/_/PzD8hWLMunow5i3RfJ6WQJAL7aI.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/lF_KG5g4tpQNlYIgA0e77fBSZ5s.svg#YandexSansDisplayWeb-Regular) format('svg');
font-weight: 400;
font-style: normal;
font-stretch: normal
}
@font-face {
font-family: 'Yandex Sans Display Web';
src: url(https://yastatic.net/adv-www/_/g8_MyyKVquSZ3xEL6tarK__V9Vw.eot);
src: url(https://yastatic.net/adv-www/_/g8_MyyKVquSZ3xEL6tarK__V9Vw.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/LGiRvlfqQHlWR9YKLhsw5e7KGNA.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/40vXwNl4eYYMgteIVgLP49dwmfc.woff) format('woff'),
url(https://yastatic.net/adv-www/_/X6zG5x_wO8-AtwJ-vDLJcKC5228.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/ZKhaR0m08c8CRRL77GtFKoHcLYA.svg#YandexSansDisplayWeb-Light) format('svg');
font-weight: 300;
font-style: normal;
font-stretch: normal
}
body {
background: #fff;
font: 300 12pt/200% 'Yandex Sans Text Web', Arial, sans-serif;
}
.page {
width: 800px;
margin: auto;
}
h1 {
font: 400 100pt 'Yandex Sans Display Web', Arial, sans-serif;
margin: 100px 0 30px;
}
h2 {
margin: 47px 0 23px;
font: 400 24pt 'Yandex Sans Display Web', Arial, sans-serif;
}
a:link, a:visited {
color: #08f;
text-decoration: none;
}
a:hover, a:active {
color: #f00;
text-decoration: underline;
}
.menu {
margin: 0 0 50px;
font-weight: 400;
}
.menu_item:link,
.menu_item:active,
.menu_item:visited {
margin: 0 8px 0 0;
padding: 5px;
background: #ffdb4d;
color: #000;
}
.menu_item:hover {
background: #f80;
color: #fff;
text-decoration: none;
}
.short_description,
.call_to_action {
font: 300 18pt/150% 'Yandex Sans Display Web', Arial, sans-serif;
margin: 6px 0 24px;
}
.call_to_action {
color: #07f;
font-weight: 400;
}
.download_div {
margin: 80px;
text-align: center;
}
.download:link,
.download:active,
.download:visited {
background: #ffdb4d;
font: 300 18pt 'Yandex Sans Display Web', Arial, sans-serif;
padding: 15px 22px;
margin: auto;
color: #000;
text-decoration: none;
border-radius: 3px;
}
.download:hover {
color: #f00;
text-decoration: none;
}
.footer {
text-align: right;
padding: 10px 0 0;
color: #888;
font-size: 10pt;
}
.feedback_form {
position: absolute;
}
.feedback_form, .download_instructions {
display: none;
background: #fff;
padding: 20px;
box-shadow: 10px 10px 300px 350px #fff;
}
code {
font: 13px/18px monospace, "Courier New";
display: block;
border-left: 5px solid #ffdb4d;
padding: 5px 10px;
background: #000;
color: #ccc;
}
ul {
margin: 0;
padding-left: 0;
}
ul.dashed {
list-style-type: none;
}
ul.dashed > li {
text-indent: 5px;
}
ul.dashed > li:before {
content: '— ';
text-indent: 5px;
}
.warranty {
font-size: 10pt;
color: #888;
line-height: 150%;
}
.nomarkerlist {
list-style-type: none;
}
.distributive_selected {
color: #000;
font-weight: bold;
}
.distributive_not_selected {
color: #08f;
cursor: pointer;
border-bottom: 1px dashed #08f;
}
</style>
</head>
<body>
<div class="page">
<h1>ClickHouse</h1>
<div class="feedback_form">
<h2>Feedback</h2>
<p>Ask any questions on <a href="https://stackoverflow.com/questions/tagged/clickhouse">Stack Overflow</a>.</p>
<p>Use <a href="https://groups.google.com/group/clickhouse">Google Group</a> for discussion.</p>
<p>Or send private message to developers: <a href="mailto:clickhouse-feedback@yandex-team.com">clickhouse-feedback@yandex-team.com</a>.</p>
<p>Discuss with real users in Telegram chat in <a href="https://telegram.me/clickhouse_en">English</a> or in <a href="https://telegram.me/clickhouse_ru">Russian</a>.</p>
<p class="warranty">Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,<br /> either express or implied.</p>
</div>
<p class="short_description">ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time.</p>
<div class="menu">
<a class="menu_item" href="reference_en.html">Documentation</a>
<a class="menu_item" href="tutorial.html">Quick start</a>
<a class="menu_item" href="#download" id="download_menu_item">Download</a>
<a class="menu_item" href="https://github.com/yandex/ClickHouse">Source</a>
<a class="menu_item" href="benchmark.html">Benchmark</a>
<a class="menu_item" href="#feedback" id="feedback">Feedback</a>
</div>
<p>ClickHouse manages extremely large volumes of data in a stable and sustainable manner. It&nbsp;currently powers <a href="https://metrica.yandex.com/">Yandex.Metrica</a>, worlds <a href="http://w3techs.com/technologies/overview/traffic_analysis/all">second largest</a> web analytics platform, with over 20.3 trillion database records and over 20 billion events a day, generating customized reports on-the-fly, directly from non-aggregated data. This system was successfully implemented at <a href="https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/">CERNs LHCb experiment</a> to store and process metadata on 10bn events with over 1000 attributes per event registered in 2011.</p>
<h2>ClickHouse. Just makes you think faster.</h2>
<ul class="dashed">
<li>Run more queries in the same amount of time</li>
<li>Test more hypotheses</li>
<li>Slice and dice your data in many more new ways</li>
<li>Look at your data from new angles</li>
<li>Discover new dimensions</li>
</ul>
<h2>Linearly scalable</h2>
<p>ClickHouse allows companies to add servers to their clusters when necessary without investing time or money into additional DBMS modification. The system has been successfully serving <a href="https://metrica.yandex.com/">Yandex.Metrica</a>, while the servers just in its main cluster, located in six geographically distributed datacenters, have grown from 60 to 394 in two years.</p>
<p>ClickHouse scales well both vertically and horizontally. ClickHouse is easily adaptable to perform both on hundreds of node clusters, and on a single server or even virtual machine. It&nbsp;currently has installations with more than two trillion rows per single node, as well as installations with 100 TB of storage per single node.</p>
<h2>Hardware-efficient</h2>
<p>ClickHouse processes typical analytical queries two to three orders of magnitude faster than traditional row-oriented systems with the same available IO throughput. The systems columnar format allows fitting more hot data in the servers RAM, which leads to a shorter response time.</p>
<p>ClickHouse allows to minimize number of seeks for range queries, which increases efficiency of using rotational drives, as it&nbsp;maintains locality of reference for stored data continually.</p>
<p>ClickHouse is CPU efficient because of its vectorized query execution and runtime code generation.</p>
<p>By minimizing data transfers for most types of queries, ClickHouse enables companies to manage their data and create reports without using a network that supports high-performance computing.</p>
<h2>Fast</h2>
<p>ClickHouses performance <a href="benchmark.html">exceeds</a> comparable column-oriented DBMS currently available on the market. It&nbsp;processes hundreds of millions to more than a billion rows and tens of gigabytes of data per single server per second.</p>
<p>ClickHouse uses all available hardware to its full potential to process each query as fast as possible. The peak processing performance for a single query (after decompression, only used columns) stands at more than 2&nbsp;terabytes per second.</p>
<h2>Fault-tolerant</h2>
<p>ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. Downtime of a single node or the whole datacenter wont affect the systems availability for reads and writes. Distributed reads are automatically balanced to live replicas without increasing latency. Replicated data are synchronized automatically or semi-automatically after the downtime.</p>
<h2>Feature-rich</h2>
<p>ClickHouse features a number of built-in user-friendly web analytics capabilities, including probabilistic data structures for fast and memory-efficient calculation of cardinalities and quantiles, or functions for addressing URLs and IPs (both IPv4 and IPv6) as well as identifying dates, times and time zones.</p>
<p>Data management methods available in ClickHouse, such as arrays, array joins and nested data structures, are extremely efficient for managing denormalized data.</p>
<p>Using ClickHouse allows joining both distributed data and co-located data, as the system supports local joins and distributed joins. It&nbsp;also offers an opportunity to use external dictionaries, dimension tables loaded from an external source, for seamless joins.</p>
<p>ClickHouse supports approximate query processing you can get results as fast as you want, which is indispensable when dealing with terabytes and petabytes of data.</p>
<p>The systems conditional aggregate functions, calculation of totals and extremes, allow getting results with a single query without having to run a number of them.</p>
<h2>Simple and handy</h2>
<p>ClickHouse is simple and works out-of-the-box. As well as performing on hundreds of node clusters, this system can be easily installed on a single server or even a virtual machine. No development experience or code-writing skills are required to install ClickHouse.</p>
<h2>Highly reliable</h2>
<p>ClickHouse has been managing petabytes of data serving a number of high-load mass-audience services of Russias leading search provider and one of Europes largest IT companies, <a href="https://www.yandex.com/company/">Yandex</a>. Since 2012, ClickHouse has been providing robust database management for the companys <a href="https://metrica.yandex.com/">web analytics service</a>, comparison shopping platform, email service, online advertising platform, business intelligence and infrastructure monitoring.</p>
<p>ClickHouse is purely distributed system located on independent nodes, which has no single point of failure.</p>
<p>Software or hardware failures or misconfigurations do not result in loss of data. Instead of deleting "broken" data, Clickhouse saves it&nbsp;or asks you what to do before a startup. All data are checksummed before every read or write to disk or network. It&nbsp;is virtually impossible to delete data by accident.</p>
<p>ClickHouse offers flexible limits on query complexity and resource usage, which can be fine-tuned using settings. It&nbsp;is possible to simultaneously serve both a number of high priority low-latency requests and some long-running queries with lowered priority.</p>
<h2>Opens new possibilities</h2>
<p>ClickHouse streamlines all your data processing. Its easy to use: ingest all your structured data into the system, and it&nbsp;is instantly available for reports. New columns for new properties or dimensions can be easily added to the system at any time without slowing it&nbsp;down.</p>
<p>ClickHouse works <a href="benchmark.html">100-1,000x faster</a> than traditional approaches. In contrast to data management methods, where vast amounts of raw data in its native format are available as a data lake for any given query, ClickHouse, in most cases, offers instant results: the data is processed faster than it&nbsp;takes to make a query.</p>
<table width="100%">
<tr>
<td valign="top">
<h2>Key Features</h2>
<ul class="dashed">
<li>True column-oriented</li>
<li>Vectorized query execution</li>
<li>Data compression</li>
<li>Parallel and distributed query execution</li>
<li>Real-time data ingestion</li>
<li>On-disk locality of reference</li>
<li>Real-time query processing</li>
<li>Cross-datacenter replication</li>
<li>High availability</li>
<li>SQL support</li>
<li>Local and distributed joins</li>
<li>Pluggable external dimension tables</li>
<li>Arrays and nested data types</li>
<li>Approximate query processing</li>
<li>Probabilistic data structures</li>
<li>Full support of IPv6</li>
<li>Features for web analytics</li>
<li>State-of-the-art algorithms</li>
<li>Detailed documentation</li>
<li>Clean documented code</li>
</ul>
</td>
<td valign="top">
<h2>Applications</h2>
<ul class="dashed">
<li>Web and App analytics</li>
<li>Advertising networks and RTB</li>
<li>Telecommunications</li>
<li>E-commerce</li>
<li>Information security</li>
<li>Monitoring and telemetry</li>
<li>Business intelligence</li>
<li>Online games</li>
<li>Internet of Things</li>
</ul>
</td>
</tr>
</table>
<div class="download_div">
<a class="download" href="#" id="download_big_button">Download</a>
</div>
<div class="download_instructions">
<h2>Download</h2>
<p>System requirements: Linux, x86_64 with SSE 4.2.</p>
<p>Install packages for
<span class="distributive_not_selected" id="ubuntu_xenial">Ubuntu 16.04 (Xenial)</span> or <span class="distributive_selected" id="ubuntu_trusty">Ubuntu 14.04 (Trusty)</span> or <span class="distributive_not_selected" id="ubuntu_precise">Ubuntu 12.04 (Precise)</span>:
</p>
<code>
<pre>
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional
sudo mkdir -p /etc/apt/sources.list.d
echo "deb http://repo.yandex.ru/clickhouse/<span id="distributive">trusty</span> stable main" |
&nbsp;&nbsp;&nbsp;&nbsp;sudo tee /etc/apt/sources.list.d/clickhouse.list
sudo apt-get update
sudo apt-get install clickhouse-server-common clickhouse-client
sudo service clickhouse-server start
clickhouse-client
</pre>
</code>
<p>Read the <a href="reference_en.html">documentation</a>.</p>
<p>Or build ClickHouse from <a href="https://github.com/yandex/ClickHouse">sources</a> according to the <a href="https://github.com/yandex/ClickHouse/blob/master/doc/build.md">instruction</a>.</p>
</div>
<p class="footer">&copy; 20162017 <a href="https://yandex.com/company/">YANDEX</a> LLC</p>
<script type="text/javascript">
var block_visible = [];
function showBlock(class_name) {
var element = document.getElementsByClassName(class_name)[0];
element.style.display = 'block';
document.body.style.backgroundColor = '#888';
block_visible[class_name] = true;
}
function hideBlock(class_name) {
var element = document.getElementsByClassName(class_name)[0];
element.style.display = 'none';
document.body.style.backgroundColor = 'white';
block_visible[class_name] = false;
}
document.getElementById('feedback').addEventListener('click', function(e) {
var class_name = 'feedback_form';
block_visible[class_name] ? hideBlock(class_name) : showBlock(class_name);
e.stopPropagation();
});
var show_download_block = function(e) {
var class_name = 'download_instructions';
block_visible[class_name] ? hideBlock(class_name) : showBlock(class_name);
document.getElementById('download_big_button').style.display = 'none';
if (e) {
e.stopPropagation();
e.preventDefault();
}
window.scrollTo(0, document.getElementsByClassName(class_name)[0].offsetTop);
};
document.getElementById('download_menu_item').addEventListener('click', show_download_block);
document.getElementById('download_big_button').addEventListener('click', show_download_block);
document.body.addEventListener('click', function(e) {
var element = e.target;
while (element !== document.body) {
if (element.className == 'feedback_form' || element.className == 'download_instructions') {
return;
}
element = element.parentNode;
}
hideBlock('feedback_form');
hideBlock('download_instructions');
document.getElementById('download_big_button').style.display = 'inline';
});
if (location.hash == "#download") {
show_download_block();
}
var hostParts = window.location.host.split('.');
if (hostParts.length > 2) {
window.location.host = hostParts[0] + '.' + hostParts[1];
}
if (location.hash == "#feedback") {
var class_name = 'feedback_form';
block_visible[class_name] ? hideBlock(class_name) : showBlock(class_name);
}
var available_distributives = ['xenial', 'trusty', 'precise'];
function selectDistributive(name) {
console.log(name);
document.getElementById('distributive').innerHTML = name;
available_distributives.forEach(function(distr) {
document.getElementById('ubuntu_' + distr).className = (name == distr) ? 'distributive_selected' : 'distributive_not_selected';
});
}
available_distributives.forEach(function(distr) {
document.getElementById('ubuntu_' + distr).addEventListener('click', function(e) { selectDistributive(distr); });
});
</script>
<!-- Yandex.Metrika counter -->
<script src="https://mc.yandex.ru/metrika/watch.js" type="text/javascript"></script>
<script type="text/javascript">
try { var yaCounter18343495 = new Ya.Metrika({id:18343495,
webvisor:true,
clickmap:true,
trackLinks:true,
accurateTrackBounce:true,
trackHash:true});
} catch(e) { }
</script>
<noscript><div><img src="https://mc.yandex.ru/watch/18343495" style="position:absolute; left:-9999px;" alt="" /></div></noscript>
<!-- /Yandex.Metrika counter -->
</div>
</body>
</html>

View File

@ -22,13 +22,22 @@ pre {
input {
display: block;
margin-bottom: 4px;
margin: 0 0 4px 0;
}
a.reference {
border-bottom: none;
}
input[type="submit"] {
border: none!important;
background: #fc0;
}
#svg-flag {
border: 1px solid #eee;
}
@font-face {
font-family: 'Yandex Sans Text Web';
src: url(https://yastatic.net/adv-www/_/yy5JveR58JFkc97waf-xp0i6_jM.eot);

View File

@ -0,0 +1,7 @@
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" version="1.1" id="svg-flag" viewBox="0 0 25 15" height="12" width="20" border="1">
<defs id="defs14"/>
<rect id="rect4" fill="#00247d" height="15" width="25"/>
<path id="path6" stroke-width="3" stroke="#fff" d="M0 0L25 15M25 0L0 15"/>
<path id="path8" stroke-width="5" stroke="#fff" d="M12.5 0V15M0 7.5H25"/>
<path id="path10" stroke-width="3" stroke="#cf142b" d="M12.5 0V15M0 7.5H25"/>
</svg>

After

Width:  |  Height:  |  Size: 620 B

View File

@ -0,0 +1,6 @@
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" version="1.1" id="svg-flag" height="12" width="20" viewBox="0 0 10 6" border="1">
<defs id="defs12"/>
<rect id="rect4" height="3" width="10" fill="#fff"/>
<rect id="rect6" height="3" width="10" y="3" fill="#d52b1e"/>
<rect id="rect8" height="2" width="10" y="2" fill="#0039a6"/>
</svg>

After

Width:  |  Height:  |  Size: 511 B

View File

@ -125,7 +125,7 @@ html_theme_options = {
'link': '#08f',
'link_hover': 'red',
'extra_nav_links': collections.OrderedDict([
('Switch to Russian 🇷🇺 ', '/docs/ru/'),
('Switch to Russian <img id="svg-flag" src="/docs/en/_static/ru.svg" width="20" height="12" />', '/docs/ru/'),
('Single page documentation', '/docs/en/single/'),
('Website home', '/'),
('GitHub', 'https://github.com/yandex/ClickHouse'),

View File

@ -1,4 +1,4 @@
Перешардирование
Resharding
----------------
.. code-block:: sql

View File

@ -125,7 +125,7 @@ html_theme_options = {
'link': '#08f',
'link_hover': 'red',
'extra_nav_links': collections.OrderedDict([
('Switch to English 🇬🇧', '/docs/en/'),
('Switch to English <img id="svg-flag" src="/docs/ru/_static/en.svg" width="20" height="12" />', '/docs/en/'),
('Документация на одной странице', '/docs/ru/single/'),
('Главная страница сайта', '/'),
('GitHub', 'https://github.com/yandex/ClickHouse'),

View File

@ -1,711 +0,0 @@
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="utf-8"/>
<title>ClickHouse — quick start guide</title>
<link rel="shortcut icon" href="favicon.ico"/>
<meta name="description" content="Quick start guide to ClickHouse — open-source distributed column-oriented DBMS"/>
<meta name="keywords" content="ClickHouse, DBMS, OLAP, relational, analytics, analytical, big data, open-source, SQL, web-analytics"/>
<style type="text/css">
@font-face {
font-family: 'Yandex Sans Text Web';
src: url(https://yastatic.net/adv-www/_/yy5JveR58JFkc97waf-xp0i6_jM.eot);
src: url(https://yastatic.net/adv-www/_/yy5JveR58JFkc97waf-xp0i6_jM.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/CYblzLEXzCqQIvrYs7QKQe2omRk.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/pUcnOdRwl83MvPPzrNomhyletnA.woff) format('woff'),
url(https://yastatic.net/adv-www/_/vNFEmXOcGYKJ4AAidUprHWoXrLU.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/0w7OcWZM_QLP8x-LQUXFOgXO6dE.svg#YandexSansTextWeb-Bold) format('svg');
font-weight: 700;
font-style: normal;
font-stretch: normal
}
@font-face {
font-family: 'Yandex Sans Text Web';
src: url(https://yastatic.net/adv-www/_/LI6l3L2RqcgxBe2pXmuUha37czQ.eot);
src: url(https://yastatic.net/adv-www/_/LI6l3L2RqcgxBe2pXmuUha37czQ.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/z3MYElcut0R2MF_Iw1RDNrstgYs.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/1jvKJ_-hCXl3s7gmFl-y_-UHTaI.woff) format('woff'),
url(https://yastatic.net/adv-www/_/9nzjfpCR2QHvK1EzHpDEIoVFGuY.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/gwyBTpxSwkFCF1looxqs6JokKls.svg#YandexSansTextWeb-Regular) format('svg');
font-weight: 400;
font-style: normal;
font-stretch: normal
}
@font-face {
font-family: 'Yandex Sans Text Web';
src: url(https://yastatic.net/adv-www/_/ayAFYoY8swgBLhq_I56tKj2JftU.eot);
src: url(https://yastatic.net/adv-www/_/ayAFYoY8swgBLhq_I56tKj2JftU.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/lGQcYklLVV0hyvz1HFmFsUTj8_0.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/f0AAJ9GJ4iiwEmhG-7PWMHk6vUY.woff) format('woff'),
url(https://yastatic.net/adv-www/_/4UDe4nlVvgEJ-VmLWNVq3SxCsA.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/EKLr1STNokPqxLAQa_RyN82pL98.svg#YandexSansTextWeb-Light) format('svg');
font-weight: 300;
font-style: normal;
font-stretch: normal
}
@font-face {
font-family: 'Yandex Sans Display Web';
src: url(https://yastatic.net/adv-www/_/H63jN0veW07XQUIA2317lr9UIm8.eot);
src: url(https://yastatic.net/adv-www/_/H63jN0veW07XQUIA2317lr9UIm8.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/sUYVCPUAQE7ExrvMS7FoISoO83s.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/v2Sve_obH3rKm6rKrtSQpf-eB7U.woff) format('woff'),
url(https://yastatic.net/adv-www/_/PzD8hWLMunow5i3RfJ6WQJAL7aI.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/lF_KG5g4tpQNlYIgA0e77fBSZ5s.svg#YandexSansDisplayWeb-Regular) format('svg');
font-weight: 400;
font-style: normal;
font-stretch: normal
}
@font-face {
font-family: 'Yandex Sans Display Web';
src: url(https://yastatic.net/adv-www/_/g8_MyyKVquSZ3xEL6tarK__V9Vw.eot);
src: url(https://yastatic.net/adv-www/_/g8_MyyKVquSZ3xEL6tarK__V9Vw.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/LGiRvlfqQHlWR9YKLhsw5e7KGNA.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/40vXwNl4eYYMgteIVgLP49dwmfc.woff) format('woff'),
url(https://yastatic.net/adv-www/_/X6zG5x_wO8-AtwJ-vDLJcKC5228.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/ZKhaR0m08c8CRRL77GtFKoHcLYA.svg#YandexSansDisplayWeb-Light) format('svg');
font-weight: 300;
font-style: normal;
font-stretch: normal
}
body {
background: #fff;
font: 300 12pt/150% 'Yandex Sans Text Web', Arial, sans-serif;
}
.page {
width: 900px;
margin: auto;
}
h1
{
font-family: 'Yandex Sans Display Web', Arial, sans-serif;
font-size: 100px;
font-weight: normal;
margin-top: 100px;
margin-bottom: 0;
text-align: center;
padding-top: 27px;
}
.title_link, .title_link:active, .title_link:visited, .title_link:link, .title_link:hover
{
text-decoration: none;
color: #000;
}
h2
{
font: normal 50px 'Yandex Sans Display Web', Arial, sans-serif;
text-align: center;
margin-top: 35px;
margin-bottom: 50px;
}
h3
{
font: normal 24px 'Yandex Sans Display Web', Arial, sans-serif;
margin-top: 36px;
}
a:link, a:visited {
color: #08f;
text-decoration: none;
}
a:hover, a:active {
color: #f00;
text-decoration: underline;
}
.footer {
text-align: right;
margin-top: 40px;
border-top: 1px solid #EEE;
padding: 10px 0 0;
color: #888;
font-size: 10pt;
}
pre {
font: 13px/18px monospace, "Courier New";
display: block;
border-left: 5px solid #ffdb4d;
padding: 5px 10px;
background-color: #FFF8E8;
}
.spoiler
{
margin-bottom: 10px;
}
.spoiler_body
{
display: none;
}
.spoiler_title
{
color: #08f;
border-bottom: 1px dotted #08f;
}
.spoiler_title:hover
{
cursor: pointer;
color: #f00;
border-bottom: 1px dashed #f00;
text-decoration: none;
}
.tip
{
background-color: #EEE;
border: 1px solid #EEE;
padding: 5px 10px 5px 10px;
}
.tip b
{
font-size: 150%;
color: #888;
}
.warranty {
font-size: 10pt;
color: #888;
line-height: 150%;
}
</style>
</head>
<body>
<script type="text/javascript">
function getParams() {
var matches = document.cookie.match(/yandex_login=([\w\-]+)/);
return (matches && matches.length == 2) ? { "login": matches[1] } : {};
}
</script>
<!-- Yandex.Metrika counter -->
<script src="https://mc.yandex.ru/metrika/watch.js" type="text/javascript"></script>
<script type="text/javascript">
try { var yaCounter18343495 = new Ya.Metrika({id:18343495,
webvisor:true,
clickmap:true,
trackLinks:true,
accurateTrackBounce:true,
trackHash:true,
params: getParams()});
} catch(e) { }
</script>
<noscript><div><img src="https://mc.yandex.ru/watch/18343495" style="position:absolute; left:-9999px;" alt="" /></div></noscript>
<!-- /Yandex.Metrika counter -->
<script type="text/javascript" src="https://yandex.st/jquery/1.7.2/jquery.min.js"></script>
<div class="page">
<div>
<div style="float: left; margin-right: -100%; margin-top: 0; margin-left: 3px;">
<a href="/">
<svg xmlns="http://www.w3.org/2000/svg" width="90" height="80" viewBox="0 0 9 8">
<style>
.o{fill:#fc0}
.r{fill:#f00}
</style>
<path class="r" d="M0,7 h1 v1 h-1 z"/>
<path class="o" d="M0,0 h1 v7 h-1 z"/>
<path class="o" d="M2,0 h1 v8 h-1 z"/>
<path class="o" d="M4,0 h1 v8 h-1 z"/>
<path class="o" d="M6,0 h1 v8 h-1 z"/>
<path class="o" d="M8,3.25 h1 v1.5 h-1 z"/>
</svg>
</a>
</div>
<h1><a class="title_link" href="/">ClickHouse</a></h1>
<h2>Quick start guide</h2>
</div>
<p>Let's get started with sample dataset from open sources. We will use USA civil flights data since 1987 till 2015. It's hard to call this sample a Big Data (contains 166 millions rows, 63 Gb of uncompressed data) but this allows us to quickly get to work. Dataset is available for download <a href="https://yadi.sk/d/pOZxpa42sDdgm">here</a>. Also you may download it from the original datasource <a href="https://github.com/yandex/ClickHouse/raw/master/doc/example_datasets/1_ontime.txt">as described here</a>.</p>
<p>Firstly we will deploy ClickHouse to a single server. Below that we will also review the process of deployment to a cluster with support for sharding and replication.</p>
<p>On Ubuntu and Debian Linux ClickHouse can be installed from <a href="https://clickhouse.yandex/#download">packages</a>. For other Linux distributions you can <a href="https://github.com/yandex/ClickHouse/blob/master/doc/build.md">compile ClickHouse from sources</a> and then install.</p>
<p><b>clickhouse-client</b> package contains <a href="https://clickhouse.yandex/reference_en.html#Command-line%20client">clickhouse-client</a> application — interactive ClickHouse client. <b>clickhouse-server-base</b> contains a clickhouse-server binary file. <b>clickhouse-server-common</b> — contains config files for the clickhouse-server.</p>
<p>Server config files are located in /etc/clickhouse-server/. Before getting to work please notice the <b>path</b> element in config. <b>Path</b>&nbsp;determines the location for data storage. It's not really handy to directly edit <b>config.xml</b> file considering package updates. Recommended way is to override the config elements in <a href="https://clickhouse.yandex/reference_en.html#Configuration%20files">files of config.d directory</a>.
Also you may want to <a href="https://clickhouse.yandex/reference_en.html#Access%20rights">set up access rights</a> at the start.</p>
<p><b>clickhouse-server</b> won't be launched automatically after package installation. It won't be automatically restarted after updates either. Start the server with:
<pre>sudo service clickhouse-server start</pre>
Default location for server logs is /var/log/clickhouse-server/
Server is ready to handle client conections once "Ready&nbsp;for&nbsp;connections" message was logged.</p>
<p>Use <b>clickhouse-client</b> to connect to the server.</p>
<div class="spoiler"><a class="spoiler_title">Tips for clickhouse-client</a>
<div class="spoiler_body">
Interactive mode:
<pre>
clickhouse-client
clickhouse-client --host=... --port=... --user=... --password=...
</pre>
Enable multiline queries:
<pre>
clickhouse-client -m
clickhouse-client --multiline
</pre>
Run queries in batch-mode:
<pre>
clickhouse-client --query='SELECT 1'
echo 'SELECT 1' | clickhouse-client
</pre>
Inser data from file of a specified format:
<pre>
clickhouse-client --query='INSERT INTO table VALUES' &lt; data.txt
clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' &lt; data.tsv
</pre>
</div></div>
<h3>Create table for sample dataset</h3>
<div class="spoiler"><a class="spoiler_title">Create table query</a>
<div class="spoiler_body">
<pre>
$ clickhouse-client --multiline
ClickHouse client version 0.0.53720.
Connecting to localhost:9000.
Connected to ClickHouse server version 0.0.53720.
:) CREATE TABLE ontime
(
Year UInt16,
Quarter UInt8,
Month UInt8,
DayofMonth UInt8,
DayOfWeek UInt8,
FlightDate Date,
UniqueCarrier FixedString(7),
AirlineID Int32,
Carrier FixedString(2),
TailNum String,
FlightNum String,
OriginAirportID Int32,
OriginAirportSeqID Int32,
OriginCityMarketID Int32,
Origin FixedString(5),
OriginCityName String,
OriginState FixedString(2),
OriginStateFips String,
OriginStateName String,
OriginWac Int32,
DestAirportID Int32,
DestAirportSeqID Int32,
DestCityMarketID Int32,
Dest FixedString(5),
DestCityName String,
DestState FixedString(2),
DestStateFips String,
DestStateName String,
DestWac Int32,
CRSDepTime Int32,
DepTime Int32,
DepDelay Int32,
DepDelayMinutes Int32,
DepDel15 Int32,
DepartureDelayGroups String,
DepTimeBlk String,
TaxiOut Int32,
WheelsOff Int32,
WheelsOn Int32,
TaxiIn Int32,
CRSArrTime Int32,
ArrTime Int32,
ArrDelay Int32,
ArrDelayMinutes Int32,
ArrDel15 Int32,
ArrivalDelayGroups Int32,
ArrTimeBlk String,
Cancelled UInt8,
CancellationCode FixedString(1),
Diverted UInt8,
CRSElapsedTime Int32,
ActualElapsedTime Int32,
AirTime Int32,
Flights Int32,
Distance Int32,
DistanceGroup UInt8,
CarrierDelay Int32,
WeatherDelay Int32,
NASDelay Int32,
SecurityDelay Int32,
LateAircraftDelay Int32,
FirstDepTime String,
TotalAddGTime String,
LongestAddGTime String,
DivAirportLandings String,
DivReachedDest String,
DivActualElapsedTime String,
DivArrDelay String,
DivDistance String,
Div1Airport String,
Div1AirportID Int32,
Div1AirportSeqID Int32,
Div1WheelsOn String,
Div1TotalGTime String,
Div1LongestGTime String,
Div1WheelsOff String,
Div1TailNum String,
Div2Airport String,
Div2AirportID Int32,
Div2AirportSeqID Int32,
Div2WheelsOn String,
Div2TotalGTime String,
Div2LongestGTime String,
Div2WheelsOff String,
Div2TailNum String,
Div3Airport String,
Div3AirportID Int32,
Div3AirportSeqID Int32,
Div3WheelsOn String,
Div3TotalGTime String,
Div3LongestGTime String,
Div3WheelsOff String,
Div3TailNum String,
Div4Airport String,
Div4AirportID Int32,
Div4AirportSeqID Int32,
Div4WheelsOn String,
Div4TotalGTime String,
Div4LongestGTime String,
Div4WheelsOff String,
Div4TailNum String,
Div5Airport String,
Div5AirportID Int32,
Div5AirportSeqID Int32,
Div5WheelsOn String,
Div5TotalGTime String,
Div5LongestGTime String,
Div5WheelsOff String,
Div5TailNum String
)
ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192);
</pre>
</div></div>
<p>Now we have a table of <a href="https://clickhouse.yandex/reference_en.html#MergeTree">MergeTree type</a>. MergeTree table type is recommended for usage in production. Table of this kind has a primary key used for incremental sort of table data. This allows fast execution of queries in ranges of a primary key.</p>
<p><b>Note</b>
We store ad network banners impressions logs in ClickHouse. Each table entry looks like:
<source>[Advertiser ID, Impression ID, attribute1, attribute2, &hellip;]</pre>
Let assume that our aim is to provide a set of reports for each advertiser. Common and frequently demanded query would be to count impressions for a specific Advertiser ID. This means that table primary key should start with <source>Advertiser ID</pre>. In this case ClickHouse needs to read smaller amount of data to perform the query for a given <source>Advertiser ID</pre>.</p>
<h3>Load data</h3>
<pre>xz -v -c -d &lt; ontime.csv.xz | clickhouse-client --query="INSERT INTO ontime FORMAT CSV"</pre>
<p>ClickHouse INSERT query allows to load data in any <a href="https://clickhouse.yandex/reference_en.html#Formats">supported format</a>. Data load requires just O(1) RAM consumption. INSERT query can receive any data volume as input. It's strongly recommended to insert data with <a href="https://clickhouse.yandex/reference_en.html#Performance%20on%20data%20insertion.">not too small size blocks</a>. Notice that insert of blocks with size up to max_insert_block_size (= 1&nbsp;048&nbsp;576 rows by default) is an atomic operation: data block will be inserted completely or not inserted at all. In case of disconnect during insert operation you may not know if the block was inserted successfully. To achieve exactly-once semantics ClickHouse supports idempotency for <a href="https://clickhouse.yandex/reference_en.html#Data%20replication">replicated tables</a>. This means that you may retry insert of the same data block (possibly on a different replicas) but this block will be inserted just once. Anyway in this guide we will load data from our localhost so we may not take care about data blocks generation and exactly-once semantics.</p>
<p>INSERT query into tables of MergeTree type is non-blocking (so does a SELECT query). You can execute SELECT queries right after of during insert operation.</p>
<p>Our sample dataset is a bit not optimal. There are two reasons.</p>
<p>The first is that String data type is used in cases when <a href="https://clickhouse.yandex/reference_en.html#Enum">Enum</a> or numeric type would fit best.</p>
<p class="tip"><b></b> When set of possible values is determined and known to be small. (E.g. OS name, browser vendors etc.) it's&nbsp;recommended to use Enums or numbers to improve performance.
When set of possible values is not limited (search&nbsp;query, URL, etc.) just go ahead with String.</p>
<p>The second is that dataset contains redundant fields like Year, Quarter, Month, DayOfMonth, DayOfWeek. In fact a single FlightDate would be enough. Most likely they have been added to improve performance for other DBMS'es which DateTime handling functions may be not efficient.</p>
<p class="tip"><b></b> ClickHouse <a href="https://clickhouse.yandex/reference_en.html#Functions%20for%20working%20with%20dates%20and%20times">functions for operating with DateTime fields</a> are well-optimized so such redundancy is not required. Anyway much columns is not a reason to worry — ClickHouse is a <a href="https://en.wikipedia.org/wiki/Column-oriented_DBMS">column-oriented DBMS</a>. This allows you to have as much fields as you need. Hundreds of columns in a table is fine for ClickHouse.</p>
<h3>Querying the sample dataset</h3>
<p>Here are some examples of the queries from our test data.</p>
<ul>
<li><div class="spoiler"><a class="spoiler_title">the most popular destinations in 2015;</a>
<div class="spoiler_body">
<pre>
SELECT
OriginCityName,
DestCityName,
count(*) AS flights,
bar(flights, 0, 20000, 40)
FROM ontime WHERE Year = 2015 GROUP BY OriginCityName, DestCityName ORDER BY flights DESC LIMIT 20
</pre><img src="https://habrastorage.org/files/a85/18a/200/a8518a200d6d405a95ee80ea1c8e1c90.png"/>
<pre>
SELECT
OriginCityName &lt; DestCityName ? OriginCityName : DestCityName AS a,
OriginCityName &lt; DestCityName ? DestCityName : OriginCityName AS b,
count(*) AS flights,
bar(flights, 0, 40000, 40)
FROM ontime WHERE Year = 2015 GROUP BY a, b ORDER BY flights DESC LIMIT 20
</pre><img src="https://habrastorage.org/files/d35/78d/b55/d3578db55e304bd7b5eba818abdb53f5.png"/></div></div>
</li>
<li><div class="spoiler"><a class="spoiler_title">the most popular cities of departure;</a>
<div class="spoiler_body">
<pre>
SELECT OriginCityName, count(*) AS flights
FROM ontime GROUP BY OriginCityName ORDER BY flights DESC LIMIT 20
</pre><img src="https://habrastorage.org/files/ef4/141/f34/ef4141f348234773a5349c4bd3e8f804.png"/></div></div>
</li>
<li><div class="spoiler"><a class="spoiler_title">cities of departure which offer maximum variety of destinations;</a>
<div class="spoiler_body">
<pre>
SELECT OriginCityName, uniq(Dest) AS u
FROM ontime GROUP BY OriginCityName ORDER BY u DESC LIMIT 20
</pre><img src="https://habrastorage.org/files/240/9f4/9d1/2409f49d11fb4aa1b8b5ff34cf9ca75d.png"/></div></div>
</li>
<li><div class="spoiler"><a class="spoiler_title">flight delay dependence on the day of week;</a>
<div class="spoiler_body">
<pre>
SELECT DayOfWeek, count() AS c, avg(DepDelay &gt; 60) AS delays
FROM ontime GROUP BY DayOfWeek ORDER BY DayOfWeek
</pre><img src="https://habrastorage.org/files/885/e50/793/885e507930e34b7c8f788d25e7ca2bcf.png"/></div></div>
</li>
<li><div class="spoiler"><a class="spoiler_title">cities of departure with most frequent delays for 1 hour or longer;</a>
<div class="spoiler_body">
<pre>
SELECT OriginCityName, count() AS c, avg(DepDelay &gt; 60) AS delays
FROM ontime
GROUP BY OriginCityName
HAVING c &gt; 100000
ORDER BY delays DESC
LIMIT 20
</pre><img src="https://habrastorage.org/files/ac2/926/56d/ac292656d03946d0aba35c75783a31f2.png"/></div></div>
</li>
<li><div class="spoiler"><a class="spoiler_title">flights of maximum duration;</a>
<div class="spoiler_body">
<pre>
SELECT OriginCityName, DestCityName, count(*) AS flights, avg(AirTime) AS duration
FROM ontime
GROUP BY OriginCityName, DestCityName
ORDER BY duration DESC
LIMIT 20
</pre><img src="https://habrastorage.org/files/7b3/c2e/685/7b3c2e685832439b8c373bf2015131d2.png"/></div></div>
</li>
<li><div class="spoiler"><a class="spoiler_title">distribution of arrival time delays split by aircompanies;</a>
<div class="spoiler_body">
<pre>
SELECT Carrier, count() AS c, round(quantileTDigest(0.99)(DepDelay), 2) AS q
FROM ontime GROUP BY Carrier ORDER BY q DESC
</pre><img src="https://habrastorage.org/files/49c/332/e3d/49c332e3d93146ba8f46beef6b2b02b0.png"/></div></div>
</li>
<li><div class="spoiler"><a class="spoiler_title">aircompanies who stopped flights operation;</a>
<div class="spoiler_body">
<pre>
SELECT Carrier, min(Year), max(Year), count()
FROM ontime GROUP BY Carrier HAVING max(Year) &lt; 2015 ORDER BY count() DESC
</pre><img src="https://habrastorage.org/files/249/56f/1a2/24956f1a2efc48d78212586958aa036c.png"/></div></div>
</li>
<li><div class="spoiler"><a class="spoiler_title">most trending destination cities in 2015;</a>
<div class="spoiler_body">
<pre>
SELECT
DestCityName,
sum(Year = 2014) AS c2014,
sum(Year = 2015) AS c2015,
c2015 / c2014 AS diff
FROM ontime
WHERE Year IN (2014, 2015)
GROUP BY DestCityName
HAVING c2014 &gt; 10000 AND c2015 &gt; 1000 AND diff &gt; 1
ORDER BY diff DESC
</pre><img src="https://habrastorage.org/files/f31/32f/4d1/f3132f4d1c0d42eab26d9111afe7771a.png"/></div></div>
</li>
<li><div class="spoiler"><a class="spoiler_title">destination cities with maximum popularity-season dependency.</a>
<div class="spoiler_body">
<pre>
SELECT
DestCityName,
any(total),
avg(abs(monthly * 12 - total) / total) AS avg_month_diff
FROM
(
SELECT DestCityName, count() AS total
FROM ontime GROUP BY DestCityName HAVING total &gt; 100000
)
ALL INNER JOIN
(
SELECT DestCityName, Month, count() AS monthly
FROM ontime GROUP BY DestCityName, Month HAVING monthly &gt; 10000
)
USING DestCityName
GROUP BY DestCityName
ORDER BY avg_month_diff DESC
LIMIT 20
</pre><img src="https://habrastorage.org/files/26b/2c7/aae/26b2c7aae21a4c76800cb1c7a33a374d.png"/></div></div>
</li>
</ul>
<h3>ClickHouse deployment to cluster</h3>
<p>ClickHouse cluster is a homogenous cluster. Steps to set up:
<ol>
<li>Install ClickHouse server on all machines of the cluster</li>
<li>Set up cluster configs in configuration file</li>
<li>Create local tables on each instance</li>
<li>Create a <a href="https://clickhouse.yandex/reference_en.html#Distributed">Distributed table</a></li>
</ol>
</p>
<p><a href="https://clickhouse.yandex/reference_en.html#Distributed">Distributed-table</a> is actually a kind of "view" to local tables of ClickHouse cluster. SELECT query from a distributed table will be executed using resources of all cluster's shards. You may specify configs for multiple clusters and create multiple Distributed-tables providing views to different clusters.</p>
<p>Enable network access to clickhouse:
<pre>&lt;listen_host&gt;::&lt;/listen_host&gt;</pre>
</p>
<div class="spoiler"><a class="spoiler_title">Config for cluster of three shards. Each shard stores data on a single replica</a>
<div class="spoiler_body">
<pre>
&lt;remote_servers&gt;
&lt;perftest_3shards_1replicas&gt;
&lt;shard&gt;
&lt;replica&gt;
&lt;host&gt;example-perftest01j.yandex.ru&lt;/host&gt;
&lt;port&gt;9000&lt;/port&gt;
&lt;/replica&gt;
&lt;/shard&gt;
&lt;shard&gt;
&lt;replica&gt;
&lt;host&gt;example-perftest02j.yandex.ru&lt;/host&gt;
&lt;port&gt;9000&lt;/port&gt;
&lt;/replica&gt;
&lt;/shard&gt;
&lt;shard&gt;
&lt;replica&gt;
&lt;host&gt;example-perftest03j.yandex.ru&lt;/host&gt;
&lt;port&gt;9000&lt;/port&gt;
&lt;/replica&gt;
&lt;/shard&gt;
&lt;/perftest_3shards_1replicas&gt;
&lt;/remote_servers&gt;
</pre>
</div></div>
Creating a local table:
<pre>CREATE TABLE ontime_local (...) ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192);</pre>
Creating a distributed table providing a view into local tables of the cluster:
<pre>CREATE TABLE ontime_all AS ontime_local
ENGINE = Distributed(perftest_3shards_1replicas, default, ontime_local, rand());</pre>
<p>You can create a Distributed table on all machines in the cluster. This would allow to run distributed queries on any machine of the cluster. Besides distributed table you can also use <a href="https://clickhouse.yandex/reference_en.html#remote">*remote* table function</a>.</p>
<p>Let's run <a href="https://clickhouse.yandex/reference_en.html#INSERT">INSERT SELECT</a> into Distributed table to spread the table to multiple servers.</p>
<pre>INSERT INTO ontime_all SELECT * FROM ontime;</pre>
<p class="tip"><b></b> Worth to notice that the approach given above wouldn't fit for sharding of large tables.<br />Please use <a href="https://clickhouse.yandex/reference_en.html#Resharding">built-in sharding feature</a>.</p>
<p>As you could expect heavy queries are executed N times faster being launched on 3 servers instead of one.</p>
<div class="spoiler"><a class="spoiler_title">See here</a>
<div class="spoiler_body">
<img src="https://habrastorage.org/files/ece/020/129/ece020129fdf4a18a6e75daf2e699cb9.png"/>
<p>You may have noticed that quantiles calculation are slightly different. This happens due to <a href="https://github.com/tdunning/t-digest/raw/master/docs/t-digest-paper/histo.pdf">t-digest</a> algorithm implementation which is non-deterministic — it depends on the order of data processing.</p>
</div></div>
<p>In this case we have used a cluster with 3 shards each contains a single replica.</p>
<p>To provide for resilience in production environment we recommend that each shard should contain 2-3 replicas distributed between multiple data-centers. Note that ClickHouse supports unlimited number of replicas.</p>
<div class="spoiler"><a class="spoiler_title">Config for cluster of one shard containing three replicas</a>
<div class="spoiler_body">
<pre>
&lt;remote_servers&gt;
...
&lt;perftest_1shards_3replicas&gt;
&lt;shard&gt;
&lt;replica&gt;
&lt;host&gt;example-perftest01j.yandex.ru&lt;/host&gt;
&lt;port&gt;9000&lt;/port&gt;
&lt;/replica&gt;
&lt;replica&gt;
&lt;host&gt;example-perftest02j.yandex.ru&lt;/host&gt;
&lt;port&gt;9000&lt;/port&gt;
&lt;/replica&gt;
&lt;replica&gt;
&lt;host&gt;example-perftest03j.yandex.ru&lt;/host&gt;
&lt;port&gt;9000&lt;/port&gt;
&lt;/replica&gt;
&lt;/shard&gt;
&lt;/perftest_1shards_3replicas&gt;
&lt;/remote_servers&gt;
</pre>
</div></div>
<p>To enable replication <a href="http://zookeeper.apache.org/">ZooKeeper</a> is required. ClickHouse will take care of data consistency on all replicas and run restore procedure after failure automatically. It's recommended to deploy ZooKeeper cluster to separate servers.</p>
<p>ZooKeeper is not a requirement — in some simple cases you can duplicate the data by writing it into all the replicas from your application code. This approach is not recommended — in this case ClickHouse is not able to guarantee data consistency on all replicas. This remains the responsibility of your application.</p>
<div class="spoiler"><a class="spoiler_title">Set ZooKeeper locations in configuration file</a>
<div class="spoiler_body">
<pre>
&lt;zookeeper-servers&gt;
&lt;node&gt;
&lt;host&gt;zoo01.yandex.ru&lt;/host&gt;
&lt;port&gt;2181&lt;/port&gt;
&lt;/node&gt;
&lt;node&gt;
&lt;host&gt;zoo02.yandex.ru&lt;/host&gt;
&lt;port&gt;2181&lt;/port&gt;
&lt;/node&gt;
&lt;node&gt;
&lt;host&gt;zoo03.yandex.ru&lt;/host&gt;
&lt;port&gt;2181&lt;/port&gt;
&lt;/node&gt;
&lt;/zookeeper-servers&gt;
</pre>
</div></div>
<p>Also we need to set macros for identifying shard and replica — it will be used on table creation</p>
<pre>
&lt;macros&gt;
&lt;shard&gt;01&lt;/shard&gt;
&lt;replica&gt;01&lt;/replica&gt;
&lt;/macros&gt;
</pre>
<p>If there are no replicas at the moment on replicated table creation — a new first replica will be instantiated. If there are already live replicas — new replica will clone the data from existing ones. You have an option to create all replicated tables first and that insert data to it. Another option is to create some replicas and add the others after or during data insertion.</p>
<pre>
CREATE TABLE ontime_replica (...)
ENGINE = ReplicatedMergeTree(
'/clickhouse_perftest/tables/{shard}/ontime',
'{replica}',
FlightDate,
(Year, FlightDate),
8192);
</pre>
<p>Here we use <a href="https://clickhouse.yandex/reference_en.html#ReplicatedMergeTree">ReplicatedMergeTree</a> table type. In parameters we specify ZooKeeper path containing shard and replica identifiers.</p>
<pre>INSERT INTO ontime_replica SELECT * FROM ontime;</pre>
<p>Replication operates in multi-master mode. Data can be loaded into any replica — it will be synced with other instances automatically. Replication is asynchronous so at a given moment of time not all replicas may contain recently inserted data. To allow data insertion at least one replica should be up. Others will sync up data and repair consistency once they will become active again. Please notice that such scheme allows for the possibility of just appended data loss.</p>
<h3>Feedback</h3>
<p>Ask any questions on <a href="https://stackoverflow.com/">Stackoverflow</a>. Use <a href="https://groups.google.com/group/clickhouse">Google Group</a> for discussion.<br />Or send private message to developers: <a href="mailto:clickhouse-feedback@yandex-team.com">clickhouse-feedback@yandex-team.com</a>.</p>
<p class="warranty">Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</p>
<p class="footer">&copy; 2016 YANDEX LLC</p>
</div>
<script type="text/javascript">
$('.spoiler_title').click(function() {
console.log($(this).parent());
$(this).next('.spoiler_body').toggle(100);
});
</script>
</body>
</html>

View File

@ -1,5 +1,7 @@
#!/bin/bash
set -e
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
cd $CURDIR

View File

@ -12,14 +12,8 @@ function make_control {
# set environment variables REVISION, AUTHOR
function gen_revision_author {
REVISION=$(get_revision)
if [ -z $VERSION_PREFIX ] ; then
VERSION_PREFIX="v1.1."
fi
if [ -z $VERSION_POSTFIX ] ; then
VERSION_POSTFIX="-testing"
fi
VERSION_PREFIX="${VERSION_PREFIX:-v1.1.}"
VERSION_POSTFIX="${VERSION_POSTFIX:--testing}"
if [[ $STANDALONE != 'yes' ]]; then

View File

@ -1,4 +1,6 @@
ClickHouse website quickstart:
1. If npm is not installed: `apt-get install npm` for Debian/Ubuntu, `brew install npm` for Mac OS or download and install manually https://nodejs.org/en/download/
2. Run setup\_gulp.sh once to install prerequisites via npm
3. Use `gulp build` to minify website to "public" folder or just `gulp` to run local webserver with livereload serving it
1. Make sure you have `npm`, `docker` and `python` installed and available in your `$PATH`.
2. Run `setup\_gulp.sh` once to install build prerequisites via npm.
3. Use `gulp build` to minify website to "public" subfolder or just `gulp` to run local webserver with livereload serving it (note: livereload browser extension is required to make it actually reload pages on edits automatically).
4. There's Dockerfile that can be used to build and run ClickHouse website inside docker.
4. Deployment to https://clickhouse.yandex is managed by `release.sh`, but it is only usable from inside Yandex private network.

View File

@ -49,7 +49,13 @@ gulp.task('reference', [], function () {
.pipe(gulp.dest(outputDir))
});
gulp.task('docs', [], function () {
gulp.task('docstxt', [], function () {
run('cd ' + docsDir + '; make');
return gulp.src(paths.docs)
.pipe(gulp.dest(outputDir + '/../docs'))
});
gulp.task('docs', ['docstxt'], function () {
run('cd ' + docsDir + '; make');
return gulp.src(paths.docs)
.pipe(gulp.dest(outputDir + '/../docs'))

View File

@ -847,6 +847,11 @@ clickhouse-client
window.history.replaceState('', document.title, window.location.href.replace(location.hash, '') + this.hash);
});
var hostParts = window.location.host.split('.');
if (hostParts.length > 2 && hostParts[0] != 'test') {
window.location.host = hostParts[0] + '.' + hostParts[1];
}
var available_distributives = ['xenial', 'trusty', 'precise'];
available_distributives.forEach(function (name) {
$('#ubuntu_' + name).on('click', function () {

View File

@ -50,15 +50,6 @@ function getParams() {
<script type="text/javascript" src="https://yandex.st/jquery/1.7.2/jquery.min.js"></script>
<div class="island">
<div style="background:#fc0;margin-bottom:5em;padding:3em;">
<div>
<span style="font-size:3em;line-height:1.1em">Warning!</span>
<p style="font-size:1.2em;line-height:1.1em">
ClickHouse documentation has been moved to new location and the rest of this page is considered deprecated.<br>
<a href="docs/en/" style="font-weight:bold;">Go to current ClickHouse documentation</a>
</p>
</div>
</div>
<div style="float: left; margin-right: -100%; margin-top: 3px; margin-left: 3px;">
<a href="/">
<svg xmlns="http://www.w3.org/2000/svg" width="90" height="80" viewBox="0 0 9 8">

View File

@ -52,16 +52,6 @@ function getParams() {
<script type="text/javascript" src="https://yandex.st/jquery/1.7.2/jquery.min.js"></script>
<div class="island">
<div style="background:#fc0;margin-bottom:5em;padding:3em;">
<div>
<span style="font-size:3em;line-height:1.1em">Внимание!</span>
<p style="font-size:1.2em;line-height:1.1em">
Документация по ClickHouse переехала на другой адрес и оставшаяся часть данной страницы более не
является актуальной.<br>
<a href="docs/ru/" style="font-weight:bold;">Перейти к текущей документации по ClickHouse</a>
</p>
</div>
</div>
<div style="float: left; margin-right: -100%; margin-top: 3px; margin-left: 3px;">
<a href="/">
<svg xmlns="http://www.w3.org/2000/svg" width="90" height="80" viewBox="0 0 9 8">