Mobile app version of vmapp.org
Login or Join
Sarah324

: If you really want to spend that kind of time downloading, here's a link to the Wikipedia dumps - "WARNING: 6.07 GB compressed, approximately 27 GB uncompressed as of 2010-09-20" (and you'll

@Sarah324

If you really want to spend that kind of time downloading, here's a link to the Wikipedia dumps - "WARNING: 6.07 GB compressed, approximately 27 GB uncompressed as of 2010-09-20" (and you'll still have only 3,428,557 articles)

Here's a quick PHP script that'll make one for you (just create an empty DB, temporarily grant the $db_user CREATE + INSERT privileges, and populate the configuration variables in the script):

<?php

$db_host = '127.0.0.1';
$db_name = 'testing';

$db_user = 'root';
$db_pass = '';

$entries = 10000000;
$entry_words_min = 250;
$entry_words_max = 1000;

/*
End configuration
*/

function get_rand_word( $len_min, $len_max ) {
for ( $i = 0; $i < ( rand( 0, $len_max - $len_min ) + $len_min ); $i++ ) {
$word .= chr(rand(65, 90));
}
return $word;
}
function get_title() {
for ( $i = 0; $i < ( rand( 4, 10 ) ); $i++ ) {
$title .= get_rand_word( 2, 9 ) . ' ';
}
return $title;
}
function get_fulltext() {
for ( $i = 0; $i < ( rand( 250, 500 ) ); $i++ ) {
$fulltext .= get_rand_word( 2, 9 ) . ' ';
}
return $fulltext;
}

$dsn = 'mysql:dbname=' . $db_name . ';host=' . $db_host;

try {
$dbh = new PDO($dsn, $db_user, $db_password);
} catch (PDOException $e) {
echo 'Connection failed: ' . $e->getMessage();
die();
}

$dbh->query('CREATE TABLE IF NOT EXISTS `sphinx` (
`id` int(10) unsigned NOT NULL auto_increment,
`title` varchar(150) collate utf8_bin NOT NULL,
`fulltext` text collate utf8_bin NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin');

$sth = $dbh->prepare('INSERT INTO `sphinx` (`title`,`fulltext`) VALUES (:title, :fulltext)');

$counter = 0;
for ( $i = 0; $i < $entries; $i++ ) {
$sth->execute(array(
':title' => get_title(),
':fulltext' => get_fulltext()
));
$counter++;
}

echo $counter . ' rows inserted';

?>


Note: You'll probably need to change your PHP environment configuration or tweak the number of entries to run this script...

10% popularity Vote Up Vote Down


Login to follow query

More posts by @Sarah324

0 Comments

Sorted by latest first Latest Oldest Best

Back to top | Use Dark Theme