plugin controller change

This commit is contained in:
Manuel Fritsch 2021-01-30 22:29:33 +01:00
parent 53a82b4249
commit aeabde8c4a
887 changed files with 34635 additions and 87014 deletions

BIN
.DS_Store vendored

Binary file not shown.

View file

@ -1 +1 @@
__version__ = "4.0.0.12"
__version__ = "4.0.0.13"

View file

@ -23,16 +23,16 @@ class CBPiKettleLogic(metaclass=ABCMeta):
self.cbpi.log.log_data(self.id, value)
async def run(self):
self.state = True
while self.running:
print("RUNNING KETTLE")
await asyncio.sleep(1)
self.state = False
def get_state(self):
return dict(state=self.state)
return dict(running=self.running)
async def start(self):
self.running = True
async def stop(self):

View file

@ -7,19 +7,15 @@ import re
import requests
import yaml
from cbpi.utils.utils import load_config
from zipfile import ZipFile
from cbpi.craftbeerpi import CraftBeerPi
import os
import pathlib
import shutil
import yaml
import click
def create_plugin_file():
import os.path
if os.path.exists(os.path.join(".", 'config', "plugin_list.txt")) is False:
srcfile = os.path.join(os.path.dirname(__file__), "config", "plugin_list.txt")
destfile = os.path.join(".", 'config')
shutil.copy(srcfile, destfile)
print("Plugin Folder created")
from jinja2 import Template
def create_config_file():
import os.path
@ -74,9 +70,7 @@ def clear_db():
os.remove(os.path.join(".", "craftbeerpi.db"))
print("database Cleared")
def check_for_setup():
if os.path.exists(os.path.join(".", "config", "config.yaml")) is False:
print("***************************************************")
print("CraftBeerPi Config File not found: %s" % os.path.join(".", "config", "config.yaml"))
@ -87,137 +81,164 @@ def check_for_setup():
return True
def list_plugins():
print("***************************************************")
print("CraftBeerPi 4.x Plugin List")
print("***************************************************")
print("")
plugins_yaml = "https://raw.githubusercontent.com/Manuel83/craftbeerpi-plugins/master/plugins_v4.yaml"
r = requests.get(plugins_yaml)
data = yaml.load(r.content, Loader=yaml.FullLoader)
for name, value in data.items():
print(name)
print("")
print("***************************************************")
def add(package_name):
def plugins_add(package_name):
if package_name is None:
print("Missing Plugin Name: cbpi add --name=")
print("Pleaes provide a plugin Name")
return
try:
with open(os.path.join(".", 'config', "config.yaml"), 'rt') as f:
data = yaml.load(f, Loader=yaml.FullLoader)
if package_name in data["plugins"]:
print("")
print("Plugin {} already active".format(package_name))
print("")
return
data["plugins"].append(package_name)
with open(os.path.join(".", 'config', "config.yaml"), 'w') as outfile:
yaml.dump(data, outfile, default_flow_style=False)
print("")
print("Plugin {} activated".format(package_name))
print("")
except Exception as e:
print(e)
pass
def plugin_remove(package_name):
if package_name is None:
print("Pleaes provide a plugin Name")
return
try:
with open(os.path.join(".", 'config', "config.yaml"), 'rt') as f:
data = yaml.load(f, Loader=yaml.FullLoader)
data["plugins"] = list(filter(lambda k: package_name not in k, data["plugins"]))
with open(os.path.join(".", 'config', "config.yaml"), 'w') as outfile:
yaml.dump(data, outfile, default_flow_style=False)
print("")
print("Plugin {} deactivated".format(package_name))
print("")
except Exception as e:
print(e)
pass
def plugins_list():
print("--------------------------------------")
print("List of active pluigins")
try:
with open(os.path.join(".", 'config', "config.yaml"), 'rt') as f:
data = yaml.load(f, Loader=yaml.FullLoader)
for p in data["plugins"]:
print("- {}".format(p))
except Exception as e:
print(e)
pass
print("--------------------------------------")
def plugin_create(name):
if os.path.exists(os.path.join(".", name)) is True:
print("Cant create Plugin. Folder {} already exists ".format(name))
return
data = subprocess.check_output([sys.executable, "-m", "pip", "install", package_name])
data = data.decode('UTF-8')
url = 'https://github.com/Manuel83/craftbeerpi4-plugin-template/archive/main.zip'
r = requests.get(url)
with open('temp.zip', 'wb') as f:
f.write(r.content)
patter_already_installed = "Requirement already satisfied: %s" % package_name
pattern = "Successfully installed %s-([-0-9a-zA-Z._]*)" % package_name
match_already_installed = re.search(patter_already_installed, data)
match_installed = re.search(pattern, data)
if match_already_installed is not None:
print("Plugin already installed")
return False
if match_installed is None:
print(data)
print("Faild to install plugin")
return False
version = match_installed.groups()[0]
plugins = load_config("./config/plugin_list.txt")
if plugins is None:
plugins = {}
now = datetime.datetime.now()
plugins[package_name] = dict(version=version, installation_date=now.strftime("%Y-%m-%d %H:%M:%S"))
with open('./config/plugin_list.txt', 'w') as outfile:
yaml.dump(plugins, outfile, default_flow_style=False)
print("Plugin %s added" % package_name)
return True
with ZipFile('temp.zip', 'r') as repo_zip:
repo_zip.extractall()
def remove(package_name):
if package_name is None:
print("Missing Plugin Name: cbpi add --name=")
return
data = subprocess.check_output([sys.executable, "-m", "pip", "uninstall", "-y", package_name])
data = data.decode('UTF-8')
os.rename("./craftbeerpi4-plugin-template-main", os.path.join(".", name))
os.rename(os.path.join(".", name, "src"), os.path.join(".", name, name))
pattern = "Successfully uninstalled %s-([-0-9a-zA-Z._]*)" % package_name
match_uninstalled = re.search(pattern, data)
import jinja2
if match_uninstalled is None:
templateLoader = jinja2.FileSystemLoader(searchpath=os.path.join(".", name))
templateEnv = jinja2.Environment(loader=templateLoader)
TEMPLATE_FILE = "setup.py"
template = templateEnv.get_template(TEMPLATE_FILE)
outputText = template.render(name=name)
print("Faild to uninstall plugin")
return False
with open(os.path.join(".", name, "setup.py"), "w") as fh:
fh.write(outputText)
plugins = load_config("./config/plugin_list.txt")
if plugins is None:
plugins = {}
TEMPLATE_FILE = "MANIFEST.in"
template = templateEnv.get_template(TEMPLATE_FILE)
outputText = template.render(name=name)
with open(os.path.join(".", name, "MANIFEST.in"), "w") as fh:
fh.write(outputText)
if package_name not in plugins:
return False
TEMPLATE_FILE = os.path.join("/", name , "config.yaml")
template = templateEnv.get_template(TEMPLATE_FILE)
outputText = template.render(name=name)
del plugins[package_name]
with open('./config/plugin_list.txt', 'w') as outfile:
yaml.dump(plugins, outfile, default_flow_style=False)
with open(os.path.join(".", name, name, "config.yaml"), "w") as fh:
fh.write(outputText)
print("")
print("")
print("Plugin {} created! See https://craftbeerpi.gitbook.io/craftbeerpi4/development how to run your plugin ".format(name))
print("")
print("Happy Development! Cheers")
print("")
print("")
print("Plugin %s removed" % package_name)
return True
@click.group()
def main():
parser = argparse.ArgumentParser(description='Welcome to CraftBeerPi 4')
parser.add_argument("action", type=str, help="start,stop,restart,setup,plugins")
parser.add_argument('--debug', dest='debug', action='store_true')
parser.add_argument("--name", type=str, help="Plugin name")
args = parser.parse_args()
if args.debug is True:
level =logging.DEBUG
else:
level =logging.INFO
#logging.basicConfig(level=logging.INFO, filename='./logs/app.log', filemode='a', format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
logging.basicConfig(level=level, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
pass
if args.action == "setup":
print("Setting up CBPi")
@click.command()
def setup():
'''Create Config folder'''
print("Setting up CraftBeerPi")
create_home_folder_structure()
create_plugin_file()
create_config_file()
copy_splash()
return
if args.action == "cleardb":
clear_db()
return
if args.action == "plugins":
list_plugins()
return
if args.action == "add":
add(args.name)
return
if args.action == "remove":
remove(args.name)
return
if args.action == "start":
@click.command()
def start():
if check_for_setup() is False:
return
print("START")
cbpi = CraftBeerPi()
cbpi.start()
@click.command()
def plugins():
'''List active plugins'''
plugins_list()
return
parser.print_help()
@click.command()
@click.argument('name')
def add(name):
'''Activate Plugin'''
plugins_add(name)
@click.command()
@click.argument('name')
def remove(name):
'''Deactivate Plugin'''
plugin_remove(name)
@click.command()
@click.argument('name')
def create(name):
'''Deactivate Plugin'''
plugin_create(name)
main.add_command(setup)
main.add_command(start)
main.add_command(plugins)
main.add_command(add)
main.add_command(remove)
main.add_command(create)

View file

@ -9,3 +9,6 @@ port: 8000
username: cbpi
password: 123
plugins:
- cbpi4-ui

View file

@ -37,6 +37,6 @@ class ActorController(BasicController):
instance = data.get("instance")
state = state=instance.get_state()
except Exception as e:
logging.error("Faild to crate actor dict {} ".format(e))
logging.error("Faild to create actor dict {} ".format(e))
state = dict()
return dict(name=data.get("name"), id=data.get("id"), type=data.get("type"), state=state,props=data.get("props", []))

View file

@ -31,13 +31,12 @@ class BasicController:
logging.info("{} Load ".format(self.name))
with open(self.path) as json_file:
data = json.load(json_file)
self.data = data["data"]
if self.autostart is True:
for d in self.data:
logging.info("{} Starting ".format(self.name))
await self.start(d.get("id"))
await self.push_udpate()
async def save(self):
logging.info("{} Save ".format(self.name))
@ -76,6 +75,7 @@ class BasicController:
instance = item.get("instance")
await instance.stop()
await instance.task
await self.push_udpate()
except Exception as e:
logging.error("{} Cant stop {} - {}".format(self.name, id, e))
@ -84,20 +84,17 @@ class BasicController:
try:
item = self.find_by_id(id)
instance = item.get("instance")
if instance is not None and instance.running is True:
logging.warning("{} already running {}".format(self.name, id))
return
type = item["type"]
clazz = self.types[type]["class"]
item["instance"] = clazz(self.cbpi, item["id"], {})
await item["instance"].start()
item["instance"].task = self._loop.create_task(item["instance"].run())
logging.info("Sensor started {}".format(id))
logging.info("{} started {}".format(self.name, id))
except Exception as e:
logging.error("{} Cant start {} - {}".format(self.name, id, e))

View file

@ -13,6 +13,7 @@ class KettleController(BasicController):
item = self.find_by_id(id)
instance = item.get("instance")
await instance.start()
await self.push_udpate()
except Exception as e:
logging.error("Faild to switch on KettleLogic {} {}".format(id, e))
@ -21,6 +22,19 @@ class KettleController(BasicController):
item = self.find_by_id(id)
instance = item.get("instance")
await instance.stop()
await self.push_udpate()
except Exception as e:
logging.error("Faild to switch on KettleLogic {} {}".format(id, e))
async def toggle(self, id):
try:
item = self.find_by_id(id)
instance = item.get("instance")
if instance is None or instance.running == False:
await self.start(id)
else:
await instance.stop()
await self.push_udpate()
except Exception as e:
logging.error("Faild to switch on KettleLogic {} {}".format(id, e))
@ -35,7 +49,7 @@ class KettleController(BasicController):
def create_dict(self, data):
try:
instance = data.get("instance")
state = dict(state=instance.get_state())
state = instance.get_state()
except Exception as e:
logging.error("Faild to create KettleLogic dict {} ".format(e))
state = dict()

View file

@ -18,72 +18,8 @@ class PluginController():
def __init__(self, cbpi):
self.cbpi = cbpi
self.plugins = load_config("./config/plugin_list.txt")
if self.plugins is None:
self.plugins = {}
async def load_plugin_list(self):
async with aiohttp.ClientSession() as session:
async with session.get('http://localhost:2202/list') as resp:
if (resp.status == 200):
data = yaml.load(await resp.text())
self.plugins = data
return data
def installed_plugins(self):
return self.plugins
async def install(self, package_name):
async def install(cbpi, plugins, package_name):
data = subprocess.check_output(
[sys.executable, "-m", "pip", "install", package_name])
data = data.decode('UTF-8')
if package_name not in self.plugins:
now = datetime.datetime.now()
self.plugins[package_name] = dict(
version="1.0", installation_date=now.strftime("%Y-%m-%d %H:%M:%S"))
with open('./config/plugin_list.txt', 'w') as outfile:
yaml.dump(self.plugins, outfile, default_flow_style=False)
if data.startswith('Requirement already satisfied'):
self.cbpi.notify(
key="p", message="Plugin already installed ", type="warning")
else:
self.cbpi.notify(
key="p", message="Plugin installed ", type="success")
async with aiohttp.ClientSession() as session:
async with session.get('http://localhost:2202/get/%s' % package_name) as resp:
if (resp.status == 200):
data = await resp.json()
await self.cbpi.job.start_job(install(self.cbpi, self.plugins, data["package_name"]), data["package_name"], "plugins_install")
return True
else:
self.cbpi.notify(
key="p", message="Failed to install Plugin %s " % package_name, type="danger")
return False
async def uninstall(self, package_name):
async def uninstall(cbpi, plugins, package_name):
print("try to uninstall", package_name)
try:
data = subprocess.check_output(
[sys.executable, "-m", "pip", "uninstall", "-y", package_name])
data = data.decode('UTF-8')
if data.startswith("Successfully uninstalled"):
cbpi.notify(key="p", message="Plugin %s Uninstalled" %
package_name, type="success")
else:
cbpi.notify(key="p", message=data, type="success")
except Exception as e:
print(e)
if package_name in self.plugins:
print("Uninstall", self.plugins[package_name])
await self.cbpi.job.start_job(uninstall(self.cbpi, self.plugins, package_name), package_name, "plugins_uninstall")
def load_plugins(self):
this_directory = os.path.dirname(__file__)
@ -110,18 +46,21 @@ class PluginController():
def load_plugins_from_evn(self):
for p in self.plugins:
logger.debug("Load Plugin %s" % p)
for p in self.cbpi.static_config.get("plugins",[]):
try:
logger.info("Try to load plugin: %s " % p)
self.modules[p] = import_module(p)
self.modules[p].setup(self.cbpi)
#logger.info("Plugin %s loaded successfully" % p)
logger.info("Plugin %s loaded successfully" % p)
except Exception as e:
logger.error("FAILED to load plugin %s " % p)
logger.error(e)
def register(self, name, clazz) -> None:
'''
Register a new actor type
@ -171,9 +110,7 @@ class PluginController():
parameters.append(self._parse_property_object(p))
result["properties"] = parameters
for method_name, method in cls.__dict__.items():
if hasattr(method, "action"):
key = method.__getattribute__("key")
parameters = []
for p in method.__getattribute__("parameters"):

View file

@ -11,7 +11,7 @@ class SensorController(BasicController):
instance = data.get("instance")
state = state=instance.get_state()
except Exception as e:
logging.error("Faild to crate actor dict {} ".format(e))
logging.error("Faild to create sensor dict {} ".format(e))
state = dict()
return dict(name=data.get("name"), id=data.get("id"), type=data.get("type"), state=state,props=data.get("props", []))

View file

@ -222,7 +222,7 @@ class StepController:
return next((i for i, item in enumerate(self.profile) if item["id"] == id), None)
async def push_udpate(self):
await self.cbpi.bus.fire("step/update", data=list(map(lambda x: self.create_dict(x), self.profile)))
self.cbpi.ws.send(dict(topic="step_update", data=list(map(lambda x: self.create_dict(x), self.profile))))
async def start_step(self,step):
logging.info("Start Step")

View file

@ -20,11 +20,7 @@ except Exception:
import RPi.GPIO as GPIO
@parameters([Property.Number(label="Param1", configurable=True),
Property.Text(label="Param2", configurable=True, default_value="HALLO"),
Property.Select(label="Param3", options=[1,2,4]),
Property.Sensor(label="Param4"),
Property.Actor(label="Param5")])
@parameters([])
class CustomActor(CBPiActor):
my_name = ""
@ -37,7 +33,6 @@ class CustomActor(CBPiActor):
def init(self):
print("INIT")
self.state = False
pass

View file

@ -2,11 +2,7 @@ import asyncio
from cbpi.api import *
@parameters([Property.Number(label="Param1", configurable=True),
Property.Text(label="Param2", configurable=True, default_value="HALLO"),
Property.Select(label="Param3", options=[1,2,4]),
Property.Sensor(label="Param4"),
Property.Actor(label="Param5")])
@parameters([])
class CustomLogic(CBPiKettleLogic):
pass

View file

@ -7,11 +7,7 @@ from aiohttp import web
from cbpi.api import *
@parameters([Property.Number(label="Param1", configurable=True),
Property.Text(label="Param2", configurable=True, default_value="HALLO"),
Property.Select(label="Param3", options=[1,2,4]),
Property.Sensor(label="Param4"),
Property.Actor(label="Param5")])
@parameters([])
class CustomSensor(CBPiSensor):
def __init__(self, cbpi, id, props):
@ -36,7 +32,7 @@ class CustomSensor(CBPiSensor):
while self.running is True:
self.value = random.randint(0,50)
self.push_update(self.value)
await asyncio.sleep(1)
await asyncio.sleep(10)
def get_state(self):
return dict(value=self.value)

View file

@ -182,6 +182,31 @@ class KettleHttpEndpoints():
await self.controller.off(id)
return web.Response(status=204)
@request_mapping(path="/{id}/toggle", method="POST", auth_required=False)
async def http_toggle(self, request) -> web.Response:
"""
---
description: Switch actor on
tags:
- Kettle
parameters:
- name: "id"
in: "path"
description: "Kettle ID"
required: true
type: "string"
responses:
"204":
description: successful operation
"405":
description: invalid HTTP Met
"""
id = request.match_info['id']
await self.controller.toggle(id)
return web.Response(status=204)
@request_mapping(path="/{id}/action", method="POST", auth_required=auth)
async def http_action(self, request) -> web.Response:
@ -233,11 +258,20 @@ class KettleHttpEndpoints():
required: true
type: "integer"
format: "int64"
- in: body
name: body
description: Update Temp
required: true
schema:
type: object
properties:
temp:
type: integer
responses:
"204":
description: successful operation
"""
id = request.match_info['id']
#data = await request.json()
await self.controller.set_target_temp(id,999)
data = await request.json()
await self.controller.set_target_temp(id,data.get("temp"))
return web.Response(status=204)

View file

@ -15,7 +15,7 @@
"id": "Aifjxmw4QdPfU3XbR6iyis",
"name": "Pump1",
"props": {},
"state": false,
"state": true,
"type": "CustomActor"
},
{
@ -24,6 +24,34 @@
"props": {},
"state": false,
"type": "CustomActor"
},
{
"id": "NjammuygecdvMpoGYc3rXt",
"name": "Heater Boil",
"props": {},
"state": false,
"type": "CustomActor"
},
{
"id": "j4PnSfuWRhgZDgrQScLN7e",
"name": "Vent1",
"props": {},
"state": true,
"type": "CustomActor"
},
{
"id": "ZGJqoybWv3eWrEeGJLopFs",
"name": "Water In",
"props": {},
"state": false,
"type": "CustomActor"
},
{
"id": "NfYJEWbTXPUSUQzS83dfAn",
"name": "Vent Out",
"props": {},
"state": false,
"type": "CustomActor"
}
]
}

View file

@ -1,64 +1,425 @@
{
"elements": [
{
"id": "6c670263-7b19-426c-8769-19aac8ebb381",
"name": "CustomSVG",
"id": "1ad5cec3-0f10-4910-b5ba-b4a96207d0ca",
"name": "Kettle",
"props": {
"name": "tank",
"width": "200"
"heigth": "150",
"width": "100"
},
"type": "CustomSVG",
"x": 295,
"y": 45
"type": "Kettle",
"x": 225,
"y": 160
},
{
"id": "cbe859ca-b8e8-433f-952c-938a2f8a309b",
"id": "ba621aee-a733-4238-b892-0f39100a5d21",
"name": "Kettle",
"props": {
"heigth": "150",
"width": "100"
},
"type": "Kettle",
"x": 530,
"y": 160
},
{
"id": "b61f57d9-e9ce-42b5-97df-3b2d7deaf18c",
"name": "Kettle",
"props": {
"heigth": "150",
"width": "100"
},
"type": "Kettle",
"x": 780,
"y": 160
},
{
"id": "f2facefa-5808-4f63-93e7-fd8c3343aa2f",
"name": "Pump1",
"props": {
"actor": "Aifjxmw4QdPfU3XbR6iyis"
},
"type": "ActorButton",
"x": 410,
"y": 380
},
{
"id": "6996220e-b314-4c23-82c5-2d0873bcd1bc",
"name": "KettleControl",
"props": {
"kettle": "oHxKz3z5RjbsxfSz6KUgov",
"orientation": "vertical"
},
"type": "KettleControl",
"x": 165,
"y": 205
},
{
"id": "91547101-86e5-405c-84e4-295d3565adfb",
"name": "Vent",
"props": {
"actor": "j4PnSfuWRhgZDgrQScLN7e"
},
"type": "ActorButton",
"x": 550,
"y": 380
},
{
"id": "a7ec6424-0df5-489e-85a6-5b36d039079b",
"name": "Pump2",
"props": {
"actor": "HX2bKdobuANehPggYcynnj"
},
"type": "ActorButton",
"x": 680,
"y": 380
},
{
"id": "39bb1a5b-294e-47e6-b472-699ef05aa780",
"name": "KettleControl",
"props": {
"kettle": "a7bWex85Z9Td4atwgazpXW",
"orientation": "vertical"
},
"type": "KettleControl",
"x": 720,
"y": 205
},
{
"id": "310054aa-729b-45b2-a3a3-2c73196a2444",
"name": "HLT",
"props": {
"color": "#fff",
"size": "15"
},
"type": "Text",
"x": 235,
"y": 165
},
{
"id": "72a66e4f-f7ce-4ac2-9956-c581590bfb3d",
"name": "MashTun",
"props": {
"color": "#fff",
"size": "15"
},
"type": "Text",
"x": 540,
"y": 165
},
{
"id": "62f58450-5ce6-45bf-b178-0dde9225ab52",
"name": "Boil",
"props": {
"color": "#fff",
"size": "15"
},
"type": "Text",
"x": 820,
"y": 165
},
{
"id": "e2b351fa-b66e-416a-a6d6-887ee41b3d7e",
"name": "Water",
"props": {
"actor": "ZGJqoybWv3eWrEeGJLopFs"
},
"type": "ActorButton",
"x": 45,
"y": 160
},
{
"id": "9f3f87d4-3c2a-4dcc-9740-8f7efcc553bf",
"name": "Sensor Data",
"props": {
"color": "#fff",
"sensor": "8ohkXvFA9UrkHLsxQL38wu",
"size": "30",
"unit": "\u00b0"
},
"type": "Sensor",
"x": 255,
"y": 185
},
{
"id": "8df86373-7ed9-4d49-9d29-3b80e67989ab",
"name": "Sensor Data",
"props": {
"color": "#fff",
"sensor": "8ohkXvFA9UrkHLsxQL38wu",
"size": "30",
"unit": "\u00b0"
},
"type": "Sensor",
"x": 810,
"y": 185
},
{
"id": "16a0e88b-09fb-4f32-9d9a-b82d02c48190",
"name": "TargetTemp",
"props": {
"color": "#fff",
"kettle": "oHxKz3z5RjbsxfSz6KUgov",
"size": "12",
"unit": "\u00b0"
},
"type": "TargetTemp",
"x": 260,
"y": 225
},
{
"id": "2204b231-ca45-4773-a110-0e4b19dfab89",
"name": "TargetTemp",
"props": {
"color": "#fff",
"kettle": "a7bWex85Z9Td4atwgazpXW",
"size": "12",
"unit": "\u00b0"
},
"type": "TargetTemp",
"x": 820,
"y": 225
},
{
"id": "8f3c656c-16b7-4f81-9d6d-8219e90e87d0",
"name": "CustomSVG",
"props": {
"name": "tank",
"width": "100"
"name": "cbpi_svg",
"width": "50"
},
"type": "CustomSVG",
"x": 555,
"y": 55
"y": 240
},
{
"id": "1f1d5ee6-1ccc-409b-a240-c81d50b71627",
"id": "2a8b37f8-c0af-4592-9771-2e6500ef4299",
"name": "CustomSVG",
"props": {
"name": "kettle",
"width": "100"
"name": "cbpi_svg",
"width": "50"
},
"type": "CustomSVG",
"x": 795,
"y": 90
"x": 245,
"y": 240
},
{
"id": "16ec8526-7f2c-4973-bf97-4ab3363e6ca1",
"name": "CustomSVG",
"props": {
"name": "cbpi_svg",
"width": "50"
},
"type": "CustomSVG",
"x": 805,
"y": 240
},
{
"id": "4fecbb43-53be-4d4a-b24d-2d980777afbe",
"name": "CraftBeerPi Brewery",
"props": {
"color": "#fff",
"size": "40"
},
"type": "Text",
"x": 45,
"y": 65
},
{
"id": "4996dd17-b047-4d27-8598-0563dfd444ab",
"name": "Steps",
"props": {
"width": "200"
},
"type": "Steps",
"x": 35,
"y": 315
},
{
"id": "44014b52-4bf0-4136-88a7-3cb9f1882962",
"name": "Out",
"props": {
"actor": "NfYJEWbTXPUSUQzS83dfAn"
},
"type": "ActorButton",
"x": 985,
"y": 265
},
{
"id": "d4a56a0e-f410-47c1-879a-ff41c6422a6e",
"name": "Sensor Data",
"props": {
"color": "red",
"sensor": "8ohkXvFA9UrkHLsxQL38wu",
"size": "40",
"unit": "\u00b0"
},
"type": "Sensor",
"x": 555,
"y": 180
}
],
"pathes": [
{
"condition": [
"ZGJqoybWv3eWrEeGJLopFs"
],
"coordinates": [
[
305,
75
225,
180
],
[
160,
190
],
[
245,
460
],
[
525,
395
],
[
560,
75
115,
180
]
],
"id": "d22d65d2-c4db-4553-856a-e9239a79e136"
"id": "731806be-b2cb-4706-8dd1-00bfc7daa818"
},
{
"condition": [
"Aifjxmw4QdPfU3XbR6iyis",
"j4PnSfuWRhgZDgrQScLN7e"
],
"coordinates": [
[
480,
400
],
[
550,
400
]
],
"id": "39c646bc-3655-433d-a989-aa25a4a1d3ab"
},
{
"condition": [
"Aifjxmw4QdPfU3XbR6iyis",
"j4PnSfuWRhgZDgrQScLN7e"
],
"coordinates": [
[
320,
285
],
[
360,
285
],
[
360,
400
],
[
410,
400
]
],
"id": "3fd4d742-a9b4-4d6f-ab75-9fcfed4f5104"
},
{
"condition": [
"Aifjxmw4QdPfU3XbR6iyis",
"j4PnSfuWRhgZDgrQScLN7e"
],
"coordinates": [
[
535,
175
],
[
390,
175
],
[
390,
215
],
[
325,
215
]
],
"id": "91f38257-788c-4255-99cf-f454c69a7d93"
},
{
"condition": [
"Aifjxmw4QdPfU3XbR6iyis",
"j4PnSfuWRhgZDgrQScLN7e"
],
"coordinates": [
[
580,
380
],
[
580,
305
]
],
"id": "0f9ffe1d-0b0c-4a0e-9dbf-3931ded3d050"
},
{
"coordinates": [
[
615,
400
],
[
680,
400
]
],
"id": "fbbd511d-b51c-43a3-95e7-1608f21fdb33"
},
{
"coordinates": [
[
780,
180
],
[
710,
180
],
[
710,
380
]
],
"id": "e4f7b27e-a0db-48e8-82e2-7a07f1a61dc5"
},
{
"condition": [
"NfYJEWbTXPUSUQzS83dfAn"
],
"coordinates": [
[
985,
285
],
[
880,
285
]
],
"id": "0dc28018-7282-4a43-98e6-c1dd198c93d5"
},
{
"condition": [
"NfYJEWbTXPUSUQzS83dfAn"
],
"coordinates": [
[
1015,
375
],
[
1015,
300
]
],
"id": "6ca9c0f9-d4a6-45cf-bfdd-b7f6740c4bc1"
}
]
}

View file

@ -1,14 +1,13 @@
name: CraftBeerPi
version: 4.0
index_url: /cbpi_ui/static/index.html
plugins:
- cbpi4-ui
port: 8080
# login data
username: cbpi
password: 123
ws_push_all: true

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 30 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 13 KiB

View file

@ -1,81 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0" width="150" height="220" viewBox="0, 0, 150, 220">
<defs>
<linearGradient id="Gradient_1" gradientUnits="userSpaceOnUse" x1="3.5" y1="110.5" x2="147.5" y2="110.5">
<stop offset="0" stop-color="#323232"/>
<stop offset="0.357" stop-color="#FFFFFF"/>
<stop offset="0.571" stop-color="#919191"/>
<stop offset="1" stop-color="#4A4A4A"/>
</linearGradient>
<linearGradient id="Gradient_2" gradientUnits="userSpaceOnUse" x1="73.868" y1="3.277" x2="77.132" y2="217.723">
<stop offset="0" stop-color="#5D5D5D"/>
<stop offset="1" stop-color="#000000" stop-opacity="0.959"/>
</linearGradient>
<linearGradient id="Gradient_3" gradientUnits="userSpaceOnUse" x1="3.5" y1="101.083" x2="147.5" y2="101.083">
<stop offset="0" stop-color="#323232"/>
<stop offset="0.357" stop-color="#FFFFFF"/>
<stop offset="0.571" stop-color="#919191"/>
<stop offset="1" stop-color="#4A4A4A"/>
</linearGradient>
<linearGradient id="Gradient_4" gradientUnits="userSpaceOnUse" x1="2.75" y1="110.5" x2="148.25" y2="110.5">
<stop offset="0" stop-color="#232323"/>
<stop offset="0.357" stop-color="#5B5B5B"/>
<stop offset="0.571" stop-color="#474747"/>
<stop offset="1" stop-color="#282828"/>
</linearGradient>
<linearGradient id="Gradient_5" gradientUnits="userSpaceOnUse" x1="219.5" y1="110" x2="223.5" y2="110">
<stop offset="0" stop-color="#232323"/>
<stop offset="0.357" stop-color="#5B5B5B"/>
<stop offset="0.571" stop-color="#474747"/>
<stop offset="1" stop-color="#282828"/>
</linearGradient>
</defs>
<g id="Ebene_1" display="none">
<g display="none">
<path d="M135.5,3 C141.774,3.18 146.086,7.113 147.348,13.254 L147.5,13.254 L147.5,156.127 L111.5,185.434 C102.435,192.824 93.37,200.214 84.3,207.598 L84.3,218 L66.7,218 L66.7,207.328 C57.672,199.985 48.594,192.701 39.5,185.434 L3.5,156.127 L3.5,13.254 L3.652,13.254 C4.623,7.127 9.57,3.297 15.5,3 L135.5,3 z" fill="url(#Gradient_1)"/>
<path d="M135.5,3 C141.774,3.18 146.086,7.113 147.348,13.254 L147.5,13.254 L147.5,156.127 L111.5,185.434 C102.435,192.824 93.37,200.214 84.3,207.598 L84.3,218 L66.7,218 L66.7,207.328 C57.672,199.985 48.594,192.701 39.5,185.434 L3.5,156.127 L3.5,13.254 L3.652,13.254 C4.623,7.127 9.57,3.297 15.5,3 L135.5,3 z" fill-opacity="0" stroke="#272727" stroke-width="1"/>
</g>
</g>
<g id="Ebene_4"/>
<g id="Ebene_3">
<g display="none">
<g display="none">
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill="url(#Gradient_2)"/>
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="1"/>
</g>
<path d="M75.5,189.637 C41.08,189.637 13.177,182.258 13.177,173.156 C13.177,164.053 41.08,156.674 75.5,156.674 C109.92,156.674 137.823,164.053 137.823,173.156 C137.823,182.258 109.92,189.637 75.5,189.637 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="10"/>
<path d="M75.5,189.637 C41.08,189.637 13.177,182.258 13.177,173.156 C13.177,164.053 41.08,156.674 75.5,156.674 C109.92,156.674 137.823,164.053 137.823,173.156 C137.823,182.258 109.92,189.637 75.5,189.637 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="8"/>
<path d="M75.5,177.357 C41.08,177.357 13.177,169.978 13.177,160.875 C13.177,151.772 41.08,144.393 75.5,144.393 C109.92,144.393 137.822,151.772 137.822,160.875 C137.822,169.978 109.92,177.357 75.5,177.357 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="10"/>
<path d="M75.5,177.357 C41.08,177.357 13.177,169.978 13.177,160.875 C13.177,151.772 41.08,144.393 75.5,144.393 C109.92,144.393 137.823,151.772 137.823,160.875 C137.823,169.978 109.92,177.357 75.5,177.357 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="8"/>
<path d="M75.5,165.076 C41.08,165.076 13.177,157.697 13.177,148.594 C13.177,139.492 41.08,132.113 75.5,132.113 C109.92,132.113 137.823,139.492 137.823,148.594 C137.823,157.697 109.92,165.076 75.5,165.076 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="10"/>
<path d="M75.5,165.076 C41.08,165.076 13.177,157.697 13.177,148.594 C13.177,139.492 41.08,132.113 75.5,132.113 C109.92,132.113 137.823,139.492 137.823,148.594 C137.823,157.697 109.92,165.076 75.5,165.076 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="8"/>
</g>
<g>
<path d="M2.25,159.208 C2.25,163.834 34.821,167.583 75,167.583 C115.179,167.583 147.75,163.834 147.75,159.208 L147.75,208.875 C147.75,213.5 115.179,217.25 75,217.25 C34.821,217.25 2.25,213.5 2.25,208.875 L2.25,159.208 z" fill="#3B2CD5"/>
<path d="M75,167.333 C34.821,167.333 2.25,163.584 2.25,158.958 C2.25,154.333 34.821,150.583 75,150.583 C115.179,150.583 147.75,154.333 147.75,158.958 C147.75,163.584 115.179,167.333 75,167.333 z" fill="#2193FF"/>
</g>
<path d="M75.5,20 C35.321,20 2.75,16.25 2.75,11.625 C2.75,7 35.321,3.25 75.5,3.25 C115.679,3.25 148.25,7 148.25,11.625 C148.25,16.25 115.679,20 75.5,20 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="1"/>
<path d="M75.5,217.75 C35.321,217.75 2.75,214 2.75,209.375 C2.75,204.75 35.321,201 75.5,201 C115.679,201 148.25,204.75 148.25,209.375 C148.25,214 115.679,217.75 75.5,217.75 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="1"/>
<path d="M2.75,208.604 L2.75,12.396" fill-opacity="0" stroke="#CDCDCD" stroke-width="1"/>
<path d="M148.25,209.375 L148.25,11.625" fill-opacity="0" stroke="#CDCDCD" stroke-width="1"/>
</g>
<g id="Ebene_2">
<g display="none">
<path d="M75.5,3.333 C115.265,3.333 147.5,14.414 147.5,28.083 L147.5,174.083 C147.5,187.752 115.264,198.833 75.5,198.833 C35.736,198.833 3.5,187.752 3.5,174.083 L3.5,28.083 C3.5,14.414 35.736,3.333 75.5,3.333 z" fill="url(#Gradient_3)"/>
<path d="M75.5,3.333 C115.265,3.333 147.5,14.414 147.5,28.083 L147.5,174.083 C147.5,187.752 115.264,198.833 75.5,198.833 C35.736,198.833 3.5,187.752 3.5,174.083 L3.5,28.083 C3.5,14.414 35.736,3.333 75.5,3.333 z" fill-opacity="0" stroke="#272727" stroke-width="1"/>
</g>
<g display="none">
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill="#919191"/>
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill-opacity="0" stroke="#000000" stroke-width="1"/>
</g>
<g>
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill="url(#Gradient_4)"/>
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill-opacity="0" stroke="#000000" stroke-width="1"/>
</g>
<g>
<path d="M219.5,108.5 L223.5,108.5 L223.5,111.5 L219.5,111.5 L219.5,108.5 z" fill="url(#Gradient_5)"/>
<path d="M219.5,108.5 L223.5,108.5 L223.5,111.5 L219.5,111.5 L219.5,108.5 z" fill-opacity="0" stroke="#000000" stroke-width="1"/>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 6.9 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 9.2 KiB

View file

@ -4,18 +4,20 @@
"agitator": "",
"heater": "8BLRqagLicCdEBDdc77Sgr",
"id": "oHxKz3z5RjbsxfSz6KUgov",
"name": "Test1111111",
"name": "MashTun",
"props": {},
"sensor": "",
"state": {},
"target_temp": null,
"sensor": "8ohkXvFA9UrkHLsxQL38wu",
"state": {
"running": false
},
"target_temp": 52,
"type": "CustomKettleLogic"
},
{
"agitator": "",
"heater": "",
"id": "WxAkesrkqiHH3Gywc4fMci",
"name": "Test",
"name": "HLT",
"props": {
"Param2": "13",
"Param3": 1,
@ -25,43 +27,20 @@
"sensor": "",
"state": {},
"target_temp": null,
"type": "CustomKettleLogic"
},
{
"agitator": "",
"heater": "8BLRqagLicCdEBDdc77Sgr",
"id": "gc9Bwp38jtyxkVWH5oYRNZ",
"name": "Test",
"props": {
"Param3": 1,
"Param5": "8BLRqagLicCdEBDdc77Sgr"
},
"sensor": "",
"state": {},
"target_temp": null,
"type": "CustomKettleLogic"
},
{
"agitator": "",
"heater": "",
"id": "ZfF2N2UnEHtgExNgZJyF5i",
"name": "Test",
"props": {},
"sensor": "",
"state": {},
"target_temp": null,
"type": "CustomKettleLogic"
},
{
"agitator": "",
"heater": "8BLRqagLicCdEBDdc77Sgr",
"id": "oTivUB7LueLeUWoZAnLhwp",
"name": "",
"props": {},
"sensor": "",
"state": {},
"target_temp": null,
"type": ""
},
{
"agitator": "",
"heater": "NjammuygecdvMpoGYc3rXt",
"id": "a7bWex85Z9Td4atwgazpXW",
"name": "Boil",
"props": {},
"sensor": "",
"state": {
"running": false
},
"target_temp": 55,
"type": "CustomKettleLogic"
}
]
}

View file

@ -2,10 +2,10 @@
"data": [
{
"id": "8ohkXvFA9UrkHLsxQL38wu",
"name": "Test1112222",
"name": "Sensor1",
"props": {},
"state": {
"value": 49
"value": 0
},
"type": "CustomSensor"
}

View file

@ -1,15 +1,43 @@
{
"basic": {
"name": ""
"name": "PALE ALE"
},
"profile": [
{
"id": "6mdUtsrBaWeDvKgUXJiLqu",
"name": "Test",
"id": "T2y34Mbex9KjNWXhzfCRby",
"name": "MashIn",
"props": {
"Param1": 123,
"Param2": "HALLO",
"Param3": 1
"Param3": 1,
"count": 1,
"wohoo": 0
},
"status": "P",
"type": "CustomStep2"
},
{
"id": "RjS8Zb2GGpUtNsqHsES3yF",
"name": "Step2",
"props": {
"Param1": 123,
"Param2": "HALLO",
"Param3": 1,
"count": 0,
"wohoo": 0
},
"status": "I",
"type": "CustomStep2"
},
{
"id": "WkZG4fDNxZdtZ7uoTsSHhR",
"name": "Mash Step 1",
"props": {
"Param1": 123,
"Param2": "HALLO",
"Param3": 1,
"count": 0,
"wohoo": 0
},
"status": "I",
"type": "CustomStep2"

View file

@ -28,9 +28,10 @@ setup(name='cbpi',
"voluptuous==0.12.1",
"pyfiglet==0.8.post1",
'pandas==1.1.5',
'click==7.1.2',
'shortuuid==1.0.1',
'tabulate==0.8.7',
'cbpi4-ui==0.0.2',
'cbpi4-ui==0.0.3',
],
dependency_links=[
'https://testpypi.python.org/pypi'

BIN
temp.zip Normal file

Binary file not shown.

View file

@ -1,2 +1,3 @@
/Users/manuelfritsch/Documents/git/cbpi4-ui-plugin
/Users/manuelfritsch/Documents/git/cbpi4-ui
/Users/manuelfritsch/Documents/git/myplugin/plugin1

View file

@ -1,56 +0,0 @@
About the Copyright Holders
===========================
* Copyright (c) 2008-2011 AQR Capital Management, LLC
AQR Capital Management began pandas development in 2008. Development was
led by Wes McKinney. AQR released the source under this license in 2009.
* Copyright (c) 2011-2012, Lambda Foundry, Inc.
Wes is now an employee of Lambda Foundry, and remains the pandas project
lead.
* Copyright (c) 2011-2012, PyData Development Team
The PyData Development Team is the collection of developers of the PyData
project. This includes all of the PyData sub-projects, including pandas. The
core team that coordinates development on GitHub can be found here:
https://github.com/pydata.
Full credits for pandas contributors can be found in the documentation.
Our Copyright Policy
====================
PyData uses a shared copyright model. Each contributor maintains copyright
over their contributions to PyData. However, it is important to note that
these contributions are typically only changes to the repositories. Thus,
the PyData source code, in its entirety, is not the copyright of any single
person or institution. Instead, it is the collective copyright of the
entire PyData Development Team. If individual contributors want to maintain
a record of what changes/contributions they have specific copyright on,
they should indicate their copyright in the commit message of the change
when they commit the change to one of the PyData repositories.
With this in mind, the following banner should be used in any source code
file to indicate the copyright and license terms:
```
#-----------------------------------------------------------------------------
# Copyright (c) 2012, PyData Development Team
# All rights reserved.
#
# Distributed under the terms of the BSD Simplified License.
#
# The full license is in the LICENSE file, distributed with this software.
#-----------------------------------------------------------------------------
```
Other licenses can be found in the LICENSES directory.
License
=======
pandas is distributed under a 3-clause ("Simplified" or "New") BSD
license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
BSD-compatible licenses, are included. Their licenses follow the pandas
license.

View file

@ -1,31 +0,0 @@
BSD 3-Clause License
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.
Copyright (c) 2011-2020, Open source contributors.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,95 +0,0 @@
Metadata-Version: 2.1
Name: pandas
Version: 1.2.0
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Maintainer: The PyData Development Team
Maintainer-email: pydata@googlegroups.com
License: BSD
Project-URL: Bug Tracker, https://github.com/pandas-dev/pandas/issues
Project-URL: Documentation, https://pandas.pydata.org/pandas-docs/stable/
Project-URL: Source Code, https://github.com/pandas-dev/pandas
Platform: any
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console
Classifier: Operating System :: OS Independent
Classifier: Intended Audience :: Science/Research
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Cython
Classifier: Topic :: Scientific/Engineering
Requires-Python: >=3.7.1
Requires-Dist: python-dateutil (>=2.7.3)
Requires-Dist: pytz (>=2017.3)
Requires-Dist: numpy (>=1.16.5)
Provides-Extra: test
Requires-Dist: pytest (>=5.0.1) ; extra == 'test'
Requires-Dist: pytest-xdist ; extra == 'test'
Requires-Dist: hypothesis (>=3.58) ; extra == 'test'
**pandas** is a Python package that provides fast, flexible, and expressive data
structures designed to make working with structured (tabular, multidimensional,
potentially heterogeneous) and time series data both easy and intuitive. It
aims to be the fundamental high-level building block for doing practical,
**real world** data analysis in Python. Additionally, it has the broader goal
of becoming **the most powerful and flexible open source data analysis /
manipulation tool available in any language**. It is already well on its way
toward this goal.
pandas is well suited for many different kinds of data:
- Tabular data with heterogeneously-typed columns, as in an SQL table or
Excel spreadsheet
- Ordered and unordered (not necessarily fixed-frequency) time series data.
- Arbitrary matrix data (homogeneously typed or heterogeneous) with row and
column labels
- Any other form of observational / statistical data sets. The data actually
need not be labeled at all to be placed into a pandas data structure
The two primary data structures of pandas, Series (1-dimensional) and DataFrame
(2-dimensional), handle the vast majority of typical use cases in finance,
statistics, social science, and many areas of engineering. For R users,
DataFrame provides everything that R's ``data.frame`` provides and much
more. pandas is built on top of `NumPy <https://www.numpy.org>`__ and is
intended to integrate well within a scientific computing environment with many
other 3rd party libraries.
Here are just a few of the things that pandas does well:
- Easy handling of **missing data** (represented as NaN) in floating point as
well as non-floating point data
- Size mutability: columns can be **inserted and deleted** from DataFrame and
higher dimensional objects
- Automatic and explicit **data alignment**: objects can be explicitly
aligned to a set of labels, or the user can simply ignore the labels and
let `Series`, `DataFrame`, etc. automatically align the data for you in
computations
- Powerful, flexible **group by** functionality to perform
split-apply-combine operations on data sets, for both aggregating and
transforming data
- Make it **easy to convert** ragged, differently-indexed data in other
Python and NumPy data structures into DataFrame objects
- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting**
of large data sets
- Intuitive **merging** and **joining** data sets
- Flexible **reshaping** and pivoting of data sets
- **Hierarchical** labeling of axes (possible to have multiple labels per
tick)
- Robust IO tools for loading data from **flat files** (CSV and delimited),
Excel files, databases, and saving / loading data from the ultrafast **HDF5
format**
- **Time series**-specific functionality: date range generation and frequency
conversion, moving window statistics, date shifting and lagging.
Many of these principles are here to address the shortcomings frequently
experienced using other languages / scientific research environments. For data
scientists, working with data is typically divided into multiple stages:
munging and cleaning data, analyzing / modeling it, then organizing the results
of the analysis into a form suitable for plotting or tabular display. pandas is
the ideal tool for all of these tasks.

File diff suppressed because it is too large Load diff

View file

@ -1,5 +0,0 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.36.2)
Root-Is-Purelib: false
Tag: cp38-cp38-macosx_10_9_x86_64

View file

@ -1,3 +0,0 @@
[pandas_plotting_backends]
matplotlib = pandas:plotting._matplotlib

View file

@ -20,9 +20,10 @@ del hard_dependencies, dependency, missing_dependencies
# numpy compat
from pandas.compat.numpy import (
np_version_under1p17 as _np_version_under1p17,
np_version_under1p18 as _np_version_under1p18,
is_numpy_dev as _is_numpy_dev,
_np_version_under1p16,
_np_version_under1p17,
_np_version_under1p18,
_is_numpy_dev,
)
try:
@ -33,7 +34,7 @@ except ImportError as e: # pragma: no cover
raise ImportError(
f"C extension: {module} not built. If you want to import "
"pandas from the source directory, you may need to run "
"'python setup.py build_ext --force' to build the C extensions first."
"'python setup.py build_ext --inplace --force' to build the C extensions first."
) from e
from pandas._config import (
@ -58,8 +59,6 @@ from pandas.core.api import (
UInt16Dtype,
UInt32Dtype,
UInt64Dtype,
Float32Dtype,
Float64Dtype,
CategoricalDtype,
PeriodDtype,
IntervalDtype,
@ -102,7 +101,6 @@ from pandas.core.api import (
to_datetime,
to_timedelta,
# misc
Flags,
Grouper,
factorize,
unique,
@ -187,12 +185,28 @@ __version__ = v.get("closest-tag", v["version"])
__git_version__ = v.get("full-revisionid")
del get_versions, v
# GH 27101
# TODO: remove Panel compat in 1.0
if pandas.compat.PY37:
def __getattr__(name):
import warnings
if name == "datetime":
if name == "Panel":
warnings.warn(
"The Panel class is removed from pandas. Accessing it "
"from the top-level namespace will also be removed in the next version",
FutureWarning,
stacklevel=2,
)
class Panel:
pass
return Panel
elif name == "datetime":
warnings.warn(
"The pandas.datetime class is deprecated "
"and will be removed from pandas in a future version. "
@ -244,6 +258,110 @@ def __getattr__(name):
raise AttributeError(f"module 'pandas' has no attribute '{name}'")
else:
class Panel:
pass
class SparseDataFrame:
pass
class SparseSeries:
pass
class __numpy:
def __init__(self):
import numpy as np
import warnings
self.np = np
self.warnings = warnings
def __getattr__(self, item):
self.warnings.warn(
"The pandas.np module is deprecated "
"and will be removed from pandas in a future version. "
"Import numpy directly instead",
FutureWarning,
stacklevel=2,
)
try:
return getattr(self.np, item)
except AttributeError as err:
raise AttributeError(f"module numpy has no attribute {item}") from err
np = __numpy()
class __Datetime(type):
from datetime import datetime as dt
datetime = dt
def __getattr__(cls, item):
cls.emit_warning()
try:
return getattr(cls.datetime, item)
except AttributeError as err:
raise AttributeError(
f"module datetime has no attribute {item}"
) from err
def __instancecheck__(cls, other):
return isinstance(other, cls.datetime)
class __DatetimeSub(metaclass=__Datetime):
def emit_warning(dummy=0):
import warnings
warnings.warn(
"The pandas.datetime class is deprecated "
"and will be removed from pandas in a future version. "
"Import from datetime instead.",
FutureWarning,
stacklevel=3,
)
def __new__(cls, *args, **kwargs):
cls.emit_warning()
from datetime import datetime as dt
return dt(*args, **kwargs)
datetime = __DatetimeSub
class __SparseArray(type):
from pandas.core.arrays.sparse import SparseArray as sa
SparseArray = sa
def __instancecheck__(cls, other):
return isinstance(other, cls.SparseArray)
class __SparseArraySub(metaclass=__SparseArray):
def emit_warning(dummy=0):
import warnings
warnings.warn(
"The pandas.SparseArray class is deprecated "
"and will be removed from pandas in a future version. "
"Use pandas.arrays.SparseArray instead.",
FutureWarning,
stacklevel=3,
)
def __new__(cls, *args, **kwargs):
cls.emit_warning()
from pandas.core.arrays.sparse import SparseArray as sa
return sa(*args, **kwargs)
SparseArray = __SparseArraySub
# module level doc-string
__doc__ = """
pandas - a powerful data analysis and manipulation library for Python

View file

@ -392,7 +392,7 @@ class option_context(ContextDecorator):
"""
def __init__(self, *args):
if len(args) % 2 != 0 or len(args) < 2:
if not (len(args) % 2 == 0 and len(args) >= 2):
raise ValueError(
"Need to invoke as option_context(pat, val, [(pat, val), ...])."
)
@ -460,7 +460,9 @@ def register_option(
path = key.split(".")
for k in path:
if not re.match("^" + tokenize.Name + "$", k):
# NOTE: tokenize.Name is not a public constant
# error: Module has no attribute "Name" [attr-defined]
if not re.match("^" + tokenize.Name + "$", k): # type: ignore
raise ValueError(f"{k} is not a valid identifier")
if keyword.iskeyword(k):
raise ValueError(f"{k} is a python keyword")
@ -648,7 +650,7 @@ def _build_option_description(k: str) -> str:
s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]"
if d:
rkey = d.rkey or ""
rkey = d.rkey if d.rkey else ""
s += "\n (Deprecated"
s += f", use `{rkey}` instead."
s += ")"

View file

@ -22,7 +22,7 @@ def detect_console_encoding() -> str:
encoding = None
try:
encoding = sys.stdout.encoding or sys.stdin.encoding
except (AttributeError, OSError):
except (AttributeError, IOError):
pass
# try again for something better

View file

@ -88,18 +88,17 @@ def _valid_locales(locales, normalize):
valid_locales : list
A list of valid locales.
"""
return [
loc
for loc in (
locale.normalize(loc.strip()) if normalize else loc.strip()
for loc in locales
)
if can_set_locale(loc)
]
if normalize:
normalizer = lambda x: locale.normalize(x.strip())
else:
normalizer = lambda x: x.strip()
return list(filter(can_set_locale, map(normalizer, locales)))
def _default_locale_getter():
return subprocess.check_output(["locale -a"], shell=True)
raw_locales = subprocess.check_output(["locale -a"], shell=True)
return raw_locales
def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_getter):

View file

@ -6,7 +6,6 @@ from functools import wraps
import gzip
import operator
import os
import re
from shutil import rmtree
import string
import tempfile
@ -26,7 +25,7 @@ from pandas._config.localization import ( # noqa:F401
from pandas._libs.lib import no_default
import pandas._libs.testing as _testing
from pandas._typing import Dtype, FilePathOrBuffer, FrameOrSeries
from pandas.compat import get_lzma_file, import_lzma
from pandas.compat import _get_lzma_file, _import_lzma
from pandas.core.dtypes.common import (
is_bool,
@ -71,7 +70,7 @@ from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
from pandas.io.common import urlopen
from pandas.io.formats.printing import pprint_thing
lzma = import_lzma()
lzma = _import_lzma()
_N = 30
_K = 4
@ -85,7 +84,6 @@ ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES
FLOAT_DTYPES: List[Dtype] = [float, "float32", "float64"]
FLOAT_EA_DTYPES: List[Dtype] = ["Float32", "Float64"]
COMPLEX_DTYPES: List[Dtype] = [complex, "complex64", "complex128"]
STRING_DTYPES: List[Dtype] = [str, "str", "U"]
@ -108,8 +106,6 @@ ALL_NUMPY_DTYPES = (
+ BYTES_DTYPES
)
NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA]
# set testing_mode
_testing_mode_warnings = (DeprecationWarning, ResourceWarning)
@ -119,24 +115,14 @@ def set_testing_mode():
# set the testing mode filters
testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None")
if "deprecate" in testing_mode:
# pandas\_testing.py:119: error: Argument 2 to "simplefilter" has
# incompatible type "Tuple[Type[DeprecationWarning],
# Type[ResourceWarning]]"; expected "Type[Warning]"
warnings.simplefilter(
"always", _testing_mode_warnings # type: ignore[arg-type]
)
warnings.simplefilter("always", _testing_mode_warnings)
def reset_testing_mode():
# reset the testing mode filters
testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None")
if "deprecate" in testing_mode:
# pandas\_testing.py:126: error: Argument 2 to "simplefilter" has
# incompatible type "Tuple[Type[DeprecationWarning],
# Type[ResourceWarning]]"; expected "Type[Warning]"
warnings.simplefilter(
"ignore", _testing_mode_warnings # type: ignore[arg-type]
)
warnings.simplefilter("ignore", _testing_mode_warnings)
set_testing_mode()
@ -253,22 +239,16 @@ def decompress_file(path, compression):
if compression is None:
f = open(path, "rb")
elif compression == "gzip":
# pandas\_testing.py:243: error: Incompatible types in assignment
# (expression has type "IO[Any]", variable has type "BinaryIO")
f = gzip.open(path, "rb") # type: ignore[assignment]
f = gzip.open(path, "rb")
elif compression == "bz2":
# pandas\_testing.py:245: error: Incompatible types in assignment
# (expression has type "BZ2File", variable has type "BinaryIO")
f = bz2.BZ2File(path, "rb") # type: ignore[assignment]
f = bz2.BZ2File(path, "rb")
elif compression == "xz":
f = get_lzma_file(lzma)(path, "rb")
f = _get_lzma_file(lzma)(path, "rb")
elif compression == "zip":
zip_file = zipfile.ZipFile(path)
zip_names = zip_file.namelist()
if len(zip_names) == 1:
# pandas\_testing.py:252: error: Incompatible types in assignment
# (expression has type "IO[bytes]", variable has type "BinaryIO")
f = zip_file.open(zip_names.pop()) # type: ignore[assignment]
f = zip_file.open(zip_names.pop())
else:
raise ValueError(f"ZIP file {path} error. Only one file per ZIP.")
else:
@ -304,17 +284,11 @@ def write_to_compressed(compression, path, data, dest="test"):
if compression == "zip":
compress_method = zipfile.ZipFile
elif compression == "gzip":
# pandas\_testing.py:288: error: Incompatible types in assignment
# (expression has type "Type[GzipFile]", variable has type
# "Type[ZipFile]")
compress_method = gzip.GzipFile # type: ignore[assignment]
compress_method = gzip.GzipFile
elif compression == "bz2":
# pandas\_testing.py:290: error: Incompatible types in assignment
# (expression has type "Type[BZ2File]", variable has type
# "Type[ZipFile]")
compress_method = bz2.BZ2File # type: ignore[assignment]
compress_method = bz2.BZ2File
elif compression == "xz":
compress_method = get_lzma_file(lzma)
compress_method = _get_lzma_file(lzma)
else:
raise ValueError(f"Unrecognized compression type: {compression}")
@ -324,10 +298,7 @@ def write_to_compressed(compression, path, data, dest="test"):
method = "writestr"
else:
mode = "wb"
# pandas\_testing.py:302: error: Incompatible types in assignment
# (expression has type "Tuple[Any]", variable has type "Tuple[Any,
# Any]")
args = (data,) # type: ignore[assignment]
args = (data,)
method = "write"
with compress_method(path, mode=mode) as f:
@ -694,7 +665,6 @@ def assert_index_equal(
check_less_precise: Union[bool, int] = no_default,
check_exact: bool = True,
check_categorical: bool = True,
check_order: bool = True,
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
obj: str = "Index",
@ -724,12 +694,6 @@ def assert_index_equal(
Whether to compare number exactly.
check_categorical : bool, default True
Whether to compare internal Categorical exactly.
check_order : bool, default True
Whether to compare the order of index entries as well as their values.
If True, both indexes must contain the same elements, in the same order.
If False, both indexes must contain the same elements, but in any order.
.. versionadded:: 1.2.0
rtol : float, default 1e-5
Relative tolerance. Only used when check_exact is False.
@ -741,36 +705,30 @@ def assert_index_equal(
obj : str, default 'Index'
Specify object name being compared, internally used to show appropriate
assertion message.
Examples
--------
>>> from pandas.testing import assert_index_equal
>>> a = pd.Index([1, 2, 3])
>>> b = pd.Index([1, 2, 3])
>>> assert_index_equal(a, b)
"""
__tracebackhide__ = True
def _check_types(left, right, obj="Index"):
def _check_types(l, r, obj="Index"):
if exact:
assert_class_equal(left, right, exact=exact, obj=obj)
assert_class_equal(l, r, exact=exact, obj=obj)
# Skip exact dtype checking when `check_categorical` is False
if check_categorical:
assert_attr_equal("dtype", left, right, obj=obj)
assert_attr_equal("dtype", l, r, obj=obj)
# allow string-like to have different inferred_types
if left.inferred_type in ("string"):
assert right.inferred_type in ("string")
if l.inferred_type in ("string"):
assert r.inferred_type in ("string")
else:
assert_attr_equal("inferred_type", left, right, obj=obj)
assert_attr_equal("inferred_type", l, r, obj=obj)
def _get_ilevel_values(index, level):
# accept level number only
unique = index.levels[level]
level_codes = index.codes[level]
filled = take_1d(unique._values, level_codes, fill_value=unique._na_value)
return unique._shallow_copy(filled, name=index.names[level])
values = unique._shallow_copy(filled, name=index.names[level])
return values
if check_less_precise is not no_default:
warnings.warn(
@ -802,11 +760,6 @@ def assert_index_equal(
msg3 = f"{len(right)}, {right}"
raise_assert_detail(obj, msg1, msg2, msg3)
# If order doesn't matter then sort the index entries
if not check_order:
left = left.sort_values()
right = right.sort_values()
# MultiIndex special comparison for little-friendly error messages
if left.nlevels > 1:
left = cast(MultiIndex, left)
@ -986,7 +939,7 @@ def assert_categorical_equal(
if check_category_order:
assert_index_equal(left.categories, right.categories, obj=f"{obj}.categories")
assert_numpy_array_equal(
left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes"
left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes",
)
else:
try:
@ -995,7 +948,9 @@ def assert_categorical_equal(
except TypeError:
# e.g. '<' not supported between instances of 'int' and 'str'
lc, rc = left.categories, right.categories
assert_index_equal(lc, rc, obj=f"{obj}.categories")
assert_index_equal(
lc, rc, obj=f"{obj}.categories",
)
assert_index_equal(
left.categories.take(left.codes),
right.categories.take(right.codes),
@ -1023,14 +978,8 @@ def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray")
"""
_check_isinstance(left, right, IntervalArray)
kwargs = {}
if left._left.dtype.kind in ["m", "M"]:
# We have a DatetimeArray or TimedeltaArray
kwargs["check_freq"] = False
assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
assert_index_equal(left.left, right.left, exact=exact, obj=f"{obj}.left")
assert_index_equal(left.right, right.right, exact=exact, obj=f"{obj}.left")
assert_attr_equal("closed", left, right, obj=obj)
@ -1041,21 +990,19 @@ def assert_period_array_equal(left, right, obj="PeriodArray"):
assert_attr_equal("freq", left, right, obj=obj)
def assert_datetime_array_equal(left, right, obj="DatetimeArray", check_freq=True):
def assert_datetime_array_equal(left, right, obj="DatetimeArray"):
__tracebackhide__ = True
_check_isinstance(left, right, DatetimeArray)
assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
if check_freq:
assert_attr_equal("freq", left, right, obj=obj)
assert_attr_equal("tz", left, right, obj=obj)
def assert_timedelta_array_equal(left, right, obj="TimedeltaArray", check_freq=True):
def assert_timedelta_array_equal(left, right, obj="TimedeltaArray"):
__tracebackhide__ = True
_check_isinstance(left, right, TimedeltaArray)
assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
if check_freq:
assert_attr_equal("freq", left, right, obj=obj)
@ -1145,13 +1092,13 @@ def assert_numpy_array_equal(
if err_msg is None:
if left.shape != right.shape:
raise_assert_detail(
obj, f"{obj} shapes are different", left.shape, right.shape
obj, f"{obj} shapes are different", left.shape, right.shape,
)
diff = 0
for left_arr, right_arr in zip(left, right):
for l, r in zip(left, right):
# count up differences
if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
if not array_equivalent(l, r, strict_nan=strict_nan):
diff += 1
diff = diff * 100.0 / left.size
@ -1214,13 +1161,6 @@ def assert_extension_array_equal(
Missing values are checked separately from valid values.
A mask of missing values is computed for each and checked to match.
The remaining all-valid values are cast to object dtype and checked.
Examples
--------
>>> from pandas.testing import assert_extension_array_equal
>>> a = pd.Series([1, 2, 3, 4])
>>> b, c = a.array, a.array
>>> assert_extension_array_equal(b, c)
"""
if check_less_precise is not no_default:
warnings.warn(
@ -1287,7 +1227,6 @@ def assert_series_equal(
check_categorical=True,
check_category_order=True,
check_freq=True,
check_flags=True,
rtol=1.0e-5,
atol=1.0e-8,
obj="Series",
@ -1334,11 +1273,6 @@ def assert_series_equal(
.. versionadded:: 1.0.2
check_freq : bool, default True
Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
check_flags : bool, default True
Whether to check the `flags` attribute.
.. versionadded:: 1.2.0
rtol : float, default 1e-5
Relative tolerance. Only used when check_exact is False.
@ -1350,13 +1284,6 @@ def assert_series_equal(
obj : str, default 'Series'
Specify object name being compared, internally used to show appropriate
assertion message.
Examples
--------
>>> from pandas.testing import assert_series_equal
>>> a = pd.Series([1, 2, 3, 4])
>>> b = pd.Series([1, 2, 3, 4])
>>> assert_series_equal(a, b)
"""
__tracebackhide__ = True
@ -1382,9 +1309,6 @@ def assert_series_equal(
msg2 = f"{len(right)}, {right.index}"
raise_assert_detail(obj, "Series length are different", msg1, msg2)
if check_flags:
assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
# index comparison
assert_index_equal(
left.index,
@ -1458,16 +1382,7 @@ def assert_series_equal(
check_dtype=check_dtype,
index_values=np.asarray(left.index),
)
elif is_extension_array_dtype_and_needs_i8_conversion(
left.dtype, right.dtype
) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype):
assert_extension_array_equal(
left._values,
right._values,
check_dtype=check_dtype,
index_values=np.asarray(left.index),
)
elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype):
# DatetimeArray or TimedeltaArray
assert_extension_array_equal(
left._values,
@ -1516,7 +1431,6 @@ def assert_frame_equal(
check_categorical=True,
check_like=False,
check_freq=True,
check_flags=True,
rtol=1.0e-5,
atol=1.0e-8,
obj="DataFrame",
@ -1578,8 +1492,6 @@ def assert_frame_equal(
(same as in columns) - same labels must be with the same data.
check_freq : bool, default True
Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
check_flags : bool, default True
Whether to check the `flags` attribute.
rtol : float, default 1e-5
Relative tolerance. Only used when check_exact is False.
@ -1647,11 +1559,11 @@ def assert_frame_equal(
# shape comparison
if left.shape != right.shape:
raise_assert_detail(
obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}"
obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}",
)
if check_flags:
assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
if check_like:
left, right = left.reindex_like(right), right
# index comparison
assert_index_equal(
@ -1661,7 +1573,6 @@ def assert_frame_equal(
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
check_order=not check_like,
rtol=rtol,
atol=atol,
obj=f"{obj}.index",
@ -1675,15 +1586,11 @@ def assert_frame_equal(
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
check_order=not check_like,
rtol=rtol,
atol=atol,
obj=f"{obj}.columns",
)
if check_like:
left, right = left.reindex_like(right), right
# compare by blocks
if by_blocks:
rblocks = right._to_dict_of_blocks()
@ -1779,7 +1686,7 @@ def box_expected(expected, box_cls, transpose=True):
elif box_cls is pd.DataFrame:
expected = pd.Series(expected).to_frame()
if transpose:
# for vector operations, we need a DataFrame to be a single-row,
# for vector operations, we we need a DataFrame to be a single-row,
# not a single-column, in order to operate against non-DataFrame
# vectors of the same length.
expected = expected.T
@ -1877,20 +1784,6 @@ def assert_copy(iter1, iter2, **eql_kwargs):
assert elem1 is not elem2, msg
def is_extension_array_dtype_and_needs_i8_conversion(left_dtype, right_dtype) -> bool:
"""
Checks that we have the combination of an ExtensionArraydtype and
a dtype that should be converted to int64
Returns
-------
bool
Related to issue #37609
"""
return is_extension_array_dtype(left_dtype) and needs_i8_conversion(right_dtype)
def getCols(k):
return string.ascii_uppercase[:k]
@ -1955,7 +1848,8 @@ def makeTimedeltaIndex(k=10, freq="D", name=None, **kwargs):
def makePeriodIndex(k=10, name=None, **kwargs):
dt = datetime(2000, 1, 1)
return pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs)
dr = pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs)
return dr
def makeMultiIndex(k=10, names=None, **kwargs):
@ -2053,7 +1947,8 @@ def index_subclass_makers_generator():
makeCategoricalIndex,
makeMultiIndex,
]
yield from make_index_funcs
for make_index_func in make_index_funcs:
yield make_index_func
def all_timeseries_index_generator(k=10):
@ -2067,8 +1962,7 @@ def all_timeseries_index_generator(k=10):
"""
make_index_funcs = [makeDateIndex, makePeriodIndex, makeTimedeltaIndex]
for make_index_func in make_index_funcs:
# pandas\_testing.py:1986: error: Cannot call function of unknown type
yield make_index_func(k=k) # type: ignore[operator]
yield make_index_func(k=k)
# make series
@ -2192,18 +2086,17 @@ def makeCustomIndex(
names = [names]
# specific 1D index type requested?
idx_func = {
"i": makeIntIndex,
"f": makeFloatIndex,
"s": makeStringIndex,
"u": makeUnicodeIndex,
"dt": makeDateIndex,
"td": makeTimedeltaIndex,
"p": makePeriodIndex,
}.get(idx_type)
idx_func = dict(
i=makeIntIndex,
f=makeFloatIndex,
s=makeStringIndex,
u=makeUnicodeIndex,
dt=makeDateIndex,
td=makeTimedeltaIndex,
p=makePeriodIndex,
).get(idx_type)
if idx_func:
# pandas\_testing.py:2120: error: Cannot call function of unknown type
idx = idx_func(nentries) # type: ignore[operator]
idx = idx_func(nentries)
# but we need to fill in the name
if names:
idx.name = names[0]
@ -2231,8 +2124,7 @@ def makeCustomIndex(
# build a list of lists to create the index from
div_factor = nentries // ndupe_l[i] + 1
# pandas\_testing.py:2148: error: Need type annotation for 'cnt'
cnt = Counter() # type: ignore[var-annotated]
cnt = Counter()
for j in range(div_factor):
label = f"{prefix}_l{i}_g{j}"
cnt[label] = ndupe_l[i]
@ -2390,14 +2282,7 @@ def _create_missing_idx(nrows, ncols, density, random_state=None):
def makeMissingDataframe(density=0.9, random_state=None):
df = makeDataFrame()
# pandas\_testing.py:2306: error: "_create_missing_idx" gets multiple
# values for keyword argument "density" [misc]
# pandas\_testing.py:2306: error: "_create_missing_idx" gets multiple
# values for keyword argument "random_state" [misc]
i, j = _create_missing_idx( # type: ignore[misc]
*df.shape, density=density, random_state=random_state
)
i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state)
df.values[i, j] = np.nan
return df
@ -2422,10 +2307,7 @@ def optional_args(decorator):
is_decorating = not kwargs and len(args) == 1 and callable(args[0])
if is_decorating:
f = args[0]
# pandas\_testing.py:2331: error: Incompatible types in assignment
# (expression has type "List[<nothing>]", variable has type
# "Tuple[Any, ...]")
args = [] # type: ignore[assignment]
args = []
return dec(f)
else:
return dec
@ -2509,7 +2391,7 @@ def can_connect(url, error_classes=None):
@optional_args
def network(
t,
url="https://www.google.com",
url="http://www.google.com",
raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT,
check_before_test=False,
error_classes=None,
@ -2533,7 +2415,7 @@ def network(
The test requiring network connectivity.
url : path
The url to test via ``pandas.io.common.urlopen`` to check
for connectivity. Defaults to 'https://www.google.com'.
for connectivity. Defaults to 'http://www.google.com'.
raise_on_error : bool
If True, never catches errors.
check_before_test : bool
@ -2577,7 +2459,7 @@ def network(
You can specify alternative URLs::
>>> @network("https://www.yahoo.com")
>>> @network("http://www.yahoo.com")
... def test_something_with_yahoo():
... raise IOError("Failure Message")
>>> test_something_with_yahoo()
@ -2607,20 +2489,15 @@ def network(
@wraps(t)
def wrapper(*args, **kwargs):
if (
check_before_test
and not raise_on_error
and not can_connect(url, error_classes)
):
if check_before_test and not raise_on_error:
if not can_connect(url, error_classes):
skip()
try:
return t(*args, **kwargs)
except Exception as err:
errno = getattr(err, "errno", None)
if not errno and hasattr(errno, "reason"):
# pandas\_testing.py:2521: error: "Exception" has no attribute
# "reason"
errno = getattr(err.reason, "errno", None) # type: ignore[attr-defined]
errno = getattr(err.reason, "errno", None)
if errno in skip_errnos:
skip(f"Skipping test due to known errno and error {err}")
@ -2648,11 +2525,10 @@ with_connectivity_check = network
@contextmanager
def assert_produces_warning(
expected_warning: Optional[Union[Type[Warning], bool]] = Warning,
expected_warning=Warning,
filter_level="always",
check_stacklevel: bool = True,
raise_on_extra_warnings: bool = True,
match: Optional[str] = None,
check_stacklevel=True,
raise_on_extra_warnings=True,
):
"""
Context manager for running code expected to either raise a specific
@ -2687,8 +2563,6 @@ def assert_produces_warning(
raise_on_extra_warnings : bool, default True
Whether extra warnings not of the type `expected_warning` should
cause the test to fail.
match : str, optional
Match warning message.
Examples
--------
@ -2715,28 +2589,28 @@ def assert_produces_warning(
with warnings.catch_warnings(record=True) as w:
saw_warning = False
matched_message = False
warnings.simplefilter(filter_level)
yield w
extra_warnings = []
for actual_warning in w:
if not expected_warning:
continue
expected_warning = cast(Type[Warning], expected_warning)
if issubclass(actual_warning.category, expected_warning):
if expected_warning and issubclass(
actual_warning.category, expected_warning
):
saw_warning = True
if check_stacklevel and issubclass(
actual_warning.category, (FutureWarning, DeprecationWarning)
):
_assert_raised_with_correct_stacklevel(actual_warning)
if match is not None and re.search(match, str(actual_warning.message)):
matched_message = True
from inspect import getframeinfo, stack
caller = getframeinfo(stack()[2][0])
msg = (
"Warning not set with correct stacklevel. "
f"File where warning is raised: {actual_warning.filename} != "
f"{caller.filename}. Warning message: {actual_warning.message}"
)
assert actual_warning.filename == caller.filename, msg
else:
extra_warnings.append(
(
@ -2746,41 +2620,18 @@ def assert_produces_warning(
actual_warning.lineno,
)
)
if expected_warning:
expected_warning = cast(Type[Warning], expected_warning)
if not saw_warning:
raise AssertionError(
msg = (
f"Did not see expected warning of class "
f"{repr(expected_warning.__name__)}"
)
if match and not matched_message:
raise AssertionError(
f"Did not see warning {repr(expected_warning.__name__)} "
f"matching {match}"
)
assert saw_warning, msg
if raise_on_extra_warnings and extra_warnings:
raise AssertionError(
f"Caused unexpected warning(s): {repr(extra_warnings)}"
)
def _assert_raised_with_correct_stacklevel(
actual_warning: warnings.WarningMessage,
) -> None:
from inspect import getframeinfo, stack
caller = getframeinfo(stack()[3][0])
msg = (
"Warning not set with correct stacklevel. "
f"File where warning is raised: {actual_warning.filename} != "
f"{caller.filename}. Warning message: {actual_warning.message}"
)
assert actual_warning.filename == caller.filename, msg
class RNGContext:
"""
Context manager to set the numpy random number generator speed. Returns
@ -2849,7 +2700,7 @@ def use_numexpr(use, min_elements=None):
if min_elements is None:
min_elements = expr._MIN_ELEMENTS
olduse = expr.USE_NUMEXPR
olduse = expr._USE_NUMEXPR
oldmin = expr._MIN_ELEMENTS
expr.set_use_numexpr(use)
expr._MIN_ELEMENTS = min_elements
@ -3029,10 +2880,11 @@ def convert_rows_list_to_csv_str(rows_list: List[str]):
Expected output of to_csv() in current OS.
"""
sep = os.linesep
return sep.join(rows_list) + sep
expected = sep.join(rows_list) + sep
return expected
def external_error_raised(expected_exception: Type[Exception]) -> ContextManager:
def external_error_raised(expected_exception: Type[Exception],) -> ContextManager:
"""
Helper function to mark pytest.raises that have an external error message.

View file

@ -1,7 +1,5 @@
from datetime import datetime, timedelta, tzinfo
from io import BufferedIOBase, RawIOBase, TextIOBase, TextIOWrapper
from mmap import mmap
from os import PathLike
from pathlib import Path
from typing import (
IO,
TYPE_CHECKING,
@ -14,8 +12,6 @@ from typing import (
List,
Mapping,
Optional,
Sequence,
Tuple,
Type,
TypeVar,
Union,
@ -27,27 +23,16 @@ import numpy as np
# and use a string literal forward reference to it in subsequent types
# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
if TYPE_CHECKING:
from typing import final
from pandas._libs import Period, Timedelta, Timestamp # noqa: F401
from pandas._libs import Period, Timedelta, Timestamp
from pandas.core.dtypes.dtypes import ExtensionDtype # noqa: F401
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas import Interval
from pandas import Interval # noqa: F401
from pandas.core.arrays.base import ExtensionArray # noqa: F401
from pandas.core.frame import DataFrame
from pandas.core.frame import DataFrame # noqa: F401
from pandas.core.generic import NDFrame # noqa: F401
from pandas.core.groupby.generic import DataFrameGroupBy, SeriesGroupBy
from pandas.core.indexes.base import Index
from pandas.core.resample import Resampler
from pandas.core.series import Series
from pandas.core.window.rolling import BaseWindow
from pandas.io.formats.format import EngFormatter
else:
# typing.final does not exist until py38
final = lambda x: x
from pandas.core.indexes.base import Index # noqa: F401
from pandas.core.series import Series # noqa: F401
# array-like
@ -74,9 +59,10 @@ Timezone = Union[str, tzinfo]
# other
Dtype = Union[
"ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool, object]]
"ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool]]
]
DtypeObj = Union[np.dtype, "ExtensionDtype"]
FilePathOrBuffer = Union[str, Path, IO[AnyStr]]
# FrameOrSeriesUnion means either a DataFrame or a Series. E.g.
# `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series
@ -92,9 +78,7 @@ FrameOrSeries = TypeVar("FrameOrSeries", bound="NDFrame")
Axis = Union[str, int]
Label = Optional[Hashable]
IndexLabel = Union[Label, Sequence[Label]]
Level = Union[Label, int]
Shape = Tuple[int, ...]
Ordered = Optional[bool]
JSONSerializable = Optional[Union[PythonScalar, List, Dict]]
Axes = Collection
@ -117,34 +101,8 @@ IndexKeyFunc = Optional[Callable[["Index"], Union["Index", AnyArrayLike]]]
# types of `func` kwarg for DataFrame.aggregate and Series.aggregate
AggFuncTypeBase = Union[Callable, str]
AggFuncTypeDict = Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]]
AggFuncType = Union[
AggFuncTypeBase,
List[AggFuncTypeBase],
AggFuncTypeDict,
Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]],
]
AggObjType = Union[
"Series",
"DataFrame",
"SeriesGroupBy",
"DataFrameGroupBy",
"BaseWindow",
"Resampler",
]
# filenames and file-like-objects
Buffer = Union[IO[AnyStr], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]
FileOrBuffer = Union[str, Buffer[T]]
FilePathOrBuffer = Union["PathLike[str]", FileOrBuffer[T]]
# for arbitrary kwargs passed during reading/writing files
StorageOptions = Optional[Dict[str, Any]]
# compression keywords and compression
CompressionDict = Dict[str, Any]
CompressionOptions = Optional[Union[str, CompressionDict]]
# type of float formatter in DataFrameFormatter
FloatFormatType = Union[str, Callable, "EngFormatter"]

View file

@ -1,18 +1,20 @@
# This file was generated by 'versioneer.py' (0.19) from
# This file was generated by 'versioneer.py' (0.15) from
# revision-control system data, or from the parent directory name of an
# unpacked source archive. Distribution tarballs contain a pre-generated copy
# of this file.
from warnings import catch_warnings
with catch_warnings(record=True):
import json
import sys
version_json = '''
{
"date": "2020-12-26T13:47:00+0000",
"dirty": false,
"error": null,
"full-revisionid": "3e89b4c4b1580aa890023fc550774e63d499da25",
"version": "1.2.0"
"full-revisionid": "b5958ee1999e9aead1938c0bba2b674378807b3d",
"version": "1.1.5"
}
''' # END VERSION_JSON

View file

@ -4,7 +4,7 @@ Public toolkit API.
from pandas._libs.lib import infer_dtype
from pandas.core.dtypes.api import * # noqa: F401, F403
from pandas.core.dtypes.api import * # noqa: F403, F401
from pandas.core.dtypes.concat import union_categoricals
from pandas.core.dtypes.dtypes import (
CategoricalDtype,

View file

@ -7,7 +7,6 @@ from pandas.core.arrays import (
BooleanArray,
Categorical,
DatetimeArray,
FloatingArray,
IntegerArray,
IntervalArray,
PandasArray,
@ -21,7 +20,6 @@ __all__ = [
"BooleanArray",
"Categorical",
"DatetimeArray",
"FloatingArray",
"IntegerArray",
"IntervalArray",
"PandasArray",

View file

@ -8,17 +8,27 @@ Other items:
* platform checker
"""
import platform
import struct
import sys
import warnings
from pandas._typing import F
PY37 = sys.version_info >= (3, 7)
PY38 = sys.version_info >= (3, 8)
PY39 = sys.version_info >= (3, 9)
PYPY = platform.python_implementation() == "PyPy"
IS64 = sys.maxsize > 2 ** 32
# ----------------------------------------------------------------------------
# functions largely based / taken from the six module
# Much of the code in this module comes from Benjamin Peterson's six library.
# The license for this library can be found in LICENSES/SIX and the code can be
# found at https://bitbucket.org/gutworth/six
def set_function_name(f: F, name: str, cls) -> F:
"""
Bind the name/qualname attributes of the function.
@ -29,6 +39,7 @@ def set_function_name(f: F, name: str, cls) -> F:
return f
# https://github.com/pandas-dev/pandas/pull/9123
def is_platform_little_endian() -> bool:
"""
Checking if the running platform is little endian.
@ -50,7 +61,7 @@ def is_platform_windows() -> bool:
bool
True if the running platform is windows.
"""
return sys.platform in ["win32", "cygwin"]
return sys.platform == "win32" or sys.platform == "cygwin"
def is_platform_linux() -> bool:
@ -62,7 +73,7 @@ def is_platform_linux() -> bool:
bool
True if the running platform is linux.
"""
return sys.platform == "linux"
return sys.platform == "linux2"
def is_platform_mac() -> bool:
@ -77,7 +88,19 @@ def is_platform_mac() -> bool:
return sys.platform == "darwin"
def import_lzma():
def is_platform_32bit() -> bool:
"""
Checking if the running platform is 32-bit.
Returns
-------
bool
True if the running platform is 32-bit.
"""
return struct.calcsize("P") * 8 < 64
def _import_lzma():
"""
Importing the `lzma` module.
@ -97,7 +120,7 @@ def import_lzma():
warnings.warn(msg)
def get_lzma_file(lzma):
def _get_lzma_file(lzma):
"""
Importing the `LZMAFile` class from the `lzma` module.

View file

@ -11,24 +11,25 @@ VERSIONS = {
"fsspec": "0.7.4",
"fastparquet": "0.3.2",
"gcsfs": "0.6.0",
"lxml.etree": "4.3.0",
"matplotlib": "2.2.3",
"numexpr": "2.6.8",
"lxml.etree": "3.8.0",
"matplotlib": "2.2.2",
"numexpr": "2.6.2",
"odfpy": "1.3.0",
"openpyxl": "2.5.7",
"pandas_gbq": "0.12.0",
"pyarrow": "0.15.0",
"pyarrow": "0.13.0",
"pytables": "3.4.3",
"pytest": "5.0.1",
"pyxlsb": "1.0.6",
"s3fs": "0.4.0",
"scipy": "1.2.0",
"sqlalchemy": "1.2.8",
"tables": "3.5.1",
"sqlalchemy": "1.1.4",
"tables": "3.4.3",
"tabulate": "0.8.3",
"xarray": "0.12.3",
"xlrd": "1.2.0",
"xlwt": "1.3.0",
"xlsxwriter": "1.0.2",
"xarray": "0.8.2",
"xlrd": "1.1.0",
"xlwt": "1.2.0",
"xlsxwriter": "0.9.8",
"numba": "0.46.0",
}

View file

@ -8,19 +8,19 @@ import numpy as np
# numpy versioning
_np_version = np.__version__
_nlv = LooseVersion(_np_version)
np_version_under1p17 = _nlv < LooseVersion("1.17")
np_version_under1p18 = _nlv < LooseVersion("1.18")
_np_version_under1p16 = _nlv < LooseVersion("1.16")
_np_version_under1p17 = _nlv < LooseVersion("1.17")
_np_version_under1p18 = _nlv < LooseVersion("1.18")
_np_version_under1p19 = _nlv < LooseVersion("1.19")
_np_version_under1p20 = _nlv < LooseVersion("1.20")
is_numpy_dev = ".dev" in str(_nlv)
_min_numpy_ver = "1.16.5"
_is_numpy_dev = ".dev" in str(_nlv)
if _nlv < _min_numpy_ver:
if _nlv < "1.15.4":
raise ImportError(
f"this version of pandas is incompatible with numpy < {_min_numpy_ver}\n"
"this version of pandas is incompatible with numpy < 1.15.4\n"
f"your numpy version is {_np_version}.\n"
f"Please upgrade numpy to >= {_min_numpy_ver} to use this pandas version"
"Please upgrade numpy to >= 1.15.4 to use this pandas version"
)
@ -65,6 +65,7 @@ def np_array_datetime64_compat(arr, *args, **kwargs):
__all__ = [
"np",
"_np_version",
"np_version_under1p17",
"is_numpy_dev",
"_np_version_under1p16",
"_np_version_under1p17",
"_is_numpy_dev",
]

View file

@ -1,24 +1,27 @@
"""
For compatibility with numpy libraries, pandas functions or methods have to
accept '*args' and '**kwargs' parameters to accommodate numpy arguments that
are not actually used or respected in the pandas implementation.
For compatibility with numpy libraries, pandas functions or
methods have to accept '*args' and '**kwargs' parameters to
accommodate numpy arguments that are not actually used or
respected in the pandas implementation.
To ensure that users do not abuse these parameters, validation is performed in
'validators.py' to make sure that any extra parameters passed correspond ONLY
to those in the numpy signature. Part of that validation includes whether or
not the user attempted to pass in non-default values for these extraneous
parameters. As we want to discourage users from relying on these parameters
when calling the pandas implementation, we want them only to pass in the
default values for these parameters.
To ensure that users do not abuse these parameters, validation
is performed in 'validators.py' to make sure that any extra
parameters passed correspond ONLY to those in the numpy signature.
Part of that validation includes whether or not the user attempted
to pass in non-default values for these extraneous parameters. As we
want to discourage users from relying on these parameters when calling
the pandas implementation, we want them only to pass in the default values
for these parameters.
This module provides a set of commonly used default arguments for functions and
methods that are spread throughout the codebase. This module will make it
This module provides a set of commonly used default arguments for functions
and methods that are spread throughout the codebase. This module will make it
easier to adjust to future upstream changes in the analogous numpy signatures.
"""
from collections import OrderedDict
from distutils.version import LooseVersion
from typing import Any, Dict, Optional, Union
from numpy import __version__, ndarray
from numpy import __version__ as _np_version, ndarray
from pandas._libs.lib import is_bool, is_integer
from pandas.errors import UnsupportedFunctionCall
@ -71,7 +74,7 @@ class CompatValidator:
raise ValueError(f"invalid validation method '{method}'")
ARGMINMAX_DEFAULTS = {"out": None}
ARGMINMAX_DEFAULTS = dict(out=None)
validate_argmin = CompatValidator(
ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1
)
@ -90,10 +93,11 @@ def process_skipna(skipna, args):
def validate_argmin_with_skipna(skipna, args, kwargs):
"""
If 'Series.argmin' is called via the 'numpy' library, the third parameter
in its signature is 'out', which takes either an ndarray or 'None', so
check if the 'skipna' parameter is either an instance of ndarray or is
None, since 'skipna' itself should be a boolean
If 'Series.argmin' is called via the 'numpy' library,
the third parameter in its signature is 'out', which
takes either an ndarray or 'None', so check if the
'skipna' parameter is either an instance of ndarray or
is None, since 'skipna' itself should be a boolean
"""
skipna, args = process_skipna(skipna, args)
validate_argmin(args, kwargs)
@ -102,22 +106,23 @@ def validate_argmin_with_skipna(skipna, args, kwargs):
def validate_argmax_with_skipna(skipna, args, kwargs):
"""
If 'Series.argmax' is called via the 'numpy' library, the third parameter
in its signature is 'out', which takes either an ndarray or 'None', so
check if the 'skipna' parameter is either an instance of ndarray or is
None, since 'skipna' itself should be a boolean
If 'Series.argmax' is called via the 'numpy' library,
the third parameter in its signature is 'out', which
takes either an ndarray or 'None', so check if the
'skipna' parameter is either an instance of ndarray or
is None, since 'skipna' itself should be a boolean
"""
skipna, args = process_skipna(skipna, args)
validate_argmax(args, kwargs)
return skipna
ARGSORT_DEFAULTS: Dict[str, Optional[Union[int, str]]] = {}
ARGSORT_DEFAULTS: "OrderedDict[str, Optional[Union[int, str]]]" = OrderedDict()
ARGSORT_DEFAULTS["axis"] = -1
ARGSORT_DEFAULTS["kind"] = "quicksort"
ARGSORT_DEFAULTS["order"] = None
if LooseVersion(__version__) >= LooseVersion("1.17.0"):
if LooseVersion(_np_version) >= LooseVersion("1.17.0"):
# GH-26361. NumPy added radix sort and changed default to None.
ARGSORT_DEFAULTS["kind"] = None
@ -126,9 +131,9 @@ validate_argsort = CompatValidator(
ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both"
)
# two different signatures of argsort, this second validation for when the
# `kind` param is supported
ARGSORT_DEFAULTS_KIND: Dict[str, Optional[int]] = {}
# two different signatures of argsort, this second validation
# for when the `kind` param is supported
ARGSORT_DEFAULTS_KIND: "OrderedDict[str, Optional[int]]" = OrderedDict()
ARGSORT_DEFAULTS_KIND["axis"] = -1
ARGSORT_DEFAULTS_KIND["order"] = None
validate_argsort_kind = CompatValidator(
@ -138,10 +143,11 @@ validate_argsort_kind = CompatValidator(
def validate_argsort_with_ascending(ascending, args, kwargs):
"""
If 'Categorical.argsort' is called via the 'numpy' library, the first
parameter in its signature is 'axis', which takes either an integer or
'None', so check if the 'ascending' parameter has either integer type or is
None, since 'ascending' itself should be a boolean
If 'Categorical.argsort' is called via the 'numpy' library, the
first parameter in its signature is 'axis', which takes either
an integer or 'None', so check if the 'ascending' parameter has
either integer type or is None, since 'ascending' itself should
be a boolean
"""
if is_integer(ascending) or ascending is None:
args = (ascending,) + args
@ -151,7 +157,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs):
return ascending
CLIP_DEFAULTS: Dict[str, Any] = {"out": None}
CLIP_DEFAULTS: Dict[str, Any] = dict(out=None)
validate_clip = CompatValidator(
CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3
)
@ -159,10 +165,10 @@ validate_clip = CompatValidator(
def validate_clip_with_axis(axis, args, kwargs):
"""
If 'NDFrame.clip' is called via the numpy library, the third parameter in
its signature is 'out', which can takes an ndarray, so check if the 'axis'
parameter is an instance of ndarray, since 'axis' itself should either be
an integer or None
If 'NDFrame.clip' is called via the numpy library, the third
parameter in its signature is 'out', which can takes an ndarray,
so check if the 'axis' parameter is an instance of ndarray, since
'axis' itself should either be an integer or None
"""
if isinstance(axis, ndarray):
args = (axis,) + args
@ -172,7 +178,7 @@ def validate_clip_with_axis(axis, args, kwargs):
return axis
CUM_FUNC_DEFAULTS: Dict[str, Any] = {}
CUM_FUNC_DEFAULTS: "OrderedDict[str, Any]" = OrderedDict()
CUM_FUNC_DEFAULTS["dtype"] = None
CUM_FUNC_DEFAULTS["out"] = None
validate_cum_func = CompatValidator(
@ -185,9 +191,10 @@ validate_cumsum = CompatValidator(
def validate_cum_func_with_skipna(skipna, args, kwargs, name):
"""
If this function is called via the 'numpy' library, the third parameter in
its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so
check if the 'skipna' parameter is a boolean or not
If this function is called via the 'numpy' library, the third
parameter in its signature is 'dtype', which takes either a
'numpy' dtype or 'None', so check if the 'skipna' parameter is
a boolean or not
"""
if not is_bool(skipna):
args = (skipna,) + args
@ -197,7 +204,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name):
return skipna
ALLANY_DEFAULTS: Dict[str, Optional[bool]] = {}
ALLANY_DEFAULTS: "OrderedDict[str, Optional[bool]]" = OrderedDict()
ALLANY_DEFAULTS["dtype"] = None
ALLANY_DEFAULTS["out"] = None
ALLANY_DEFAULTS["keepdims"] = False
@ -208,10 +215,10 @@ validate_any = CompatValidator(
ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1
)
LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False}
LOGICAL_FUNC_DEFAULTS = dict(out=None, keepdims=False)
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs")
MINMAX_DEFAULTS = {"axis": None, "out": None, "keepdims": False}
MINMAX_DEFAULTS = dict(axis=None, out=None, keepdims=False)
validate_min = CompatValidator(
MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1
)
@ -219,28 +226,28 @@ validate_max = CompatValidator(
MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1
)
RESHAPE_DEFAULTS: Dict[str, str] = {"order": "C"}
RESHAPE_DEFAULTS: Dict[str, str] = dict(order="C")
validate_reshape = CompatValidator(
RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1
)
REPEAT_DEFAULTS: Dict[str, Any] = {"axis": None}
REPEAT_DEFAULTS: Dict[str, Any] = dict(axis=None)
validate_repeat = CompatValidator(
REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1
)
ROUND_DEFAULTS: Dict[str, Any] = {"out": None}
ROUND_DEFAULTS: Dict[str, Any] = dict(out=None)
validate_round = CompatValidator(
ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1
)
SORT_DEFAULTS: Dict[str, Optional[Union[int, str]]] = {}
SORT_DEFAULTS: "OrderedDict[str, Optional[Union[int, str]]]" = OrderedDict()
SORT_DEFAULTS["axis"] = -1
SORT_DEFAULTS["kind"] = "quicksort"
SORT_DEFAULTS["order"] = None
validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs")
STAT_FUNC_DEFAULTS: Dict[str, Optional[Any]] = {}
STAT_FUNC_DEFAULTS: "OrderedDict[str, Optional[Any]]" = OrderedDict()
STAT_FUNC_DEFAULTS["dtype"] = None
STAT_FUNC_DEFAULTS["out"] = None
@ -274,13 +281,13 @@ validate_median = CompatValidator(
MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1
)
STAT_DDOF_FUNC_DEFAULTS: Dict[str, Optional[bool]] = {}
STAT_DDOF_FUNC_DEFAULTS: "OrderedDict[str, Optional[bool]]" = OrderedDict()
STAT_DDOF_FUNC_DEFAULTS["dtype"] = None
STAT_DDOF_FUNC_DEFAULTS["out"] = None
STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False
validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs")
TAKE_DEFAULTS: Dict[str, Optional[str]] = {}
TAKE_DEFAULTS: "OrderedDict[str, Optional[str]]" = OrderedDict()
TAKE_DEFAULTS["out"] = None
TAKE_DEFAULTS["mode"] = "raise"
validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
@ -288,9 +295,10 @@ validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
def validate_take_with_convert(convert, args, kwargs):
"""
If this function is called via the 'numpy' library, the third parameter in
its signature is 'axis', which takes either an ndarray or 'None', so check
if the 'convert' parameter is either an instance of ndarray or is None
If this function is called via the 'numpy' library, the third
parameter in its signature is 'axis', which takes either an
ndarray or 'None', so check if the 'convert' parameter is either
an instance of ndarray or is None
"""
if isinstance(convert, ndarray) or convert is None:
args = (convert,) + args
@ -300,7 +308,7 @@ def validate_take_with_convert(convert, args, kwargs):
return convert
TRANSPOSE_DEFAULTS = {"axes": None}
TRANSPOSE_DEFAULTS = dict(axes=None)
validate_transpose = CompatValidator(
TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0
)
@ -353,9 +361,10 @@ def validate_expanding_func(name, args, kwargs) -> None:
def validate_groupby_func(name, args, kwargs, allowed=None) -> None:
"""
'args' and 'kwargs' should be empty, except for allowed kwargs because all
of their necessary parameters are explicitly listed in the function
signature
'args' and 'kwargs' should be empty, except for allowed
kwargs because all of
their necessary parameters are explicitly listed in
the function signature
"""
if allowed is None:
allowed = []
@ -374,8 +383,9 @@ RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var")
def validate_resampler_func(method: str, args, kwargs) -> None:
"""
'args' and 'kwargs' should be empty because all of their necessary
parameters are explicitly listed in the function signature
'args' and 'kwargs' should be empty because all of
their necessary parameters are explicitly listed in
the function signature
"""
if len(args) + len(kwargs) > 0:
if method in RESAMPLER_NUMPY_OPS:
@ -387,20 +397,20 @@ def validate_resampler_func(method: str, args, kwargs) -> None:
raise TypeError("too many arguments passed in")
def validate_minmax_axis(axis: Optional[int], ndim: int = 1) -> None:
def validate_minmax_axis(axis: Optional[int]) -> None:
"""
Ensure that the axis argument passed to min, max, argmin, or argmax is zero
or None, as otherwise it will be incorrectly ignored.
Ensure that the axis argument passed to min, max, argmin, or argmax is
zero or None, as otherwise it will be incorrectly ignored.
Parameters
----------
axis : int or None
ndim : int, default 1
Raises
------
ValueError
"""
ndim = 1 # hard-coded for Index
if axis is None:
return
if axis >= ndim or (axis < 0 and ndim + axis < 0):

View file

@ -64,7 +64,7 @@ class _LoadSparseSeries:
# https://github.com/python/mypy/issues/1020
# error: Incompatible return type for "__new__" (returns "Series", but must return
# a subtype of "_LoadSparseSeries")
def __new__(cls) -> "Series": # type: ignore[misc]
def __new__(cls) -> "Series": # type: ignore
from pandas import Series
warnings.warn(
@ -82,7 +82,7 @@ class _LoadSparseFrame:
# https://github.com/python/mypy/issues/1020
# error: Incompatible return type for "__new__" (returns "DataFrame", but must
# return a subtype of "_LoadSparseFrame")
def __new__(cls) -> "DataFrame": # type: ignore[misc]
def __new__(cls) -> "DataFrame": # type: ignore
from pandas import DataFrame
warnings.warn(
@ -181,7 +181,7 @@ _class_locations_map = {
# functions for compat and uses a non-public class of the pickle module.
# error: Name 'pkl._Unpickler' is not defined
class Unpickler(pkl._Unpickler): # type: ignore[name-defined]
class Unpickler(pkl._Unpickler): # type: ignore
def find_class(self, module, name):
# override superclass
key = (module, name)
@ -274,7 +274,7 @@ def patch_pickle():
"""
orig_loads = pkl.loads
try:
setattr(pkl, "loads", loads)
pkl.loads = loads
yield
finally:
setattr(pkl, "loads", orig_loads)
pkl.loads = orig_loads

View file

@ -33,10 +33,8 @@ from pytz import FixedOffset, utc
import pandas.util._test_decorators as td
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype
import pandas as pd
from pandas import DataFrame, Interval, Period, Series, Timedelta, Timestamp
from pandas import DataFrame
import pandas._testing as tm
from pandas.core import ops
from pandas.core.indexes.api import Index, MultiIndex
@ -57,9 +55,6 @@ def pytest_configure(config):
)
config.addinivalue_line("markers", "high_memory: mark a test as a high-memory only")
config.addinivalue_line("markers", "clipboard: mark a pd.read_clipboard test")
config.addinivalue_line(
"markers", "arm_slow: mark a test as slow for arm64 architecture"
)
def pytest_addoption(parser):
@ -176,6 +171,14 @@ def axis(request):
axis_frame = axis
@pytest.fixture(params=[0, "index"], ids=lambda x: f"axis {repr(x)}")
def axis_series(request):
"""
Fixture for returning the axis numbers of a Series.
"""
return request.param
@pytest.fixture(params=[True, False, None])
def observed(request):
"""
@ -266,7 +269,7 @@ def nselect_method(request):
# ----------------------------------------------------------------
# Missing values & co.
# ----------------------------------------------------------------
@pytest.fixture(params=tm.NULL_OBJECTS, ids=str)
@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), pd.NA], ids=str)
def nulls_fixture(request):
"""
Fixture for each null type in pandas.
@ -288,22 +291,11 @@ def unique_nulls_fixture(request):
# Generate cartesian product of unique_nulls_fixture:
unique_nulls_fixture2 = unique_nulls_fixture
# ----------------------------------------------------------------
# Classes
# ----------------------------------------------------------------
@pytest.fixture(params=[pd.DataFrame, pd.Series])
def frame_or_series(request):
"""
Fixture to parametrize over DataFrame and Series.
"""
return request.param
@pytest.fixture(
params=[pd.Index, pd.Series], ids=["index", "series"] # type: ignore[list-item]
)
@pytest.fixture(params=[pd.Index, pd.Series], ids=["index", "series"])
def index_or_series(request):
"""
Fixture to parametrize over Index and Series, made necessary by a mypy
@ -320,16 +312,6 @@ def index_or_series(request):
index_or_series2 = index_or_series
@pytest.fixture(
params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"]
)
def index_or_series_or_array(request):
"""
Fixture to parametrize over Index, Series, and ExtensionArray
"""
return request.param
@pytest.fixture
def dict_subclass():
"""
@ -377,24 +359,11 @@ def multiindex_year_month_day_dataframe_random_data():
tdf = tm.makeTimeDataFrame(100)
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
# use Int64Index, to make sure things work
ymd.index = ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels])
ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True)
ymd.index.set_names(["year", "month", "day"], inplace=True)
return ymd
@pytest.fixture
def multiindex_dataframe_random_data():
"""DataFrame with 2 level MultiIndex with random data"""
index = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
return DataFrame(
np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp")
)
def _create_multiindex():
"""
MultiIndex used to test the general functionality of this object
@ -407,12 +376,13 @@ def _create_multiindex():
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"]
return MultiIndex(
mi = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False,
)
return mi
def _create_mi_with_dt64tz_level():
@ -467,29 +437,6 @@ def index(request):
index_fixture2 = index
@pytest.fixture(params=indices_dict.keys())
def index_with_missing(request):
"""
Fixture for indices with missing values
"""
if request.param in ["int", "uint", "range", "empty", "repeats"]:
pytest.xfail("missing values not supported")
# GH 35538. Use deep copy to avoid illusive bug on np-dev
# Azure pipeline that writes into indices_dict despite copy
ind = indices_dict[request.param].copy(deep=True)
vals = ind.values
if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]:
# For setting missing values in the top level of MultiIndex
vals = ind.tolist()
vals[0] = (None,) + vals[0][1:]
vals[-1] = (None,) + vals[-1][1:]
return MultiIndex.from_tuples(vals)
else:
vals[0] = None
vals[-1] = None
return type(ind)(vals)
# ----------------------------------------------------------------
# Series'
# ----------------------------------------------------------------
@ -549,23 +496,6 @@ def series_with_simple_index(index):
return _create_series(index)
@pytest.fixture
def series_with_multilevel_index():
"""
Fixture with a Series with a 2-level MultiIndex.
"""
arrays = [
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
data = np.random.randn(8)
ser = Series(data, index=index)
ser[3] = np.NaN
return ser
_narrow_dtypes = [
np.float16,
np.float32,
@ -698,26 +628,6 @@ def float_frame():
return DataFrame(tm.getSeriesData())
# ----------------------------------------------------------------
# Scalars
# ----------------------------------------------------------------
@pytest.fixture(
params=[
(Interval(left=0, right=5), IntervalDtype("int64")),
(Interval(left=0.1, right=0.5), IntervalDtype("float64")),
(Period("2012-01", freq="M"), "period[M]"),
(Period("2012-02-01", freq="D"), "period[D]"),
(
Timestamp("2011-01-01", tz="US/Eastern"),
DatetimeTZDtype(tz="US/Eastern"),
),
(Timedelta(seconds=500), "timedelta64[ns]"),
]
)
def ea_scalar_and_dtype(request):
return request.param
# ----------------------------------------------------------------
# Operators & Operations
# ----------------------------------------------------------------
@ -747,43 +657,6 @@ def all_arithmetic_operators(request):
return request.param
@pytest.fixture(
params=[
operator.add,
ops.radd,
operator.sub,
ops.rsub,
operator.mul,
ops.rmul,
operator.truediv,
ops.rtruediv,
operator.floordiv,
ops.rfloordiv,
operator.mod,
ops.rmod,
operator.pow,
ops.rpow,
operator.eq,
operator.ne,
operator.lt,
operator.le,
operator.gt,
operator.ge,
operator.and_,
ops.rand_,
operator.xor,
ops.rxor,
operator.or_,
ops.ror_,
]
)
def all_binary_operators(request):
"""
Fixture for operator and roperator arithmetic, comparison, and logical ops.
"""
return request.param
@pytest.fixture(
params=[
operator.add,
@ -964,10 +837,6 @@ TIMEZONES = [
"Asia/Tokyo",
"dateutil/US/Pacific",
"dateutil/Asia/Singapore",
"+01:15",
"-02:15",
"UTC+01:15",
"UTC-02:15",
tzutc(),
tzlocal(),
FixedOffset(300),
@ -1089,31 +958,6 @@ def float_dtype(request):
return request.param
@pytest.fixture(params=tm.FLOAT_EA_DTYPES)
def float_ea_dtype(request):
"""
Parameterized fixture for float dtypes.
* 'Float32'
* 'Float64'
"""
return request.param
@pytest.fixture(params=tm.FLOAT_DTYPES + tm.FLOAT_EA_DTYPES)
def any_float_allowed_nullable_dtype(request):
"""
Parameterized fixture for float dtypes.
* float
* 'float32'
* 'float64'
* 'Float32'
* 'Float64'
"""
return request.param
@pytest.fixture(params=tm.COMPLEX_DTYPES)
def complex_dtype(request):
"""
@ -1188,26 +1032,6 @@ def any_nullable_int_dtype(request):
return request.param
@pytest.fixture(params=tm.ALL_EA_INT_DTYPES + tm.FLOAT_EA_DTYPES)
def any_numeric_dtype(request):
"""
Parameterized fixture for any nullable integer dtype and
any float ea dtypes.
* 'UInt8'
* 'Int8'
* 'UInt16'
* 'Int16'
* 'UInt32'
* 'Int32'
* 'UInt64'
* 'Int64'
* 'Float32'
* 'Float64'
"""
return request.param
@pytest.fixture(params=tm.SIGNED_EA_INT_DTYPES)
def any_signed_nullable_int_dtype(request):
"""
@ -1370,13 +1194,7 @@ def ip():
pytest.importorskip("IPython", minversion="6.0.0")
from IPython.core.interactiveshell import InteractiveShell
# GH#35711 make sure sqlite history file handle is not leaked
from traitlets.config import Config # isort:skip
c = Config()
c.HistoryManager.hist_file = ":memory:"
return InteractiveShell(config=c)
return InteractiveShell()
@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"])
@ -1389,6 +1207,15 @@ def spmatrix(request):
return getattr(sparse, request.param + "_matrix")
@pytest.fixture(params=list(tm.cython_table))
def cython_table_items(request):
"""
Yields a tuple of a function and its corresponding name. Correspond to
the list of aggregator "Cython functions" used on selected table items.
"""
return request.param
@pytest.fixture(
params=[
getattr(pd.offsets, o)
@ -1410,39 +1237,3 @@ def sort_by_key(request):
Tests None (no key) and the identity key.
"""
return request.param
@pytest.fixture()
def fsspectest():
pytest.importorskip("fsspec")
from fsspec import register_implementation
from fsspec.implementations.memory import MemoryFileSystem
from fsspec.registry import _registry as registry
class TestMemoryFS(MemoryFileSystem):
protocol = "testmem"
test = [None]
def __init__(self, **kwargs):
self.test[0] = kwargs.pop("test", None)
super().__init__(**kwargs)
register_implementation("testmem", TestMemoryFS, clobber=True)
yield TestMemoryFS()
registry.pop("testmem", None)
TestMemoryFS.test[0] = None
TestMemoryFS.store.clear()
@pytest.fixture(
params=[
("foo", None, None),
("Egon", "Venkman", None),
("NCC1701D", "NCC1701D", "NCC1701D"),
]
)
def names(request):
"""
A 3-tuple of names, the first two for operands, the last for a result.
"""
return request.param

View file

@ -4,7 +4,7 @@ accessor.py contains base classes for implementing accessor properties
that can be mixed into or pinned onto other pandas classes.
"""
from typing import FrozenSet, List, Set
from typing import FrozenSet, Set
import warnings
from pandas.util._decorators import doc
@ -12,21 +12,28 @@ from pandas.util._decorators import doc
class DirNamesMixin:
_accessors: Set[str] = set()
_hidden_attrs: FrozenSet[str] = frozenset()
_deprecations: FrozenSet[str] = frozenset()
def _dir_deletions(self) -> Set[str]:
def _dir_deletions(self):
"""
Delete unwanted __dir__ for this object.
"""
return self._accessors | self._hidden_attrs
return self._accessors | self._deprecations
def _dir_additions(self) -> Set[str]:
def _dir_additions(self):
"""
Add additional __dir__ for this object.
"""
return {accessor for accessor in self._accessors if hasattr(self, accessor)}
rv = set()
for accessor in self._accessors:
try:
getattr(self, accessor)
rv.add(accessor)
except AttributeError:
pass
return rv
def __dir__(self) -> List[str]:
def __dir__(self):
"""
Provide method name lookup and completion.
@ -34,7 +41,7 @@ class DirNamesMixin:
-----
Only provide 'public' methods.
"""
rv = set(super().__dir__())
rv = set(dir(type(self)))
rv = (rv - self._dir_deletions()) | self._dir_additions()
return sorted(rv)

View file

@ -6,46 +6,32 @@ kwarg aggregations in groupby and DataFrame/Series aggregation
from collections import defaultdict
from functools import partial
from typing import (
TYPE_CHECKING,
Any,
Callable,
DefaultDict,
Dict,
Iterable,
List,
Optional,
Sequence,
Tuple,
Union,
cast,
)
from pandas._typing import (
AggFuncType,
AggFuncTypeBase,
AggFuncTypeDict,
AggObjType,
Axis,
FrameOrSeries,
FrameOrSeriesUnion,
Label,
)
from pandas._typing import AggFuncType, Label
from pandas.core.dtypes.cast import is_nested_object
from pandas.core.dtypes.common import is_dict_like, is_list_like
from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries
from pandas.core.base import DataError, SpecificationError
from pandas.core.base import SpecificationError
import pandas.core.common as com
from pandas.core.indexes.api import Index
if TYPE_CHECKING:
from pandas.core.series import Series
from pandas.core.series import FrameOrSeriesUnion, Series
def reconstruct_func(
func: Optional[AggFuncType], **kwargs
) -> Tuple[bool, Optional[AggFuncType], Optional[List[str]], Optional[List[int]]]:
func: Optional[AggFuncType], **kwargs,
) -> Tuple[
bool, Optional[AggFuncType], Optional[List[str]], Optional[List[int]],
]:
"""
This is the internal function to reconstruct func given if there is relabeling
or not and also normalize the keyword to get new order of columns.
@ -291,13 +277,12 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any:
def relabel_result(
result: FrameOrSeries,
result: FrameOrSeriesUnion,
func: Dict[str, List[Union[Callable, str]]],
columns: Iterable[Label],
order: Iterable[int],
) -> Dict[Label, "Series"]:
"""
Internal function to reorder result if relabelling is True for
columns: Tuple,
order: List[int],
) -> Dict[Label, Series]:
"""Internal function to reorder result if relabelling is True for
dataframe.agg, and return the reordered result in dict.
Parameters:
@ -322,10 +307,10 @@ def relabel_result(
reordered_indexes = [
pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1])
]
reordered_result_in_dict: Dict[Label, "Series"] = {}
reordered_result_in_dict: Dict[Label, Series] = {}
idx = 0
reorder_mask = not isinstance(result, ABCSeries) and len(result.columns) > 1
reorder_mask = not isinstance(result, Series) and len(result.columns) > 1
for col, fun in func.items():
s = result[col].dropna()
@ -388,7 +373,7 @@ def validate_func_kwargs(
(['one', 'two'], ['min', 'max'])
"""
no_arg_message = "Must provide 'func' or named aggregation **kwargs."
tuple_given_message = "func is expected but received {} in **kwargs."
tuple_given_message = "func is expected but recieved {} in **kwargs."
columns = list(kwargs)
func = []
for col_func in kwargs.values():
@ -398,390 +383,3 @@ def validate_func_kwargs(
if not columns:
raise TypeError(no_arg_message)
return columns, func
def transform(
obj: FrameOrSeries, func: AggFuncType, axis: Axis, *args, **kwargs
) -> FrameOrSeriesUnion:
"""
Transform a DataFrame or Series
Parameters
----------
obj : DataFrame or Series
Object to compute the transform on.
func : string, function, list, or dictionary
Function(s) to compute the transform with.
axis : {0 or 'index', 1 or 'columns'}
Axis along which the function is applied:
* 0 or 'index': apply function to each column.
* 1 or 'columns': apply function to each row.
Returns
-------
DataFrame or Series
Result of applying ``func`` along the given axis of the
Series or DataFrame.
Raises
------
ValueError
If the transform function fails or does not transform.
"""
is_series = obj.ndim == 1
if obj._get_axis_number(axis) == 1:
assert not is_series
return transform(obj.T, func, 0, *args, **kwargs).T
if is_list_like(func) and not is_dict_like(func):
func = cast(List[AggFuncTypeBase], func)
# Convert func equivalent dict
if is_series:
func = {com.get_callable_name(v) or v: v for v in func}
else:
func = {col: func for col in obj}
if is_dict_like(func):
func = cast(AggFuncTypeDict, func)
return transform_dict_like(obj, func, *args, **kwargs)
# func is either str or callable
func = cast(AggFuncTypeBase, func)
try:
result = transform_str_or_callable(obj, func, *args, **kwargs)
except Exception:
raise ValueError("Transform function failed")
# Functions that transform may return empty Series/DataFrame
# when the dtype is not appropriate
if isinstance(result, (ABCSeries, ABCDataFrame)) and result.empty:
raise ValueError("Transform function failed")
if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals(
obj.index
):
raise ValueError("Function did not transform")
return result
def transform_dict_like(
obj: FrameOrSeries,
func: AggFuncTypeDict,
*args,
**kwargs,
):
"""
Compute transform in the case of a dict-like func
"""
from pandas.core.reshape.concat import concat
if len(func) == 0:
raise ValueError("No transform functions were provided")
if obj.ndim != 1:
# Check for missing columns on a frame
cols = sorted(set(func.keys()) - set(obj.columns))
if len(cols) > 0:
raise SpecificationError(f"Column(s) {cols} do not exist")
# Can't use func.values(); wouldn't work for a Series
if any(is_dict_like(v) for _, v in func.items()):
# GH 15931 - deprecation of renaming keys
raise SpecificationError("nested renamer is not supported")
results: Dict[Label, FrameOrSeriesUnion] = {}
for name, how in func.items():
colg = obj._gotitem(name, ndim=1)
try:
results[name] = transform(colg, how, 0, *args, **kwargs)
except Exception as err:
if (
str(err) == "Function did not transform"
or str(err) == "No transform functions were provided"
):
raise err
# combine results
if len(results) == 0:
raise ValueError("Transform function failed")
return concat(results, axis=1)
def transform_str_or_callable(
obj: FrameOrSeries, func: AggFuncTypeBase, *args, **kwargs
) -> FrameOrSeriesUnion:
"""
Compute transform in the case of a string or callable func
"""
if isinstance(func, str):
return obj._try_aggregate_string_function(func, *args, **kwargs)
if not args and not kwargs:
f = obj._get_cython_func(func)
if f:
return getattr(obj, f)()
# Two possible ways to use a UDF - apply or call directly
try:
return obj.apply(func, args=args, **kwargs)
except Exception:
return func(obj, *args, **kwargs)
def aggregate(
obj: AggObjType,
arg: AggFuncType,
*args,
**kwargs,
):
"""
Provide an implementation for the aggregators.
Parameters
----------
obj : Pandas object to compute aggregation on.
arg : string, dict, function.
*args : args to pass on to the function.
**kwargs : kwargs to pass on to the function.
Returns
-------
tuple of result, how.
Notes
-----
how can be a string describe the required post-processing, or
None if not required.
"""
_axis = kwargs.pop("_axis", None)
if _axis is None:
_axis = getattr(obj, "axis", 0)
if isinstance(arg, str):
return obj._try_aggregate_string_function(arg, *args, **kwargs), None
elif is_dict_like(arg):
arg = cast(AggFuncTypeDict, arg)
return agg_dict_like(obj, arg, _axis), True
elif is_list_like(arg):
# we require a list, but not an 'str'
arg = cast(List[AggFuncTypeBase], arg)
return agg_list_like(obj, arg, _axis=_axis), None
else:
result = None
if callable(arg):
f = obj._get_cython_func(arg)
if f and not args and not kwargs:
return getattr(obj, f)(), None
# caller can react
return result, True
def agg_list_like(
obj: AggObjType,
arg: List[AggFuncTypeBase],
_axis: int,
) -> FrameOrSeriesUnion:
"""
Compute aggregation in the case of a list-like argument.
Parameters
----------
obj : Pandas object to compute aggregation on.
arg : list
Aggregations to compute.
_axis : int, 0 or 1
Axis to compute aggregation on.
Returns
-------
Result of aggregation.
"""
from pandas.core.reshape.concat import concat
if _axis != 0:
raise NotImplementedError("axis other than 0 is not supported")
if obj._selected_obj.ndim == 1:
selected_obj = obj._selected_obj
else:
selected_obj = obj._obj_with_exclusions
results = []
keys = []
# degenerate case
if selected_obj.ndim == 1:
for a in arg:
colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
try:
new_res = colg.aggregate(a)
except TypeError:
pass
else:
results.append(new_res)
# make sure we find a good name
name = com.get_callable_name(a) or a
keys.append(name)
# multiples
else:
for index, col in enumerate(selected_obj):
colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
try:
new_res = colg.aggregate(arg)
except (TypeError, DataError):
pass
except ValueError as err:
# cannot aggregate
if "Must produce aggregated value" in str(err):
# raised directly in _aggregate_named
pass
elif "no results" in str(err):
# raised directly in _aggregate_multiple_funcs
pass
else:
raise
else:
results.append(new_res)
keys.append(col)
# if we are empty
if not len(results):
raise ValueError("no results")
try:
return concat(results, keys=keys, axis=1, sort=False)
except TypeError as err:
# we are concatting non-NDFrame objects,
# e.g. a list of scalars
from pandas import Series
result = Series(results, index=keys, name=obj.name)
if is_nested_object(result):
raise ValueError(
"cannot combine transform and aggregation operations"
) from err
return result
def agg_dict_like(
obj: AggObjType,
arg: AggFuncTypeDict,
_axis: int,
) -> FrameOrSeriesUnion:
"""
Compute aggregation in the case of a dict-like argument.
Parameters
----------
obj : Pandas object to compute aggregation on.
arg : dict
label-aggregation pairs to compute.
_axis : int, 0 or 1
Axis to compute aggregation on.
Returns
-------
Result of aggregation.
"""
is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
if _axis != 0: # pragma: no cover
raise ValueError("Can only pass dict with axis=0")
selected_obj = obj._selected_obj
# if we have a dict of any non-scalars
# eg. {'A' : ['mean']}, normalize all to
# be list-likes
if any(is_aggregator(x) for x in arg.values()):
new_arg: AggFuncTypeDict = {}
for k, v in arg.items():
if not isinstance(v, (tuple, list, dict)):
new_arg[k] = [v]
else:
new_arg[k] = v
# the keys must be in the columns
# for ndim=2, or renamers for ndim=1
# ok for now, but deprecated
# {'A': { 'ra': 'mean' }}
# {'A': { 'ra': ['mean'] }}
# {'ra': ['mean']}
# not ok
# {'ra' : { 'A' : 'mean' }}
if isinstance(v, dict):
raise SpecificationError("nested renamer is not supported")
elif isinstance(selected_obj, ABCSeries):
raise SpecificationError("nested renamer is not supported")
elif (
isinstance(selected_obj, ABCDataFrame) and k not in selected_obj.columns
):
raise KeyError(f"Column '{k}' does not exist!")
arg = new_arg
else:
# deprecation of renaming keys
# GH 15931
keys = list(arg.keys())
if isinstance(selected_obj, ABCDataFrame) and len(
selected_obj.columns.intersection(keys)
) != len(keys):
cols = sorted(set(keys) - set(selected_obj.columns.intersection(keys)))
raise SpecificationError(f"Column(s) {cols} do not exist")
from pandas.core.reshape.concat import concat
if selected_obj.ndim == 1:
# key only used for output
colg = obj._gotitem(obj._selection, ndim=1)
results = {key: colg.agg(how) for key, how in arg.items()}
else:
# key used for column selection and output
results = {key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()}
# set the final keys
keys = list(arg.keys())
# Avoid making two isinstance calls in all and any below
is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
# combine results
if all(is_ndframe):
keys_to_use = [k for k in keys if not results[k].empty]
# Have to check, if at least one DataFrame is not empty.
keys_to_use = keys_to_use if keys_to_use != [] else keys
axis = 0 if isinstance(obj, ABCSeries) else 1
result = concat({k: results[k] for k in keys_to_use}, axis=axis)
elif any(is_ndframe):
# There is a mix of NDFrames and scalars
raise ValueError(
"cannot perform both aggregation "
"and transformation operations "
"simultaneously"
)
else:
from pandas import Series
# we have a dict of scalars
# GH 36212 use name only if obj is a series
if obj.ndim == 1:
obj = cast("Series", obj)
name = obj.name
else:
name = None
result = Series(results, name=name)
return result

View file

@ -2,17 +2,15 @@
Generic data algorithms. This module is experimental at the moment and not
intended for public consumption
"""
from __future__ import annotations
import operator
from textwrap import dedent
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union, cast
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
from warnings import catch_warnings, simplefilter, warn
import numpy as np
from pandas._libs import Timestamp, algos, hashtable as htable, iNaT, lib
from pandas._typing import AnyArrayLike, ArrayLike, DtypeObj, FrameOrSeriesUnion
from pandas._typing import AnyArrayLike, ArrayLike, DtypeObj
from pandas.util._decorators import doc
from pandas.core.dtypes.cast import (
@ -50,9 +48,9 @@ from pandas.core.dtypes.common import (
from pandas.core.dtypes.generic import (
ABCDatetimeArray,
ABCExtensionArray,
ABCIndex,
ABCIndexClass,
ABCMultiIndex,
ABCRangeIndex,
ABCSeries,
ABCTimedeltaArray,
)
@ -62,7 +60,7 @@ from pandas.core.construction import array, extract_array
from pandas.core.indexers import validate_indices
if TYPE_CHECKING:
from pandas import Categorical, DataFrame, Index, Series
from pandas import Series
_shared_docs: Dict[str, str] = {}
@ -71,7 +69,7 @@ _shared_docs: Dict[str, str] = {}
# dtype access #
# --------------- #
def _ensure_data(
values: ArrayLike, dtype: Optional[DtypeObj] = None
values, dtype: Optional[DtypeObj] = None
) -> Tuple[np.ndarray, DtypeObj]:
"""
routine to ensure that our data is of the correct
@ -97,12 +95,6 @@ def _ensure_data(
pandas_dtype : np.dtype or ExtensionDtype
"""
if dtype is not None:
# We only have non-None dtype when called from `isin`, and
# both Datetimelike and Categorical dispatch before getting here.
assert not needs_i8_conversion(dtype)
assert not is_categorical_dtype(dtype)
if not isinstance(values, ABCMultiIndex):
# extract_array would raise
values = extract_array(values, extract_numpy=True)
@ -139,20 +131,21 @@ def _ensure_data(
return ensure_object(values), np.dtype("object")
# datetimelike
if needs_i8_conversion(values.dtype) or needs_i8_conversion(dtype):
if is_period_dtype(values.dtype) or is_period_dtype(dtype):
vals_dtype = getattr(values, "dtype", None)
if needs_i8_conversion(vals_dtype) or needs_i8_conversion(dtype):
if is_period_dtype(vals_dtype) or is_period_dtype(dtype):
from pandas import PeriodIndex
values = PeriodIndex(values)._data
values = PeriodIndex(values)
dtype = values.dtype
elif is_timedelta64_dtype(values.dtype) or is_timedelta64_dtype(dtype):
elif is_timedelta64_dtype(vals_dtype) or is_timedelta64_dtype(dtype):
from pandas import TimedeltaIndex
values = TimedeltaIndex(values)._data
values = TimedeltaIndex(values)
dtype = values.dtype
else:
# Datetime
if values.ndim > 1 and is_datetime64_ns_dtype(values.dtype):
if values.ndim > 1 and is_datetime64_ns_dtype(vals_dtype):
# Avoid calling the DatetimeIndex constructor as it is 1D only
# Note: this is reached by DataFrame.rank calls GH#27027
# TODO(EA2D): special case not needed with 2D EAs
@ -162,15 +155,14 @@ def _ensure_data(
from pandas import DatetimeIndex
values = DatetimeIndex(values)._data
values = DatetimeIndex(values)
dtype = values.dtype
return values.asi8, dtype
elif is_categorical_dtype(values.dtype) and (
elif is_categorical_dtype(vals_dtype) and (
is_categorical_dtype(dtype) or dtype is None
):
values = cast("Categorical", values)
values = values.codes
dtype = pandas_dtype("category")
@ -234,8 +226,7 @@ def _ensure_arraylike(values):
"""
if not is_array_like(values):
inferred = lib.infer_dtype(values, skipna=False)
if inferred in ["mixed", "string", "mixed-integer"]:
# "mixed-integer" to ensure we do not cast ["ss", 42] to str GH#22160
if inferred in ["mixed", "string"]:
if isinstance(values, tuple):
values = list(values)
values = construct_1d_object_array_from_listlike(values)
@ -253,11 +244,11 @@ _hashtables = {
}
def _get_hashtable_algo(values: np.ndarray):
def _get_hashtable_algo(values):
"""
Parameters
----------
values : np.ndarray
values : arraylike
Returns
-------
@ -271,15 +262,15 @@ def _get_hashtable_algo(values: np.ndarray):
return htable, values
def _get_values_for_rank(values: ArrayLike):
def _get_values_for_rank(values):
if is_categorical_dtype(values):
values = cast("Categorical", values)._values_for_rank()
values = values._values_for_rank()
values, _ = _ensure_data(values)
return values
def get_data_algo(values: ArrayLike):
def _get_data_algo(values):
values = _get_values_for_rank(values)
ndtype = _check_object_for_strings(values)
@ -295,6 +286,7 @@ def _check_object_for_strings(values) -> str:
Parameters
----------
values : ndarray
ndtype : str
Returns
-------
@ -437,64 +429,54 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
f"to isin(), you passed a [{type(values).__name__}]"
)
if not isinstance(
values, (ABCIndexClass, ABCSeries, ABCExtensionArray, np.ndarray)
):
values = _ensure_arraylike(list(values))
elif isinstance(values, ABCMultiIndex):
# Avoid raising in extract_array
values = np.array(values)
else:
values = extract_array(values, extract_numpy=True)
if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
values = construct_1d_object_array_from_listlike(list(values))
# TODO: could use ensure_arraylike here
comps = _ensure_arraylike(comps)
comps = extract_array(comps, extract_numpy=True)
if is_categorical_dtype(comps.dtype):
if is_categorical_dtype(comps):
# TODO(extension)
# handle categoricals
return cast("Categorical", comps).isin(values)
return comps.isin(values) # type: ignore
if needs_i8_conversion(comps.dtype):
# Dispatch to DatetimeLikeArrayMixin.isin
return array(comps).isin(values)
elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps.dtype):
# e.g. comps are integers and values are datetime64s
return np.zeros(comps.shape, dtype=bool)
# TODO: not quite right ... Sparse/Categorical
elif needs_i8_conversion(values.dtype):
return isin(comps, values.astype(object))
comps, dtype = _ensure_data(comps)
values, _ = _ensure_data(values, dtype=dtype)
elif is_extension_array_dtype(comps.dtype) or is_extension_array_dtype(
values.dtype
):
return isin(np.asarray(comps), np.asarray(values))
# faster for larger cases to use np.in1d
f = htable.ismember_object
# GH16012
# Ensure np.in1d doesn't get object types or it *may* throw an exception
# Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array),
# in1d is faster for small sizes
if len(comps) > 1_000_000 and len(values) <= 26 and not is_object_dtype(comps):
# If the values include nan we need to check for nan explicitly
if len(comps) > 1_000_000 and not is_object_dtype(comps):
# If the the values include nan we need to check for nan explicitly
# since np.nan it not equal to np.nan
if isna(values).any():
f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c))
else:
f = np.in1d
elif is_integer_dtype(comps):
try:
values = values.astype("int64", copy=False)
comps = comps.astype("int64", copy=False)
f = htable.ismember_int64
except (TypeError, ValueError, OverflowError):
values = values.astype(object)
comps = comps.astype(object)
else:
common = np.find_common_type([values.dtype, comps.dtype], [])
values = values.astype(common, copy=False)
comps = comps.astype(common, copy=False)
name = common.name
if name == "bool":
name = "uint8"
f = getattr(htable, f"ismember_{name}")
elif is_float_dtype(comps):
try:
values = values.astype("float64", copy=False)
comps = comps.astype("float64", copy=False)
f = htable.ismember_float64
except (TypeError, ValueError):
values = values.astype(object)
comps = comps.astype(object)
return f(comps, values)
def factorize_array(
values: np.ndarray, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None
def _factorize_array(
values, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None,
) -> Tuple[np.ndarray, np.ndarray]:
"""
Factorize an array-like to codes and uniques.
@ -522,7 +504,7 @@ def factorize_array(
codes : ndarray
uniques : ndarray
"""
hash_klass, values = get_data_algo(values)
hash_klass, values = _get_data_algo(values)
table = hash_klass(size_hint or len(values))
uniques, codes = table.factorize(
@ -560,7 +542,7 @@ def factorize(
sort: bool = False,
na_sentinel: Optional[int] = -1,
size_hint: Optional[int] = None,
) -> Tuple[np.ndarray, Union[np.ndarray, "Index"]]:
) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]:
"""
Encode the object as an enumerated type or categorical variable.
@ -680,9 +662,6 @@ def factorize(
# responsible only for factorization. All data coercion, sorting and boxing
# should happen here.
if isinstance(values, ABCRangeIndex):
return values.factorize(sort=sort)
values = _ensure_arraylike(values)
original = values
if not isinstance(values, ABCMultiIndex):
@ -719,7 +698,7 @@ def factorize(
else:
na_value = None
codes, uniques = factorize_array(
codes, uniques = _factorize_array(
values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value
)
@ -740,8 +719,6 @@ def factorize(
# return original tenor
if isinstance(original, ABCIndexClass):
if original.dtype.kind in ["m", "M"] and isinstance(uniques, np.ndarray):
uniques = type(original._data)._simple_new(uniques, dtype=original.dtype)
uniques = original._shallow_copy(uniques, name=None)
elif isinstance(original, ABCSeries):
from pandas import Index
@ -758,7 +735,7 @@ def value_counts(
normalize: bool = False,
bins=None,
dropna: bool = True,
) -> Series:
) -> "Series":
"""
Compute a histogram of the counts of non-null values.
@ -817,7 +794,7 @@ def value_counts(
counts = result._values
else:
keys, counts = value_counts_arraylike(values, dropna)
keys, counts = _value_counts_arraylike(values, dropna)
result = Series(counts, index=keys, name=name)
@ -830,8 +807,8 @@ def value_counts(
return result
# Called once from SparseArray, otherwise could be private
def value_counts_arraylike(values, dropna: bool):
# Called once from SparseArray
def _value_counts_arraylike(values, dropna: bool):
"""
Parameters
----------
@ -875,7 +852,7 @@ def value_counts_arraylike(values, dropna: bool):
return keys, counts
def duplicated(values: ArrayLike, keep: str = "first") -> np.ndarray:
def duplicated(values, keep="first") -> np.ndarray:
"""
Return boolean ndarray denoting duplicate values.
@ -900,7 +877,7 @@ def duplicated(values: ArrayLike, keep: str = "first") -> np.ndarray:
return f(values, keep=keep)
def mode(values, dropna: bool = True) -> Series:
def mode(values, dropna: bool = True) -> "Series":
"""
Returns the mode(s) of an array.
@ -1068,10 +1045,11 @@ def checked_add_with_arr(arr, b, arr_mask=None, b_mask=None):
to_raise = ((np.iinfo(np.int64).max - b2 < arr) & not_nan).any()
else:
to_raise = (
(np.iinfo(np.int64).max - b2[mask1] < arr[mask1]) & not_nan[mask1]
).any() or (
((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any()
or (
(np.iinfo(np.int64).min - b2[mask2] > arr[mask2]) & not_nan[mask2]
).any()
)
if to_raise:
raise OverflowError("Overflow in int64 addition")
@ -1176,9 +1154,6 @@ class SelectN:
if self.keep not in ("first", "last", "all"):
raise ValueError('keep must be either "first", "last" or "all"')
def compute(self, method: str) -> FrameOrSeriesUnion:
raise NotImplementedError
def nlargest(self):
return self.compute("nlargest")
@ -1211,7 +1186,7 @@ class SelectNSeries(SelectN):
nordered : Series
"""
def compute(self, method: str) -> Series:
def compute(self, method):
n = self.n
dtype = self.obj.dtype
@ -1225,8 +1200,10 @@ class SelectNSeries(SelectN):
# slow method
if n >= len(self.obj):
reverse_it = self.keep == "last" or method == "nlargest"
ascending = method == "nsmallest"
return dropped.sort_values(ascending=ascending).head(n)
slc = np.s_[::-1] if reverse_it else np.s_[:]
return dropped[slc].sort_values(ascending=ascending).head(n)
# fast method
arr, pandas_dtype = _ensure_data(dropped.values)
@ -1283,7 +1260,7 @@ class SelectNFrame(SelectN):
columns = list(columns)
self.columns = columns
def compute(self, method: str) -> DataFrame:
def compute(self, method):
from pandas import Int64Index
@ -1571,6 +1548,8 @@ def take(arr, indices, axis: int = 0, allow_fill: bool = False, fill_value=None)
"""
Take elements from an array.
.. versionadded:: 0.23.0
Parameters
----------
arr : sequence
@ -1588,7 +1567,7 @@ def take(arr, indices, axis: int = 0, allow_fill: bool = False, fill_value=None)
* True: negative values in `indices` indicate
missing values. These values are set to `fill_value`. Any other
negative values raise a ``ValueError``.
other negative values raise a ``ValueError``.
fill_value : any, optional
Fill value to use for NA-indices when `allow_fill` is True.
@ -1694,8 +1673,7 @@ def take_nd(
"""
mask_info = None
if isinstance(arr, ABCExtensionArray):
# Check for EA to catch DatetimeArray, TimedeltaArray
if is_extension_array_dtype(arr):
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
arr = extract_array(arr)
@ -1826,7 +1804,7 @@ def take_2d_multi(arr, indexer, fill_value=np.nan):
# ------------ #
def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray:
def searchsorted(arr, value, side="left", sorter=None):
"""
Find indices where elements should be inserted to maintain order.
@ -1875,7 +1853,7 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray:
if (
isinstance(arr, np.ndarray)
and is_integer_dtype(arr.dtype)
and is_integer_dtype(arr)
and (is_integer(value) or is_integer_dtype(value))
):
# if `arr` and `value` have different dtypes, `arr` would be
@ -1953,8 +1931,6 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
if is_extension_array_dtype(dtype):
if hasattr(arr, f"__{op.__name__}__"):
if axis != 0:
raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}")
return op(arr, arr.shift(n))
else:
warn(
@ -1969,26 +1945,18 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
is_timedelta = False
is_bool = False
if needs_i8_conversion(arr.dtype):
dtype = np.int64
dtype = np.float64
arr = arr.view("i8")
na = iNaT
is_timedelta = True
elif is_bool_dtype(dtype):
# We have to cast in order to be able to hold np.nan
dtype = np.object_
is_bool = True
elif is_integer_dtype(dtype):
# We have to cast in order to be able to hold np.nan
dtype = np.float64
orig_ndim = arr.ndim
if orig_ndim == 1:
# reshape so we can always use algos.diff_2d
arr = arr.reshape(-1, 1)
# TODO: require axis == 0
dtype = np.dtype(dtype)
out_arr = np.empty(arr.shape, dtype=dtype)
@ -1999,7 +1967,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
if arr.ndim == 2 and arr.dtype.name in _diff_special:
# TODO: can diff_2d dtype specialization troubles be fixed by defining
# out_arr inside diff_2d?
algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta)
algos.diff_2d(arr, out_arr, n, axis)
else:
# To keep mypy happy, _res_indexer is a list while res_indexer is
# a tuple, ditto for lag_indexer.
@ -2033,10 +2001,8 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]
if is_timedelta:
out_arr = out_arr.view("timedelta64[ns]")
out_arr = out_arr.astype("int64").view("timedelta64[ns]")
if orig_ndim == 1:
out_arr = out_arr[:, 0]
return out_arr
@ -2100,30 +2066,32 @@ def safe_sort(
"Only list-like objects are allowed to be passed to safe_sort as values"
)
if not isinstance(values, (np.ndarray, ABCExtensionArray)):
if not isinstance(values, np.ndarray) and not is_extension_array_dtype(values):
# don't convert to string types
dtype, _ = infer_dtype_from_array(values)
values = np.asarray(values, dtype=dtype)
sorter = None
def sort_mixed(values):
# order ints before strings, safe in py3
str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
return np.concatenate([nums, np.asarray(strs, dtype=object)])
sorter = None
if (
not is_extension_array_dtype(values)
and lib.infer_dtype(values, skipna=False) == "mixed-integer"
):
ordered = _sort_mixed(values)
# unorderable in py3 if mixed str/int
ordered = sort_mixed(values)
else:
try:
sorter = values.argsort()
ordered = values.take(sorter)
except TypeError:
# Previous sorters failed or were not applicable, try `_sort_mixed`
# which would work, but which fails for special case of 1d arrays
# with tuples.
if values.size and isinstance(values[0], tuple):
ordered = _sort_tuples(values)
else:
ordered = _sort_mixed(values)
# try this anyway
ordered = sort_mixed(values)
# codes:
@ -2142,7 +2110,7 @@ def safe_sort(
if sorter is None:
# mixed types
hash_klass, values = get_data_algo(values)
hash_klass, values = _get_data_algo(values)
t = hash_klass(len(values))
t.map_locations(values)
sorter = ensure_platform_int(t.lookup(ordered))
@ -2170,26 +2138,3 @@ def safe_sort(
np.putmask(new_codes, mask, na_sentinel)
return ordered, ensure_platform_int(new_codes)
def _sort_mixed(values):
""" order ints before strings in 1d arrays, safe in py3 """
str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
return np.concatenate([nums, np.asarray(strs, dtype=object)])
def _sort_tuples(values: np.ndarray[tuple]):
"""
Convert array of tuples (1d) to array or array (2d).
We need to keep the columns separately as they contain different types and
nans (can't use `np.sort` as it may fail when str and nan are mixed in a
column as types cannot be compared).
"""
from pandas.core.internals.construction import to_arrays
from pandas.core.sorting import lexsort_indexer
arrays, _ = to_arrays(values, None)
indexer = lexsort_indexer(arrays, orders=True)
return values[indexer]

View file

@ -14,7 +14,6 @@ from pandas.core.dtypes.missing import isna, isnull, notna, notnull
from pandas.core.algorithms import factorize, unique, value_counts
from pandas.core.arrays import Categorical
from pandas.core.arrays.boolean import BooleanDtype
from pandas.core.arrays.floating import Float32Dtype, Float64Dtype
from pandas.core.arrays.integer import (
Int8Dtype,
Int16Dtype,
@ -27,7 +26,6 @@ from pandas.core.arrays.integer import (
)
from pandas.core.arrays.string_ import StringDtype
from pandas.core.construction import array
from pandas.core.flags import Flags
from pandas.core.groupby import Grouper, NamedAgg
from pandas.core.indexes.api import (
CategoricalIndex,

View file

@ -1,12 +1,12 @@
import abc
import inspect
from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Tuple, Type
from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Tuple, Type, Union
import numpy as np
from pandas._config import option_context
from pandas._typing import Axis, FrameOrSeriesUnion
from pandas._typing import Axis
from pandas.util._decorators import cache_readonly
from pandas.core.dtypes.common import (
@ -31,6 +31,7 @@ def frame_apply(
axis: Axis = 0,
raw: bool = False,
result_type: Optional[str] = None,
ignore_failures: bool = False,
args=None,
kwds=None,
):
@ -47,6 +48,7 @@ def frame_apply(
func,
raw=raw,
result_type=result_type,
ignore_failures=ignore_failures,
args=args,
kwds=kwds,
)
@ -76,7 +78,7 @@ class FrameApply(metaclass=abc.ABCMeta):
@abc.abstractmethod
def wrap_results_for_axis(
self, results: ResType, res_index: "Index"
) -> FrameOrSeriesUnion:
) -> Union["Series", "DataFrame"]:
pass
# ---------------------------------------------------------------
@ -87,11 +89,13 @@ class FrameApply(metaclass=abc.ABCMeta):
func,
raw: bool,
result_type: Optional[str],
ignore_failures: bool,
args,
kwds,
):
self.obj = obj
self.raw = raw
self.ignore_failures = ignore_failures
self.args = args or ()
self.kwds = kwds or {}
@ -142,11 +146,7 @@ class FrameApply(metaclass=abc.ABCMeta):
""" compute the results """
# dispatch to agg
if is_list_like(self.f) or is_dict_like(self.f):
# pandas\core\apply.py:144: error: "aggregate" of "DataFrame" gets
# multiple values for keyword argument "axis"
return self.obj.aggregate( # type: ignore[misc]
self.f, axis=self.axis, *self.args, **self.kwds
)
return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds)
# all empty
if len(self.columns) == 0 and len(self.index) == 0:
@ -284,6 +284,21 @@ class FrameApply(metaclass=abc.ABCMeta):
results = {}
if self.ignore_failures:
successes = []
for i, v in enumerate(series_gen):
try:
results[i] = self.f(v)
except Exception:
pass
else:
successes.append(i)
# so will work with MultiIndex
if len(successes) < len(res_index):
res_index = res_index.take(successes)
else:
with option_context("mode.chained_assignment", None):
for i, v in enumerate(series_gen):
# ignore SettingWithCopy here in case the user mutates
@ -295,7 +310,9 @@ class FrameApply(metaclass=abc.ABCMeta):
return results, res_index
def wrap_results(self, results: ResType, res_index: "Index") -> FrameOrSeriesUnion:
def wrap_results(
self, results: ResType, res_index: "Index"
) -> Union["Series", "DataFrame"]:
from pandas import Series
# see if we can infer the results
@ -339,7 +356,7 @@ class FrameRowApply(FrameApply):
def wrap_results_for_axis(
self, results: ResType, res_index: "Index"
) -> FrameOrSeriesUnion:
) -> Union["Series", "DataFrame"]:
""" return the results for the rows """
if self.result_type == "reduce":
@ -352,10 +369,8 @@ class FrameRowApply(FrameApply):
isinstance(x, dict) for x in results.values()
):
# Our operation was a to_dict op e.g.
# test_apply_dict GH#8735, test_apply_reduce_to_dict GH#25196 #37544
res = self.obj._constructor_sliced(results)
res.index = res_index
return res
# test_apply_dict GH#8735, test_apply_reduce_rows_to_dict GH#25196
return self.obj._constructor_sliced(results)
try:
result = self.obj._constructor(data=results)
@ -422,9 +437,9 @@ class FrameColumnApply(FrameApply):
def wrap_results_for_axis(
self, results: ResType, res_index: "Index"
) -> FrameOrSeriesUnion:
) -> Union["Series", "DataFrame"]:
""" return the results for the columns """
result: FrameOrSeriesUnion
result: Union["Series", "DataFrame"]
# we have requested to expand
if self.result_type == "expand":

View file

@ -8,7 +8,7 @@ from typing import Callable
import numpy as np
from pandas._libs import missing as libmissing
from pandas.compat.numpy import np_version_under1p17
from pandas.compat.numpy import _np_version_under1p17
from pandas.core.nanops import check_below_min_count
@ -17,7 +17,6 @@ def _sumprod(
func: Callable,
values: np.ndarray,
mask: np.ndarray,
*,
skipna: bool = True,
min_count: int = 0,
):
@ -47,31 +46,25 @@ def _sumprod(
if check_below_min_count(values.shape, mask, min_count):
return libmissing.NA
if np_version_under1p17:
if _np_version_under1p17:
return func(values[~mask])
else:
return func(values, where=~mask)
def sum(
values: np.ndarray, mask: np.ndarray, *, skipna: bool = True, min_count: int = 0
):
def sum(values: np.ndarray, mask: np.ndarray, skipna: bool = True, min_count: int = 0):
return _sumprod(
np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count
)
def prod(
values: np.ndarray, mask: np.ndarray, *, skipna: bool = True, min_count: int = 0
):
def prod(values: np.ndarray, mask: np.ndarray, skipna: bool = True, min_count: int = 0):
return _sumprod(
np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count
)
def _minmax(
func: Callable, values: np.ndarray, mask: np.ndarray, *, skipna: bool = True
):
def _minmax(func: Callable, values: np.ndarray, mask: np.ndarray, skipna: bool = True):
"""
Reduction for 1D masked array.
@ -101,9 +94,9 @@ def _minmax(
return libmissing.NA
def min(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True):
def min(values: np.ndarray, mask: np.ndarray, skipna: bool = True):
return _minmax(np.min, values=values, mask=mask, skipna=skipna)
def max(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True):
def max(values: np.ndarray, mask: np.ndarray, skipna: bool = True):
return _minmax(np.max, values=values, mask=mask, skipna=skipna)

View file

@ -1,133 +0,0 @@
"""
Methods used by Block.replace and related methods.
"""
import operator
import re
from typing import Optional, Pattern, Union
import numpy as np
from pandas._typing import ArrayLike, Scalar
from pandas.core.dtypes.common import (
is_datetimelike_v_numeric,
is_numeric_v_string_like,
is_re,
is_scalar,
)
from pandas.core.dtypes.missing import isna
def compare_or_regex_search(
a: ArrayLike, b: Union[Scalar, Pattern], regex: bool, mask: ArrayLike
) -> Union[ArrayLike, bool]:
"""
Compare two array_like inputs of the same shape or two scalar values
Calls operator.eq or re.search, depending on regex argument. If regex is
True, perform an element-wise regex matching.
Parameters
----------
a : array_like
b : scalar or regex pattern
regex : bool
mask : array_like
Returns
-------
mask : array_like of bool
"""
def _check_comparison_types(
result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
):
"""
Raises an error if the two arrays (a,b) cannot be compared.
Otherwise, returns the comparison result as expected.
"""
if is_scalar(result) and isinstance(a, np.ndarray):
type_names = [type(a).__name__, type(b).__name__]
if isinstance(a, np.ndarray):
type_names[0] = f"ndarray(dtype={a.dtype})"
raise TypeError(
f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
)
if not regex:
op = lambda x: operator.eq(x, b)
else:
op = np.vectorize(
lambda x: bool(re.search(b, x))
if isinstance(x, str) and isinstance(b, (str, Pattern))
else False
)
# GH#32621 use mask to avoid comparing to NAs
if isinstance(a, np.ndarray):
a = a[mask]
if is_numeric_v_string_like(a, b):
# GH#29553 avoid deprecation warnings from numpy
return np.zeros(a.shape, dtype=bool)
elif is_datetimelike_v_numeric(a, b):
# GH#29553 avoid deprecation warnings from numpy
_check_comparison_types(False, a, b)
return False
result = op(a)
if isinstance(result, np.ndarray) and mask is not None:
# The shape of the mask can differ to that of the result
# since we may compare only a subset of a's or b's elements
tmp = np.zeros(mask.shape, dtype=np.bool_)
tmp[mask] = result
result = tmp
_check_comparison_types(result, a, b)
return result
def replace_regex(values: ArrayLike, rx: re.Pattern, value, mask: Optional[np.ndarray]):
"""
Parameters
----------
values : ArrayLike
Object dtype.
rx : re.Pattern
value : Any
mask : np.ndarray[bool], optional
Notes
-----
Alters values in-place.
"""
# deal with replacing values with objects (strings) that match but
# whose replacement is not a string (numeric, nan, object)
if isna(value) or not isinstance(value, str):
def re_replacer(s):
if is_re(rx) and isinstance(s, str):
return value if rx.search(s) is not None else s
else:
return s
else:
# value is guaranteed to be a string here, s can be either a string
# or null if it's null it gets returned
def re_replacer(s):
if is_re(rx) and isinstance(s, str):
return rx.sub(value, s)
else:
return s
f = np.vectorize(re_replacer, otypes=[values.dtype])
if mask is None:
values[:] = f(values)
else:
values[mask] = f(values[mask])

View file

@ -1,284 +0,0 @@
"""
Methods that can be shared by many array-like classes or subclasses:
Series
Index
ExtensionArray
"""
import operator
from typing import Any, Callable
import warnings
import numpy as np
from pandas._libs import lib
from pandas.core.construction import extract_array
from pandas.core.ops import maybe_dispatch_ufunc_to_dunder_op, roperator
from pandas.core.ops.common import unpack_zerodim_and_defer
class OpsMixin:
# -------------------------------------------------------------
# Comparisons
def _cmp_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__eq__")
def __eq__(self, other):
return self._cmp_method(other, operator.eq)
@unpack_zerodim_and_defer("__ne__")
def __ne__(self, other):
return self._cmp_method(other, operator.ne)
@unpack_zerodim_and_defer("__lt__")
def __lt__(self, other):
return self._cmp_method(other, operator.lt)
@unpack_zerodim_and_defer("__le__")
def __le__(self, other):
return self._cmp_method(other, operator.le)
@unpack_zerodim_and_defer("__gt__")
def __gt__(self, other):
return self._cmp_method(other, operator.gt)
@unpack_zerodim_and_defer("__ge__")
def __ge__(self, other):
return self._cmp_method(other, operator.ge)
# -------------------------------------------------------------
# Logical Methods
def _logical_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__and__")
def __and__(self, other):
return self._logical_method(other, operator.and_)
@unpack_zerodim_and_defer("__rand__")
def __rand__(self, other):
return self._logical_method(other, roperator.rand_)
@unpack_zerodim_and_defer("__or__")
def __or__(self, other):
return self._logical_method(other, operator.or_)
@unpack_zerodim_and_defer("__ror__")
def __ror__(self, other):
return self._logical_method(other, roperator.ror_)
@unpack_zerodim_and_defer("__xor__")
def __xor__(self, other):
return self._logical_method(other, operator.xor)
@unpack_zerodim_and_defer("__rxor__")
def __rxor__(self, other):
return self._logical_method(other, roperator.rxor)
# -------------------------------------------------------------
# Arithmetic Methods
def _arith_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__add__")
def __add__(self, other):
return self._arith_method(other, operator.add)
@unpack_zerodim_and_defer("__radd__")
def __radd__(self, other):
return self._arith_method(other, roperator.radd)
@unpack_zerodim_and_defer("__sub__")
def __sub__(self, other):
return self._arith_method(other, operator.sub)
@unpack_zerodim_and_defer("__rsub__")
def __rsub__(self, other):
return self._arith_method(other, roperator.rsub)
@unpack_zerodim_and_defer("__mul__")
def __mul__(self, other):
return self._arith_method(other, operator.mul)
@unpack_zerodim_and_defer("__rmul__")
def __rmul__(self, other):
return self._arith_method(other, roperator.rmul)
@unpack_zerodim_and_defer("__truediv__")
def __truediv__(self, other):
return self._arith_method(other, operator.truediv)
@unpack_zerodim_and_defer("__rtruediv__")
def __rtruediv__(self, other):
return self._arith_method(other, roperator.rtruediv)
@unpack_zerodim_and_defer("__floordiv__")
def __floordiv__(self, other):
return self._arith_method(other, operator.floordiv)
@unpack_zerodim_and_defer("__rfloordiv")
def __rfloordiv__(self, other):
return self._arith_method(other, roperator.rfloordiv)
@unpack_zerodim_and_defer("__mod__")
def __mod__(self, other):
return self._arith_method(other, operator.mod)
@unpack_zerodim_and_defer("__rmod__")
def __rmod__(self, other):
return self._arith_method(other, roperator.rmod)
@unpack_zerodim_and_defer("__divmod__")
def __divmod__(self, other):
return self._arith_method(other, divmod)
@unpack_zerodim_and_defer("__rdivmod__")
def __rdivmod__(self, other):
return self._arith_method(other, roperator.rdivmod)
@unpack_zerodim_and_defer("__pow__")
def __pow__(self, other):
return self._arith_method(other, operator.pow)
@unpack_zerodim_and_defer("__rpow__")
def __rpow__(self, other):
return self._arith_method(other, roperator.rpow)
def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any):
"""
Compatibility with numpy ufuncs.
See also
--------
numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
"""
from pandas.core.generic import NDFrame
from pandas.core.internals import BlockManager
cls = type(self)
# for binary ops, use our custom dunder methods
result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
return result
# Determine if we should defer.
no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
for item in inputs:
higher_priority = (
hasattr(item, "__array_priority__")
and item.__array_priority__ > self.__array_priority__
)
has_array_ufunc = (
hasattr(item, "__array_ufunc__")
and type(item).__array_ufunc__ not in no_defer
and not isinstance(item, self._HANDLED_TYPES)
)
if higher_priority or has_array_ufunc:
return NotImplemented
# align all the inputs.
types = tuple(type(x) for x in inputs)
alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
if len(alignable) > 1:
# This triggers alignment.
# At the moment, there aren't any ufuncs with more than two inputs
# so this ends up just being x1.index | x2.index, but we write
# it to handle *args.
if len(set(types)) > 1:
# We currently don't handle ufunc(DataFrame, Series)
# well. Previously this raised an internal ValueError. We might
# support it someday, so raise a NotImplementedError.
raise NotImplementedError(
"Cannot apply ufunc {} to mixed DataFrame and Series "
"inputs.".format(ufunc)
)
axes = self.axes
for obj in alignable[1:]:
# this relies on the fact that we aren't handling mixed
# series / frame ufuncs.
for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
axes[i] = ax1.union(ax2)
reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
inputs = tuple(
x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
for x, t in zip(inputs, types)
)
else:
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
if self.ndim == 1:
names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
name = names[0] if len(set(names)) == 1 else None
reconstruct_kwargs = {"name": name}
else:
reconstruct_kwargs = {}
def reconstruct(result):
if lib.is_scalar(result):
return result
if result.ndim != self.ndim:
if method == "outer":
if self.ndim == 2:
# we already deprecated for Series
msg = (
"outer method for ufunc {} is not implemented on "
"pandas objects. Returning an ndarray, but in the "
"future this will raise a 'NotImplementedError'. "
"Consider explicitly converting the DataFrame "
"to an array with '.to_numpy()' first."
)
warnings.warn(msg.format(ufunc), FutureWarning, stacklevel=4)
return result
raise NotImplementedError
return result
if isinstance(result, BlockManager):
# we went through BlockManager.apply
result = self._constructor(result, **reconstruct_kwargs, copy=False)
else:
# we converted an array, lost our axes
result = self._constructor(
result, **reconstruct_axes, **reconstruct_kwargs, copy=False
)
# TODO: When we support multiple values in __finalize__, this
# should pass alignable to `__fianlize__` instead of self.
# Then `np.add(a, b)` would consider attrs from both a and b
# when a and b are NDFrames.
if len(alignable) == 1:
result = result.__finalize__(self)
return result
if self.ndim > 1 and (
len(inputs) > 1 or ufunc.nout > 1 # type: ignore[attr-defined]
):
# Just give up on preserving types in the complex case.
# In theory we could preserve them for them.
# * nout>1 is doable if BlockManager.apply took nout and
# returned a Tuple[BlockManager].
# * len(inputs) > 1 is doable when we know that we have
# aligned blocks / dtypes.
inputs = tuple(np.asarray(x) for x in inputs)
result = getattr(ufunc, method)(*inputs)
elif self.ndim == 1:
# ufunc(series, ...)
inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
result = getattr(ufunc, method)(*inputs, **kwargs)
else:
# ufunc(dataframe)
mgr = inputs[0]._mgr
result = mgr.apply(getattr(ufunc, method))
if ufunc.nout > 1: # type: ignore[attr-defined]
result = tuple(reconstruct(x) for x in result)
else:
result = reconstruct(result)
return result

Some files were not shown because too many files have changed in this diff Show more